def visit(observation, **kwargs):
    mc.check_param(observation, Observation)

    working_dir = './'
    if 'working_directory' in kwargs:
        working_dir = kwargs['working_directory']
    if 'cadc_client' in kwargs:
        cadc_client = kwargs['cadc_client']
    else:
        raise mc.CadcException('Need a cadc_client parameter.')

    count = 0
    for i in observation.planes:
        plane = observation.planes[i]
        for j in plane.artifacts:
            artifact = plane.artifacts[j]
            if (artifact.uri.endswith('.fits.gz')
                    or artifact.uri.endswith('.fits')):
                file_id = ec.CaomName(artifact.uri).file_id
                file_name = ec.CaomName(artifact.uri).file_name
                science_fqn = os.path.join(working_dir, file_name)
                if not os.path.exists(science_fqn):
                    file_name = \
                        ec.CaomName(artifact.uri).uncomp_file_name
                    science_fqn = os.path.join(working_dir, file_name)
                    if not os.path.exists(science_fqn):
                        raise mc.CadcException(
                            '{} preview visit file not found'.format(
                                science_fqn))
                logging.debug('working on file {}'.format(science_fqn))
                count += _do_prev(file_id, science_fqn, working_dir, plane,
                                  cadc_client)
    logging.info('Completed preview augmentation for {}.'.format(
        observation.observation_id))
    return {'artifacts': count}
Ejemplo n.º 2
0
def visit(observation, **kwargs):
    mc.check_param(observation, Observation)

    working_dir = kwargs.get('working_directory', './')
    cadc_client = kwargs.get('cadc_client')
    if cadc_client is None:
        logging.warning(
            'Visitor needs a cadc_client parameter to store images.')
    stream = kwargs.get('stream')
    if stream is None:
        raise mc.CadcException('Visitor needs a stream parameter.')
    observable = kwargs.get('observable')
    if observable is None:
        raise mc.CadcException('Visitor needs a observable parameter.')
    science_file = kwargs.get('science_file')

    count = 0
    for plane in observation.planes.values():
        delete_list = []
        for artifact in plane.artifacts.values():
            if artifact.uri.endswith(science_file):
                count += _do_prev(artifact, plane, working_dir, cadc_client,
                                  stream, observable)
            if artifact.uri.endswith('.jpg'):
                delete_list.append(artifact.uri)

        for uri in delete_list:
            plane.artifacts.pop(uri)

    logging.info('Completed preview augmentation for {}.'.format(
        observation.observation_id))
    return {'artifacts': count}
Ejemplo n.º 3
0
def read_file_list_from_archive(config):
    ad_resource_id = 'ivo://cadc.nrc.ca/ad'
    agent = 'vlass2caom2/1.0'
    subject = net.Subject(certificate=config.proxy_fqn)
    client = net.BaseWsClient(
        resource_id=ad_resource_id,
        subject=subject,
        agent=agent,
        retry=True,
    )
    query_meta = (f"SELECT fileName FROM archive_files WHERE archiveName = "
                  f"'{config.archive}'")
    data = {'QUERY': query_meta, 'LANG': 'ADQL', 'FORMAT': 'csv'}
    logging.debug(f'Query is {query_meta}')
    try:
        response = client.get(
            f'https://{client.host}/ad/sync?{parse.urlencode((data))}',
            cert=config.proxy_fqn,
        )
        if response.status_code == 200:
            # ignore the column name as the first part of the response
            artifact_files_list = response.text.split()[1:]
            return artifact_files_list
        else:
            raise mc.CadcException(f'Query failure {response}')
    except Exception as e:
        raise mc.CadcException(f'Failed ad content query: {e}')
Ejemplo n.º 4
0
def data_get(client, working_directory, file_name, archive, metrics):
    """
    Retrieve a local copy of a file available from CADC. Assumes the working
    directory location exists and is writeable.

    :param client: The CadcDataClient for read access to CADC storage.
    :param working_directory: Where 'file_name' will be written.
    :param file_name: What to copy from CADC storage.
    :param archive: Which archive to retrieve the file from.
    :param metrics: track success execution times, and failure counts.
    """
    start = current()
    fqn = os.path.join(working_directory, file_name)
    try:
        client.get_file(archive, file_name, destination=fqn)
        if not os.path.exists(fqn):
            raise mc.CadcException(
                f'ad retrieve failed. {fqn} does not exist.'
            )
    except Exception as e:
        metrics.observe_failure('get', 'data', file_name)
        logging.debug(traceback.format_exc())
        raise mc.CadcException(f'Did not retrieve {fqn} because {e}')
    end = current()
    file_size = os.stat(fqn).st_size
    metrics.observe(start, end, file_size, 'get', 'data', file_name)
Ejemplo n.º 5
0
 def __init__(self, fname_on_disk=None, file_name=None, obs_id=None):
     if file_name is not None:
         self.file_id = GemName.get_file_id(file_name)
         if '.fits' in file_name:
             self.fname_in_ad = '{}.fits'.format(self.file_id)
         elif GemName.is_preview(file_name):
             self.fname_in_ad = '{}.jpg'.format(self.file_id)
         else:
             raise mc.CadcException(
                 'Unrecognized file name format {}'.format(file_name))
     elif fname_on_disk is not None:
         self.file_id = GemName.get_file_id(fname_on_disk)
         if '.fits' in fname_on_disk:
             self.fname_in_ad = '{}.fits'.format(self.file_id)
         elif GemName.is_preview(fname_on_disk):
             self.fname_in_ad = '{}.jpg'.format(self.file_id)
         else:
             raise mc.CadcException(
                 'Unrecognized file name format {}'.format(fname_on_disk))
     else:
         raise mc.CadcException('Require file name.')
     super(GemName,
           self).__init__(obs_id=None,
                          collection=ARCHIVE,
                          collection_pattern=GemName.GEM_NAME_PATTERN,
                          fname_on_disk=fname_on_disk,
                          scheme=SCHEME)
     self.obs_id = obs_id
Ejemplo n.º 6
0
def visit(observation, **kwargs):
    assert observation is not None, 'Input parameter must have a value.'
    assert isinstance(observation, Observation), \
        'Input parameter must be an Observation'

    working_dir = './'
    if 'working_directory' in kwargs:
        working_dir = kwargs['working_directory']
    if 'science_file' in kwargs:
        science_file = kwargs['science_file']
    else:
        raise mc.CadcException(
            'No science_file parameter provided to vistor '
            'for obs {}.'.format(observation.observation_id))

    science_fqn = os.path.join(working_dir, science_file)
    if not os.path.exists(science_fqn):
        if science_fqn.endswith('.gz'):
            science_fqn = science_fqn.replace('.gz', '')
            if not os.path.exists(science_fqn):
                raise mc.CadcException(
                    '{} visit file not found'.format(science_fqn))

    science_fqn = _unzip(science_fqn)

    count = 0
    for i in observation.planes:
        plane = observation.planes[i]
        _update_plane_position(plane, science_fqn)
        _update_fits_artifact_metadata(plane, science_fqn, science_file)
        count += 1

    logging.info('Completed footprint augmentation for {}'.format(
        observation.observation_id))
    return {'planes': count}
Ejemplo n.º 7
0
def visit(observation, **kwargs):
    mc.check_param(observation, Observation)

    working_dir = './'
    if 'working_directory' in kwargs:
        working_dir = kwargs['working_directory']
    if 'cadc_client' in kwargs:
        cadc_client = kwargs['cadc_client']
    else:
        raise mc.CadcException('Visitor needs a cadc_client parameter.')
    if 'stream' in kwargs:
        stream = kwargs['stream']
    else:
        raise mc.CadcException('Visitor needs a stream parameter.')

    count = 0
    for i in observation.planes:
        plane = observation.planes[i]
        for j in plane.artifacts:
            artifact = plane.artifacts[j]
            file_id = ec.CaomName(artifact.uri).file_id
            logging.debug('Generate thumbnail for file id {}'.format(file_id))
            count += _do_prev(file_id, working_dir, plane, cadc_client, stream)
            break
    logging.info('Completed preview augmentation for {}.'.format(
        observation.observation_id))
    return {'artifacts': count}
Ejemplo n.º 8
0
 def check(self, dest_fqn):
     from astropy.io import fits
     try:
         hdulist = fits.open(dest_fqn, memmap=True, lazy_load_hdus=False)
         hdulist.verify('warn')
         for h in hdulist:
             h.verify('warn')
         hdulist.close()
     except (fits.VerifyError, OSError) as e:
         if self._observable is not None:
             self._observable.rejected.record(
                 mc.Rejected.BAD_DATA, os.path.basename(dest_fqn)
             )
         if os.path.exists(dest_fqn):
             os.unlink(dest_fqn)
             raise mc.CadcException(
                 f'astropy verify error {dest_fqn} when reading {e}'
             )
     # a second check that fails for some NEOSSat cases - if this works,
     # the file might have been correctly retrieved
     try:
         # ignore the return value - if the file is corrupted, the getdata
         # fails, which is the only interesting behaviour here
         fits.getdata(dest_fqn, ext=0)
     except (TypeError, OSError) as e:
         if self._observable is not None:
             self._observable.rejected.record(
                 mc.Rejected.BAD_DATA, os.path.basename(dest_fqn)
             )
         if os.path.exists(dest_fqn):
             os.unlink(dest_fqn)
         raise mc.CadcException(
             f'astropy getdata error {dest_fqn} when reading {e}'
         )
Ejemplo n.º 9
0
 def build(self, entry):
     """
     :param entry: a Gemini file name or observation ID, depending on
         the configuration
     :return: an instance of StorageName for use in execute_composable.
     """
     self._logger.debug(f'Build a StorageName instance for {entry}.')
     try:
         if self._config.features.supports_latest_client:
             if (mc.TaskType.SCRAPE in self._config.task_types or
                     self._config.use_local_files):
                 self._read_instrument_locally(entry)
                 result = gem_name.GemName(file_name=entry,
                                           instrument=self._instrument,
                                           v_collection=gem_name.COLLECTION,
                                           v_scheme=gem_name.V_SCHEME,
                                           entry=entry)
             elif self._config.features.use_file_names:
                 self._read_instrument_remotely(entry)
                 result = gem_name.GemName(file_name=entry,
                                           instrument=self._instrument,
                                           v_collection=gem_name.COLLECTION,
                                           v_scheme=gem_name.V_SCHEME,
                                           entry=entry)
             else:
                 raise mc.CadcException('The need has not been encountered '
                                        'in the real world yet.')
         else:
             if (mc.TaskType.INGEST_OBS in self._config.task_types and
                     '.fits' not in entry):
                 # anything that is NOT ALOPEKE/ZORRO, which are the only
                 # two instruments that change the behaviour of the
                 # GemName constructor - and yeah, that abstraction is
                 # leaking like a sieve.
                 self._logger.debug('INGEST_OBS, hard-coded instrument.')
                 instrument = external_metadata.Inst.CIRPASS
                 result = gem_name.GemName(obs_id=entry,
                                           instrument=instrument,
                                           entry=entry)
             elif (mc.TaskType.SCRAPE in self._config.task_types or
                     self._config.use_local_files):
                 self._read_instrument_locally(entry)
                 result = gem_name.GemName(file_name=entry,
                                           instrument=self._instrument,
                                           entry=entry)
             elif self._config.features.use_file_names:
                 self._read_instrument_remotely(entry)
                 result = gem_name.GemName(file_name=entry,
                                           instrument=self._instrument,
                                           entry=entry)
             else:
                 raise mc.CadcException('The need has not been encountered '
                                        'in the real world yet.')
         self._logger.debug('Done build.')
         return result
     except Exception as e:
         self._logger.error(e)
         self._logger.debug(traceback.format_exc())
         raise mc.CadcException(e)
Ejemplo n.º 10
0
 def failure_action(self, original_fqn, destination_fqn, msg):
     """Action take on failure is completely dependent on where the
     file originated, and any cleanup configuration."""
     try:
         if os.path.exists(destination_fqn):
             os.unlink(destination_fqn)
     except Exception as e:
         self._logger.error(
             f'Failed to clean up {destination_fqn} after a verification '
             f'error.')
         raise mc.CadcException(e)
     raise mc.CadcException(msg)
Ejemplo n.º 11
0
    def _update_time_bounds(self, observation, storage_name):
        """Add chunk time bounds to the chunk from the first part, by
        referencing information from the second header."""

        lower_values = ''
        upper_values = ''
        with fits.open(storage_name.sources_names[0]) as fits_data:
            xtension = fits_data[1].header['XTENSION']
            extname = fits_data[1].header['EXTNAME']
            if 'BINTABLE' in xtension and 'PROVENANCE' in extname:
                for ii in fits_data[1].data[0]['STARTTIME']:
                    lower_values = f'{ii} {lower_values}'
                for ii in fits_data[1].data[0]['DURATION']:
                    upper_values = f'{ii} {upper_values} '
            else:
                raise mc.CadcException(
                    f'Opened a composite file that does not match the '
                    f'expected profile '
                    f'(XTENSION=BINTABLE/EXTNAME=PROVENANCE). '
                    f'{xtension} {extname}'
                )

        for plane in observation.planes:
            for artifact in observation.planes[plane].artifacts:
                parts = observation.planes[plane].artifacts[artifact].parts
                for p in parts:
                    if p == '0':
                        lower = lower_values.split()
                        upper = upper_values.split()
                        if len(lower) != len(upper):
                            raise mc.CadcException(
                                'Cannot make RefCoords with inconsistent '
                                'values.'
                            )
                        chunk = parts[p].chunks[0]
                        bounds = CoordBounds1D()
                        chunk.time.axis.bounds = bounds
                        for ii in range(len(lower)):
                            mjd_start, mjd_end = ac.convert_time(
                                mc.to_float(lower[ii]), mc.to_float(upper[ii])
                            )
                            lower_refcoord = RefCoord(0.5, mjd_start)
                            upper_refcoord = RefCoord(1.5, mjd_end)
                            r = CoordRange1D(lower_refcoord, upper_refcoord)
                            bounds.samples.append(r)
                        # if execution has gotten to this point, remove range
                        # if it exists, since only one of bounds or range
                        # should be provided, and bounds is more specific. PD,
                        # slack, 2018-07-16
                        if chunk.time.axis.range is not None:
                            chunk.time.axis.range = None
Ejemplo n.º 12
0
    def _update_telescope_location(self, observation):
        """Provide geocentric telescope location information, based on
        geodetic information from the headers."""

        self._logger.debug('Begin _update_telescope_location')
        if not isinstance(observation, Observation):
            raise mc.CadcException('Input type is Observation.')

        telescope = self._headers[0].get('TELESCOP')

        if telescope is None:
            self._logger.warning(
                f'No telescope name. Could not set telescope '
                f'location for {observation.observation_id}'
            )
            return

        telescope = telescope.upper()
        if COLLECTION in telescope or 'CTIO' in telescope:
            lat = self._headers[0].get('OBS_LAT')
            long = self._headers[0].get('OBS_LON')

            # make a reliable lookup value
            if COLLECTION in telescope:
                telescope = COLLECTION
            if 'CTIO' in telescope:
                telescope = 'CTIO'

            if lat is None or long is None:
                observation.telescope.geo_location_x = DEFAULT_GEOCENTRIC[
                    telescope
                ]['x']
                observation.telescope.geo_location_y = DEFAULT_GEOCENTRIC[
                    telescope
                ]['y']
                observation.telescope.geo_location_z = DEFAULT_GEOCENTRIC[
                    telescope
                ]['z']
            else:
                (
                    observation.telescope.geo_location_x,
                    observation.telescope.geo_location_y,
                    observation.telescope.geo_location_z,
                ) = ac.get_location(
                    lat, long, DEFAULT_GEOCENTRIC[telescope]['elevation']
                )
        else:
            raise mc.CadcException(f'Unexpected telescope name {telescope}')

        self._logger.debug('Done _update_telescope_location')
Ejemplo n.º 13
0
def visit(observation, **kwargs):
    assert observation is not None, 'Input parameter must have a value.'
    assert isinstance(observation, Observation), \
        'Input parameter must be an Observation'

    working_dir = './'
    if 'working_directory' in kwargs:
        working_dir = kwargs['working_directory']
    if 'science_file' in kwargs:
        science_file = kwargs['science_file']
    else:
        raise mc.CadcException('No science_file parameter provided to vistor '
                               'for obs {}.'.format(
                                   observation.observation_id))
    # TODO - this moves location handling structures to other than the
    # main composable code - this could be MUCH better handled, just not
    # sure how right now
    log_file_directory = None
    if 'log_file_directory' in kwargs:
        log_file_directory = kwargs['log_file_directory']

    science_fqn = os.path.join(working_dir, science_file)
    if not os.path.exists(science_fqn):
        if science_fqn.endswith('.gz'):
            science_fqn = science_fqn.replace('.gz', '')
            if not os.path.exists(science_fqn):
                raise mc.CadcException(
                    '{} visit file not found'.format(science_fqn))

    science_fqn = _unzip(science_fqn)

    count = 0
    for i in observation.planes:
        plane = observation.planes[i]
        for j in plane.artifacts:
            artifact = plane.artifacts[j]
            for k in artifact.parts:
                part = artifact.parts[k]
                for chunk in part.chunks:
                    _update_position(chunk, science_fqn)
                    count += 1

    return_file = '{}_footprint.txt'.format(observation.observation_id)
    return_string_file = '{}_footprint_returnstring.txt'.format(
        observation.observation_id)
    _handle_footprint_logs(log_file_directory, return_file)
    _handle_footprint_logs(log_file_directory, return_string_file)
    logging.info('Completed footprint augmentation for {}'.format(
        observation.observation_id))
    return {'chunks': count}
Ejemplo n.º 14
0
def get_obs_metadata(file_id):
    """
    Download the Gemini observation metadata for the given obs_id.

    :param file_id: The file ID
    :return: Dictionary of observation metadata.
    """
    logging.debug('Begin get_obs_metadata for {}'.format(file_id))
    gemini_url = '{}{}'.format(GEMINI_METADATA_URL, file_id)

    # Open the URL and fetch the JSON document for the observation
    session = requests.Session()
    retries = 10
    retry = Retry(total=retries,
                  read=retries,
                  connect=retries,
                  backoff_factor=0.5)
    adapter = HTTPAdapter(max_retries=retry)
    session.mount('http://', adapter)
    session.mount('https://', adapter)
    try:
        response = session.get(gemini_url, timeout=20)
        metadata = response.json()
        response.close()
    except Exception as e:
        raise mc.CadcException(
            'Unable to download Gemini observation metadata from {} because {}'
            .format(gemini_url, str(e)))
    global om
    om.add(metadata, file_id)
    logging.debug('End get_obs_metadata for {}'.format(file_id))
Ejemplo n.º 15
0
def get_obs_metadata(file_id):
    """
    Download the Gemini observation metadata for the given obs_id.

    :param file_id: The file ID
    :return: Dictionary of observation metadata.
    """
    logging.debug('Begin get_obs_metadata for {}'.format(file_id))
    global om
    if om.contains(file_id):
        om.reset_index(file_id)
    else:
        gemini_url = '{}{}'.format(GEMINI_METADATA_URL, file_id)

        # Open the URL and fetch the JSON document for the observation
        response = None
        try:
            response = mc.query_endpoint_session(
                gemini_url, gofr.query_session
            )
            metadata = response.json()
        finally:
            if response is not None:
                response.close()
        if len(metadata) == 0:
            raise mc.CadcException(f'Could not find JSON record for {file_id} '
                                   f'at archive.gemini.edu.')
        om.add(metadata, file_id)
    logging.debug('End get_obs_metadata for {}'.format(file_id))
Ejemplo n.º 16
0
def _get_uri(args):
    if args.lineage:
        result = args.lineage[0].split('/', 1)[1]
    else:
        raise mc.CadcException(
            'Could not define uri from these args {}'.format(args))
    return result
Ejemplo n.º 17
0
def visit(observation, **kwargs):
    assert observation is not None, 'Input parameter must have a value.'
    assert isinstance(observation, Observation), \
        'Input parameter must be an Observation'

    working_dir = kwargs.get('working_directory', './')
    science_file = kwargs.get('science_file')
    if science_file is None:
        raise mc.CadcException('No science_file parameter provided to vistor '
                               'for obs {}.'.format(
                                   observation.observation_id))
    # TODO - this moves location handling structures to other than the
    # main composable code - this could be MUCH better handled, just not
    # sure how right now
    log_file_directory = kwargs.get('log_file_directory')

    science_fqn = os.path.join(working_dir, science_file)
    count = 0
    for plane in observation.planes.values():
        for artifact in plane.artifacts.values():
            for part in artifact.parts.values():
                for chunk in part.chunks:
                    # -t 10 provides a margin of up to 10 pixels
                    cc.exec_footprintfinder(
                        chunk, science_fqn, log_file_directory,
                        sn.VlassName.remove_extensions(science_file), '-t 10')
                    count += 1

    logging.info('Completed footprint augmentation for {}'.format(
        observation.observation_id))
    return {'chunks': count}
Ejemplo n.º 18
0
 def _failure_mock(ignore_url, ignore_local_fqn):
     raise mc.CadcException(
         'Could not retrieve /usr/src/app/N20211007A0003/'
         'N20211007A0003b.jpg from '
         'https://archive.gemini.edu/preview/N20211007A0003b.fits. Failed '
         'with 404 Client Error: Not Found for url: '
         'https://archive.gemini.edu/preview/N20211007A0003b.fits')
Ejemplo n.º 19
0
def data_put_fqn(
    client,
    source_name,
    storage_name,
    stream='raw',
    metrics=None,
):
    """
    Make a copy of a locally available file by writing it to CADC. Assumes
    file and directory locations are correct. Requires a checksum comparison
    by the client.

    :param client: The CadcDataClient for write access to CADC storage.
    :param source_name: str fully-qualified
    :param storage_name: StorageName instance
    :param stream: str A relic of the old CADC storage.
    :param metrics: Tracking success execution times, and failure counts.
    """
    start = current()
    try:
        client.put_file(
            storage_name.archive,
            source_name,
            archive_stream=stream,
            mime_type=storage_name.mime_type,
            mime_encoding=storage_name.mime_encoding,
            md5_check=True,
        )
        file_size = os.stat(source_name).st_size
    except Exception as e:
        metrics.observe_failure('put', 'data', source_name)
        logging.debug(traceback.format_exc())
        raise mc.CadcException(f'Failed to store data with {e}')
    end = current()
    metrics.observe(start, end, file_size, 'put', 'data', source_name)
Ejemplo n.º 20
0
    def __init__(self,
                 fname_on_disk=None,
                 file_name=None,
                 obs_id=None,
                 file_id=None):
        super(AlmacaName, self).__init__(collection=ARCHIVE,
                                         collection_pattern='*',
                                         fname_on_disk=fname_on_disk)
        self._file_name = os.path.basename(fname_on_disk)
        temp = self._file_name.split('.')
        if len(temp) < 5:
            raise mc.CadcException('Not a split product.')
        asdm_str = temp[0].replace('uid___', '')
        self._obs_id = f'{asdm_str}.{temp[1]}.{temp[2]}'

        # TODO - hard-coded for single-band splitting testing right now
        self._science_goal_id = 'uid://A001/X88b/X21'
        self._group_id = 'uid://A001/X88b/X22'
        self._mous_id = 'uid://A001/X88b/X23'
        self._asdm_id = f'uid://{asdm_str.replace("_", "/")}'

        self._product_id = temp[3]
        self._intent = (ProductType.CALIBRATION
                        if '.CAL.' in fname_on_disk else ProductType.SCIENCE)
        self._ms = fname_on_disk
        self._log_dir = '/data/calibrated'
        self._input_ms_metadata = f'{self._log_dir}/{temp[0]}' \
                                  f'.ms.split.cal/md.pk'
        self._logger = logging.getLogger(__name__)
        self._logger.error(self)
Ejemplo n.º 21
0
 def _add_extensions(fname):
     if fname.endswith('.gz'):
         return fname
     elif fname.endswith('.fits'):
         return '{}.gz'.format(fname)
     else:
         raise mc.CadcException('Unexpected file name {}'.format(fname))
Ejemplo n.º 22
0
def define_subject(config):
    """Common code to figure out which credentials to use based on the
    content of a Config instance."""
    subject = None
    if config.proxy_fqn is not None and os.path.exists(config.proxy_fqn):
        logging.debug(
            f'Using proxy certificate {config.proxy_fqn} for credentials.'
        )
        subject = net.Subject(username=None, certificate=config.proxy_fqn)
    elif config.netrc_file is not None:
        netrc_fqn = os.path.join(config.working_directory, config.netrc_file)
        if os.path.exists(netrc_fqn):
            logging.debug(f'Using netrc file {netrc_fqn} for credentials.')
            subject = net.Subject(
                username=None, certificate=None, netrc=netrc_fqn
            )
        else:
            logging.warning(f'Cannot find netrc file {netrc_fqn}')
    else:
        logging.warning(
            f'Proxy certificate is {config.proxy_fqn}, netrc file is '
            f'{config.netrc_file}.'
        )
        raise mc.CadcException(
            'No credentials provided (proxy certificate or netrc file). '
            'Cannot create an anonymous subject.'
        )
    return subject
Ejemplo n.º 23
0
def update(observation, **kwargs):
    """Called to fill multiple CAOM model elements and/or attributes (an n:n
    relationship between TDM attributes and CAOM attributes). Must have this
    signature for import_module loading and execution.

    :param observation A CAOM Observation model instance.
    :param **kwargs Everything else."""
    logging.debug('Begin update.')
    mc.check_param(observation, Observation)

    headers = kwargs.get('headers')
    fqn = kwargs.get('fqn')
    uri = kwargs.get('uri')
    phangs_name = None
    if uri is not None:
        phangs_name = PHANGSName(artifact_uri=uri)
    if fqn is not None:
        phangs_name = PHANGSName(file_name=os.path.basename(fqn))
    if phangs_name is None:
        raise mc.CadcException(f'Need one of fqn or uri defined for '
                               f'{observation.observation_id}')

    _update_from_comment(observation, phangs_name, headers)

    logging.debug('Done update.')
    return observation
Ejemplo n.º 24
0
def test_pull_augmentation():
    obs = mc.read_obs_from_file(TEST_OBS_FILE)
    obs.planes[TEST_PRODUCT_ID].data_release = datetime.utcnow()
    assert len(obs.planes[TEST_PRODUCT_ID].artifacts) == 1, 'initial condition'

    test_rejected = mc.Rejected(REJECTED_FILE)
    test_config = mc.Config()
    test_observable = mc.Observable(test_rejected, mc.Metrics(test_config))
    cadc_client_mock = Mock()
    kwargs = {'working_directory': TEST_DATA_DIR,
              'cadc_client': cadc_client_mock,
              'stream': 'stream',
              'observable': test_observable}

    with patch('caom2pipe.manage_composable.http_get') as http_mock, \
            patch('caom2pipe.manage_composable.data_put') as ad_put_mock:
        cadc_client_mock.return_value.data_get.return_value = mc.CadcException(
            'test')
        # no scheme from cadc client
        cadc_client_mock.get_file_info.return_value = {'md5sum': '1234'}
        result = pull_augmentation.visit(obs, **kwargs)
        test_url = f'{pull_augmentation.FILE_URL}/{TEST_PRODUCT_ID}.fits'
        test_prev = f'{TEST_DATA_DIR}/{TEST_PRODUCT_ID}.fits'
        http_mock.assert_called_with(test_url, test_prev),  'mock not called'
        assert ad_put_mock.called, 'ad put mock not called'
        assert result is not None, 'expect a result'
        assert result['observation'] == 0, 'no updated metadata'
        assert len(obs.planes[TEST_PRODUCT_ID].artifacts) == 1, \
            'no new artifacts'
Ejemplo n.º 25
0
def get_pi_metadata(program_id):
    global pm
    if program_id in pm:
        metadata = pm[program_id]
    else:
        program_url = 'https://archive.gemini.edu/programinfo/' + program_id

        # Open the URL and fetch the JSON document for the observation
        session = requests.Session()
        retries = 10
        retry = Retry(total=retries,
                      read=retries,
                      connect=retries,
                      backoff_factor=0.5)
        adapter = HTTPAdapter(max_retries=retry)
        session.mount('http://', adapter)
        session.mount('https://', adapter)
        try:
            response = session.get(program_url, timeout=20)
            xml_metadata = response.text
            response.close()
        except Exception as e:
            raise mc.CadcException(
                'Unable to download Gemini observation metadata from {} '
                'because {}'.format(program_url, str(e)))
        metadata = None
        soup = BeautifulSoup(xml_metadata, 'lxml')
        tds = soup.find_all('td')
        if len(tds) > 0:
            title = tds[1].contents[0].replace('\n', ' ')
            pi_name = tds[3].contents[0]
            metadata = {'title': title, 'pi_name': pi_name}
            pm[program_id] = metadata
        logging.debug('End get_obs_metadata')
    return metadata
Ejemplo n.º 26
0
 def get_product_id(file_name):
     if file_name.startswith('component'):
         result = 'component_image'
     elif 'cont.taylor.0.restored' in file_name:
         if file_name.endswith('restored.components.csv'):
             result = 'fine_source_catalog'
         elif file_name.endswith('restored.islands.csv'):
             result = 'coarse_source_catalog'
         else:
             result = 'cont_taylor_0_restored'
     elif 'cont.taylor.0' in file_name:
         result = 'cont_taylor_0'
     elif 'cont.taylor.1.restored' in file_name:
         result = 'cont_taylor_1_restored'
     elif 'cont.taylor.1' in file_name:
         result = 'cont_taylor_1'
     elif 'restored' in file_name and 'contcube' in file_name:
         result = 'contcube_restored'
     elif 'contcube' in file_name:
         result = 'contcube'
     else:
         raise mc.CadcException(
             'Could not guess product ID from file name {}'.format(
                 file_name))
     return result
Ejemplo n.º 27
0
def _repair_instrument_name_for_svo(instrument):
    """
    Instrument names from JSON/headers are not necessarily the same
    as the instrument names used by the SVO Filter service. Correlate
    the two here.
    :param instrument the Gemini version
    :return instrument the SVO version
    """
    result = instrument.value
    if instrument == Inst.HRWFS:
        telescope = om.get('telescope')
        if telescope is None:
            obs_id = om.get('data_label')
            raise mc.CadcException(
                '{}: No observatory information for {}'.format(
                    instrument, obs_id))
        else:
            if 'Gemini-South' == telescope:
                result = 'AcqCam-S'
            else:
                result = 'AcqCam-N'
    elif instrument == Inst.F2:
        result = 'Flamingos2'
    elif instrument == Inst.FLAMINGOS:
        result = 'Flamingos'
    return result
Ejemplo n.º 28
0
def _update_science_provenance(observation, headers):
    members_inputs = TypedSet(ObservationURI, )
    plane_inputs = TypedSet(PlaneURI, )
    for keyword in headers[0]:
        if keyword.startswith('IN_'):
            value = headers[0].get(keyword)
            base_name = OmmName.remove_extensions(os.path.basename(value))
            if base_name.startswith('S'):
                # starting 'S' means a science input, 'C' will mean cal
                file_id = '{}_SCI'.format(base_name.replace('S', 'C', 1))
            elif base_name.startswith('C'):
                file_id = '{}_CAL'.format(base_name)
            else:
                raise mc.CadcException(
                    'Unknown file naming pattern {}'.format(base_name))

            obs_member_uri_str = ec.CaomName.make_obs_uri_from_obs_id(
                COLLECTION, file_id)
            obs_member_uri = ObservationURI(obs_member_uri_str)
            # the product id is the same as the observation id for OMM
            plane_uri = PlaneURI.get_plane_uri(obs_member_uri, file_id)
            plane_inputs.add(plane_uri)
            members_inputs.add(obs_member_uri)

    mc.update_typed_set(observation.members, members_inputs)
    mc.update_typed_set(
        observation.planes[observation.observation_id].provenance.inputs,
        plane_inputs)
Ejemplo n.º 29
0
 def _get_obs_id(self, temp, file_name, entry):
     """
     These files are not available from archive.gemini.edu, so
     only ask for their metadata from CADC.
     """
     self._logger.debug(f'Begin _get_obs_id for file_name {file_name}')
     metadata = None
     if self._connected:
         if self._config.use_local_files:
             self._logger.debug(f'Check local {file_name}')
             metadata = em.defining_metadata_finder._check_local(file_name)
         if metadata is None and temp is not None and temp.scheme == 'vos':
             self._logger.debug('Check vos')
             metadata = self._get_obs_id_from_vos(entry)
         if metadata is None:
             # why the old collection name? Because it's better to
             # retrieve the metadata from the old sc2 collection than
             # by retrieving a header, and beat up CADC instead of
             # archive.gemini.edu
             original_client = em.defining_metadata_finder._tap_client
             try:
                 self._logger.debug(f'Check caom2 collection {COLLECTION}')
                 # uri = mc.build_uri(COLLECTION, file_name, CADC_SCHEME)
                 em.defining_metadata_finder._tap_client = (
                     self._prod_client
                 )
                 for uri in [
                     f'gemini:GEM/{file_name}',
                     f'gemini:GEMINI/{file_name}',
                 ]:
                     metadata = em.defining_metadata_finder._check_caom2(
                         uri, 'GEMINI'
                     )
                     if metadata is not None:
                         break
                 if metadata is None:
                     self._logger.debug(
                         f'Check caom2 collection GEMINIPROC'
                     )
                     # uri = mc.build_uri('GEMINI', file_name)
                     uri = f'ad:GEMINI/{file_name}'
                     em.defining_metadata_finder._tap_client = (
                         self._sc2_client
                     )
                     metadata = em.defining_metadata_finder._check_caom2(
                         uri, 'GEMINIPROC'
                     )
             finally:
                 em.defining_metadata_finder._tap_client = original_client
     else:
         self._logger.debug('Check unconnected local')
         metadata = em.defining_metadata_finder._check_local(file_name)
     if metadata is None:
         raise mc.CadcException(f'No metadata for {file_name}')
     if metadata.data_label is not None:
         metadata.data_label = repair_data_label(
             file_name, metadata.data_label
         )
     self._logger.debug(f'End _get_obs_id')
     return metadata.data_label
Ejemplo n.º 30
0
def visit(observation, **kwargs):
    """
    If the observation says the data release date is past, attempt to
    retrieve the fits file if it is not already at CADC.
    """
    mc.check_param(observation, Observation)
    working_dir = kwargs.get('working_directory', './')
    cadc_client = kwargs.get('cadc_client')
    if cadc_client is None:
        logging.warning('Need a cadc_client to update. Stopping pull visitor.')
        return
    stream = kwargs.get('stream')
    if stream is None:
        raise mc.CadcException('Visitor needs a stream parameter.')
    observable = kwargs.get('observable')
    if observable is None:
        raise mc.CadcException('Visitor needs a observable parameter.')

    count = 0
    if observable.rejected.is_bad_metadata(observation.observation_id):
        logging.info(f'Stopping visit for {observation.observation_id} '
                     f'because of bad metadata.')
    else:
        for plane in observation.planes.values():
            if (plane.data_release is None
                    or plane.data_release > datetime.utcnow()):
                logging.error(f'Plane {plane.product_id} is proprietary '
                              f'until {plane.data_release}. No file access.')
                continue

            for artifact in plane.artifacts.values():
                if gem_name.GemName.is_preview(artifact.uri):
                    continue
                try:
                    f_name = mc.CaomName(artifact.uri).file_name
                    file_url = '{}/{}'.format(FILE_URL, f_name)
                    mc.look_pull_and_put(f_name, working_dir, file_url,
                                         gem_name.ARCHIVE, stream, MIME_TYPE,
                                         cadc_client,
                                         artifact.content_checksum.checksum,
                                         observable.metrics)
                except Exception as e:
                    if not (observable.rejected.check_and_record(
                            str(e), observation.observation_id)):
                        raise e
    logging.info(f'Completed pull visitor for {observation.observation_id}.')
    return {'observation': count}