Ejemplo n.º 1
0
    def _update_cal_provenance(self, observation):
        plane_inputs = TypedSet(
            PlaneURI,
        )
        members_inputs = TypedSet(
            ObservationURI,
        )
        for keyword in self._headers[0]:
            if keyword.startswith('F_ON') or keyword.startswith('F_OFF'):
                value = self._headers[0].get(keyword)
                base_name = (
                    f'C{OmmName.remove_extensions(os.path.basename(value))}'
                )
                file_id = f'{base_name}_CAL'

                obs_member_uri_str = mc.CaomName.make_obs_uri_from_obs_id(
                    COLLECTION, base_name
                )
                obs_member_uri = ObservationURI(obs_member_uri_str)
                plane_uri = PlaneURI.get_plane_uri(obs_member_uri, file_id)
                plane_inputs.add(plane_uri)
                members_inputs.add(obs_member_uri)

        for plane in observation.planes.values():
            if plane.provenance is None:
                plane.provenance = Provenance('CPAPIR')
            mc.update_typed_set(plane.provenance.inputs, plane_inputs)

        mc.update_typed_set(observation.members, members_inputs)
Ejemplo n.º 2
0
def _update_science_provenance(observation, headers):
    members_inputs = TypedSet(ObservationURI, )
    plane_inputs = TypedSet(PlaneURI, )
    for keyword in headers[0]:
        if keyword.startswith('IN_'):
            value = headers[0].get(keyword)
            base_name = OmmName.remove_extensions(os.path.basename(value))
            if base_name.startswith('S'):
                # starting 'S' means a science input, 'C' will mean cal
                file_id = '{}_SCI'.format(base_name.replace('S', 'C', 1))
            elif base_name.startswith('C'):
                file_id = '{}_CAL'.format(base_name)
            else:
                raise mc.CadcException(
                    'Unknown file naming pattern {}'.format(base_name))

            obs_member_uri_str = ec.CaomName.make_obs_uri_from_obs_id(
                COLLECTION, file_id)
            obs_member_uri = ObservationURI(obs_member_uri_str)
            # the product id is the same as the observation id for OMM
            plane_uri = PlaneURI.get_plane_uri(obs_member_uri, file_id)
            plane_inputs.add(plane_uri)
            members_inputs.add(obs_member_uri)

    mc.update_typed_set(observation.members, members_inputs)
    mc.update_typed_set(
        observation.planes[observation.observation_id].provenance.inputs,
        plane_inputs)
Ejemplo n.º 3
0
def _find_plane_provenance_single(plane_inputs, headers, lookup, collection,
                                  repair, obs_id):
    """
    :param plane_inputs TypedSet instance to add inputs to
    :param headers FITS keyword headers that have lookup values.
    :param lookup The keyword pattern to find in the FITS header keywords for
        input files.
    :param collection The collection name for URI construction
    :param repair The function to fix input values, to ensure they match
        input observation ID values.
    :param obs_id String value for logging only.
    """
    for header in headers:
        for keyword in header:
            if keyword.startswith(lookup):
                value = header.get(keyword)
                prov_ids = repair(value, obs_id)
                for entry in prov_ids:
                    # 0 - observation
                    # 1 - plane
                    obs_member_uri_str = \
                        mc.CaomName.make_obs_uri_from_obs_id(
                            collection, entry[0])
                    obs_member_uri = ObservationURI(obs_member_uri_str)
                    plane_uri = PlaneURI.get_plane_uri(obs_member_uri,
                                                       entry[1])
                    plane_inputs.add(plane_uri)
                    logging.debug(f'Adding PlaneURI {plane_uri}')
                # because all the content gets processed with one
                # access to the keyword value, stop after one round
                break
Ejemplo n.º 4
0
def _update_plane_provenance(headers, lookup, collection, repair, obs_id,
                             plane_inputs):
    """Add inputs to a collection, based on a particular keyword prefix.

    :param headers FITS keyword headers that have lookup values.
    :param lookup The keyword pattern to find in the FITS header keywords for
        input files.
    :param collection The collection name for URI construction
    :param repair The function to fix input values, to ensure they match
        input observation ID values.
    :param obs_id String value for logging only.
    :param plane_inputs TypedSet(PlaneURI,) to which new PlaneURI instances are
        added
    """
    for header in headers:
        for keyword in header:
            if keyword.startswith(lookup):
                value = header.get(keyword)
                prov_obs_id, prov_prod_id = repair(value, obs_id)
                if prov_obs_id is not None and prov_prod_id is not None:
                    obs_member_uri_str = \
                        mc.CaomName.make_obs_uri_from_obs_id(
                            collection, prov_obs_id
                        )
                    obs_member_uri = ObservationURI(obs_member_uri_str)
                    plane_uri = PlaneURI.get_plane_uri(obs_member_uri,
                                                       prov_prod_id)
                    plane_inputs.add(plane_uri)
                    logging.debug(f'Adding PlaneURI {plane_uri}')
def _do_provenance(working_directory, science_file, observation,
                   tap_client, plane_inputs, obs_members):
    """
    DB 06-08-20
    Looking at the DATALAB values for the test set, these are now set to
    correctly identify the correct observation ID.
    e.g. rnN20140428S0174_dark.fits has DATALAB = GN-2014A-Q-85-16-006 since
    it is NOT derived from multiple observations.  rgnN20140428S0174_dark.fits
    (with the extra ‘g’) has DATALAB = GN-2014A-Q-85-16-006-DARK since it IS a
    new derived observation.   And inputs/members are in the PROVENANCE
    extension.

    DB 07-08-20
    All members + inputs in the extension are plane.provenance.inputs.

    Add the appropriate raw planes of the ‘member’ observations to the list
    of inputs for the derived observations, where 'appropriate' means
    find the ‘appropriate’ filename that identifies the plane of the inputs.
    """
    logging.debug(f'Begin _do_provenance for {observation.observation_id}')
    count = 0
    fqn = os.path.join(working_directory, science_file)
    hdus = fits.open(fqn)
    if 'PROVENANCE' not in hdus:
        logging.warning(
            f'PROVENANCE extension not found in HDUs for {science_file}.')
        return count

    data = hdus['PROVENANCE'].data
    temp = None
    for entry in data.columns:
        if entry.name.startswith('Type'):
            temp = entry.name
            break
    for f_name, f_prov_type in zip(data['Filename'],
                                   data[temp]):
        f_id = gem_name.GemName.remove_extensions(f_name)
        for coll in ['GEMINI', 'GEMINIPROC']:
            obs_id = external_metadata.get_obs_id_from_cadc(
                    f_id, tap_client, coll)
            if obs_id is not None:
                logging.info(f'Found observation ID {obs_id} for file {f_id}.')
                input_obs_uri_str = mc.CaomName.make_obs_uri_from_obs_id(
                    coll, obs_id)
                input_obs_uri = ObservationURI(input_obs_uri_str)
                plane_uri = PlaneURI.get_plane_uri(input_obs_uri, f_id)
                plane_inputs.add(plane_uri)
                count += 1
                if f_prov_type == 'member':
                    if isinstance(observation, DerivedObservation):
                        member_obs_uri_str = mc.CaomName.make_obs_uri_from_obs_id(
                            coll, obs_id)
                        member_obs_uri = ObservationURI(member_obs_uri_str)
                        obs_members.add(member_obs_uri)
                        count += 1
                break
    hdus.close()
    logging.debug('End _do_provenance.')
    return count
Ejemplo n.º 6
0
    def _update_science_provenance(self, observation):
        members_inputs = TypedSet(
            ObservationURI,
        )
        plane_inputs = TypedSet(
            PlaneURI,
        )
        # values look like:
        # IN_00010= 'S/data/cpapir/data/101116/101116_0088.fits.fits.gz'
        # or
        # IN_00001= 'S050213_0278.fits.gz' /raw input file (1/5)
        # or
        # DD - slack - 11-02-20
        # Add this new prefix. This will be a much easier fix than changing the
        # pipeline and all the headers once more.
        #
        # ID_00001= 'S/data/cpapir/data/101116/101116_0041.fits.fits.gz'
        for keyword in self._headers[0]:
            if keyword.startswith('IN_') or keyword.startswith('ID_'):
                temp = keyword.split('_')[1]
                try:
                    int(temp)
                except ValueError as e:
                    # skip the keyword ID_PROG
                    continue
                value = self._headers[0].get(keyword)
                base_name = OmmName.remove_extensions(os.path.basename(value))
                if base_name.startswith('S'):
                    # starting 'S' means a science input, 'C' will mean cal
                    base_name = base_name.replace('S', 'C', 1)
                    file_id = f'{base_name}_SCI'
                elif value.startswith('S'):
                    base_name = f'C{base_name}'
                    file_id = f'{base_name}_SCI'
                elif base_name.startswith('C') or value.startswith('C'):
                    file_id = f'{base_name}_CAL'
                else:
                    raise mc.CadcException(
                        f'Unknown file naming pattern {base_name}'
                    )

                obs_member_uri_str = mc.CaomName.make_obs_uri_from_obs_id(
                    COLLECTION, base_name
                )
                obs_member_uri = ObservationURI(obs_member_uri_str)
                plane_uri = PlaneURI.get_plane_uri(obs_member_uri, file_id)
                plane_inputs.add(plane_uri)
                members_inputs.add(obs_member_uri)

        mc.update_typed_set(observation.members, members_inputs)
        for plane in observation.planes.values():
            mc.update_typed_set(plane.provenance.inputs, plane_inputs)
Ejemplo n.º 7
0
def make_plane_uri(obs_id, product_id, collection):
    """
    Common code to construction a PlaneURI.

    :param obs_id: str Observation.observationID for a CADC collection.
    :param product_id: str Plane.productID for a CADC collection.
    :param collection: str CADC collection.
    :return: tuple with ObservationURI, PlaneURI instance
    """
    obs_member_uri_str = mc.CaomName.make_obs_uri_from_obs_id(
        collection, obs_id)
    obs_member_uri = ObservationURI(obs_member_uri_str)
    plane_uri = PlaneURI.get_plane_uri(obs_member_uri, product_id)
    return obs_member_uri, plane_uri
Ejemplo n.º 8
0
def _update_cal_provenance(observation, headers):
    plane_inputs = TypedSet(PlaneURI, )
    members_inputs = TypedSet(ObservationURI, )
    for keyword in headers[0]:
        if keyword.startswith('F_ON') or keyword.startswith('F_OFF'):
            value = headers[0].get(keyword)
            base_name = OmmName.remove_extensions(os.path.basename(value))
            file_id = 'C{}_CAL'.format(base_name)

            obs_member_uri_str = ec.CaomName.make_obs_uri_from_obs_id(
                COLLECTION, file_id)
            obs_member_uri = ObservationURI(obs_member_uri_str)
            # the product id is the same as the observation id for OMM
            plane_uri = PlaneURI.get_plane_uri(obs_member_uri, file_id)
            plane_inputs.add(plane_uri)
            members_inputs.add(obs_member_uri)

    for key in observation.planes:
        plane = observation.planes[key]
        if plane.provenance is None:
            plane.provenance = Provenance('CPAPIR')
        mc.update_typed_set(plane.provenance.inputs, plane_inputs)

    mc.update_typed_set(observation.members, members_inputs)
Ejemplo n.º 9
0
def _update_catalog_plane(obs):
    logging.debug(f'Begin _update_catalog_plane for {catalog_uri}.')
    assert obs is not None, 'Must have an observation to update'

    for ii in obs.planes:
        plane = obs.planes[ii]
        if 'catalog' in plane.product_id:
            plane.meta_release = obs.meta_release
            plane.data_release = obs.meta_release
            plane.data_product_type = DataProductType.CATALOG
            plane.calibration_level = CalibrationLevel.CALIBRATED
            plane.provenance = Provenance(
                catalog_blueprint._get('Plane.provenance.name'))
            plane.provenance.project = catalog_blueprint._get(
                'Plane.provenance.project')
            plane.provenance.producer = catalog_blueprint._get(
                'Plane.provenance.producer')
            plane.provenance.reference = catalog_blueprint._get(
                'Plane.provenance.reference')
            inputs = catalog_blueprint._get('Plane.provenance.inputs')
            if inputs:
                for i in inputs.split():
                    plane.provenance.inputs.add(PlaneURI(str(i)))
    logging.debug(f'Done _update_catalog_plane for {catalog_uri}.')
Ejemplo n.º 10
0
def _make_uris(obs_id, product_id):
    obs_member_uri = ObservationURI(
        mc.CaomName.make_obs_uri_from_obs_id(dn.COLLECTION, obs_id))
    plane_uri = PlaneURI.get_plane_uri(obs_member_uri, product_id)
    return plane_uri
Ejemplo n.º 11
0
def build_observation(db_content, observation, md_name):

    override = read_md_pk(md_name)

    fqn = override.get('fqn')
    almaca_name = AlmacaName(fname_on_disk=fqn)
    # logging.error(db_content.colnames)
    # logging.error('fqn is {}'.format(fqn))
    field_index = _get_index(almaca_name, db_content)
    # field_index = 0
    if observation is None:
        observation = _build_obs(override, db_content, fqn, field_index,
                                 almaca_name, md_name)

    provenance = get_provenance(almaca_name)
    provenance.inputs.add(
        PlaneURI('caom:ALMA/A001_X88b_X23/A001_X88b_X23-raw'))

    # HK 07-02-20
    # I'm looking at the very first entry, A002_Xb999fd_X602.SCI.J1851+0035.
    # The time bounds listed under all of the second-level planes correspond
    # to a date of Oct 20, 2016, which agrees with the observing date I pull
    # up on listobs.  But in the top level plane, the metaRelease date is
    # listed as Oct 12, 2016.  As we discussed earlier this week, it doesn't
    # make sense to have the meta data released before the observation was
    # even taken. Using the 'end time' of the observation that's already
    # pulled for a lower plane, and putting that as the metaRelease date in
    # the top level would be a good solution.  NB: since all spws are observed
    # simultaneously, you'll get the same answer for whichever of the
    # [high/low]res_spw[X] entries that you pull the information from.
    input_meta_data = read_md_pk(almaca_name.input_ms_metadata)
    meta_release = mc.to_float(input_meta_data.get('end_date'))
    meta_release = time.Time(meta_release, format='mjd')
    meta_release.format = 'isot'
    meta_release_dt = mc.make_time(meta_release.value)

    release_date = db_content['Release date'][field_index]
    if release_date is None:
        raise mc.CadcException('No release date for {}'.format(fqn))
    else:
        release_date = time.Time(release_date).to_datetime()

    logging.error('Add plane {} to {}'.format(almaca_name.product_id,
                                              almaca_name.obs_id))
    plane = Plane(product_id=almaca_name.product_id,
                  data_release=release_date,
                  meta_release=meta_release_dt,
                  provenance=provenance)

    plane.position = build_position(db_content, field_index, md_name)
    plane.energy = build_energy(override)
    plane.polarization = None
    plane.time = build_time(override, almaca_name)

    # HK 14-08-2019
    # dataProductType should be 'visibility'
    plane.data_product_type = DataProductType.VISIBILITY
    plane.calibration_level = CalibrationLevel.CALIBRATED

    observation.planes.add(plane)
    observation.meta_release = plane.meta_release
    # TODO hard-coded
    observation.members.add(ObservationURI('caom:ALMA/A001_X88b_X23'))

    # HK 29-07-19
    # qa/ contains images, plots, and web page status views generated
    # during the original (non-CANFAR) calibration of the raw data.
    # We may want to consider retaining these files as well, as they give
    # a more advanced user an easier way to check on data quality,
    # potential issues with calibration, etc.  I believe they come
    # packaged with the rest of the 'products' tarball on the archive,
    # so they would be obtainable even if we do not keep a copy.  These
    # files are fairly small.
    # TODO override.get('artifact_uri')
    artifact = Artifact(uri=almaca_name.uri,
                        product_type=almaca_name.intent,
                        release_type=ReleaseType.DATA,
                        content_type='application/x-tar',
                        content_length=None)
    plane.artifacts.add(artifact)
    return observation
Ejemplo n.º 12
0
 def input_uri(self):
     return PlaneURI(
         mc.CaomName.make_plane_uri(ARCHIVE, self._obs_id,
                                    self._product_id))
def _do_provenance(
    working_directory,
    science_file,
    observation,
    tap_client,
    plane_inputs,
    obs_members,
    config,
):
    """
    DB 06-08-20
    Looking at the DATALAB values for the test set, these are now set to
    correctly identify the correct observation ID.
    e.g. rnN20140428S0174_dark.fits has DATALAB = GN-2014A-Q-85-16-006 since
    it is NOT derived from multiple observations.  rgnN20140428S0174_dark.fits
    (with the extra ‘g’) has DATALAB = GN-2014A-Q-85-16-006-DARK since it IS a
    new derived observation.   And inputs/members are in the PROVENANCE
    extension.

    DB 07-08-20
    All members + inputs in the extension are plane.provenance.inputs.

    Add the appropriate raw planes of the ‘member’ observations to the list
    of inputs for the derived observations, where 'appropriate' means
    find the ‘appropriate’ filename that identifies the plane of the inputs.
    """
    logging.debug(f'Begin _do_provenance for {observation.observation_id}')
    count = 0
    fqn = os.path.join(working_directory, science_file)
    hdus = fits.open(fqn)
    if 'PROVENANCE' not in hdus:
        logging.warning(
            f'PROVENANCE extension not found in HDUs for {science_file}.')
        return count

    data = hdus['PROVENANCE'].data
    temp = None
    for entry in data.columns:
        if entry.name.startswith('Type'):
            temp = entry.name
            break
    name_builder = builder.GemProcBuilder(config)
    for f_name, f_prov_type in zip(data['Filename'], data[temp]):
        f_id = gem_name.GemName.remove_extensions(f_name)

        # GEMINICADC
        # the order of calls here is meant to put the least amount of load
        # on archive.gemini.edu
        #
        collection = builder.COLLECTION
        obs_id = name_builder._get_obs_id(None, f_name, None)
        if obs_id is None:
            # GEMINI
            collection = gem_name.COLLECTION
            uri = mc.build_uri(collection, f_name, gem_name.SCHEME)
            metadata = external_metadata.defining_metadata_finder.get(uri)
            if metadata is not None and metadata.data_label is not None:
                obs_id = metadata.data_label
        if obs_id is not None:
            logging.info(f'Found observation ID {obs_id} for file {f_id}.')
            input_obs_uri_str = mc.CaomName.make_obs_uri_from_obs_id(
                collection, obs_id)
            input_obs_uri = ObservationURI(input_obs_uri_str)
            plane_uri = PlaneURI.get_plane_uri(input_obs_uri, f_id)
            plane_inputs.add(plane_uri)
            count += 1
            if (f_prov_type == 'member'
                    and isinstance(observation, DerivedObservation)):
                member_obs_uri_str = (mc.CaomName.make_obs_uri_from_obs_id(
                    collection, obs_id))
                member_obs_uri = ObservationURI(member_obs_uri_str)
                obs_members.add(member_obs_uri)
                count += 1
    hdus.close()
    logging.debug('End _do_provenance.')
    return count