def visit(observation, **kwargs):
    mc.check_param(observation, Observation)

    working_directory = kwargs.get('working_directory', './')
    storage_name = kwargs.get('storage_name')
    if storage_name is None:
        raise mc.CadcException(
            f'Must have a storage_name parameter for provenance_augmentation '
            f'for {observation.observation_id}')
    config = mc.Config()
    config.get_executors()
    if mc.TaskType.SCRAPE in config.task_types:
        logging.warning(f'Provenance augmentation does not work for SCRAPE.')
        return {'provenance': 0}

    subject = clc.define_subject(config)
    tap_client = CadcTapClient(subject, config.tap_id)

    count = 0
    obs_members = TypedSet(ObservationURI, )

    for plane in observation.planes.values():
        plane_inputs = TypedSet(PlaneURI, )
        for artifact in plane.artifacts.values():
            if storage_name.file_uri == artifact.uri:
                count = _do_provenance(
                    working_directory,
                    storage_name.file_name,
                    observation,
                    tap_client,
                    plane_inputs,
                    obs_members,
                    config,
                )

        if plane.provenance is not None:
            plane.provenance.inputs.update(plane_inputs)

    if isinstance(observation, DerivedObservation):
        observation.members.update(obs_members)
        if len(observation.members) > 0:
            observable = kwargs.get('observable')
            caom_repo_client = kwargs.get('caom_repo_client')
            if caom_repo_client is None:
                logging.warning(f'Warning: Must have a caom_repo_client for '
                                f'members metadata for '
                                f'{observation.observation_id}.')
            _do_members_metadata(
                observation,
                caom_repo_client,
                observation.members,
                observable.metrics,
            )

    logging.info(
        f'Done provenance_augmentation for {observation.observation_id}')
    return {'provenance': count}
Ejemplo n.º 2
0
def update_plane_provenance_from_values(plane, repair, values, collection,
                                        obs_id):
    plane_inputs = TypedSet(PlaneURI, )
    for value in values:
        prov_obs_id, prov_prod_id = repair(value, obs_id)
        if prov_obs_id is not None and prov_prod_id is not None:
            obs_member_uri_ignore, plane_uri = make_plane_uri(
                prov_obs_id, prov_prod_id, collection)
            plane_inputs.add(plane_uri)
            logging.debug(f'Adding PlaneURI {plane_uri}')
    mc.update_typed_set(plane.provenance.inputs, plane_inputs)
Ejemplo n.º 3
0
    def _update_cal_provenance(self, observation):
        plane_inputs = TypedSet(
            PlaneURI,
        )
        members_inputs = TypedSet(
            ObservationURI,
        )
        for keyword in self._headers[0]:
            if keyword.startswith('F_ON') or keyword.startswith('F_OFF'):
                value = self._headers[0].get(keyword)
                base_name = (
                    f'C{OmmName.remove_extensions(os.path.basename(value))}'
                )
                file_id = f'{base_name}_CAL'

                obs_member_uri_str = mc.CaomName.make_obs_uri_from_obs_id(
                    COLLECTION, base_name
                )
                obs_member_uri = ObservationURI(obs_member_uri_str)
                plane_uri = PlaneURI.get_plane_uri(obs_member_uri, file_id)
                plane_inputs.add(plane_uri)
                members_inputs.add(obs_member_uri)

        for plane in observation.planes.values():
            if plane.provenance is None:
                plane.provenance = Provenance('CPAPIR')
            mc.update_typed_set(plane.provenance.inputs, plane_inputs)

        mc.update_typed_set(observation.members, members_inputs)
Ejemplo n.º 4
0
def update_observation_members(observation):
    """Add members to Observation from all its Planes.

    :param observation Observation instance to add members to
    """
    members_inputs = TypedSet(ObservationURI, )
    for plane in observation.planes.values():
        if (plane.provenance is not None
                and plane.provenance.inputs is not None):
            for inpt in plane.provenance.inputs:
                members_inputs.add(inpt.get_observation_uri())
                logging.debug(
                    f'Adding Observation URI {inpt.get_observation_uri()}')
    mc.update_typed_set(observation.members, members_inputs)
Ejemplo n.º 5
0
def _update_science_provenance(observation, headers):
    members_inputs = TypedSet(ObservationURI, )
    plane_inputs = TypedSet(PlaneURI, )
    for keyword in headers[0]:
        if keyword.startswith('IN_'):
            value = headers[0].get(keyword)
            base_name = OmmName.remove_extensions(os.path.basename(value))
            if base_name.startswith('S'):
                # starting 'S' means a science input, 'C' will mean cal
                file_id = '{}_SCI'.format(base_name.replace('S', 'C', 1))
            elif base_name.startswith('C'):
                file_id = '{}_CAL'.format(base_name)
            else:
                raise mc.CadcException(
                    'Unknown file naming pattern {}'.format(base_name))

            obs_member_uri_str = ec.CaomName.make_obs_uri_from_obs_id(
                COLLECTION, file_id)
            obs_member_uri = ObservationURI(obs_member_uri_str)
            # the product id is the same as the observation id for OMM
            plane_uri = PlaneURI.get_plane_uri(obs_member_uri, file_id)
            plane_inputs.add(plane_uri)
            members_inputs.add(obs_member_uri)

    mc.update_typed_set(observation.members, members_inputs)
    mc.update_typed_set(
        observation.planes[observation.observation_id].provenance.inputs,
        plane_inputs)
Ejemplo n.º 6
0
    def _update_science_provenance(self, observation):
        members_inputs = TypedSet(
            ObservationURI,
        )
        plane_inputs = TypedSet(
            PlaneURI,
        )
        # values look like:
        # IN_00010= 'S/data/cpapir/data/101116/101116_0088.fits.fits.gz'
        # or
        # IN_00001= 'S050213_0278.fits.gz' /raw input file (1/5)
        # or
        # DD - slack - 11-02-20
        # Add this new prefix. This will be a much easier fix than changing the
        # pipeline and all the headers once more.
        #
        # ID_00001= 'S/data/cpapir/data/101116/101116_0041.fits.fits.gz'
        for keyword in self._headers[0]:
            if keyword.startswith('IN_') or keyword.startswith('ID_'):
                temp = keyword.split('_')[1]
                try:
                    int(temp)
                except ValueError as e:
                    # skip the keyword ID_PROG
                    continue
                value = self._headers[0].get(keyword)
                base_name = OmmName.remove_extensions(os.path.basename(value))
                if base_name.startswith('S'):
                    # starting 'S' means a science input, 'C' will mean cal
                    base_name = base_name.replace('S', 'C', 1)
                    file_id = f'{base_name}_SCI'
                elif value.startswith('S'):
                    base_name = f'C{base_name}'
                    file_id = f'{base_name}_SCI'
                elif base_name.startswith('C') or value.startswith('C'):
                    file_id = f'{base_name}_CAL'
                else:
                    raise mc.CadcException(
                        f'Unknown file naming pattern {base_name}'
                    )

                obs_member_uri_str = mc.CaomName.make_obs_uri_from_obs_id(
                    COLLECTION, base_name
                )
                obs_member_uri = ObservationURI(obs_member_uri_str)
                plane_uri = PlaneURI.get_plane_uri(obs_member_uri, file_id)
                plane_inputs.add(plane_uri)
                members_inputs.add(obs_member_uri)

        mc.update_typed_set(observation.members, members_inputs)
        for plane in observation.planes.values():
            mc.update_typed_set(plane.provenance.inputs, plane_inputs)
Ejemplo n.º 7
0
def update_observation_members_filtered(observation, filter_fn):
    """Add members to Observation from all its Planes, depending
    on the return of the filter_fn.

    :param observation Observation instance to add members to
    :param filter_fn returns True if a plane input should also be an
        Observation member
    """

    inputs = []
    members_inputs = TypedSet(ObservationURI, )
    for plane in observation.planes.values():
        if (plane.provenance is not None
                and plane.provenance.inputs is not None):
            inputs = filter(filter_fn, plane.provenance.inputs)

    for entry in inputs:
        members_inputs.add(entry.get_observation_uri())
        logging.debug(f'Adding Observation URI {entry.get_observation_uri()}')
    mc.update_typed_set(observation.members, members_inputs)
Ejemplo n.º 8
0
    def _update_observation_members(self, observation):
        """
        Must filter results because:
        DB - 11-06-20
        For the spectra there is a minor issue with members for master flat,
        *_F, observations.  The master bias used in the processing, the *_B.fits
        file, shouldn’t be a member for the master flats.

        The master bias is in the list of inputs though:  Inputs for master flat
        are the unprocessed flats and the master bias.  The master bias is
        subtracted pixel-by-pixel from each unprocessed flat as part of the
        processing before the flats are then co-added.

        The composite/derived master flats (F) and master biases (B) should
        never be members.  At least for any processing that is currently being
        done.  For now the only members should those given by the
        NCOMBINE x ZERO_# or FLAT_# keyword values.
        """

        def filter_fun(x):
            result = True
            if DAOName.is_master_flat(observation.observation_id):
                if DAOName.is_master_bias(x.get_observation_uri().uri):
                    result = False
            return result

        inputs = []
        members_inputs = TypedSet(ObservationURI,)
        for plane in observation.planes.values():
            if (
                    plane.provenance is not None
                    and plane.provenance.inputs is not None
            ):
                inputs = filter(filter_fun, plane.provenance.inputs)

        for entry in inputs:
            members_inputs.add(entry.get_observation_uri())
            logging.debug(f'Adding Observation URI {entry.get_observation_uri()}')
        mc.update_typed_set(observation.members, members_inputs)
Ejemplo n.º 9
0
def update_plane_provenance_from_values(plane, repair, values, collection,
                                        obs_id):
    """
    Add inputs to Planes, based on a list of input values.

    :param plane: Plane instance to add inputs to
    :param repair: The function to fix the input values, to ensure they
        match input observationID values
    :param values: list of values to add as inputs, after repair
    :param collection: str The collection name for URI construction
    :param obs_id: str value for logging only
    :return:
    """
    logging.debug(f'Begin update_plane_provenance_from_values')
    plane_inputs = TypedSet(PlaneURI, )
    for value in values:
        prov_obs_id, prov_prod_id = repair(value, obs_id)
        if prov_obs_id is not None and prov_prod_id is not None:
            obs_member_uri_ignore, plane_uri = make_plane_uri(
                prov_obs_id, prov_prod_id, collection)
            plane_inputs.add(plane_uri)
            logging.debug(f'Adding PlaneURI {plane_uri}')
    mc.update_typed_set(plane.provenance.inputs, plane_inputs)
    logging.debug(f'End update_plane_provenance_from_values')
Ejemplo n.º 10
0
def update_plane_provenance(plane, headers, lookup, collection, repair,
                            obs_id):
    """Add inputs to Planes, based on a particular keyword prefix.

    :param plane Plane instance to add inputs to
    :param headers FITS keyword headers that have lookup values.
    :param lookup The keyword pattern to find in the FITS header keywords for
        input files.
    :param collection The collection name for URI construction
    :param repair The function to fix input values, to ensure they match
        input observation ID values.
    :param obs_id String value for logging only.
    """
    plane_inputs = TypedSet(PlaneURI, )
    _update_plane_provenance(headers, lookup, collection, repair, obs_id,
                             plane_inputs)
    mc.update_typed_set(plane.provenance.inputs, plane_inputs)
Ejemplo n.º 11
0
def update_plane_provenance_single(plane, headers, lookup, collection, repair,
                                   obs_id):
    """Replace inputs in Planes, based on a particular keyword prefix. This
    differs from update_plane_provenance because all the values are in a
    single keyword, such as COMMENT or HISTORY.

    :param plane Plane instance to add inputs to
    :param headers FITS keyword headers that have lookup values.
    :param lookup The keyword pattern to find in the FITS header keywords for
        input files.
    :param collection The collection name for URI construction
    :param repair The function to fix input values, to ensure they match
        input observation ID values.
    :param obs_id String value for logging only.
    """
    plane_inputs = TypedSet(PlaneURI, )
    _find_plane_provenance_single(plane_inputs, headers, lookup, collection,
                                  repair, obs_id)
    mc.update_typed_set(plane.provenance.inputs, plane_inputs)
Ejemplo n.º 12
0
def _update_cal_provenance(observation, headers):
    plane_inputs = TypedSet(PlaneURI, )
    members_inputs = TypedSet(ObservationURI, )
    for keyword in headers[0]:
        if keyword.startswith('F_ON') or keyword.startswith('F_OFF'):
            value = headers[0].get(keyword)
            base_name = OmmName.remove_extensions(os.path.basename(value))
            file_id = 'C{}_CAL'.format(base_name)

            obs_member_uri_str = ec.CaomName.make_obs_uri_from_obs_id(
                COLLECTION, file_id)
            obs_member_uri = ObservationURI(obs_member_uri_str)
            # the product id is the same as the observation id for OMM
            plane_uri = PlaneURI.get_plane_uri(obs_member_uri, file_id)
            plane_inputs.add(plane_uri)
            members_inputs.add(obs_member_uri)

    for key in observation.planes:
        plane = observation.planes[key]
        if plane.provenance is None:
            plane.provenance = Provenance('CPAPIR')
        mc.update_typed_set(plane.provenance.inputs, plane_inputs)

    mc.update_typed_set(observation.members, members_inputs)
Ejemplo n.º 13
0
def _build_observation(args):
    config = mc.Config()
    config.get_executors()

    existing = None
    if args.in_obs_xml:
        existing = mc.read_obs_from_file(args.in_obs_xml.name)

    drao_name, drao_dir = _get_name(args)
    json_fqn = f'{drao_dir}/{drao_name.obs_id}.json'
    logging.error(f'Looking for metadata in {json_fqn}')
    if not os.path.exists(json_fqn):
        raise mc.CadcException(
            f'Could not find {json_fqn}. Cannot continue without it.')

    with open(json_fqn) as f:
        js = f.read()

    # get the skeleton of the CAOM2 observation
    jsonpickle.handlers.register(TypedOrderedDict, TypedOrderedDictHandler)
    jsonpickle.handlers.register(datetime, DateTimeHandler)
    obs = jsonpickle.decode(js)

    # add the bits of the CAOM2 observation that are required for a
    # structure that's acceptable to /ams - this mostly amounts to
    # ensuring that attributes have been defined on the 'un-pickled'

    _set_common(obs, existing)

    if obs._proposal is not None:
        if not hasattr(obs._proposal, '_project'):
            obs._proposal._project = None
        if not hasattr(obs._proposal, '_name'):
            obs._proposal._name = None
        if not hasattr(obs._proposal, '_keywords'):
            obs._proposal._keywords = set()
        if not hasattr(obs._proposal, '_title'):
            obs._proposal._title = None
    if obs._target is not None:
        if not hasattr(obs._target, '_target_type'):
            obs._target._target_type = None
        if not hasattr(obs._target, '_standard'):
            obs._target._standard = None
        if not hasattr(obs._target, '_redshift'):
            obs._target._redshift = None
        if not hasattr(obs._target, '_moving'):
            obs._target._moving = None
        if not hasattr(obs._target, '_target_id'):
            obs._target._target_id = None
        obs._target._keywords = set()
    obs._requirements = None
    if obs._telescope is not None:
        obs._telescope._keywords = set()
    if obs._instrument is not None:
        obs._instrument._keywords = set()
    obs._environment = None

    if not hasattr(obs, '_meta_read_groups'):
        obs._meta_read_groups = None

    for plane in obs.planes.values():
        if existing is not None:
            _set_common(plane, existing.planes[plane.product_id])
        else:
            _set_common(plane, None)
        plane._acc_meta_checksum = None

        if not hasattr(plane, '_data_read_groups'):
            plane._data_read_groups = None
        if not hasattr(plane, '_meta_read_groups'):
            plane._meta_read_groups = None

        plane._metrics = None
        plane._quality = None
        if plane._provenance is not None:
            plane._provenance._keywords = set()
            plane._provenance._inputs = TypedSet(PlaneURI, )
            if not hasattr(plane._provenance, '_run_id'):
                plane._provenance._run_id = None
            # plane._provenance._last_executed = None
        if hasattr(plane, '_position'):
            if plane._position is not None:
                plane._position._dimension = None
                plane._position._resolution = None
                plane._position._sample_size = None

            if not hasattr(plane._position, '_resolution_bounds'):
                plane._position._resolution_bounds = None
        else:
            plane._position = None
        if hasattr(plane, '_energy'):
            if plane._energy is not None:
                if not hasattr(plane._energy, '_sample_size'):
                    plane._energy._sample_size = None
                if not hasattr(plane._energy, '_bandpass_name'):
                    plane._energy._bandpass_name = None
                if not hasattr(plane._energy, '_transition'):
                    plane._energy._transition = None
                if not hasattr(plane._energy, '_resolving_power'):
                    plane._energy._resolving_power = None
                if not hasattr(plane._energy, '_resolving_power_bounds'):
                    plane._energy._resolving_power_bounds = None

                if hasattr(plane._energy, '_em_band'):
                    plane._energy.energy_bands = caom_util.TypedSet(EnergyBand)
                    plane._energy.energy_bands.add(plane._energy._em_band)
        else:
            plane._energy = None
        if not hasattr(plane, '_polarization'):
            plane._polarization = None
        if hasattr(plane, '_time'):
            if plane._time is not None:
                if not hasattr(plane._time, '_resolution'):
                    plane._time._resolution = None
                if not hasattr(plane._time, '_resolution_bounds'):
                    plane._time._resolution_bounds = None
        else:
            plane._time = None
        if not hasattr(plane, '_position'):
            plane._position = None
        if not hasattr(plane, '_custom'):
            plane._custom = None

        for artifact in plane.artifacts.values():
            if existing is not None:
                _set_common(
                    artifact,
                    existing.planes[plane.product_id].artifacts[artifact.uri])
            else:
                _set_common(artifact, None)
            artifact._acc_meta_checksum = None
            artifact.parts = TypedOrderedDict(Part, )
            if not hasattr(artifact, '_content_release'):
                artifact._content_release = None
            if not hasattr(artifact, '_content_read_groups'):
                artifact._content_read_groups = None

    if args.out_obs_xml:
        mc.write_obs_to_file(obs, args.out_obs_xml)
    else:
        raise mc.CadcException(f'No where to write for {obs.observation_id}')
    return 0