コード例 #1
0
def _do_provenance(working_directory, science_file, observation,
                   tap_client, plane_inputs, obs_members):
    """
    DB 06-08-20
    Looking at the DATALAB values for the test set, these are now set to
    correctly identify the correct observation ID.
    e.g. rnN20140428S0174_dark.fits has DATALAB = GN-2014A-Q-85-16-006 since
    it is NOT derived from multiple observations.  rgnN20140428S0174_dark.fits
    (with the extra ‘g’) has DATALAB = GN-2014A-Q-85-16-006-DARK since it IS a
    new derived observation.   And inputs/members are in the PROVENANCE
    extension.

    DB 07-08-20
    All members + inputs in the extension are plane.provenance.inputs.

    Add the appropriate raw planes of the ‘member’ observations to the list
    of inputs for the derived observations, where 'appropriate' means
    find the ‘appropriate’ filename that identifies the plane of the inputs.
    """
    logging.debug(f'Begin _do_provenance for {observation.observation_id}')
    count = 0
    fqn = os.path.join(working_directory, science_file)
    hdus = fits.open(fqn)
    if 'PROVENANCE' not in hdus:
        logging.warning(
            f'PROVENANCE extension not found in HDUs for {science_file}.')
        return count

    data = hdus['PROVENANCE'].data
    temp = None
    for entry in data.columns:
        if entry.name.startswith('Type'):
            temp = entry.name
            break
    for f_name, f_prov_type in zip(data['Filename'],
                                   data[temp]):
        f_id = gem_name.GemName.remove_extensions(f_name)
        for coll in ['GEMINI', 'GEMINIPROC']:
            obs_id = external_metadata.get_obs_id_from_cadc(
                    f_id, tap_client, coll)
            if obs_id is not None:
                logging.info(f'Found observation ID {obs_id} for file {f_id}.')
                input_obs_uri_str = mc.CaomName.make_obs_uri_from_obs_id(
                    coll, obs_id)
                input_obs_uri = ObservationURI(input_obs_uri_str)
                plane_uri = PlaneURI.get_plane_uri(input_obs_uri, f_id)
                plane_inputs.add(plane_uri)
                count += 1
                if f_prov_type == 'member':
                    if isinstance(observation, DerivedObservation):
                        member_obs_uri_str = mc.CaomName.make_obs_uri_from_obs_id(
                            coll, obs_id)
                        member_obs_uri = ObservationURI(member_obs_uri_str)
                        obs_members.add(member_obs_uri)
                        count += 1
                break
    hdus.close()
    logging.debug('End _do_provenance.')
    return count
コード例 #2
0
ファイル: main_app.py プロジェクト: opencadc/omm2caom2
    def _update_cal_provenance(self, observation):
        plane_inputs = TypedSet(
            PlaneURI,
        )
        members_inputs = TypedSet(
            ObservationURI,
        )
        for keyword in self._headers[0]:
            if keyword.startswith('F_ON') or keyword.startswith('F_OFF'):
                value = self._headers[0].get(keyword)
                base_name = (
                    f'C{OmmName.remove_extensions(os.path.basename(value))}'
                )
                file_id = f'{base_name}_CAL'

                obs_member_uri_str = mc.CaomName.make_obs_uri_from_obs_id(
                    COLLECTION, base_name
                )
                obs_member_uri = ObservationURI(obs_member_uri_str)
                plane_uri = PlaneURI.get_plane_uri(obs_member_uri, file_id)
                plane_inputs.add(plane_uri)
                members_inputs.add(obs_member_uri)

        for plane in observation.planes.values():
            if plane.provenance is None:
                plane.provenance = Provenance('CPAPIR')
            mc.update_typed_set(plane.provenance.inputs, plane_inputs)

        mc.update_typed_set(observation.members, members_inputs)
コード例 #3
0
ファイル: caom_composable.py プロジェクト: opencadc/caom2pipe
def _find_plane_provenance_single(plane_inputs, headers, lookup, collection,
                                  repair, obs_id):
    """
    :param plane_inputs TypedSet instance to add inputs to
    :param headers FITS keyword headers that have lookup values.
    :param lookup The keyword pattern to find in the FITS header keywords for
        input files.
    :param collection The collection name for URI construction
    :param repair The function to fix input values, to ensure they match
        input observation ID values.
    :param obs_id String value for logging only.
    """
    for header in headers:
        for keyword in header:
            if keyword.startswith(lookup):
                value = header.get(keyword)
                prov_ids = repair(value, obs_id)
                for entry in prov_ids:
                    # 0 - observation
                    # 1 - plane
                    obs_member_uri_str = \
                        mc.CaomName.make_obs_uri_from_obs_id(
                            collection, entry[0])
                    obs_member_uri = ObservationURI(obs_member_uri_str)
                    plane_uri = PlaneURI.get_plane_uri(obs_member_uri,
                                                       entry[1])
                    plane_inputs.add(plane_uri)
                    logging.debug(f'Adding PlaneURI {plane_uri}')
                # because all the content gets processed with one
                # access to the keyword value, stop after one round
                break
コード例 #4
0
def _update_science_provenance(observation, headers):
    members_inputs = TypedSet(ObservationURI, )
    plane_inputs = TypedSet(PlaneURI, )
    for keyword in headers[0]:
        if keyword.startswith('IN_'):
            value = headers[0].get(keyword)
            base_name = OmmName.remove_extensions(os.path.basename(value))
            if base_name.startswith('S'):
                # starting 'S' means a science input, 'C' will mean cal
                file_id = '{}_SCI'.format(base_name.replace('S', 'C', 1))
            elif base_name.startswith('C'):
                file_id = '{}_CAL'.format(base_name)
            else:
                raise mc.CadcException(
                    'Unknown file naming pattern {}'.format(base_name))

            obs_member_uri_str = ec.CaomName.make_obs_uri_from_obs_id(
                COLLECTION, file_id)
            obs_member_uri = ObservationURI(obs_member_uri_str)
            # the product id is the same as the observation id for OMM
            plane_uri = PlaneURI.get_plane_uri(obs_member_uri, file_id)
            plane_inputs.add(plane_uri)
            members_inputs.add(obs_member_uri)

    mc.update_typed_set(observation.members, members_inputs)
    mc.update_typed_set(
        observation.planes[observation.observation_id].provenance.inputs,
        plane_inputs)
コード例 #5
0
def _update_plane_provenance(headers, lookup, collection, repair, obs_id,
                             plane_inputs):
    """Add inputs to a collection, based on a particular keyword prefix.

    :param headers FITS keyword headers that have lookup values.
    :param lookup The keyword pattern to find in the FITS header keywords for
        input files.
    :param collection The collection name for URI construction
    :param repair The function to fix input values, to ensure they match
        input observation ID values.
    :param obs_id String value for logging only.
    :param plane_inputs TypedSet(PlaneURI,) to which new PlaneURI instances are
        added
    """
    for header in headers:
        for keyword in header:
            if keyword.startswith(lookup):
                value = header.get(keyword)
                prov_obs_id, prov_prod_id = repair(value, obs_id)
                if prov_obs_id is not None and prov_prod_id is not None:
                    obs_member_uri_str = \
                        mc.CaomName.make_obs_uri_from_obs_id(
                            collection, prov_obs_id
                        )
                    obs_member_uri = ObservationURI(obs_member_uri_str)
                    plane_uri = PlaneURI.get_plane_uri(obs_member_uri,
                                                       prov_prod_id)
                    plane_inputs.add(plane_uri)
                    logging.debug(f'Adding PlaneURI {plane_uri}')
コード例 #6
0
ファイル: main_app.py プロジェクト: opencadc/omm2caom2
    def _update_science_provenance(self, observation):
        members_inputs = TypedSet(
            ObservationURI,
        )
        plane_inputs = TypedSet(
            PlaneURI,
        )
        # values look like:
        # IN_00010= 'S/data/cpapir/data/101116/101116_0088.fits.fits.gz'
        # or
        # IN_00001= 'S050213_0278.fits.gz' /raw input file (1/5)
        # or
        # DD - slack - 11-02-20
        # Add this new prefix. This will be a much easier fix than changing the
        # pipeline and all the headers once more.
        #
        # ID_00001= 'S/data/cpapir/data/101116/101116_0041.fits.fits.gz'
        for keyword in self._headers[0]:
            if keyword.startswith('IN_') or keyword.startswith('ID_'):
                temp = keyword.split('_')[1]
                try:
                    int(temp)
                except ValueError as e:
                    # skip the keyword ID_PROG
                    continue
                value = self._headers[0].get(keyword)
                base_name = OmmName.remove_extensions(os.path.basename(value))
                if base_name.startswith('S'):
                    # starting 'S' means a science input, 'C' will mean cal
                    base_name = base_name.replace('S', 'C', 1)
                    file_id = f'{base_name}_SCI'
                elif value.startswith('S'):
                    base_name = f'C{base_name}'
                    file_id = f'{base_name}_SCI'
                elif base_name.startswith('C') or value.startswith('C'):
                    file_id = f'{base_name}_CAL'
                else:
                    raise mc.CadcException(
                        f'Unknown file naming pattern {base_name}'
                    )

                obs_member_uri_str = mc.CaomName.make_obs_uri_from_obs_id(
                    COLLECTION, base_name
                )
                obs_member_uri = ObservationURI(obs_member_uri_str)
                plane_uri = PlaneURI.get_plane_uri(obs_member_uri, file_id)
                plane_inputs.add(plane_uri)
                members_inputs.add(obs_member_uri)

        mc.update_typed_set(observation.members, members_inputs)
        for plane in observation.planes.values():
            mc.update_typed_set(plane.provenance.inputs, plane_inputs)
コード例 #7
0
ファイル: caom_composable.py プロジェクト: opencadc/caom2pipe
def make_plane_uri(obs_id, product_id, collection):
    """
    Common code to construction a PlaneURI.

    :param obs_id: str Observation.observationID for a CADC collection.
    :param product_id: str Plane.productID for a CADC collection.
    :param collection: str CADC collection.
    :return: tuple with ObservationURI, PlaneURI instance
    """
    obs_member_uri_str = mc.CaomName.make_obs_uri_from_obs_id(
        collection, obs_id)
    obs_member_uri = ObservationURI(obs_member_uri_str)
    plane_uri = PlaneURI.get_plane_uri(obs_member_uri, product_id)
    return obs_member_uri, plane_uri
コード例 #8
0
def _update_cal_provenance(observation, headers):
    plane_inputs = TypedSet(PlaneURI, )
    members_inputs = TypedSet(ObservationURI, )
    for keyword in headers[0]:
        if keyword.startswith('F_ON') or keyword.startswith('F_OFF'):
            value = headers[0].get(keyword)
            base_name = OmmName.remove_extensions(os.path.basename(value))
            file_id = 'C{}_CAL'.format(base_name)

            obs_member_uri_str = ec.CaomName.make_obs_uri_from_obs_id(
                COLLECTION, file_id)
            obs_member_uri = ObservationURI(obs_member_uri_str)
            # the product id is the same as the observation id for OMM
            plane_uri = PlaneURI.get_plane_uri(obs_member_uri, file_id)
            plane_inputs.add(plane_uri)
            members_inputs.add(obs_member_uri)

    for key in observation.planes:
        plane = observation.planes[key]
        if plane.provenance is None:
            plane.provenance = Provenance('CPAPIR')
        mc.update_typed_set(plane.provenance.inputs, plane_inputs)

    mc.update_typed_set(observation.members, members_inputs)
コード例 #9
0
ファイル: main_app.py プロジェクト: SharonGoliath/dao2caom2
def _make_uris(obs_id, product_id):
    obs_member_uri = ObservationURI(
        mc.CaomName.make_obs_uri_from_obs_id(dn.COLLECTION, obs_id))
    plane_uri = PlaneURI.get_plane_uri(obs_member_uri, product_id)
    return plane_uri
コード例 #10
0
def build_observation(db_content, observation, md_name):

    override = read_md_pk(md_name)

    fqn = override.get('fqn')
    almaca_name = AlmacaName(fname_on_disk=fqn)
    # logging.error(db_content.colnames)
    # logging.error('fqn is {}'.format(fqn))
    field_index = _get_index(almaca_name, db_content)
    # field_index = 0
    if observation is None:
        observation = _build_obs(override, db_content, fqn, field_index,
                                 almaca_name, md_name)

    provenance = get_provenance(almaca_name)
    provenance.inputs.add(
        PlaneURI('caom:ALMA/A001_X88b_X23/A001_X88b_X23-raw'))

    # HK 07-02-20
    # I'm looking at the very first entry, A002_Xb999fd_X602.SCI.J1851+0035.
    # The time bounds listed under all of the second-level planes correspond
    # to a date of Oct 20, 2016, which agrees with the observing date I pull
    # up on listobs.  But in the top level plane, the metaRelease date is
    # listed as Oct 12, 2016.  As we discussed earlier this week, it doesn't
    # make sense to have the meta data released before the observation was
    # even taken. Using the 'end time' of the observation that's already
    # pulled for a lower plane, and putting that as the metaRelease date in
    # the top level would be a good solution.  NB: since all spws are observed
    # simultaneously, you'll get the same answer for whichever of the
    # [high/low]res_spw[X] entries that you pull the information from.
    input_meta_data = read_md_pk(almaca_name.input_ms_metadata)
    meta_release = mc.to_float(input_meta_data.get('end_date'))
    meta_release = time.Time(meta_release, format='mjd')
    meta_release.format = 'isot'
    meta_release_dt = mc.make_time(meta_release.value)

    release_date = db_content['Release date'][field_index]
    if release_date is None:
        raise mc.CadcException('No release date for {}'.format(fqn))
    else:
        release_date = time.Time(release_date).to_datetime()

    logging.error('Add plane {} to {}'.format(almaca_name.product_id,
                                              almaca_name.obs_id))
    plane = Plane(product_id=almaca_name.product_id,
                  data_release=release_date,
                  meta_release=meta_release_dt,
                  provenance=provenance)

    plane.position = build_position(db_content, field_index, md_name)
    plane.energy = build_energy(override)
    plane.polarization = None
    plane.time = build_time(override, almaca_name)

    # HK 14-08-2019
    # dataProductType should be 'visibility'
    plane.data_product_type = DataProductType.VISIBILITY
    plane.calibration_level = CalibrationLevel.CALIBRATED

    observation.planes.add(plane)
    observation.meta_release = plane.meta_release
    # TODO hard-coded
    observation.members.add(ObservationURI('caom:ALMA/A001_X88b_X23'))

    # HK 29-07-19
    # qa/ contains images, plots, and web page status views generated
    # during the original (non-CANFAR) calibration of the raw data.
    # We may want to consider retaining these files as well, as they give
    # a more advanced user an easier way to check on data quality,
    # potential issues with calibration, etc.  I believe they come
    # packaged with the rest of the 'products' tarball on the archive,
    # so they would be obtainable even if we do not keep a copy.  These
    # files are fairly small.
    # TODO override.get('artifact_uri')
    artifact = Artifact(uri=almaca_name.uri,
                        product_type=almaca_name.intent,
                        release_type=ReleaseType.DATA,
                        content_type='application/x-tar',
                        content_length=None)
    plane.artifacts.add(artifact)
    return observation
コード例 #11
0
def _build_obs(override, db_content, fqn, index, almaca_name, md_name):

    obs_date = db_content['Observation date'][index]
    if obs_date is None:
        raise mc.CadcException('No observation date for {}'.format(fqn))
    else:
        obs_date = time.Time(obs_date).to_datetime()

    # of_site('alma')
    # 2225015.30883296, -5440016.41799762, -2481631.27428014
    #
    size = db_content['Array'][index]
    telescope = Telescope(name="ALMA-{}".format(size),
                          geo_location_x=2225142.18,
                          geo_location_y=-5440307.37,
                          geo_location_z=-2481029.852)

    instrument = Instrument(name=_get_band_name(override))

    # HK - 14-08-19
    # target: this should be the science target / science fieldname and
    # not the calibrator fieldname, as it is presently (i.e., should be
    # J1851+0035, not J1924-2914).  Or, we may need to continue to leave
    # that field blank at this level.  A single calibrated measurement set
    # may contain multiple science target names, which would not be
    # properly captured at this level.  [For the raw data, the target
    # field is left blank]
    target = Target(name=override.get('field'),
                    standard=False,
                    moving=False,
                    target_type=TargetType.OBJECT)

    # HK - 07-02-20
    # Since we've changed what the base level observation is and can now list
    # a target name in the 'Derived Observation' base plane, I believe that
    # means we can also include that target's position under targetPosition.
    # You've already pulled this info out for the subsequent levels of the
    # hierarchy under 'position', so it's presumably fairly straightforward to
    # include the same info here.
    result_ra, result_dec = _get_ra_dec(md_name)
    point = Point(result_ra, result_dec)
    target_position = TargetPosition(
        coordinates=point,
        coordsys='ICRS',  # from listobs output
        equinox=2000.0)  # a guess by google

    # db_content as votable:
    # >>> t.colnames
    # ['Project_code', 'Source_name', 'RA', 'Dec', 'Galactic_longitude',
    # 'Galactic_latitude', 'Band', 'Spatial_resolution',
    # 'Frequency_resolution', 'Array', 'Mosaic', 'Integration',
    # 'Release_date', 'Frequency_support', 'Velocity_resolution',
    # 'Pol_products', 'Observation_date', 'PI_name', 'SB_name',
    # 'Proposal_authors', 'Line_sensitivity__10_km_s_',
    # 'Continuum_sensitivity', 'PWV', 'Group_ous_id', 'Member_ous_id',
    # 'Asdm_uid', 'Project_title', 'Project_type', 'Scan_intent',
    # 'Field_of_view', 'Largest_angular_scale', 'QA2_Status', 'COUNT',
    # 'Science_keyword', 'Scientific_category', 'ASA_PROJECT_CODE']
    #
    # db_content.colnames as html:
    #
    # ['Project code', 'Source name', 'RA', 'Dec', 'Galactic longitude',
    # 'Galactic latitude', 'Band', 'Spatial resolution',
    # 'Frequency resolution', 'Array', 'Mosaic', 'Integration',
    # 'Release date', 'Frequency support', 'Velocity resolution',
    # 'Pol products', 'Observation date', 'PI name', 'SB name',
    # 'Proposal authors', 'Line sensitivity (10 km/s)',
    # 'Continuum sensitivity', 'PWV', 'Group ous id', 'Member ous id',
    # 'Asdm uid', 'Project title', 'Project type', 'Scan intent',
    # 'Field of view', 'Largest angular scale', 'QA2 Status', 'COUNT',
    # 'Science keyword', 'Scientific category', 'ASA_PROJECT_CODE']

    # HK - 14-08-19
    # can we include the project code, and not just the observation UID
    # somewhere in here?  For the raw data, it looks like the project
    # code was included as 'proposal: ID', whereas for the calibrated
    # measurement set, proposal: ID is now set to the UID and the project
    # code (2016.1.00010.S) is not captured anywhere.  Could the 'project'
    # field, currently set as 'null' be used for this?
    proposal = Proposal(id=db_content['Project code'][index],
                        project=override.get('project'),
                        pi_name=db_content['PI name'][index],
                        title=db_content['Project title'][index])

    keywords = db_content['Science keyword'][index]
    if keywords is not None:
        proposal.keywords = set(keywords.split())

    environment = Environment()
    environment.tau = db_content['PWV'][index] / 0.935 + 0.35
    environment.wavelength_tau = 350 * units.um.to(units.meter)

    intent = (ObservationIntentType.SCIENCE
              if almaca_name.intent is ProductType.SCIENCE else
              ObservationIntentType.CALIBRATION)

    algorithm = Algorithm(name='single band split')
    #
    # PD, SG 15-08-19
    # make it a composite, algorithm name something like
    # 'target splitting'
    #
    observation = DerivedObservation(collection=ARCHIVE,
                                     observation_id=almaca_name.obs_id,
                                     sequence_number=None,
                                     intent=intent,
                                     type="OBJECT",
                                     proposal=proposal,
                                     telescope=telescope,
                                     instrument=instrument,
                                     target=target,
                                     meta_release=obs_date,
                                     algorithm=algorithm,
                                     environment=environment,
                                     target_position=target_position)
    observation.members.add(
        ObservationURI(
            mc.CaomName.make_obs_uri_from_obs_id('ALMA', 'A001_X88b_X23')))
    return observation
コード例 #12
0
def _do_provenance(
    working_directory,
    science_file,
    observation,
    tap_client,
    plane_inputs,
    obs_members,
    config,
):
    """
    DB 06-08-20
    Looking at the DATALAB values for the test set, these are now set to
    correctly identify the correct observation ID.
    e.g. rnN20140428S0174_dark.fits has DATALAB = GN-2014A-Q-85-16-006 since
    it is NOT derived from multiple observations.  rgnN20140428S0174_dark.fits
    (with the extra ‘g’) has DATALAB = GN-2014A-Q-85-16-006-DARK since it IS a
    new derived observation.   And inputs/members are in the PROVENANCE
    extension.

    DB 07-08-20
    All members + inputs in the extension are plane.provenance.inputs.

    Add the appropriate raw planes of the ‘member’ observations to the list
    of inputs for the derived observations, where 'appropriate' means
    find the ‘appropriate’ filename that identifies the plane of the inputs.
    """
    logging.debug(f'Begin _do_provenance for {observation.observation_id}')
    count = 0
    fqn = os.path.join(working_directory, science_file)
    hdus = fits.open(fqn)
    if 'PROVENANCE' not in hdus:
        logging.warning(
            f'PROVENANCE extension not found in HDUs for {science_file}.')
        return count

    data = hdus['PROVENANCE'].data
    temp = None
    for entry in data.columns:
        if entry.name.startswith('Type'):
            temp = entry.name
            break
    name_builder = builder.GemProcBuilder(config)
    for f_name, f_prov_type in zip(data['Filename'], data[temp]):
        f_id = gem_name.GemName.remove_extensions(f_name)

        # GEMINICADC
        # the order of calls here is meant to put the least amount of load
        # on archive.gemini.edu
        #
        collection = builder.COLLECTION
        obs_id = name_builder._get_obs_id(None, f_name, None)
        if obs_id is None:
            # GEMINI
            collection = gem_name.COLLECTION
            uri = mc.build_uri(collection, f_name, gem_name.SCHEME)
            metadata = external_metadata.defining_metadata_finder.get(uri)
            if metadata is not None and metadata.data_label is not None:
                obs_id = metadata.data_label
        if obs_id is not None:
            logging.info(f'Found observation ID {obs_id} for file {f_id}.')
            input_obs_uri_str = mc.CaomName.make_obs_uri_from_obs_id(
                collection, obs_id)
            input_obs_uri = ObservationURI(input_obs_uri_str)
            plane_uri = PlaneURI.get_plane_uri(input_obs_uri, f_id)
            plane_inputs.add(plane_uri)
            count += 1
            if (f_prov_type == 'member'
                    and isinstance(observation, DerivedObservation)):
                member_obs_uri_str = (mc.CaomName.make_obs_uri_from_obs_id(
                    collection, obs_id))
                member_obs_uri = ObservationURI(member_obs_uri_str)
                obs_members.add(member_obs_uri)
                count += 1
    hdus.close()
    logging.debug('End _do_provenance.')
    return count