def _update_cal_provenance(self, observation): plane_inputs = TypedSet( PlaneURI, ) members_inputs = TypedSet( ObservationURI, ) for keyword in self._headers[0]: if keyword.startswith('F_ON') or keyword.startswith('F_OFF'): value = self._headers[0].get(keyword) base_name = ( f'C{OmmName.remove_extensions(os.path.basename(value))}' ) file_id = f'{base_name}_CAL' obs_member_uri_str = mc.CaomName.make_obs_uri_from_obs_id( COLLECTION, base_name ) obs_member_uri = ObservationURI(obs_member_uri_str) plane_uri = PlaneURI.get_plane_uri(obs_member_uri, file_id) plane_inputs.add(plane_uri) members_inputs.add(obs_member_uri) for plane in observation.planes.values(): if plane.provenance is None: plane.provenance = Provenance('CPAPIR') mc.update_typed_set(plane.provenance.inputs, plane_inputs) mc.update_typed_set(observation.members, members_inputs)
def get_provenance(almaca_name): # HK 14-08-19 # provenance: version - capture the information on what version of # CASA was used to run the calibration script. We might appreciate # having that information saved later on (as might an advanced user). # This would be possible to capture from the 'casa[date].log' file # generated automatically during processing - the second line # includes 'CASA version XXX'. version_result = None last_result = None log_dir = almaca_name.log_dir logging.error('checking {}'.format(log_dir)) if os.path.exists(log_dir): # logging.error('exists {}'.format(log_dir)) log_dir_contents = os.listdir(log_dir) for ii in log_dir_contents: if ii.startswith('casa-') and ii.endswith('.log'): log_fqn = '{}/{}'.format(log_dir, ii) if os.path.exists(log_fqn): with open(log_fqn, 'r') as f: temp = f.readlines() for jj in temp: if 'CASA Version' in jj: version_result = jj.split('CASA Version ')[1] # get the timestamp from the filename, use it as the # 'last_executed' temp = ii.replace('casa-', '').replace('.log', '') last_result = datetime.fromtimestamp(mc.make_seconds(temp)) # TODO time.Time(override.get('casa_run_date')).datetime # The rest of the MAG seemed less concerned about the various OUS IDs being # searchable within the archive. I think it would still be best to include # the information somewhere just in case. My guess is that the ASDM UID is # the most important one to be searchable, and that it would also be quite # appropriate to be listed as the 'reference' under 'provenance'. (It might # even eventually be linked directly to the associated raw data file.) The # rest of the science/group/member OUS IDs could perhaps be listed within # the keywords section like this: # # ScienceGoalOUSID: [ugly string]; GroupOUSID: [ugly string#2]; # MemberOUSID: [ugly string#3] (or whatever formatting will work within # the keyword field). provenance = Provenance(name='CASA', version=version_result, last_executed=last_result, reference='https://casa.nrao.edu/') provenance.keywords.add( f'ScienceGoalOUSID: {almaca_name._science_goal_id}') provenance.keywords.add(f'GroupOUSID: {almaca_name._group_id}') provenance.keywords.add(f'MemberOUSID: {almaca_name._mous_id}') provenance.keywords.add(f'ASDM ID: {almaca_name._asdm_id}') return provenance
def copy_provenance(from_provenance): """Make a deep copy of a Provenance instance. :param from_provenance Provenance of which to make a shallow copy :return a copy of the from_provenance, with keywords set to None """ copy = Provenance(name=from_provenance.name, version=from_provenance.version, project=from_provenance.project, producer=from_provenance.producer, run_id=from_provenance.run_id, reference=from_provenance.reference, last_executed=from_provenance.last_executed) for entry in from_provenance.inputs: copy.inputs.add(entry) for entry in from_provenance.keywords: copy.keywords.add(entry) return copy
def test_validate_observation(): obs = SimpleObservation('test_collection', 'test_obs_id', Algorithm('test_name')) validate(obs) obs = DerivedObservation('test_collection', 'test_obs_id', Algorithm('test_name'), proposal=Proposal('test_proposal'), telescope=Telescope('test_telescope'), instrument=Instrument('test_instrument'), target=Target('test_targets')) obs.algorithm.keywords = 'foo' obs.proposal.keywords = set('foo=42') obs.telescope.keywords = set('foo:42') obs.instrument.keywords.add("tick'marks") obs.target.keywords = set('has multiple spaces') test_plane = Plane('test_plane') test_plane.provenance = Provenance('test_provenance') test_plane.provenance.keywords.add('pipe|denied') obs.planes['test_plane'] = test_plane with pytest.raises(AssertionError): validate(obs)
def _update_cal_provenance(observation, headers): plane_inputs = TypedSet(PlaneURI, ) members_inputs = TypedSet(ObservationURI, ) for keyword in headers[0]: if keyword.startswith('F_ON') or keyword.startswith('F_OFF'): value = headers[0].get(keyword) base_name = OmmName.remove_extensions(os.path.basename(value)) file_id = 'C{}_CAL'.format(base_name) obs_member_uri_str = ec.CaomName.make_obs_uri_from_obs_id( COLLECTION, file_id) obs_member_uri = ObservationURI(obs_member_uri_str) # the product id is the same as the observation id for OMM plane_uri = PlaneURI.get_plane_uri(obs_member_uri, file_id) plane_inputs.add(plane_uri) members_inputs.add(obs_member_uri) for key in observation.planes: plane = observation.planes[key] if plane.provenance is None: plane.provenance = Provenance('CPAPIR') mc.update_typed_set(plane.provenance.inputs, plane_inputs) mc.update_typed_set(observation.members, members_inputs)
def _update_catalog_plane(obs): logging.debug(f'Begin _update_catalog_plane for {catalog_uri}.') assert obs is not None, 'Must have an observation to update' for ii in obs.planes: plane = obs.planes[ii] if 'catalog' in plane.product_id: plane.meta_release = obs.meta_release plane.data_release = obs.meta_release plane.data_product_type = DataProductType.CATALOG plane.calibration_level = CalibrationLevel.CALIBRATED plane.provenance = Provenance( catalog_blueprint._get('Plane.provenance.name')) plane.provenance.project = catalog_blueprint._get( 'Plane.provenance.project') plane.provenance.producer = catalog_blueprint._get( 'Plane.provenance.producer') plane.provenance.reference = catalog_blueprint._get( 'Plane.provenance.reference') inputs = catalog_blueprint._get('Plane.provenance.inputs') if inputs: for i in inputs.split(): plane.provenance.inputs.add(PlaneURI(str(i))) logging.debug(f'Done _update_catalog_plane for {catalog_uri}.')
def _update_from_comment(observation, phangs_name, headers): # From ER: 04-03-21 # COMMENT Produced with PHANGS-ALMA pipeline version 4.0 Build 935 # - Provenance.version # COMMENT Galaxy properties from PHANGS sample table version 1.6 # COMMENT Calibration Level 4 (ANALYSIS_PRODUCT) # - Calibration level (either 3 or 4) # COMMENT PHANGS-ALMA Public Release 1 # - Provenance.project = PHANGS-ALMA # COMMENT Generated by the Physics at High Angular resolution # COMMENT in nearby GalaxieS (PHANGS) collaboration # - Provenance.organization = PHANGS # COMMENT Canonical Reference: Leroy et al. (2021), ApJ, Submitted # - Update to reference when accepted # COMMENT Release generated at 2021-03-04T07:28:10.245340 # - Provenance.lastExecuted # COMMENT Data from ALMA Proposal ID: 2017.1.00886.L # - Proposal.proposalID # COMMENT ALMA Proposal PI: Schinnerer, Eva # - Proposal.pi_name # COMMENT Observed in MJD interval [58077.386275,58081.464121] # COMMENT Observed in MJD interval [58290.770032,58365.629222] # COMMENT Observed in MJD interval [58037.515807,58047.541173] # COMMENT Observed in MJD interval [58353.589805,58381.654757] # COMMENT Observed in MJD interval [58064.3677,58072.458597] # COMMENT Observed in MJD interval [58114.347649,58139.301879] chunk = None for plane in observation.planes.values(): if plane.product_id != phangs_name.product_id: continue if plane.provenance is None: plane.provenance = Provenance(name='PHANGS-ALMA pipeline') for artifact in plane.artifacts.values(): if artifact.uri != phangs_name.file_uri: continue for part in artifact.parts.values(): chunk = part.chunks[0] break for entry in headers[0].get('COMMENT'): if 'pipeline version ' in entry: plane.provenance.version = entry.split(' version ')[1] elif 'Calibration Level' in entry: level = entry.split()[2] if level == '4': plane.calibration_level = CalibrationLevel.ANALYSIS_PRODUCT elif 'PHANGS-ALMA Public Release' in entry: plane.provenance.project = 'PHANGS-ALMA' elif 'in nearby GalaxieS (PHANGS) collaboration' in entry: plane.provenance.organization = 'PHANGS' elif 'Release generated at ' in entry: plane.provenance.last_executed = mc.make_time_tz( entry.split(' at ')[1]) elif 'Data from ALMA Proposal ID:' in entry: observation.proposal = Proposal(entry.split(':')[1].strip()) elif 'Canonical Reference: ' in entry: plane.provenance.producer = entry.split(': ')[1] elif 'ALMA Proposal PI:' in entry: observation.proposal.pi_name = entry.split(': ')[1] elif 'Observed in MJD interval ' in entry: if chunk is not None: bits = entry.split()[4].split(',') start_ref_coord = RefCoord( 0.5, mc.to_float(bits[0].replace('[', ''))) end_ref_coord = RefCoord( 1.5, mc.to_float(bits[1].replace(']', ''))) sample = CoordRange1D(start_ref_coord, end_ref_coord) if chunk.time is None: coord_bounds = CoordBounds1D() axis = CoordAxis1D(axis=Axis('TIME', 'd')) chunk.time = TemporalWCS(axis, timesys='UTC') chunk.time.axis.bounds = coord_bounds chunk.time.axis.bounds.samples.append(sample)
def stuff(args): obs = None index = 0 for f_name in args.local: product_id = args.lineage[index].split('/')[0] t_header = Table.read(f_name, format='hdf5', path='header') # logging.error(t_header.colnames) # ['VERSION_MAJOR', 'VERSION_MINOR', 'TIME_IN_SEC', # 'TIME_IN_MICROSEC', 'RUN_ID', 'ORIGIN', 'OBSMODE', 'FIELD', 'RA', # 'DEC', 'EXPTIME', 'NUM_IMAGER'] # logging.error(t_header) # t_header['RUN_ID'].data[0].decode() - return this string # 20190805T024026 # logging.error(t_header['RUN_ID'].data[0].decode()) release_date = datetime.strptime( t_header['RUN_ID'].data[0].decode(), '%Y%m%dT%H%M%S') t_image = Table.read(f_name, format='hdf5', path='image') # logging.error(t_image.colnames) # ['col0', 'col1', 'col2'] # logging.error(t_image) # logging.error(t_image['col0'].data[0]) # logging.error(t_image['col0'].data[143999]) t_catalog = Table.read(f_name, format='hdf5', path='catalog') # logging.error(t_catalog.colnames) # ['CAT_ID', 'GAIA_ID', '2MASS_ID', 'RA', 'DEC', 'TAOS_MAG', # 'GAIA_MAG', '2MASS_JMAG'] # logging.error(t_catalog) t_imager = Table.read(f_name, format='hdf5', path='imager') # logging.error(t_imager.colnames) # ['TEL_ID', 'CAM_ID', 'IMGR_ID', 'XLOC', 'YLOC'] # logging.error(t_imager) t_moment = Table.read(f_name, format='hdf5', path='moment') # logging.error(t_moment.colnames) # ['col0', 'col1', 'col2'] # logging.error(t_moment) t_window = Table.read(f_name, format='hdf5', path='window') # logging.error(t_window.colnames) # ['X0', 'X1', 'Y0', 'Y1', 'XC', 'YC'] # logging.error(t_window) t_wcs= Table.read(f_name, format='hdf5', path='/wcs/cdmatrix') # ['CRVAL1','CRVAL2','CRPIX1','CRPIX2','CD1_1','CD1_2','CD2_1','CD2_2'] # logging.error(t_wcs) taos = Telescope(name='TAOS', geo_location_x=-2354953.99637757, geo_location_y=-4940160.3636381, geo_location_z=3270123.70695983) target = Target(name=str(t_header['FIELD'].data[0]), target_type=TargetType.FIELD, standard=None, redshift=None, keywords=None, moving=None) proposal = Proposal(id=COLLECTION, pi_name=None, project=COLLECTION, title=None) obs = SimpleObservation(collection=COLLECTION, observation_id=args.observation[1], sequence_number=None, intent=ObservationIntentType.SCIENCE, type='FIELD', proposal=proposal, telescope=taos, instrument=None, target=target, meta_release=release_date) provenance = Provenance(name=COLLECTION, version='{}.{}'.format( t_header['VERSION_MAJOR'].data[0], t_header['VERSION_MINOR'].data[0]), project=COLLECTION, producer=COLLECTION, run_id=t_header['RUN_ID'].data[0].decode(), reference='https://taos2.asiaa.sinica.edu.tw/', last_executed=release_date) plane = Plane(product_id=product_id, data_release=release_date, meta_release=release_date, provenance=provenance, data_product_type=DataProductType.IMAGE, calibration_level=CalibrationLevel.RAW_STANDARD) artifact = mc.get_artifact_metadata( f_name, ProductType.SCIENCE, ReleaseType.DATA, mc.build_uri(COLLECTION, os.path.basename(f_name))) # parts are always named '0' part = Part('0') # do each of the three telescopes for telescope in [0, 1, 2]: position = build_position(t_wcs, t_window, telescope) time = build_time(t_header['TIME_IN_SEC'].data[0], t_header['TIME_IN_MICROSEC'].data[0]) energy = build_energy() chunk = Chunk(naxis=4, position_axis_1=1, position_axis_2=2, energy_axis=3, time_axis=4, position=position, energy=energy, time=time) part.chunks.append(chunk) artifact.parts.add(part) plane.artifacts.add(artifact) obs.planes.add(plane) index += 1 return obs