def test_caom_name(): cn = mc.CaomName(uri='ad:TEST/test_obs_id.fits.gz') assert cn.file_id == 'test_obs_id' assert cn.file_name == 'test_obs_id.fits.gz' assert cn.uncomp_file_name == 'test_obs_id.fits' assert (mc.CaomName.make_obs_uri_from_obs_id( 'TEST', 'test_obs_id') == 'caom:TEST/test_obs_id')
def get_version(entry): """The parameter may be a URI, or just the file name.""" # file name looks like: # 'VLASS1.2.ql.T20t12.J092604+383000.10.2048.v2.I.iter1.image. # 'pbcor.tt0.rms.subim.fits' file_name = entry if '/' in entry: file_name = mc.CaomName(entry).file_name bits = file_name.split('.') version_str = bits[7].replace('v', '') return mc.to_int(version_str)
def __init__( self, obs_id=None, fname_on_disk=None, file_name=None, artifact_uri=None, entry=None, ): if obs_id is None: if ( file_name is None and fname_on_disk is None and artifact_uri is None ): raise mc.CadcException( f'Bad StorageName initialization for {obs_id}.' ) elif file_name is not None: self._file_name = OmmName._add_extensions(file_name) elif fname_on_disk is not None: self._file_name = os.path.basename( OmmName._add_extensions(fname_on_disk) ) elif artifact_uri is not None: self._file_name = mc.CaomName(artifact_uri).file_name self._file_id = OmmName.remove_extensions(self._file_name) self._product_id = self._file_id.replace('_prev_256', '').replace( '_prev', '' ) obs_id = OmmName.get_obs_id(self._file_name) super().__init__( obs_id, COLLECTION, OmmName.OMM_NAME_PATTERN, fname_on_disk, entry=entry, scheme='ad', ) else: self.obs_id = obs_id self._file_name = None self._file_id = None self._product_id = None super().__init__( obs_id, COLLECTION, OmmName.OMM_NAME_PATTERN, entry=entry, scheme='ad', ) self._source_names = [entry] self._destination_uris = [self.file_uri] self._logger = logging.getLogger(self.__class__.__name__) self._logger.debug(self)
def __init__(self, obs_id=None, file_name=None, instrument=None, ad_uri=None, entry=None): # set compression to an empty string so the file uri method still # works, since the file_name element will have all extensions, # including the .fz | .gz | '' to indicate compression type if obs_id is None: super(CFHTName, self).__init__(None, COLLECTION, CFHTName.CFHT_NAME_PATTERN, file_name, compression='', entry=entry) self._instrument = md.Inst(instrument) if ad_uri is not None and file_name is None: file_name = mc.CaomName(ad_uri).file_name self._file_name = file_name self._file_id = CFHTName.remove_extensions(file_name) self._suffix = self._file_id[-1] if self._instrument in [md.Inst.MEGAPRIME, md.Inst.MEGACAM]: # SF - slack - 02-04-20 # - MegaCam - the logic should be probably be 2 planes: p # and o for science. - all cfht exposures are sorted by EXPNUM # if i understand their data acquisition. b,f,d,x should be 1 # plane observations. - my assumption is that the b,f,d,x have # no reason to have a processed equivalent. if (self._suffix in ['b', 'd', 'f', 'x'] or self._suffix.isnumeric()): self._obs_id = self._file_id else: self._obs_id = self._file_id[:-1] else: if self.is_simple and not self.is_master_cal: self.obs_id = self._file_id[:-1] else: self.obs_id = self._file_id if self.is_derived_sitelle: self.obs_id = self.obs_id.replace(self._suffix, 'p') else: super(CFHTName, self).__init__(obs_id, COLLECTION, CFHTName.CFHT_NAME_PATTERN, compression='') self.obs_id = obs_id self._instrument = None self._file_id = None self._file_name = None self._file_id = None self._suffix = None
def get_time_box_work(self, prev_exec_time, exec_time): """ :param prev_exec_time datetime start of the timestamp chunk :param exec_time datetime end of the timestamp chunk :return: a list of file names with time they were modified in /ams, structured as an astropy Table (for now). """ self._logger.debug('Entering get_time_box_work') # datetime format 2019-12-01T00:00:00.000000 prev_dt_str = datetime.fromtimestamp( prev_exec_time, tz=timezone.utc ).strftime(mc.ISO_8601_FORMAT) exec_dt_str = datetime.fromtimestamp( exec_time, tz=timezone.utc ).strftime(mc.ISO_8601_FORMAT) query = ( f"SELECT A.uri, A.lastModified " f"FROM caom2.Observation AS O " f"JOIN caom2.Plane AS P ON O.obsID = P.obsID " f"JOIN caom2.Artifact AS A ON P.planeID = A.planeID " f"WHERE P.planeID IN ( " f" SELECT A.planeID " f" FROM caom2.Observation AS O " f" JOIN caom2.Plane AS P ON O.obsID = P.obsID " f" JOIN caom2.Artifact AS A ON P.planeID = A.planeID " f" WHERE O.collection = '{self._config.collection}' " f" GROUP BY A.planeID " f" HAVING COUNT(A.artifactID) = 1 ) " f"AND P.dataRelease > '{prev_dt_str}' " f"AND P.dataRelease <= '{exec_dt_str}' " f"ORDER BY O.maxLastModified ASC " "" ) result = clc.query_tap_client(query, self._query_client) # results look like: # gemini:GEM/N20191202S0125.fits, ISO 8601 entries = deque() for row in result: entries.append( dsc.StateRunnerMeta( mc.CaomName(row['uri']).file_name, mc.make_time(row['lastModified']).timestamp(), ) ) return entries
def visit(observation, **kwargs): """ If the observation says the data release date is past, attempt to retrieve the fits file if it is not already at CADC. """ mc.check_param(observation, Observation) working_dir = kwargs.get('working_directory', './') cadc_client = kwargs.get('cadc_client') if cadc_client is None: logging.warning('Need a cadc_client to update. Stopping pull visitor.') return stream = kwargs.get('stream') if stream is None: raise mc.CadcException('Visitor needs a stream parameter.') observable = kwargs.get('observable') if observable is None: raise mc.CadcException('Visitor needs a observable parameter.') count = 0 if observable.rejected.is_bad_metadata(observation.observation_id): logging.info(f'Stopping visit for {observation.observation_id} ' f'because of bad metadata.') else: for plane in observation.planes.values(): if (plane.data_release is None or plane.data_release > datetime.utcnow()): logging.error(f'Plane {plane.product_id} is proprietary ' f'until {plane.data_release}. No file access.') continue for artifact in plane.artifacts.values(): if gem_name.GemName.is_preview(artifact.uri): continue try: f_name = mc.CaomName(artifact.uri).file_name file_url = '{}/{}'.format(FILE_URL, f_name) mc.look_pull_and_put(f_name, working_dir, file_url, gem_name.ARCHIVE, stream, MIME_TYPE, cadc_client, artifact.content_checksum.checksum, observable.metrics) except Exception as e: if not (observable.rejected.check_and_record( str(e), observation.observation_id)): raise e logging.info(f'Completed pull visitor for {observation.observation_id}.') return {'observation': count}
def _do_prev(artifact, plane, working_dir, cadc_client, stream, observable): naming = mc.CaomName(artifact.uri) neoss_name = NEOSSatName(file_name=naming.file_name) preview = neoss_name.prev preview_fqn = os.path.join(working_dir, preview) thumb = neoss_name.thumb thumb_fqn = os.path.join(working_dir, thumb) science_fqn = os.path.join(working_dir, naming.file_name) image_data = fits.getdata(science_fqn, ext=0) image_header = fits.getheader(science_fqn, ext=0) _generate_plot(preview_fqn, 1024, image_data, image_header) _generate_plot(thumb_fqn, 256, image_data, image_header) prev_uri = neoss_name.prev_uri thumb_uri = neoss_name.thumb_uri _store_smalls(cadc_client, working_dir, preview, thumb, observable.metrics, stream) _augment(plane, prev_uri, preview_fqn, ProductType.PREVIEW) _augment(plane, thumb_uri, thumb_fqn, ProductType.THUMBNAIL) return 2
def visit(observation, **kwargs): """ If the observation says the data release date is past, attempt to retrieve the fits file if it is not already at CADC. """ mc.check_param(observation, Observation) working_dir = kwargs.get('working_directory', './') clients = kwargs.get('clients') if clients is None: logging.warning('Need clients to update. Stopping pull visitor.') return observable = kwargs.get('observable') if observable is None: raise mc.CadcException('Visitor needs a observable parameter.') metadata_reader = kwargs.get('metadata_reader') if metadata_reader is None: raise mc.CadcException('Visitor needs a metadata_reader parameter.') storage_name = kwargs.get('storage_name') if storage_name is None: raise mc.CadcException('Visitor needs a storage_name parameter.') count = 0 if observable.rejected.is_bad_metadata(observation.observation_id): logging.info(f'Stopping visit for {observation.observation_id} ' f'because of bad metadata.') else: for plane in observation.planes.values(): if (plane.data_release is None or plane.data_release > datetime.utcnow()): logging.info( f'Plane {plane.product_id} is proprietary. No file ' f'access.') continue for artifact in plane.artifacts.values(): # compare file names, because part of this visitor is to # change the URIs artifact_f_name = artifact.uri.split('/')[-1] if artifact_f_name != storage_name.file_name: logging.debug( f'Leave {artifact.uri}, want {storage_name.file_uri}') continue try: f_name = mc.CaomName(artifact.uri).file_name if '.jpg' not in f_name: logging.debug(f'Checking for {f_name}') file_url = f'{FILE_URL}/{f_name}' fqn = os.path.join(working_dir, f_name) # want to compare the checksum from the JSON, and the # checksum at CADC storage - if they are not the same, # retrieve the file from archive.gemini.edu again json_md5sum = metadata_reader.file_info.get( artifact.uri).md5sum look_pull_and_put(artifact.uri, fqn, file_url, clients, json_md5sum) if os.path.exists(fqn): logging.info( f'Removing local copy of {f_name} after ' f'successful storage call.') os.unlink(fqn) except Exception as e: if not (observable.rejected.check_and_record( str(e), observation.observation_id)): raise e logging.info(f'Completed pull visitor for {observation.observation_id}.') result = {'observation': count} return observation
def get_proposal_id(uri): caom_name = mc.CaomName(uri) bits = caom_name.file_name.split('.') return '{}.{}'.format(bits[0], bits[1])
pass print(f'::: create observation {collection} {obs_id}') caom_client.create(actual_obs) print(f'::: read observation from sc2repo') obs_from_service = caom_client.read(collection, obs_id) mc.write_obs_to_file(obs_from_service, round_trip_fqn) try: msg = mc.compare_observations(round_trip_fqn, expected_fqn) print(msg) except Exception as e: print(f'comparison of {round_trip_fqn} and {expected_fqn} failed') print(e) for plane in obs_from_service.planes.values(): for artifact in plane.artifacts.values(): if '.fits' in artifact.uri: f_name = mc.CaomName(uri=artifact.uri).file_name todo_list.append(f_name) # check that no clean up occurred, because this was supposed to be # a SCRAPE + MODIFY configuration, where cleaning up doesn't make # sense if question(collection): for ii in ['/data/failure', '/data/success']: listing = os.listdir(ii) if len(listing) > 0: assert False, f'Bad cleanup. There should be no files in {ii}.' print('::: update the config for ingest') config = mc.Config() config.get_executors()
def get_target_name(uri): return mc.CaomName(uri).file_name.split('.')[0]
def update(observation, **kwargs): """Called to fill multiple CAOM model elements and/or attributes (an n:n relationship between TDM attributes and CAOM attributes). Must have this signature for import_module loading and execution. :param observation A CAOM Observation model instance. :param **kwargs Everything else.""" logging.debug('Begin update.') mc.check_param(observation, Observation) headers = kwargs.get('headers') fqn = kwargs.get('fqn') uri = kwargs.get('uri') gem_proc_name = None # ok not to use builder here, since the obs_id value is never used later if uri is not None: temp = mc.CaomName(uri).file_name gem_proc_name = builder.GemProcName(entry=temp) if fqn is not None: gem_proc_name = builder.GemProcName(entry=fqn) if gem_proc_name is None: raise mc.CadcException(f'Need one of fqn or uri defined for ' f'{observation.observation_id}') for plane in observation.planes.values(): if plane.product_id != gem_proc_name.product_id: continue for artifact in plane.artifacts.values(): for part in artifact.parts.values(): idx = mc.to_int(part.name) header = headers[idx] extname = header.get('EXTNAME') # DB 22-07-20 # There are a few other EXTNAME values to look at for # part.ProductType. MDF values would be ‘AUXILIARY’. The # ones currently called “CAL” are likely best set to ‘INFO’ # since it contains info about datasets used to produce the # product. # # DB 07-08-20 # EXTNAME in (‘DQ’, ‘VAR’) should both have # ProductType.NOISE. ‘CAL’ should no longer exist - it’s now # BPM. Default type is 'AUXILIARY', 'SCI' is type 'SCIENCE' if extname == 'SCI': part.product_type = ProductType.SCIENCE elif extname in ['DQ', 'VAR']: part.product_type = ProductType.NOISE else: part.product_type = ProductType.AUXILIARY if part.product_type in [ ProductType.SCIENCE, ProductType.INFO, ]: for chunk in part.chunks: filter_name = headers[0].get('FILTER').split('_')[0] _update_energy( chunk, headers[idx], filter_name, observation.observation_id, ) _update_time(part, chunk, headers[0], observation.observation_id) if part.product_type == ProductType.SCIENCE: _update_spatial_wcs( part, chunk, headers, observation.observation_id, ) chunk.naxis = header.get('NAXIS') if (chunk.position is None and chunk.naxis is not None): chunk.naxis = None if (chunk.time is not None and chunk.time.axis is not None and chunk.time.axis.function is not None and chunk.time.axis.function.delta == 1.0): # these are the default values, and they make # the time range start in 1858 chunk.time = None else: # DB 21-07-20 # ignore WCS information unless product type == SCIENCE while len(part.chunks) > 0: del part.chunks[-1] if (observation.proposal is not None and observation.proposal.id is not None and observation.proposal.pi_name is None): program = program_metadata.get_pi_metadata(observation.proposal.id) if program is not None: observation.proposal.pi_name = program.get('pi_name') observation.proposal.title = program.get('title') if isinstance(observation, SimpleObservation): # undo the observation-level metadata modifications for updated # Gemini records observation.algorithm = Algorithm(name='exposure') else: cc.update_observation_members(observation) logging.debug('Done update.') return observation
def get_proposal_id(self, ext): caom_name = mc.CaomName(self._storage_name.file_uri) bits = caom_name.file_name.split('.') return f'{bits[0]}.{bits[1]}'
def test_omm_name_dots(): TEST_NAME = 'C121121_J024345.57-021326.4_K_SCIRED' TEST_URI = f'ad:OMM/{TEST_NAME}.fits.gz' test_file_id = mc.CaomName(TEST_URI).file_id assert TEST_NAME == test_file_id, 'dots messing with things'