def visit(observation, **kwargs): mc.check_param(observation, Observation) cadc_client = kwargs.get('cadc_client') count = 0 if cadc_client is None: logging.warning('No cadc_client parameter, no connection for input ' 'metadata. Stopping time_bounds_augmentation.') else: # conversation with JJK, 2018-08-08 - until such time as VLASS becomes # a dynamic collection, rely on the time information as provided for # all observations as retrieved on this date from: # # https://archive-new.nrao.edu/vlass/weblog/quicklook/* count = 0 for plane in observation.planes.values(): for artifact in plane.artifacts.values(): if len(artifact.parts) > 0: logging.debug(f'working on artifact {artifact.uri}') version, reference = _augment_artifact( observation.observation_id, artifact) if version is not None: plane.provenance.version = version if reference is not None: plane.provenance.reference = reference count += 1 logging.info(f'Completed time bounds augmentation for ' f'{observation.observation_id}') global obs_metadata obs_metadata = None return observation
def visit(observation, **kwargs): mc.check_param(observation, Observation) # conversation with JJK, 2018-08-08 - until such time as VLASS becomes # a dynamic collection, rely on the time information as provided for all # observations as retrieved on this date from: # # https://archive-new.nrao.edu/vlass/weblog/quicklook/* # # The lowest-level index.html files are scraped to create a csv file # with observation ID, start time, end time, and exposure time. count = 0 for i in observation.planes: plane = observation.planes[i] for j in plane.artifacts: artifact = plane.artifacts[j] logging.debug('working on artifact {}'.format(artifact.uri)) version, reference = _augment(observation.observation_id, artifact) plane.provenance.version = version plane.provenance.reference = reference count += 1 logging.info('Completed time bounds augmentation for {}'.format( observation.observation_id)) return {'artifacts': count}
def visit(observation, **kwargs): mc.check_param(observation, Observation) working_dir = './' if 'working_directory' in kwargs: working_dir = kwargs['working_directory'] if 'cadc_client' in kwargs: cadc_client = kwargs['cadc_client'] else: raise mc.CadcException('Need a cadc_client parameter.') count = 0 for i in observation.planes: plane = observation.planes[i] for j in plane.artifacts: artifact = plane.artifacts[j] if (artifact.uri.endswith('.fits.gz') or artifact.uri.endswith('.fits')): file_id = ec.CaomName(artifact.uri).file_id file_name = ec.CaomName(artifact.uri).file_name science_fqn = os.path.join(working_dir, file_name) if not os.path.exists(science_fqn): file_name = \ ec.CaomName(artifact.uri).uncomp_file_name science_fqn = os.path.join(working_dir, file_name) if not os.path.exists(science_fqn): raise mc.CadcException( '{} preview visit file not found'.format( science_fqn)) logging.debug('working on file {}'.format(science_fqn)) count += _do_prev(file_id, science_fqn, working_dir, plane, cadc_client) logging.info('Completed preview augmentation for {}.'.format( observation.observation_id)) return {'artifacts': count}
def update(observation, **kwargs): """Called to fill multiple CAOM model elements and/or attributes, must have this signature for import_module loading and execution. :param observation A CAOM Observation model instance. :param **kwargs Everything else.""" logging.debug('Begin update.') mc.check_param(observation, Observation) for plane in observation.planes: for artifact in observation.planes[plane].artifacts: for part in observation.planes[plane].artifacts[artifact].parts: p = observation.planes[plane].artifacts[artifact].parts[part] for chunk in p.chunks: if 'headers' in kwargs: headers = kwargs['headers'] chunk.position.resolution = get_position_resolution( headers) if chunk.energy is not None: # A value of None per Chris, 2018-07-26 # Set the value to None here, because the # blueprint is implemented to not set WCS # information to None chunk.energy.restfrq = None logging.debug('Done update.') return True
def _update_time(chunk, headers): """Create TemporalWCS information using FITS header information. This information should always be available from the file.""" logging.debug('Begin _update_time.') mc.check_param(chunk, Chunk) mjd_start = headers[0].get('MJD_STAR') mjd_end = headers[0].get('MJD_END') if mjd_start is None or mjd_end is None: mjd_start, mjd_end = ac.find_time_bounds(headers) if mjd_start is None or mjd_end is None: chunk.time = None logging.debug('Cannot calculate mjd_start {} or mjd_end {}'.format( mjd_start, mjd_end)) else: logging.debug('Calculating range with start {} and end {}.'.format( mjd_start, mjd_start)) start = RefCoord(0.5, mjd_start) end = RefCoord(1.5, mjd_end) time_cf = CoordFunction1D(1, headers[0].get('TEFF'), start) time_axis = CoordAxis1D(Axis('TIME', 'd'), function=time_cf) time_axis.range = CoordRange1D(start, end) chunk.time = TemporalWCS(time_axis) chunk.time.exposure = headers[0].get('TEFF') chunk.time.resolution = 0.1 chunk.time.timesys = 'UTC' chunk.time.trefpos = 'TOPOCENTER' chunk.time_axis = 4 logging.debug('Done _update_time.')
def visit(observation, **kwargs): mc.check_param(observation, Observation) # conversation with JJK, PD 2018-08-27 - use the observation-level # data quality flag. # # There's no header information, so get the list of QA rejected files # from URLs that look like this: # # https://archive-new.nrao.edu/vlass/quicklook/VLASS*/QA_REJECTED/# # # and compare against that list. The list gets items added/removed over # time. count = 0 original = observation.requirements if metadata.cache.is_qa_rejected(observation.observation_id): observation.requirements = Requirements(Status.FAIL) else: observation.requirements = None if observation.requirements != original: logging.warning( f'Changed requirements to {observation.requirements} ' f'for {observation.observation_id}.' ) count = 1 logging.info( f'Completed quality augmentation for {observation.observation_id}' ) return observation
def update(observation, **kwargs): """Called to fill multiple CAOM model elements and/or attributes (an n:n relationship between TDM attributes and CAOM attributes). Must have this signature for import_module loading and execution. :param observation A CAOM Observation model instance. :param **kwargs Everything else.""" logging.debug('Begin update.') mc.check_param(observation, Observation) headers = kwargs.get('headers') fqn = kwargs.get('fqn') uri = kwargs.get('uri') phangs_name = None if uri is not None: phangs_name = PHANGSName(artifact_uri=uri) if fqn is not None: phangs_name = PHANGSName(file_name=os.path.basename(fqn)) if phangs_name is None: raise mc.CadcException(f'Need one of fqn or uri defined for ' f'{observation.observation_id}') _update_from_comment(observation, phangs_name, headers) logging.debug('Done update.') return observation
def update(observation, **kwargs): """Called to fill multiple CAOM model elements and/or attributes, must have this signature for import_module loading and execution. :param observation A CAOM Observation model instance. :param **kwargs Everything else.""" logging.debug('Begin update.') try: mc.check_param(observation, Observation) for plane in observation.planes: for artifact in observation.planes[plane].artifacts: for part in observation.planes[plane].artifacts[artifact].parts: p = observation.planes[plane].artifacts[artifact].parts[part] # for chunk in p.chunks: # if 'headers' in kwargs: # headers = kwargs['headers'] # chunk.position.resolution = get_position_resolution( # headers) # # if chunk.energy is not None: # # A value of None per Chris, 2018-07-26 # # Set the value to None here, because the # # blueprint is implemented to not set WCS # # information to None # # chunk.energy.restfrq = None logging.debug('Done update.') return observation except mc.CadcException as e: tb = traceback.format_exc() logging.debug(tb) logging.error(e) logging.error( 'Terminating ingestion for {}'.format(observation.observation_id)) return None
def visit(observation, **kwargs): mc.check_param(observation, Observation) working_dir = './' if 'working_directory' in kwargs: working_dir = kwargs['working_directory'] if 'cadc_client' in kwargs: cadc_client = kwargs['cadc_client'] else: raise mc.CadcException('Visitor needs a cadc_client parameter.') if 'stream' in kwargs: stream = kwargs['stream'] else: raise mc.CadcException('Visitor needs a stream parameter.') count = 0 for i in observation.planes: plane = observation.planes[i] for j in plane.artifacts: artifact = plane.artifacts[j] file_id = ec.CaomName(artifact.uri).file_id logging.debug('Generate thumbnail for file id {}'.format(file_id)) count += _do_prev(file_id, working_dir, plane, cadc_client, stream) break logging.info('Completed preview augmentation for {}.'.format( observation.observation_id)) return {'artifacts': count}
def visit(observation, **kwargs): mc.check_param(observation, Observation) working_dir = kwargs.get('working_directory', './') cadc_client = kwargs.get('cadc_client') if cadc_client is None: logging.warning( 'Visitor needs a cadc_client parameter to store images.') stream = kwargs.get('stream') if stream is None: raise mc.CadcException('Visitor needs a stream parameter.') observable = kwargs.get('observable') if observable is None: raise mc.CadcException('Visitor needs a observable parameter.') science_file = kwargs.get('science_file') count = 0 for plane in observation.planes.values(): delete_list = [] for artifact in plane.artifacts.values(): if artifact.uri.endswith(science_file): count += _do_prev(artifact, plane, working_dir, cadc_client, stream, observable) if artifact.uri.endswith('.jpg'): delete_list.append(artifact.uri) for uri in delete_list: plane.artifacts.pop(uri) logging.info('Completed preview augmentation for {}.'.format( observation.observation_id)) return {'artifacts': count}
def _update_energy(self, chunk): """Create SpectralWCS information using FITS headers, if available. If the WLEN and BANDPASS keyword values are set to the defaults, there is no energy information.""" self._logger.debug('Begin _update_energy') mc.check_param(chunk, Chunk) wlen = self._headers[0].get('WLEN') bandpass = self._headers[0].get('BANDPASS') if wlen is None or wlen < 0 or bandpass is None or bandpass < 0: chunk.energy = None chunk.energy_axis = None self._logger.debug( f'Setting chunk energy to None because WLEN {wlen} and ' f'BANDPASS {bandpass}' ) else: naxis = CoordAxis1D(Axis('WAVE', 'um')) start_ref_coord = RefCoord(0.5, self.get_start_ref_coord_val(0)) end_ref_coord = RefCoord(1.5, self.get_end_ref_coord_val(0)) naxis.range = CoordRange1D(start_ref_coord, end_ref_coord) chunk.energy = SpectralWCS( naxis, specsys='TOPOCENT', ssysobs='TOPOCENT', ssyssrc='TOPOCENT', bandpass_name=self._headers[0].get('FILTER'), ) chunk.energy_axis = None self._logger.debug('Setting chunk energy range (CoordRange1D).')
def visit(observation, **kwargs): """ Clean up the issue described here (multiple planes for the same photons): https://github.com/opencadc-metadata-curation/omm2caom2/issues/3 """ mc.check_param(observation, Observation) logging.info(f'Begin cleanup augmentation for ' f'{observation.observation_id}') cadc_client = kwargs.get('cadc_client') count = 0 if cadc_client is None: logging.warning( 'Stopping. Need a CADC Client for cleanup augmentation.') else: if len(observation.planes) > 1: # from Daniel, Sylvie - 21-05-20 # How to figure out which plane is newer: # SB - I do not think that we should use the “VERSION” keyword. # I think we must go with the ingested date. # # Daniel Durand # Might be better indeed. Need to compare the SCI and the REJECT # file and see which one is the latest latest_plane_id = None latest_timestamp = None temp = [] for plane in observation.planes.values(): for artifact in plane.artifacts.values(): if OmmName.is_preview(artifact.uri): continue meta = cadc_client.info(artifact.uri) if meta is None: logging.warning( f'Did not find {artifact.uri} in CADC storage.') else: if latest_plane_id is None: latest_plane_id = plane.product_id latest_timestamp = mc.make_time(meta.lastmod) else: current_timestamp = mc.make_time(meta.lastmod) if current_timestamp > latest_timestamp: latest_timestamp = current_timestamp temp.append(latest_plane_id) latest_plane_id = plane.product_id else: temp.append(plane.product_id) delete_list = list(set(temp)) for entry in delete_list: logging.warning(f'Removing plane {entry} from observation ' f'{observation.observation_id}. There are ' f'duplicate photons.') count += 1 observation.planes.pop(entry) _send_slack_message(entry) logging.info(f'Completed cleanup augmentation for ' f'{observation.observation_id}') return observation
def visit(observation, **kwargs): """ NRAO reprocesses tile + image phase center files. This visitor ensures the respective artifacts are removed from the observations if old versions of those files are removed. """ mc.check_param(observation, Observation) url = kwargs.get('url') if url is None: logging.error(f'Require url for cleanup augmentation of ' f'{observation.observation_id}') return count = 0 for plane in observation.planes.values(): temp = [] # SG - 25-03-20 - later versions of files are replacements, so just # automatically remove the 'older' artifacts. # # quicklook check is to cover the future case of having cubes in # the collection if len(plane.artifacts) > 2 and plane.product_id.endswith('quicklook'): # first - get the newest version max_version = 1 for artifact in plane.artifacts.values(): if len(artifact.parts) > 0: # check only fits uris version = sn.VlassName.get_version(artifact.uri) max_version = max(max_version, version) # now collect the list of artifacts not at the maximum version for artifact in plane.artifacts.values(): if len(artifact.parts) > 0: # check only fits uris version = sn.VlassName.get_version(artifact.uri) if version != max_version: temp.append(artifact.uri) # SG - 03-02-21 - use the full fits filename plus # _prev/_prev_256 for the preview/thumbnail file names, so # need to clean up the preview obs_id-based artifact URIs. # The observation IDs are missing '.ql', so it's a safe # way to find the artifacts to be removed. if (artifact.uri.startswith( f'ad:VLASS/{observation.observation_id}') and artifact.uri.endswith('.jpg')): temp.append(artifact.uri) delete_list = list(set(temp)) for entry in delete_list: logging.warning( f'Removing artifact {entry} from observation ' f'{observation.observation_id}, plane {plane.product_id}.') count += 1 observation.planes[plane.product_id].artifacts.pop(entry) logging.info( f'Completed cleanup augmentation for {observation.observation_id}. ' f'Remove {count} artifacts from the observation.') return {'artifacts': count}
def visit(observation, **kwargs): mc.check_param(observation, Observation) working_directory = kwargs.get('working_directory', './') storage_name = kwargs.get('storage_name') if storage_name is None: raise mc.CadcException( f'Must have a storage_name parameter for provenance_augmentation ' f'for {observation.observation_id}') config = mc.Config() config.get_executors() if mc.TaskType.SCRAPE in config.task_types: logging.warning(f'Provenance augmentation does not work for SCRAPE.') return {'provenance': 0} subject = clc.define_subject(config) tap_client = CadcTapClient(subject, config.tap_id) count = 0 obs_members = TypedSet(ObservationURI, ) for plane in observation.planes.values(): plane_inputs = TypedSet(PlaneURI, ) for artifact in plane.artifacts.values(): if storage_name.file_uri == artifact.uri: count = _do_provenance( working_directory, storage_name.file_name, observation, tap_client, plane_inputs, obs_members, config, ) if plane.provenance is not None: plane.provenance.inputs.update(plane_inputs) if isinstance(observation, DerivedObservation): observation.members.update(obs_members) if len(observation.members) > 0: observable = kwargs.get('observable') caom_repo_client = kwargs.get('caom_repo_client') if caom_repo_client is None: logging.warning(f'Warning: Must have a caom_repo_client for ' f'members metadata for ' f'{observation.observation_id}.') _do_members_metadata( observation, caom_repo_client, observation.members, observable.metrics, ) logging.info( f'Done provenance_augmentation for {observation.observation_id}') return {'provenance': count}
def update(observation, **kwargs): """Called to fill multiple CAOM model elements and/or attributes, must have this signature for import_module loading and execution. :param observation A CAOM Observation model instance. :param **kwargs Everything else.""" logging.debug('Begin update.') mc.check_param(observation, Observation) headers = None if 'headers' in kwargs: headers = kwargs['headers'] fqn = None if 'fqn' in kwargs: fqn = kwargs['fqn'] # from caom2 import shape, Point, Position # # HDU 0 in drao_60rad.mod.fits: # SIMPLE = T / conforms to FITS standard # BITPIX = -64 / array data type # NAXIS = 3 / number of array dimensions # NAXIS1 = 720 # NAXIS2 = 360 # NAXIS3 = 161 # COMMENT FITS (Flexible Image Transport System) format is defined in 'Astronomy # COMMENT and Astrophysics', volume 376, page 359; bibcode: 2001A&A...376..359H # CTYPE1 = 'GLON-CAR' / x-axis # CTYPE2 = 'GLAT-CAR' / y-axis # CTYPE3 = 'RM ' / z-axis # CRVAL1 = 0.0 / reference pixel value # CRVAL2 = 0.0 / reference pixel value # CRVAL3 = -400. / reference pixel value # CRPIX1 = 360.5 / reference value # CRPIX2 = 181 / reference value # CRPIX3 = 1. / reference value # CDELT1 = -0.5 / Degrees/pixel # CDELT2 = 0.5 / Degrees/pixel # CDELT3 = 5. / Degrees/pixel # CUNIT1 = 'deg ' # CUNIT2 = 'deg ' # CUNIT3 = 'rad/m2 ' for ii in observation.planes: plane = observation.planes[ii] # center = Point(0.0, 0.0) # width = 720 * 0.5 # height = 360 * 0.5 # plane.position = Position() # plane.position.bounds = shape.Box(center, width, height) # logging.error('set bounds') _update_time(plane) # build_a_plane_position(plane) logging.debug('Done update.') return True
def visit(observation, **kwargs): """ If there are artifacts with the same name, but different case, prefer the lower case artifact, and remove the upper-case one. :param observation: Observation instance - check all it's artifacts :param kwargs: """ mc.check_param(observation, Observation) artifact_count = 0 plane_count = 0 if len(observation.planes.values()) > 1: all_artifact_keys = cc.get_all_artifact_keys(observation) all_artifact_keys_lower = [ii.lower() for ii in all_artifact_keys] set_artifact_keys_lower = set(all_artifact_keys_lower) delete_these_artifacts = [] if len(all_artifact_keys) != len(set_artifact_keys_lower): for entry in set_artifact_keys_lower: ignore_scheme, ignore_path, file_name = mc.decompose_uri(entry) file_id = obs_file_relationship.remove_extensions(file_name) # it's the suffix that has the different case, so use it # to figure out which artifacts shouldn't exist suffixes = obs_file_relationship.get_suffix( file_id, observation.observation_id) for key in all_artifact_keys: for suffix in suffixes: if suffix.upper() in key: # get the fits, previews, thumbnails as well delete_these_artifacts.append(key) delete_these_planes = [] for entry in delete_these_artifacts: for plane in observation.planes.values(): if entry in plane.artifacts.keys(): plane.artifacts.pop(entry) logging.info(f'Removing {entry} from {plane.product_id}.') artifact_count += 1 if len(plane.artifacts.keys()) == 0: delete_these_planes.append(plane.product_id) for entry in set(delete_these_planes): observation.planes.pop(entry) logging.info( f'Removing {entry} from {observation.observation_id}.') plane_count += 1 logging.info( f'Completed cleanup for {observation.observation_id}. Removed ' f'{artifact_count} artifacts and {plane_count} planes.') result = { 'artifacts': artifact_count, 'planes': plane_count, } return observation
def update(observation, **kwargs): """Called to fill multiple CAOM model elements and/or attributes, must have this signature for import_module loading and execution. :param observation A CAOM Observation model instance. :param **kwargs Everything else.""" logging.debug('Begin update.') mc.check_param(observation, Observation) headers = None if 'headers' in kwargs: headers = kwargs['headers'] fqn = None if 'fqn' in kwargs: fqn = kwargs['fqn'] logging.error(type(headers)) logging.error(type(headers[0])) _update_telescope_location(observation, headers) for plane in observation.planes: for artifact in observation.planes[plane].artifacts: parts = observation.planes[plane].artifacts[artifact].parts for part in parts: p = parts[part] if len(p.chunks) == 0 and part == '0': # always have a time axis, and usually an energy # axis as well, so create a chunk for the zero-th part p.chunks.append(Chunk()) for chunk in p.chunks: chunk.naxis = 4 chunk.product_type = get_product_type(headers[0]) _update_energy(chunk, headers) _update_time(chunk, headers) _update_position(chunk, headers) if observation.observation_id.endswith('_REJECT'): _update_requirements(observation) if (observation.instrument is None or observation.instrument.name is None or len(observation.instrument.name) == 0): _update_instrument_name(observation) if OmmName.is_composite(observation.observation_id): if OmmChooser().needs_delete(observation): observation = _update_observation_type(observation) _update_provenance(observation, headers) # _update_time_bounds(observation, fqn) logging.debug('Done update.') return True
def update(observation, **kwargs): """Called to fill multiple CAOM model elements and/or attributes, must have this signature for import_module loading and execution. :param observation A CAOM Observation model instance. :param **kwargs Everything else.""" logging.debug('Begin update.') mc.check_param(observation, Observation) headers = None if 'headers' in kwargs: headers = kwargs['headers'] fqn = None if 'fqn' in kwargs: fqn = kwargs['fqn'] try: for plane in observation.planes.values(): for artifact in plane.artifacts.values(): temp_parts = TypedOrderedDict(Part, ) # need to rename the BINARY TABLE extensions, which have # differently telemetry, and remove their chunks for part_key in ['1', '2', '3', '4', '5']: if part_key in artifact.parts: hdu_count = mc.to_int(part_key) temp = artifact.parts.pop(part_key) temp.product_type = ProductType.AUXILIARY temp.name = headers[hdu_count].get('EXTNAME') while len(temp.chunks) > 0: temp.chunks.pop() temp_parts.add(temp) for part in artifact.parts.values(): if part.name == '0': part.product_type = artifact.product_type for chunk in part.chunks: chunk.product_type = artifact.product_type _build_chunk_energy(chunk, headers) _build_chunk_position(chunk, headers, observation.observation_id) chunk.time_axis = None for part in temp_parts.values(): artifact.parts.add(part) logging.debug('Done update.') except Exception as e: logging.error(e) logging.debug(traceback.format_exc()) observation = None return observation
def visit(observation, **kwargs): """ If the observation says the data release date is past, attempt to retrieve the fits file if it is not already at CADC. """ mc.check_param(observation, Observation) working_dir = kwargs.get('working_directory', './') cadc_client = kwargs.get('cadc_client') if cadc_client is None: logging.warning('Need a cadc_client to update. Stopping pull visitor.') return stream = kwargs.get('stream') if stream is None: raise mc.CadcException('Visitor needs a stream parameter.') observable = kwargs.get('observable') if observable is None: raise mc.CadcException('Visitor needs a observable parameter.') count = 0 if observable.rejected.is_bad_metadata(observation.observation_id): logging.info(f'Stopping visit for {observation.observation_id} ' f'because of bad metadata.') else: for plane in observation.planes.values(): if (plane.data_release is None or plane.data_release > datetime.utcnow()): logging.error(f'Plane {plane.product_id} is proprietary ' f'until {plane.data_release}. No file access.') continue for artifact in plane.artifacts.values(): if gem_name.GemName.is_preview(artifact.uri): continue try: f_name = mc.CaomName(artifact.uri).file_name file_url = '{}/{}'.format(FILE_URL, f_name) mc.look_pull_and_put(f_name, working_dir, file_url, gem_name.ARCHIVE, stream, MIME_TYPE, cadc_client, artifact.content_checksum.checksum, observable.metrics) except Exception as e: if not (observable.rejected.check_and_record( str(e), observation.observation_id)): raise e logging.info(f'Completed pull visitor for {observation.observation_id}.') return {'observation': count}
def _update_position(chunk, headers): """Check that position information has been set appropriately. Reset to null if there's bad input data.""" logging.debug('Begin _update_position') mc.check_param(chunk, Chunk) w = wcs.WCS(headers[0]) if ((chunk.position is not None and chunk.position.axis is not None and chunk.position.axis.function is None) or (numpy.allclose(w.wcs.crval[0], 0.) and numpy.allclose(w.wcs.crval[1], 0))): chunk.position = None chunk.position_axis_1 = None chunk.position_axis_2 = None logging.debug('Removing the partial position record from the chunk.') logging.debug('End _update_position')
def update(observation, **kwargs): """Called to fill multiple CAOM model elements and/or attributes, must have this signature for import_module loading and execution. :param observation A CAOM Observation model instance. :param **kwargs Everything else.""" logging.debug('Begin update.') mc.check_param(observation, Observation) headers = None if 'headers' in kwargs: headers = kwargs['headers'] fqn = None if 'fqn' in kwargs: fqn = kwargs['fqn'] logging.debug('Done update.') return observation
def update(observation, **kwargs): """Called to fill multiple CAOM model elements and/or attributes, must have this signature for import_module loading and execution. :param observation A CAOM Observation model instance. :param **kwargs Everything else.""" logging.debug('Begin update.') try: plane_ids_to_delete = [] mc.check_param(observation, Observation) for plane in observation.planes.values(): for artifact in plane.artifacts.values(): for part in artifact.parts.values(): for chunk in part.chunks: if 'headers' in kwargs: headers = kwargs['headers'] chunk.position.resolution = \ get_position_resolution(headers) if chunk.energy is not None: # A value of None per Chris, 2018-07-26 # Set the value to None here, because the # blueprint is implemented to not set WCS # information to None chunk.energy.restfrq = None if not plane.product_id.endswith('quicklook'): plane_ids_to_delete.append(plane.product_id) for product_id in plane_ids_to_delete: # change handling of product ids - remove the version number logging.warning('Removing plane {} from {}'.format( product_id, observation.observation_id)) observation.planes.pop(product_id) logging.debug('Done update.') return observation except mc.CadcException as e: tb = traceback.format_exc() logging.debug(tb) logging.error(e) logging.error('Terminating ingestion for {}'.format( observation.observation_id)) return None
def visit(observation, **kwargs): mc.check_param(observation, Observation) # conversation with JJK, PD 2018-08-27 - use the observation-level # data quality flag. # # There's no header information, so get the list of QA rejected files # from here, and make a check against that static list # # https://archive-new.nrao.edu/vlass/quicklook/VLASS1.1/QA_REJECTED/ count = 0 for i in observation.planes: plane = observation.planes[i] for j in plane.artifacts: artifact = plane.artifacts[j] logging.debug('working on artifact {}'.format(artifact.uri)) count += _augment(observation, artifact) logging.info('Completed quality augmentation for {}'.format( observation.observation_id)) return {'observations': count}
def visit(observation, **kwargs): mc.check_param(observation, Observation) working_dir = kwargs.get('working_directory', './') science_file = kwargs.get('science_file') if science_file is None: raise mc.CadcException('Visitor needs a science_file parameter.') cfht_name = cn.CFHTName(file_name=science_file, instrument=observation.instrument.name) count = 0 if (cfht_name.instrument is md.Inst.ESPADONS and cfht_name.suffix in ['i', 'p']): for plane in observation.planes.values(): for artifact in plane.artifacts.values(): if cfht_name.file_uri == artifact.uri: count += _do_energy(artifact, science_file, working_dir, cfht_name) logging.info(f'Completed ESPaDOnS energy augmentation for ' f'{observation.observation_id}.') return {'chunks': count}
def _update_position(self, plane, intent, chunk): """Check that position information has been set appropriately. Reset to null if there's bad input data. DD - 19-03-20 - slack There are OMM observations with no WCS information, because the astrometry software did not solve. The lack of solution may have been because of cloud cover or because a field is just not very populated with stars, like near the zenith, but the data still has value. The OMM opinion: When, for SCI files only, there is no WCS solution, it means that the image is really bad and we should classify it junk. """ self._logger.debug('Begin _update_position') mc.check_param(chunk, Chunk) w = wcs.WCS(self._headers[0]) if ( chunk.position is not None and chunk.position.axis is not None and chunk.position.axis.function is None ) or ( numpy.allclose(w.wcs.crval[0], 0.0) and numpy.allclose(w.wcs.crval[1], 0) ): chunk.position = None chunk.position_axis_1 = None chunk.position_axis_2 = None if intent is ObservationIntentType.SCIENCE: self._logger.warning( f'No spatial WCS. Classifying plane ' f'{plane.product_id} as JUNK.' ) plane.quality = DataQuality(Quality.JUNK) self._logger.debug('Removing the partial position record from the chunk.') self._logger.debug('End _update_position')
def _update_time(plane): logging.debug('Begin _update_time') # dates are from the GMIMS paper The Global Magneto-Ionic Survey: # Polarimetry of the Southern Sky from 300 to 480 MHz # survey = [['2009-09-07', '2009-09-21'], ['2009-11-30', '2009-12-09'], ['2010-02-23', '2010-03-09'], ['2010-06-25', '2010-07-08'], ['2010-08-26', '2010-09-10'], ['2010-11-10', '2010-11-24'], ['2011-02-09', '2011-02-23'], ['2011-10-20', '2011-11-10'], ['2012-02-08', '2012-02-29'], ['2012-06-08', '2012-07-02']] mc.check_param(plane, Plane) samples = [] for ii in survey: start_date = ac.get_datetime(ii[0]) end_date = ac.get_datetime(ii[1]) time_bounds = ac.build_plane_time_sample(start_date, end_date) samples.append(time_bounds) survey_start = ac.get_datetime(survey[0][0]) survey_end = ac.get_datetime(survey[9][1]) interval = ac.build_plane_time_interval(survey_start, survey_end, samples) plane.time = caom_Time(bounds=interval, dimension=1) logging.debug('End _update_time')
def visit(observation, **kwargs): mc.check_param(observation, Observation) working_dir = kwargs.get('working_directory', './') cadc_client = kwargs.get('cadc_client') if cadc_client is None: logging.warning('Need a cadc_client to update preview records.') observable = kwargs.get('observable') if observable is None: raise mc.CadcException('Visitor needs a observable parameter.') count = 0 for plane in observation.planes.values(): if (plane.data_release is None or plane.data_release > datetime.utcnow()): logging.info(f'Plane {plane.product_id} is proprietary. No ' f'preview access or thumbnail creation.') continue count += _do_prev(observation.observation_id, working_dir, plane, cadc_client, observable) logging.info('Completed preview augmentation for {}.'.format( observation.observation_id)) return {'artifacts': count}
def visit(self, observation, **kwargs): mc.check_param(observation, Observation) plane_count = 0 artifact_count = 0 plane_temp = [] for plane in observation.planes.values(): artifact_temp = [] for artifact in plane.artifacts.values(): if self.check_for_delete(artifact.uri, **kwargs): artifact_temp.append(artifact.uri) artifact_delete_list = list(set(artifact_temp)) for entry in artifact_delete_list: self._logger.warning( f'Removing artifact {entry} from observation ' f'{observation.observation_id}, plane {plane.product_id}.') artifact_count += 1 observation.planes[plane.product_id].artifacts.pop(entry) if len(plane.artifacts) == 0: plane_temp.append(plane.product_id) plane_delete_list = list(set(plane_temp)) for entry in plane_delete_list: self._logger.warning(f'Removing plane {entry} from observation ' f'{observation.observation_id}.') plane_count += 1 observation.planes.pop(entry) self._logger.info( f'Completed artifact cleanup augmentation for ' f'{observation.observation_id}. Removed {artifact_count} ' f'artifacts, {plane_count} planes from the observation.') return { 'artifacts': artifact_count, 'planes': plane_count, }
def _update_time(self, chunk, obs_id): """Create TemporalWCS information using FITS header information. This information should always be available from the file.""" self._logger.debug('Begin _update_time.') mc.check_param(chunk, Chunk) mjd_start = self._headers[0].get('MJD_STAR') mjd_end = self._headers[0].get('MJD_END') if mjd_start is None or mjd_end is None: mjd_start, mjd_end = ac.find_time_bounds(self._headers) if mjd_start is None or mjd_end is None: chunk.time = None self._logger.debug( f'Cannot calculate MJD_STAR {mjd_start} or ' f'MDJ_END' f' {mjd_end}' ) elif mjd_start == 'NaN' or mjd_end == 'NaN': raise mc.CadcException( f'Invalid time values MJD_STAR {mjd_start} or MJD_END ' f'{mjd_end} for {obs_id}, stopping ingestion.' ) else: self._logger.debug( f'Calculating range with start {mjd_start} and end {mjd_end}.' ) start = RefCoord(0.5, mjd_start) end = RefCoord(1.5, mjd_end) time_cf = CoordFunction1D(1, self._headers[0].get('TEFF'), start) time_axis = CoordAxis1D(Axis('TIME', 'd'), function=time_cf) time_axis.range = CoordRange1D(start, end) chunk.time = TemporalWCS(time_axis) chunk.time.exposure = self._headers[0].get('TEFF') chunk.time.resolution = 0.1 chunk.time.timesys = 'UTC' chunk.time.trefpos = 'TOPOCENTER' chunk.time_axis = None self._logger.debug('Done _update_time.')
def visit(observation, **kwargs): mc.check_param(observation, Observation) working_dir = kwargs.get('working_directory', './') clients = kwargs.get('clients') if clients is None or clients.data_client is None: logging.warning('Need a cadc_client to update preview records.') observable = kwargs.get('observable') if observable is None: raise mc.CadcException('Visitor needs a observable parameter.') storage_name = kwargs.get('storage_name') if storage_name is None: raise mc.CadcException('Visitor needs a storage_name parameter.') count = 0 for plane in observation.planes.values(): if (plane.data_release is None or plane.data_release > datetime.utcnow()): logging.info(f'Plane {plane.product_id} is proprietary. No ' f'preview access or thumbnail creation.') continue if plane.product_id != storage_name.product_id: continue count += _do_prev( observation.observation_id, working_dir, plane, clients, observable, storage_name, ) result = {'artifacts': count} logging.info( f'Completed preview augmentation for {observation.observation_id}.' f'{count} artifacts modified.') return observation