def visit(observation, **kwargs): mc.check_param(observation, Observation) working_dir = './' if 'working_directory' in kwargs: working_dir = kwargs['working_directory'] if 'cadc_client' in kwargs: cadc_client = kwargs['cadc_client'] else: raise mc.CadcException('Need a cadc_client parameter.') count = 0 for i in observation.planes: plane = observation.planes[i] for j in plane.artifacts: artifact = plane.artifacts[j] if (artifact.uri.endswith('.fits.gz') or artifact.uri.endswith('.fits')): file_id = ec.CaomName(artifact.uri).file_id file_name = ec.CaomName(artifact.uri).file_name science_fqn = os.path.join(working_dir, file_name) if not os.path.exists(science_fqn): file_name = \ ec.CaomName(artifact.uri).uncomp_file_name science_fqn = os.path.join(working_dir, file_name) if not os.path.exists(science_fqn): raise mc.CadcException( '{} preview visit file not found'.format( science_fqn)) logging.debug('working on file {}'.format(science_fqn)) count += _do_prev(file_id, science_fqn, working_dir, plane, cadc_client) logging.info('Completed preview augmentation for {}.'.format( observation.observation_id)) return {'artifacts': count}
def visit(observation, **kwargs): mc.check_param(observation, Observation) working_dir = kwargs.get('working_directory', './') cadc_client = kwargs.get('cadc_client') if cadc_client is None: logging.warning( 'Visitor needs a cadc_client parameter to store images.') stream = kwargs.get('stream') if stream is None: raise mc.CadcException('Visitor needs a stream parameter.') observable = kwargs.get('observable') if observable is None: raise mc.CadcException('Visitor needs a observable parameter.') science_file = kwargs.get('science_file') count = 0 for plane in observation.planes.values(): delete_list = [] for artifact in plane.artifacts.values(): if artifact.uri.endswith(science_file): count += _do_prev(artifact, plane, working_dir, cadc_client, stream, observable) if artifact.uri.endswith('.jpg'): delete_list.append(artifact.uri) for uri in delete_list: plane.artifacts.pop(uri) logging.info('Completed preview augmentation for {}.'.format( observation.observation_id)) return {'artifacts': count}
def read_file_list_from_archive(config): ad_resource_id = 'ivo://cadc.nrc.ca/ad' agent = 'vlass2caom2/1.0' subject = net.Subject(certificate=config.proxy_fqn) client = net.BaseWsClient( resource_id=ad_resource_id, subject=subject, agent=agent, retry=True, ) query_meta = (f"SELECT fileName FROM archive_files WHERE archiveName = " f"'{config.archive}'") data = {'QUERY': query_meta, 'LANG': 'ADQL', 'FORMAT': 'csv'} logging.debug(f'Query is {query_meta}') try: response = client.get( f'https://{client.host}/ad/sync?{parse.urlencode((data))}', cert=config.proxy_fqn, ) if response.status_code == 200: # ignore the column name as the first part of the response artifact_files_list = response.text.split()[1:] return artifact_files_list else: raise mc.CadcException(f'Query failure {response}') except Exception as e: raise mc.CadcException(f'Failed ad content query: {e}')
def data_get(client, working_directory, file_name, archive, metrics): """ Retrieve a local copy of a file available from CADC. Assumes the working directory location exists and is writeable. :param client: The CadcDataClient for read access to CADC storage. :param working_directory: Where 'file_name' will be written. :param file_name: What to copy from CADC storage. :param archive: Which archive to retrieve the file from. :param metrics: track success execution times, and failure counts. """ start = current() fqn = os.path.join(working_directory, file_name) try: client.get_file(archive, file_name, destination=fqn) if not os.path.exists(fqn): raise mc.CadcException( f'ad retrieve failed. {fqn} does not exist.' ) except Exception as e: metrics.observe_failure('get', 'data', file_name) logging.debug(traceback.format_exc()) raise mc.CadcException(f'Did not retrieve {fqn} because {e}') end = current() file_size = os.stat(fqn).st_size metrics.observe(start, end, file_size, 'get', 'data', file_name)
def __init__(self, fname_on_disk=None, file_name=None, obs_id=None): if file_name is not None: self.file_id = GemName.get_file_id(file_name) if '.fits' in file_name: self.fname_in_ad = '{}.fits'.format(self.file_id) elif GemName.is_preview(file_name): self.fname_in_ad = '{}.jpg'.format(self.file_id) else: raise mc.CadcException( 'Unrecognized file name format {}'.format(file_name)) elif fname_on_disk is not None: self.file_id = GemName.get_file_id(fname_on_disk) if '.fits' in fname_on_disk: self.fname_in_ad = '{}.fits'.format(self.file_id) elif GemName.is_preview(fname_on_disk): self.fname_in_ad = '{}.jpg'.format(self.file_id) else: raise mc.CadcException( 'Unrecognized file name format {}'.format(fname_on_disk)) else: raise mc.CadcException('Require file name.') super(GemName, self).__init__(obs_id=None, collection=ARCHIVE, collection_pattern=GemName.GEM_NAME_PATTERN, fname_on_disk=fname_on_disk, scheme=SCHEME) self.obs_id = obs_id
def visit(observation, **kwargs): assert observation is not None, 'Input parameter must have a value.' assert isinstance(observation, Observation), \ 'Input parameter must be an Observation' working_dir = './' if 'working_directory' in kwargs: working_dir = kwargs['working_directory'] if 'science_file' in kwargs: science_file = kwargs['science_file'] else: raise mc.CadcException( 'No science_file parameter provided to vistor ' 'for obs {}.'.format(observation.observation_id)) science_fqn = os.path.join(working_dir, science_file) if not os.path.exists(science_fqn): if science_fqn.endswith('.gz'): science_fqn = science_fqn.replace('.gz', '') if not os.path.exists(science_fqn): raise mc.CadcException( '{} visit file not found'.format(science_fqn)) science_fqn = _unzip(science_fqn) count = 0 for i in observation.planes: plane = observation.planes[i] _update_plane_position(plane, science_fqn) _update_fits_artifact_metadata(plane, science_fqn, science_file) count += 1 logging.info('Completed footprint augmentation for {}'.format( observation.observation_id)) return {'planes': count}
def visit(observation, **kwargs): mc.check_param(observation, Observation) working_dir = './' if 'working_directory' in kwargs: working_dir = kwargs['working_directory'] if 'cadc_client' in kwargs: cadc_client = kwargs['cadc_client'] else: raise mc.CadcException('Visitor needs a cadc_client parameter.') if 'stream' in kwargs: stream = kwargs['stream'] else: raise mc.CadcException('Visitor needs a stream parameter.') count = 0 for i in observation.planes: plane = observation.planes[i] for j in plane.artifacts: artifact = plane.artifacts[j] file_id = ec.CaomName(artifact.uri).file_id logging.debug('Generate thumbnail for file id {}'.format(file_id)) count += _do_prev(file_id, working_dir, plane, cadc_client, stream) break logging.info('Completed preview augmentation for {}.'.format( observation.observation_id)) return {'artifacts': count}
def check(self, dest_fqn): from astropy.io import fits try: hdulist = fits.open(dest_fqn, memmap=True, lazy_load_hdus=False) hdulist.verify('warn') for h in hdulist: h.verify('warn') hdulist.close() except (fits.VerifyError, OSError) as e: if self._observable is not None: self._observable.rejected.record( mc.Rejected.BAD_DATA, os.path.basename(dest_fqn) ) if os.path.exists(dest_fqn): os.unlink(dest_fqn) raise mc.CadcException( f'astropy verify error {dest_fqn} when reading {e}' ) # a second check that fails for some NEOSSat cases - if this works, # the file might have been correctly retrieved try: # ignore the return value - if the file is corrupted, the getdata # fails, which is the only interesting behaviour here fits.getdata(dest_fqn, ext=0) except (TypeError, OSError) as e: if self._observable is not None: self._observable.rejected.record( mc.Rejected.BAD_DATA, os.path.basename(dest_fqn) ) if os.path.exists(dest_fqn): os.unlink(dest_fqn) raise mc.CadcException( f'astropy getdata error {dest_fqn} when reading {e}' )
def build(self, entry): """ :param entry: a Gemini file name or observation ID, depending on the configuration :return: an instance of StorageName for use in execute_composable. """ self._logger.debug(f'Build a StorageName instance for {entry}.') try: if self._config.features.supports_latest_client: if (mc.TaskType.SCRAPE in self._config.task_types or self._config.use_local_files): self._read_instrument_locally(entry) result = gem_name.GemName(file_name=entry, instrument=self._instrument, v_collection=gem_name.COLLECTION, v_scheme=gem_name.V_SCHEME, entry=entry) elif self._config.features.use_file_names: self._read_instrument_remotely(entry) result = gem_name.GemName(file_name=entry, instrument=self._instrument, v_collection=gem_name.COLLECTION, v_scheme=gem_name.V_SCHEME, entry=entry) else: raise mc.CadcException('The need has not been encountered ' 'in the real world yet.') else: if (mc.TaskType.INGEST_OBS in self._config.task_types and '.fits' not in entry): # anything that is NOT ALOPEKE/ZORRO, which are the only # two instruments that change the behaviour of the # GemName constructor - and yeah, that abstraction is # leaking like a sieve. self._logger.debug('INGEST_OBS, hard-coded instrument.') instrument = external_metadata.Inst.CIRPASS result = gem_name.GemName(obs_id=entry, instrument=instrument, entry=entry) elif (mc.TaskType.SCRAPE in self._config.task_types or self._config.use_local_files): self._read_instrument_locally(entry) result = gem_name.GemName(file_name=entry, instrument=self._instrument, entry=entry) elif self._config.features.use_file_names: self._read_instrument_remotely(entry) result = gem_name.GemName(file_name=entry, instrument=self._instrument, entry=entry) else: raise mc.CadcException('The need has not been encountered ' 'in the real world yet.') self._logger.debug('Done build.') return result except Exception as e: self._logger.error(e) self._logger.debug(traceback.format_exc()) raise mc.CadcException(e)
def failure_action(self, original_fqn, destination_fqn, msg): """Action take on failure is completely dependent on where the file originated, and any cleanup configuration.""" try: if os.path.exists(destination_fqn): os.unlink(destination_fqn) except Exception as e: self._logger.error( f'Failed to clean up {destination_fqn} after a verification ' f'error.') raise mc.CadcException(e) raise mc.CadcException(msg)
def _update_time_bounds(self, observation, storage_name): """Add chunk time bounds to the chunk from the first part, by referencing information from the second header.""" lower_values = '' upper_values = '' with fits.open(storage_name.sources_names[0]) as fits_data: xtension = fits_data[1].header['XTENSION'] extname = fits_data[1].header['EXTNAME'] if 'BINTABLE' in xtension and 'PROVENANCE' in extname: for ii in fits_data[1].data[0]['STARTTIME']: lower_values = f'{ii} {lower_values}' for ii in fits_data[1].data[0]['DURATION']: upper_values = f'{ii} {upper_values} ' else: raise mc.CadcException( f'Opened a composite file that does not match the ' f'expected profile ' f'(XTENSION=BINTABLE/EXTNAME=PROVENANCE). ' f'{xtension} {extname}' ) for plane in observation.planes: for artifact in observation.planes[plane].artifacts: parts = observation.planes[plane].artifacts[artifact].parts for p in parts: if p == '0': lower = lower_values.split() upper = upper_values.split() if len(lower) != len(upper): raise mc.CadcException( 'Cannot make RefCoords with inconsistent ' 'values.' ) chunk = parts[p].chunks[0] bounds = CoordBounds1D() chunk.time.axis.bounds = bounds for ii in range(len(lower)): mjd_start, mjd_end = ac.convert_time( mc.to_float(lower[ii]), mc.to_float(upper[ii]) ) lower_refcoord = RefCoord(0.5, mjd_start) upper_refcoord = RefCoord(1.5, mjd_end) r = CoordRange1D(lower_refcoord, upper_refcoord) bounds.samples.append(r) # if execution has gotten to this point, remove range # if it exists, since only one of bounds or range # should be provided, and bounds is more specific. PD, # slack, 2018-07-16 if chunk.time.axis.range is not None: chunk.time.axis.range = None
def _update_telescope_location(self, observation): """Provide geocentric telescope location information, based on geodetic information from the headers.""" self._logger.debug('Begin _update_telescope_location') if not isinstance(observation, Observation): raise mc.CadcException('Input type is Observation.') telescope = self._headers[0].get('TELESCOP') if telescope is None: self._logger.warning( f'No telescope name. Could not set telescope ' f'location for {observation.observation_id}' ) return telescope = telescope.upper() if COLLECTION in telescope or 'CTIO' in telescope: lat = self._headers[0].get('OBS_LAT') long = self._headers[0].get('OBS_LON') # make a reliable lookup value if COLLECTION in telescope: telescope = COLLECTION if 'CTIO' in telescope: telescope = 'CTIO' if lat is None or long is None: observation.telescope.geo_location_x = DEFAULT_GEOCENTRIC[ telescope ]['x'] observation.telescope.geo_location_y = DEFAULT_GEOCENTRIC[ telescope ]['y'] observation.telescope.geo_location_z = DEFAULT_GEOCENTRIC[ telescope ]['z'] else: ( observation.telescope.geo_location_x, observation.telescope.geo_location_y, observation.telescope.geo_location_z, ) = ac.get_location( lat, long, DEFAULT_GEOCENTRIC[telescope]['elevation'] ) else: raise mc.CadcException(f'Unexpected telescope name {telescope}') self._logger.debug('Done _update_telescope_location')
def visit(observation, **kwargs): assert observation is not None, 'Input parameter must have a value.' assert isinstance(observation, Observation), \ 'Input parameter must be an Observation' working_dir = './' if 'working_directory' in kwargs: working_dir = kwargs['working_directory'] if 'science_file' in kwargs: science_file = kwargs['science_file'] else: raise mc.CadcException('No science_file parameter provided to vistor ' 'for obs {}.'.format( observation.observation_id)) # TODO - this moves location handling structures to other than the # main composable code - this could be MUCH better handled, just not # sure how right now log_file_directory = None if 'log_file_directory' in kwargs: log_file_directory = kwargs['log_file_directory'] science_fqn = os.path.join(working_dir, science_file) if not os.path.exists(science_fqn): if science_fqn.endswith('.gz'): science_fqn = science_fqn.replace('.gz', '') if not os.path.exists(science_fqn): raise mc.CadcException( '{} visit file not found'.format(science_fqn)) science_fqn = _unzip(science_fqn) count = 0 for i in observation.planes: plane = observation.planes[i] for j in plane.artifacts: artifact = plane.artifacts[j] for k in artifact.parts: part = artifact.parts[k] for chunk in part.chunks: _update_position(chunk, science_fqn) count += 1 return_file = '{}_footprint.txt'.format(observation.observation_id) return_string_file = '{}_footprint_returnstring.txt'.format( observation.observation_id) _handle_footprint_logs(log_file_directory, return_file) _handle_footprint_logs(log_file_directory, return_string_file) logging.info('Completed footprint augmentation for {}'.format( observation.observation_id)) return {'chunks': count}
def get_obs_metadata(file_id): """ Download the Gemini observation metadata for the given obs_id. :param file_id: The file ID :return: Dictionary of observation metadata. """ logging.debug('Begin get_obs_metadata for {}'.format(file_id)) gemini_url = '{}{}'.format(GEMINI_METADATA_URL, file_id) # Open the URL and fetch the JSON document for the observation session = requests.Session() retries = 10 retry = Retry(total=retries, read=retries, connect=retries, backoff_factor=0.5) adapter = HTTPAdapter(max_retries=retry) session.mount('http://', adapter) session.mount('https://', adapter) try: response = session.get(gemini_url, timeout=20) metadata = response.json() response.close() except Exception as e: raise mc.CadcException( 'Unable to download Gemini observation metadata from {} because {}' .format(gemini_url, str(e))) global om om.add(metadata, file_id) logging.debug('End get_obs_metadata for {}'.format(file_id))
def get_obs_metadata(file_id): """ Download the Gemini observation metadata for the given obs_id. :param file_id: The file ID :return: Dictionary of observation metadata. """ logging.debug('Begin get_obs_metadata for {}'.format(file_id)) global om if om.contains(file_id): om.reset_index(file_id) else: gemini_url = '{}{}'.format(GEMINI_METADATA_URL, file_id) # Open the URL and fetch the JSON document for the observation response = None try: response = mc.query_endpoint_session( gemini_url, gofr.query_session ) metadata = response.json() finally: if response is not None: response.close() if len(metadata) == 0: raise mc.CadcException(f'Could not find JSON record for {file_id} ' f'at archive.gemini.edu.') om.add(metadata, file_id) logging.debug('End get_obs_metadata for {}'.format(file_id))
def _get_uri(args): if args.lineage: result = args.lineage[0].split('/', 1)[1] else: raise mc.CadcException( 'Could not define uri from these args {}'.format(args)) return result
def visit(observation, **kwargs): assert observation is not None, 'Input parameter must have a value.' assert isinstance(observation, Observation), \ 'Input parameter must be an Observation' working_dir = kwargs.get('working_directory', './') science_file = kwargs.get('science_file') if science_file is None: raise mc.CadcException('No science_file parameter provided to vistor ' 'for obs {}.'.format( observation.observation_id)) # TODO - this moves location handling structures to other than the # main composable code - this could be MUCH better handled, just not # sure how right now log_file_directory = kwargs.get('log_file_directory') science_fqn = os.path.join(working_dir, science_file) count = 0 for plane in observation.planes.values(): for artifact in plane.artifacts.values(): for part in artifact.parts.values(): for chunk in part.chunks: # -t 10 provides a margin of up to 10 pixels cc.exec_footprintfinder( chunk, science_fqn, log_file_directory, sn.VlassName.remove_extensions(science_file), '-t 10') count += 1 logging.info('Completed footprint augmentation for {}'.format( observation.observation_id)) return {'chunks': count}
def _failure_mock(ignore_url, ignore_local_fqn): raise mc.CadcException( 'Could not retrieve /usr/src/app/N20211007A0003/' 'N20211007A0003b.jpg from ' 'https://archive.gemini.edu/preview/N20211007A0003b.fits. Failed ' 'with 404 Client Error: Not Found for url: ' 'https://archive.gemini.edu/preview/N20211007A0003b.fits')
def data_put_fqn( client, source_name, storage_name, stream='raw', metrics=None, ): """ Make a copy of a locally available file by writing it to CADC. Assumes file and directory locations are correct. Requires a checksum comparison by the client. :param client: The CadcDataClient for write access to CADC storage. :param source_name: str fully-qualified :param storage_name: StorageName instance :param stream: str A relic of the old CADC storage. :param metrics: Tracking success execution times, and failure counts. """ start = current() try: client.put_file( storage_name.archive, source_name, archive_stream=stream, mime_type=storage_name.mime_type, mime_encoding=storage_name.mime_encoding, md5_check=True, ) file_size = os.stat(source_name).st_size except Exception as e: metrics.observe_failure('put', 'data', source_name) logging.debug(traceback.format_exc()) raise mc.CadcException(f'Failed to store data with {e}') end = current() metrics.observe(start, end, file_size, 'put', 'data', source_name)
def __init__(self, fname_on_disk=None, file_name=None, obs_id=None, file_id=None): super(AlmacaName, self).__init__(collection=ARCHIVE, collection_pattern='*', fname_on_disk=fname_on_disk) self._file_name = os.path.basename(fname_on_disk) temp = self._file_name.split('.') if len(temp) < 5: raise mc.CadcException('Not a split product.') asdm_str = temp[0].replace('uid___', '') self._obs_id = f'{asdm_str}.{temp[1]}.{temp[2]}' # TODO - hard-coded for single-band splitting testing right now self._science_goal_id = 'uid://A001/X88b/X21' self._group_id = 'uid://A001/X88b/X22' self._mous_id = 'uid://A001/X88b/X23' self._asdm_id = f'uid://{asdm_str.replace("_", "/")}' self._product_id = temp[3] self._intent = (ProductType.CALIBRATION if '.CAL.' in fname_on_disk else ProductType.SCIENCE) self._ms = fname_on_disk self._log_dir = '/data/calibrated' self._input_ms_metadata = f'{self._log_dir}/{temp[0]}' \ f'.ms.split.cal/md.pk' self._logger = logging.getLogger(__name__) self._logger.error(self)
def _add_extensions(fname): if fname.endswith('.gz'): return fname elif fname.endswith('.fits'): return '{}.gz'.format(fname) else: raise mc.CadcException('Unexpected file name {}'.format(fname))
def define_subject(config): """Common code to figure out which credentials to use based on the content of a Config instance.""" subject = None if config.proxy_fqn is not None and os.path.exists(config.proxy_fqn): logging.debug( f'Using proxy certificate {config.proxy_fqn} for credentials.' ) subject = net.Subject(username=None, certificate=config.proxy_fqn) elif config.netrc_file is not None: netrc_fqn = os.path.join(config.working_directory, config.netrc_file) if os.path.exists(netrc_fqn): logging.debug(f'Using netrc file {netrc_fqn} for credentials.') subject = net.Subject( username=None, certificate=None, netrc=netrc_fqn ) else: logging.warning(f'Cannot find netrc file {netrc_fqn}') else: logging.warning( f'Proxy certificate is {config.proxy_fqn}, netrc file is ' f'{config.netrc_file}.' ) raise mc.CadcException( 'No credentials provided (proxy certificate or netrc file). ' 'Cannot create an anonymous subject.' ) return subject
def update(observation, **kwargs): """Called to fill multiple CAOM model elements and/or attributes (an n:n relationship between TDM attributes and CAOM attributes). Must have this signature for import_module loading and execution. :param observation A CAOM Observation model instance. :param **kwargs Everything else.""" logging.debug('Begin update.') mc.check_param(observation, Observation) headers = kwargs.get('headers') fqn = kwargs.get('fqn') uri = kwargs.get('uri') phangs_name = None if uri is not None: phangs_name = PHANGSName(artifact_uri=uri) if fqn is not None: phangs_name = PHANGSName(file_name=os.path.basename(fqn)) if phangs_name is None: raise mc.CadcException(f'Need one of fqn or uri defined for ' f'{observation.observation_id}') _update_from_comment(observation, phangs_name, headers) logging.debug('Done update.') return observation
def test_pull_augmentation(): obs = mc.read_obs_from_file(TEST_OBS_FILE) obs.planes[TEST_PRODUCT_ID].data_release = datetime.utcnow() assert len(obs.planes[TEST_PRODUCT_ID].artifacts) == 1, 'initial condition' test_rejected = mc.Rejected(REJECTED_FILE) test_config = mc.Config() test_observable = mc.Observable(test_rejected, mc.Metrics(test_config)) cadc_client_mock = Mock() kwargs = {'working_directory': TEST_DATA_DIR, 'cadc_client': cadc_client_mock, 'stream': 'stream', 'observable': test_observable} with patch('caom2pipe.manage_composable.http_get') as http_mock, \ patch('caom2pipe.manage_composable.data_put') as ad_put_mock: cadc_client_mock.return_value.data_get.return_value = mc.CadcException( 'test') # no scheme from cadc client cadc_client_mock.get_file_info.return_value = {'md5sum': '1234'} result = pull_augmentation.visit(obs, **kwargs) test_url = f'{pull_augmentation.FILE_URL}/{TEST_PRODUCT_ID}.fits' test_prev = f'{TEST_DATA_DIR}/{TEST_PRODUCT_ID}.fits' http_mock.assert_called_with(test_url, test_prev), 'mock not called' assert ad_put_mock.called, 'ad put mock not called' assert result is not None, 'expect a result' assert result['observation'] == 0, 'no updated metadata' assert len(obs.planes[TEST_PRODUCT_ID].artifacts) == 1, \ 'no new artifacts'
def get_pi_metadata(program_id): global pm if program_id in pm: metadata = pm[program_id] else: program_url = 'https://archive.gemini.edu/programinfo/' + program_id # Open the URL and fetch the JSON document for the observation session = requests.Session() retries = 10 retry = Retry(total=retries, read=retries, connect=retries, backoff_factor=0.5) adapter = HTTPAdapter(max_retries=retry) session.mount('http://', adapter) session.mount('https://', adapter) try: response = session.get(program_url, timeout=20) xml_metadata = response.text response.close() except Exception as e: raise mc.CadcException( 'Unable to download Gemini observation metadata from {} ' 'because {}'.format(program_url, str(e))) metadata = None soup = BeautifulSoup(xml_metadata, 'lxml') tds = soup.find_all('td') if len(tds) > 0: title = tds[1].contents[0].replace('\n', ' ') pi_name = tds[3].contents[0] metadata = {'title': title, 'pi_name': pi_name} pm[program_id] = metadata logging.debug('End get_obs_metadata') return metadata
def get_product_id(file_name): if file_name.startswith('component'): result = 'component_image' elif 'cont.taylor.0.restored' in file_name: if file_name.endswith('restored.components.csv'): result = 'fine_source_catalog' elif file_name.endswith('restored.islands.csv'): result = 'coarse_source_catalog' else: result = 'cont_taylor_0_restored' elif 'cont.taylor.0' in file_name: result = 'cont_taylor_0' elif 'cont.taylor.1.restored' in file_name: result = 'cont_taylor_1_restored' elif 'cont.taylor.1' in file_name: result = 'cont_taylor_1' elif 'restored' in file_name and 'contcube' in file_name: result = 'contcube_restored' elif 'contcube' in file_name: result = 'contcube' else: raise mc.CadcException( 'Could not guess product ID from file name {}'.format( file_name)) return result
def _repair_instrument_name_for_svo(instrument): """ Instrument names from JSON/headers are not necessarily the same as the instrument names used by the SVO Filter service. Correlate the two here. :param instrument the Gemini version :return instrument the SVO version """ result = instrument.value if instrument == Inst.HRWFS: telescope = om.get('telescope') if telescope is None: obs_id = om.get('data_label') raise mc.CadcException( '{}: No observatory information for {}'.format( instrument, obs_id)) else: if 'Gemini-South' == telescope: result = 'AcqCam-S' else: result = 'AcqCam-N' elif instrument == Inst.F2: result = 'Flamingos2' elif instrument == Inst.FLAMINGOS: result = 'Flamingos' return result
def _update_science_provenance(observation, headers): members_inputs = TypedSet(ObservationURI, ) plane_inputs = TypedSet(PlaneURI, ) for keyword in headers[0]: if keyword.startswith('IN_'): value = headers[0].get(keyword) base_name = OmmName.remove_extensions(os.path.basename(value)) if base_name.startswith('S'): # starting 'S' means a science input, 'C' will mean cal file_id = '{}_SCI'.format(base_name.replace('S', 'C', 1)) elif base_name.startswith('C'): file_id = '{}_CAL'.format(base_name) else: raise mc.CadcException( 'Unknown file naming pattern {}'.format(base_name)) obs_member_uri_str = ec.CaomName.make_obs_uri_from_obs_id( COLLECTION, file_id) obs_member_uri = ObservationURI(obs_member_uri_str) # the product id is the same as the observation id for OMM plane_uri = PlaneURI.get_plane_uri(obs_member_uri, file_id) plane_inputs.add(plane_uri) members_inputs.add(obs_member_uri) mc.update_typed_set(observation.members, members_inputs) mc.update_typed_set( observation.planes[observation.observation_id].provenance.inputs, plane_inputs)
def _get_obs_id(self, temp, file_name, entry): """ These files are not available from archive.gemini.edu, so only ask for their metadata from CADC. """ self._logger.debug(f'Begin _get_obs_id for file_name {file_name}') metadata = None if self._connected: if self._config.use_local_files: self._logger.debug(f'Check local {file_name}') metadata = em.defining_metadata_finder._check_local(file_name) if metadata is None and temp is not None and temp.scheme == 'vos': self._logger.debug('Check vos') metadata = self._get_obs_id_from_vos(entry) if metadata is None: # why the old collection name? Because it's better to # retrieve the metadata from the old sc2 collection than # by retrieving a header, and beat up CADC instead of # archive.gemini.edu original_client = em.defining_metadata_finder._tap_client try: self._logger.debug(f'Check caom2 collection {COLLECTION}') # uri = mc.build_uri(COLLECTION, file_name, CADC_SCHEME) em.defining_metadata_finder._tap_client = ( self._prod_client ) for uri in [ f'gemini:GEM/{file_name}', f'gemini:GEMINI/{file_name}', ]: metadata = em.defining_metadata_finder._check_caom2( uri, 'GEMINI' ) if metadata is not None: break if metadata is None: self._logger.debug( f'Check caom2 collection GEMINIPROC' ) # uri = mc.build_uri('GEMINI', file_name) uri = f'ad:GEMINI/{file_name}' em.defining_metadata_finder._tap_client = ( self._sc2_client ) metadata = em.defining_metadata_finder._check_caom2( uri, 'GEMINIPROC' ) finally: em.defining_metadata_finder._tap_client = original_client else: self._logger.debug('Check unconnected local') metadata = em.defining_metadata_finder._check_local(file_name) if metadata is None: raise mc.CadcException(f'No metadata for {file_name}') if metadata.data_label is not None: metadata.data_label = repair_data_label( file_name, metadata.data_label ) self._logger.debug(f'End _get_obs_id') return metadata.data_label
def visit(observation, **kwargs): """ If the observation says the data release date is past, attempt to retrieve the fits file if it is not already at CADC. """ mc.check_param(observation, Observation) working_dir = kwargs.get('working_directory', './') cadc_client = kwargs.get('cadc_client') if cadc_client is None: logging.warning('Need a cadc_client to update. Stopping pull visitor.') return stream = kwargs.get('stream') if stream is None: raise mc.CadcException('Visitor needs a stream parameter.') observable = kwargs.get('observable') if observable is None: raise mc.CadcException('Visitor needs a observable parameter.') count = 0 if observable.rejected.is_bad_metadata(observation.observation_id): logging.info(f'Stopping visit for {observation.observation_id} ' f'because of bad metadata.') else: for plane in observation.planes.values(): if (plane.data_release is None or plane.data_release > datetime.utcnow()): logging.error(f'Plane {plane.product_id} is proprietary ' f'until {plane.data_release}. No file access.') continue for artifact in plane.artifacts.values(): if gem_name.GemName.is_preview(artifact.uri): continue try: f_name = mc.CaomName(artifact.uri).file_name file_url = '{}/{}'.format(FILE_URL, f_name) mc.look_pull_and_put(f_name, working_dir, file_url, gem_name.ARCHIVE, stream, MIME_TYPE, cadc_client, artifact.content_checksum.checksum, observable.metrics) except Exception as e: if not (observable.rejected.check_and_record( str(e), observation.observation_id)): raise e logging.info(f'Completed pull visitor for {observation.observation_id}.') return {'observation': count}