def build(self, entry): """ :param entry: a Gemini file name or observation ID, depending on the configuration :return: an instance of StorageName for use in execute_composable. """ self._logger.debug(f'Build a StorageName instance for {entry}.') try: if self._config.features.supports_latest_client: if (mc.TaskType.SCRAPE in self._config.task_types or self._config.use_local_files): self._read_instrument_locally(entry) result = gem_name.GemName(file_name=entry, instrument=self._instrument, v_collection=gem_name.COLLECTION, v_scheme=gem_name.V_SCHEME, entry=entry) elif self._config.features.use_file_names: self._read_instrument_remotely(entry) result = gem_name.GemName(file_name=entry, instrument=self._instrument, v_collection=gem_name.COLLECTION, v_scheme=gem_name.V_SCHEME, entry=entry) else: raise mc.CadcException('The need has not been encountered ' 'in the real world yet.') else: if (mc.TaskType.INGEST_OBS in self._config.task_types and '.fits' not in entry): # anything that is NOT ALOPEKE/ZORRO, which are the only # two instruments that change the behaviour of the # GemName constructor - and yeah, that abstraction is # leaking like a sieve. self._logger.debug('INGEST_OBS, hard-coded instrument.') instrument = external_metadata.Inst.CIRPASS result = gem_name.GemName(obs_id=entry, instrument=instrument, entry=entry) elif (mc.TaskType.SCRAPE in self._config.task_types or self._config.use_local_files): self._read_instrument_locally(entry) result = gem_name.GemName(file_name=entry, instrument=self._instrument, entry=entry) elif self._config.features.use_file_names: self._read_instrument_remotely(entry) result = gem_name.GemName(file_name=entry, instrument=self._instrument, entry=entry) else: raise mc.CadcException('The need has not been encountered ' 'in the real world yet.') self._logger.debug('Done build.') return result except Exception as e: self._logger.error(e) self._logger.debug(traceback.format_exc()) raise mc.CadcException(e)
def test_preview_augment_delete_preview(): # plane starts with a preview artifact, but it represents a non-existent # file, so remove the artifact from the CAOM observation test_product_id = 'S20080610S0045' fqn = os.path.join(TEST_DATA_DIR, 'GS-2008A-C-5-35-002.fits.xml') obs = mc.read_obs_from_file(fqn) assert len(obs.planes[test_product_id].artifacts) == 2, 'initial condition' test_rejected = mc.Rejected('/tmp/nonexistent') test_rejected.content = { 'bad_metadata': [], 'no_preview': [ 'S20080610S0043.jpg', 'S20080610S0041.jpg', 'S20080610S0044.jpg', 'S20080610S0045.jpg', ], } test_config = mc.Config() test_observable = mc.Observable(test_rejected, mc.Metrics(test_config)) test_storage_name = gem_name.GemName(file_name=f'{test_product_id}.fits') kwargs = { 'working_directory': TEST_DATA_DIR, 'clients': None, 'stream': 'stream', 'observable': test_observable, 'storage_name': test_storage_name, } obs = preview_augmentation.visit(obs, **kwargs) assert obs is not None, 'expect a result' assert len(obs.planes[test_product_id].artifacts) == 1, 'post condition'
def build(self, entry): """ :param entry: str a Gemini file name, or a fully-qualified file name on disk. :return: an instance of StorageName for use in execute_composable. """ self._logger.debug(f'Build a StorageName instance for {entry}.') try: f_name = entry if entry != path.basename(entry): f_name = path.basename(entry) if ( mc.TaskType.SCRAPE in self._config.task_types or self._config.use_local_files ): self._logger.debug(f'Using entry for source.') result = gem_name.GemName(file_name=f_name) result.source_names = [entry] elif '.fits' in entry or '.jpg' in entry: self._logger.debug('Using file_id for source.') result = gem_name.GemName(file_name=f_name) result.source_names = [result.file_id] elif '.fits' not in entry and '.jpg' not in entry: # this case exists so that retries.txt entries are # handled properly, as retries.txt use the source_names # array. For GemName, source_names is a list of file_ids. # # if the list of inputs is a list of data labels, this is # the wrong thing to do, but there's really no data # label-based processing left operationally self._logger.debug( 'entry might be file_id, try a made-up name.' ) made_up_file_name = f'{entry}.fits' result = gem_name.GemName(file_name=made_up_file_name) result.source_names = [result.file_id] self._metadata_reader.set(result) # StorageName instance is only partially constructed at this # point result.obs_id = self._metadata.data_label(result.file_uri) self._logger.debug('Done build.') return result except Exception as e: self._logger.error(e) self._logger.debug(traceback.format_exc()) raise mc.CadcException(e)
def test_preview_augment(http_mock): # this should result in two new artifacts being added to the plane # one for a thumbnail and one for a preview obs = mc.read_obs_from_file(TEST_OBS_FILE) obs.planes[TEST_PRODUCT_ID].data_release = datetime.utcnow() assert len(obs.planes[TEST_PRODUCT_ID].artifacts) == 1, 'initial condition' test_rejected = mc.Rejected(REJECTED_FILE) test_config = mc.Config() test_metrics = mc.Metrics(test_config) test_observable = mc.Observable(test_rejected, test_metrics) cadc_client_mock = Mock() clients_mock = Mock() clients_mock.data_client = cadc_client_mock test_storage_name = gem_name.GemName(file_name=f'{TEST_PRODUCT_ID}.fits') kwargs = { 'working_directory': '/test_files', 'clients': clients_mock, 'observable': test_observable, 'storage_name': test_storage_name, } test_prev = f'/test_files/{TEST_PRODUCT_ID}.jpg' if os.path.exists(test_prev): os.unlink(test_prev) try: cadc_client_mock.get.side_effect = exceptions.UnexpectedException( 'test') http_mock.side_effect = _get_mock obs = preview_augmentation.visit(obs, **kwargs) test_url = (f'{preview_augmentation.PREVIEW_URL}' f'{TEST_PRODUCT_ID}.fits') assert http_mock.called, 'http mock should be called' http_mock.assert_called_with(test_url, test_prev), 'mock not called' assert cadc_client_mock.put.called, 'put mock not called' cadc_client_mock.put.assert_called_with( '/test_files', 'cadc:GEMINI/GN2001BQ013-04_th.jpg', ), 'wrong put arguments' assert obs is not None, 'expect a result' assert (len( obs.planes[TEST_PRODUCT_ID].artifacts) == 3), 'two new artifacts' prev_uri = mc.build_uri(COLLECTION, f'{TEST_PRODUCT_ID}.jpg', SCHEME) thumb_uri = mc.build_uri(COLLECTION, f'{TEST_PRODUCT_ID}_th.jpg', 'cadc') assert (prev_uri in obs.planes[TEST_PRODUCT_ID].artifacts.keys()), 'no preview' assert (thumb_uri in obs.planes[TEST_PRODUCT_ID].artifacts), 'no thumbnail' finally: if os.path.exists(test_prev): os.unlink(test_prev)
def test_set(retrieve_json_mock, retrieve_headers_mock): retrieve_json_mock.side_effect = gem_mocks.mock_get_obs_metadata test_f_name = 'N20030104S0065.fits' test_obs_id = 'GN-CAL20030104-14-001' retrieve_headers_mock.side_effect = gem_mocks._mock_headers test_storage_name = gem_name.GemName(file_name=test_f_name) test_storage_name.obs_id = test_obs_id test_subject = gemini_metadata.GeminiMetadataReader(Mock(), Mock(), Mock()) test_subject.set(test_storage_name) assert len(test_subject._json_metadata) == 1, 'json entries' assert len(test_subject._headers) == 1, 'header entries' assert len(test_subject._file_info) == 1, 'file info entries'
def build(self, entry): """ :param entry: a Gemini file name :return: an instance of StorageName for use in execute_composable. """ if self._config.use_local_files: raise NotImplementedError('The need has not been encountered ' 'in the real world.') external_metadata.get_obs_metadata( gem_name.GemName.remove_extensions(entry)) instrument = get_instrument() storage_name = gem_name.GemName(file_name=entry, instrument=instrument) return storage_name
def test_preview_augment_known_no_preview(): # rejected file exists that says there's a preview known to not # exist, so trying to generate a thumbnail will result in no # change to the plane/artifact structure try: obs = mc.read_obs_from_file(TEST_OBS_FILE) obs.planes[TEST_PRODUCT_ID].data_release = datetime.utcnow() assert (len( obs.planes[TEST_PRODUCT_ID].artifacts) == 1), 'initial condition' if os.path.exists(REJECTED_FILE): os.unlink(REJECTED_FILE) test_rejected = mc.Rejected(REJECTED_FILE) test_rejected.record(mc.Rejected.NO_PREVIEW, f'{TEST_PRODUCT_ID}.jpg') test_config = mc.Config() test_observable = mc.Observable(test_rejected, mc.Metrics(test_config)) test_storage_name = gem_name.GemName(file_name=TEST_FP_FILE) cadc_client_mock = Mock() clients_mock = Mock() clients_mock.data_client = cadc_client_mock kwargs = { 'working_directory': TEST_DATA_DIR, 'clients': clients_mock, 'stream': 'stream', 'observable': test_observable, 'storage_name': test_storage_name, } with patch('caom2pipe.manage_composable.http_get') as http_mock, patch( 'caom2pipe.manage_composable.data_put') as ad_put_mock, patch( 'caom2pipe.manage_composable.get_artifact_metadata' ) as art_mock, patch( 'caom2pipe.manage_composable.exec_cmd') as exec_mock: cadc_client_mock.return_value.data_get.return_value = ( mc.CadcException('test')) obs = preview_augmentation.visit(obs, **kwargs) assert not http_mock.called, 'http mock should not be called' assert not ad_put_mock.called, 'ad put mock should not be called' assert not art_mock.called, 'art mock should not be called' assert not exec_mock.called, 'exec mock should not be called' assert obs is not None, 'expect a result' assert (len(obs.planes[TEST_PRODUCT_ID].artifacts) == 1 ), 'no new artifacts' test_rejected.persist_state() assert os.path.exists(REJECTED_FILE) finally: if os.path.exists(REJECTED_FILE): os.unlink(REJECTED_FILE)
def test_pull_augmentation(http_mock, json_mock, header_mock, file_type_mock): obs = mc.read_obs_from_file(TEST_OBS_AD_URI_FILE) obs.planes[TEST_PRODUCT_ID].data_release = datetime.utcnow() original_uri = 'gemini:GEMINI/GN2001BQ013-04.fits' assert len(obs.planes[TEST_PRODUCT_ID].artifacts) == 1, 'initial condition' assert ( original_uri in obs.planes[TEST_PRODUCT_ID].artifacts.keys()), 'initial condition' test_uri = f'{SCHEME}:{COLLECTION}/{TEST_PRODUCT_ID}.fits' test_rejected = mc.Rejected(REJECTED_FILE) test_config = mc.Config() test_observable = mc.Observable(test_rejected, mc.Metrics(test_config)) cadc_client_mock = Mock() cadc_client_mock.return_value.info.return_value = None clients_mock = Mock() clients_mock.data_client = cadc_client_mock json_mock.side_effect = gem_mocks.mock_retrieve_json filter_cache = svofps.FilterMetadataCache(Mock()) test_reader = gemini_metadata.GeminiFileMetadataReader( Mock(), Mock(), filter_cache) test_fqn = f'{gem_mocks.TEST_DATA_DIR}/GMOS/GN2001BQ013-04.fits.header' test_storage_name = gem_name.GemName(file_name='GN2001BQ013-04.fits') header_mock.side_effect = gem_mocks._mock_headers file_type_mock.return_values = 'application/fits' test_reader.set(test_storage_name) kwargs = { 'working_directory': TEST_DATA_DIR, 'clients': clients_mock, 'observable': test_observable, 'metadata_reader': test_reader, 'storage_name': test_storage_name, } obs = pull_augmentation.visit(obs, **kwargs) test_url = f'{pull_augmentation.FILE_URL}/{TEST_PRODUCT_ID}.fits' test_prev = f'{TEST_DATA_DIR}/{TEST_PRODUCT_ID}.fits' http_mock.assert_called_with(test_url, test_prev), 'mock not called' assert cadc_client_mock.put.called, 'put mock not called' cadc_client_mock.put.assert_called_with( TEST_DATA_DIR, 'gemini:GEMINI/GN2001BQ013-04.fits'), 'wrong put args' assert obs is not None, 'expect a result' assert len(obs.planes[TEST_PRODUCT_ID].artifacts) == 1, 'no new artifacts' try: ignore = obs.planes[TEST_PRODUCT_ID].artifacts[test_uri] except KeyError as ke: # because CAOM does magic result = obs.planes[TEST_PRODUCT_ID].artifacts[original_uri] assert result.uri == test_uri, f'wrong uri {result.uri}'
def test_preview_augment_unknown_no_preview(): # what happens when it's not known that there's no preview obs = mc.read_obs_from_file(TEST_OBS_FILE) obs.planes[TEST_PRODUCT_ID].data_release = datetime.utcnow() assert len(obs.planes[TEST_PRODUCT_ID].artifacts) == 1, 'initial condition' # make sure the rejected file is empty if os.path.exists(REJECTED_FILE): os.unlink(REJECTED_FILE) test_rejected = mc.Rejected(REJECTED_FILE) test_config = mc.Config() test_observable = mc.Observable(test_rejected, mc.Metrics(test_config)) test_storage_name = gem_name.GemName(file_name=f'{TEST_PRODUCT_ID}.fits') cadc_client_mock = Mock() clients_mock = Mock() clients_mock.data_client = cadc_client_mock kwargs = { 'working_directory': TEST_DATA_DIR, 'clients': clients_mock, 'stream': 'stream', 'observable': test_observable, 'storage_name': test_storage_name, } with patch( 'caom2pipe.manage_composable.http_get', side_effect=mc.CadcException( 'Not Found for url: https://archive.gemini.edu/preview'), ) as http_mock, patch( 'caom2pipe.manage_composable.data_put') as ad_put_mock, patch( 'caom2pipe.manage_composable.get_artifact_metadata' ) as art_mock, patch( 'caom2pipe.manage_composable.exec_cmd') as exec_mock: cadc_client_mock.get.side_effect = exceptions.UnexpectedException( 'test') obs = preview_augmentation.visit(obs, **kwargs) assert obs is not None, 'expect result' test_url = f'{preview_augmentation.PREVIEW_URL}{TEST_PRODUCT_ID}.fits' test_prev = f'{TEST_DATA_DIR}/{TEST_PRODUCT_ID}.jpg' http_mock.assert_called_with(test_url, test_prev), 'mock not called' assert not ad_put_mock.called, 'ad put mock should not be called' assert not art_mock.called, 'art mock should not be called' assert not exec_mock.called, 'exec mock should not be called'
def _run_single(): """ Run the processing for a single entry. :return 0 if successful, -1 if there's any sort of failure. Return status is used by airflow for task instance management and reporting. """ config = mc.Config() config.get_executors() config.resource_id = 'ivo://cadc.nrc.ca/sc2repo' if config.features.run_in_airflow: temp = tempfile.NamedTemporaryFile() mc.write_to_file(temp.name, sys.argv[2]) config.proxy = temp.name else: config.proxy = sys.argv[2] config.stream = 'default' if config.features.use_file_names: storage_name = gem_name.GemName(file_name=sys.argv[1]) else: raise mc.CadcException('No code to handle running GEM by obs id.') external_metadata.init_global(config=config) meta_visitors = _define_meta_visitors(config) return rc.run_single(config, storage_name, main_app.APPLICATION, meta_visitors, DATA_VISITORS)
def test_preview_augment_failure(http_mock): # mimic 'Not Found' behaviour # this should result in no new artifacts being added to the plane # but a record for 'no preview exists at Gemini' added to the # record def _failure_mock(ignore_url, ignore_local_fqn): raise mc.CadcException( 'Could not retrieve /usr/src/app/N20211007A0003/' 'N20211007A0003b.jpg from ' 'https://archive.gemini.edu/preview/N20211007A0003b.fits. Failed ' 'with 404 Client Error: Not Found for url: ' 'https://archive.gemini.edu/preview/N20211007A0003b.fits') obs = mc.read_obs_from_file(TEST_OBS_FILE) obs.planes[TEST_PRODUCT_ID].data_release = datetime.utcnow() assert len(obs.planes[TEST_PRODUCT_ID].artifacts) == 1, 'initial condition' test_rejected = mc.Rejected(REJECTED_FILE) test_config = mc.Config() test_metrics = mc.Metrics(test_config) test_observable = mc.Observable(test_rejected, test_metrics) cadc_client_mock = Mock() clients_mock = Mock() clients_mock.data_client = cadc_client_mock test_storage_name = gem_name.GemName(file_name=f'{TEST_PRODUCT_ID}.fits') kwargs = { 'working_directory': '/test_files', 'clients': clients_mock, 'observable': test_observable, 'storage_name': test_storage_name, } test_prev = f'/test_files/{TEST_PRODUCT_ID}.jpg' if os.path.exists(test_prev): os.unlink(test_prev) try: cadc_client_mock.get.side_effect = exceptions.UnexpectedException( 'test') http_mock.side_effect = _failure_mock obs = preview_augmentation.visit(obs, **kwargs) test_url = (f'{preview_augmentation.PREVIEW_URL}' f'{TEST_PRODUCT_ID}.fits') assert http_mock.called, 'http mock should be called' http_mock.assert_called_with(test_url, test_prev), 'mock not called' assert not cadc_client_mock.put.called, 'put mock should not be called' assert obs is not None, 'expect a result' assert (len(obs.planes[TEST_PRODUCT_ID].artifacts) == 1 ), 'same as the pre-condition' prev_uri = mc.build_uri(COLLECTION, f'{TEST_PRODUCT_ID}.jpg', SCHEME) thumb_uri = mc.build_uri(COLLECTION, f'{TEST_PRODUCT_ID}_th.jpg', 'cadc') assert (prev_uri not in obs.planes[TEST_PRODUCT_ID].artifacts.keys() ), 'should be no preview' assert (thumb_uri not in obs.planes[TEST_PRODUCT_ID].artifacts ), 'should be no thumbnail' assert not ( test_rejected.is_no_preview(prev_uri)), 'preview should be tracked' assert http_mock.call_count == 1, 'wrong number of calls' # now try again to generate the preview, and ensure that the # rejected tracking is working obs = preview_augmentation.visit(obs, **kwargs) assert obs is not None, 'expect a result the second time' assert http_mock.call_count == 1, 'never even tried to retrieve it' finally: if os.path.exists(test_prev): os.unlink(test_prev)