Ejemplo n.º 1
0
 def build(self, entry):
     """
     :param entry: a Gemini file name or observation ID, depending on
         the configuration
     :return: an instance of StorageName for use in execute_composable.
     """
     self._logger.debug(f'Build a StorageName instance for {entry}.')
     try:
         if self._config.features.supports_latest_client:
             if (mc.TaskType.SCRAPE in self._config.task_types or
                     self._config.use_local_files):
                 self._read_instrument_locally(entry)
                 result = gem_name.GemName(file_name=entry,
                                           instrument=self._instrument,
                                           v_collection=gem_name.COLLECTION,
                                           v_scheme=gem_name.V_SCHEME,
                                           entry=entry)
             elif self._config.features.use_file_names:
                 self._read_instrument_remotely(entry)
                 result = gem_name.GemName(file_name=entry,
                                           instrument=self._instrument,
                                           v_collection=gem_name.COLLECTION,
                                           v_scheme=gem_name.V_SCHEME,
                                           entry=entry)
             else:
                 raise mc.CadcException('The need has not been encountered '
                                        'in the real world yet.')
         else:
             if (mc.TaskType.INGEST_OBS in self._config.task_types and
                     '.fits' not in entry):
                 # anything that is NOT ALOPEKE/ZORRO, which are the only
                 # two instruments that change the behaviour of the
                 # GemName constructor - and yeah, that abstraction is
                 # leaking like a sieve.
                 self._logger.debug('INGEST_OBS, hard-coded instrument.')
                 instrument = external_metadata.Inst.CIRPASS
                 result = gem_name.GemName(obs_id=entry,
                                           instrument=instrument,
                                           entry=entry)
             elif (mc.TaskType.SCRAPE in self._config.task_types or
                     self._config.use_local_files):
                 self._read_instrument_locally(entry)
                 result = gem_name.GemName(file_name=entry,
                                           instrument=self._instrument,
                                           entry=entry)
             elif self._config.features.use_file_names:
                 self._read_instrument_remotely(entry)
                 result = gem_name.GemName(file_name=entry,
                                           instrument=self._instrument,
                                           entry=entry)
             else:
                 raise mc.CadcException('The need has not been encountered '
                                        'in the real world yet.')
         self._logger.debug('Done build.')
         return result
     except Exception as e:
         self._logger.error(e)
         self._logger.debug(traceback.format_exc())
         raise mc.CadcException(e)
Ejemplo n.º 2
0
def test_preview_augment_delete_preview():
    # plane starts with a preview artifact, but it represents a non-existent
    # file, so remove the artifact from the CAOM observation
    test_product_id = 'S20080610S0045'
    fqn = os.path.join(TEST_DATA_DIR, 'GS-2008A-C-5-35-002.fits.xml')
    obs = mc.read_obs_from_file(fqn)
    assert len(obs.planes[test_product_id].artifacts) == 2, 'initial condition'
    test_rejected = mc.Rejected('/tmp/nonexistent')
    test_rejected.content = {
        'bad_metadata': [],
        'no_preview': [
            'S20080610S0043.jpg',
            'S20080610S0041.jpg',
            'S20080610S0044.jpg',
            'S20080610S0045.jpg',
        ],
    }
    test_config = mc.Config()
    test_observable = mc.Observable(test_rejected, mc.Metrics(test_config))
    test_storage_name = gem_name.GemName(file_name=f'{test_product_id}.fits')
    kwargs = {
        'working_directory': TEST_DATA_DIR,
        'clients': None,
        'stream': 'stream',
        'observable': test_observable,
        'storage_name': test_storage_name,
    }
    obs = preview_augmentation.visit(obs, **kwargs)
    assert obs is not None, 'expect a result'
    assert len(obs.planes[test_product_id].artifacts) == 1, 'post condition'
Ejemplo n.º 3
0
 def build(self, entry):
     """
     :param entry: str a Gemini file name, or a fully-qualified file
         name on disk.
     :return: an instance of StorageName for use in execute_composable.
     """
     self._logger.debug(f'Build a StorageName instance for {entry}.')
     try:
         f_name = entry
         if entry != path.basename(entry):
             f_name = path.basename(entry)
         if (
             mc.TaskType.SCRAPE in self._config.task_types
             or self._config.use_local_files
         ):
             self._logger.debug(f'Using entry for source.')
             result = gem_name.GemName(file_name=f_name)
             result.source_names = [entry]
         elif '.fits' in entry or '.jpg' in entry:
             self._logger.debug('Using file_id for source.')
             result = gem_name.GemName(file_name=f_name)
             result.source_names = [result.file_id]
         elif '.fits' not in entry and '.jpg' not in entry:
             # this case exists so that retries.txt entries are
             # handled properly, as retries.txt use the source_names
             # array. For GemName, source_names is a list of file_ids.
             #
             # if the list of inputs is a list of data labels, this is
             # the wrong thing to do, but there's really no data
             # label-based processing left operationally
             self._logger.debug(
                 'entry might be file_id, try a made-up name.'
             )
             made_up_file_name = f'{entry}.fits'
             result = gem_name.GemName(file_name=made_up_file_name)
             result.source_names = [result.file_id]
         self._metadata_reader.set(result)
         # StorageName instance is only partially constructed at this
         # point
         result.obs_id = self._metadata.data_label(result.file_uri)
         self._logger.debug('Done build.')
         return result
     except Exception as e:
         self._logger.error(e)
         self._logger.debug(traceback.format_exc())
         raise mc.CadcException(e)
Ejemplo n.º 4
0
def test_preview_augment(http_mock):
    # this should result in two new artifacts being added to the plane
    # one for a thumbnail and one for a preview

    obs = mc.read_obs_from_file(TEST_OBS_FILE)
    obs.planes[TEST_PRODUCT_ID].data_release = datetime.utcnow()
    assert len(obs.planes[TEST_PRODUCT_ID].artifacts) == 1, 'initial condition'

    test_rejected = mc.Rejected(REJECTED_FILE)
    test_config = mc.Config()
    test_metrics = mc.Metrics(test_config)
    test_observable = mc.Observable(test_rejected, test_metrics)
    cadc_client_mock = Mock()
    clients_mock = Mock()
    clients_mock.data_client = cadc_client_mock
    test_storage_name = gem_name.GemName(file_name=f'{TEST_PRODUCT_ID}.fits')
    kwargs = {
        'working_directory': '/test_files',
        'clients': clients_mock,
        'observable': test_observable,
        'storage_name': test_storage_name,
    }

    test_prev = f'/test_files/{TEST_PRODUCT_ID}.jpg'
    if os.path.exists(test_prev):
        os.unlink(test_prev)

    try:
        cadc_client_mock.get.side_effect = exceptions.UnexpectedException(
            'test')
        http_mock.side_effect = _get_mock
        obs = preview_augmentation.visit(obs, **kwargs)
        test_url = (f'{preview_augmentation.PREVIEW_URL}'
                    f'{TEST_PRODUCT_ID}.fits')
        assert http_mock.called, 'http mock should be called'
        http_mock.assert_called_with(test_url, test_prev), 'mock not called'
        assert cadc_client_mock.put.called, 'put mock not called'
        cadc_client_mock.put.assert_called_with(
            '/test_files',
            'cadc:GEMINI/GN2001BQ013-04_th.jpg',
        ), 'wrong put arguments'
        assert obs is not None, 'expect a result'
        assert (len(
            obs.planes[TEST_PRODUCT_ID].artifacts) == 3), 'two new artifacts'
        prev_uri = mc.build_uri(COLLECTION, f'{TEST_PRODUCT_ID}.jpg', SCHEME)
        thumb_uri = mc.build_uri(COLLECTION, f'{TEST_PRODUCT_ID}_th.jpg',
                                 'cadc')
        assert (prev_uri
                in obs.planes[TEST_PRODUCT_ID].artifacts.keys()), 'no preview'
        assert (thumb_uri
                in obs.planes[TEST_PRODUCT_ID].artifacts), 'no thumbnail'
    finally:
        if os.path.exists(test_prev):
            os.unlink(test_prev)
Ejemplo n.º 5
0
def test_set(retrieve_json_mock, retrieve_headers_mock):
    retrieve_json_mock.side_effect = gem_mocks.mock_get_obs_metadata
    test_f_name = 'N20030104S0065.fits'
    test_obs_id = 'GN-CAL20030104-14-001'
    retrieve_headers_mock.side_effect = gem_mocks._mock_headers
    test_storage_name = gem_name.GemName(file_name=test_f_name)
    test_storage_name.obs_id = test_obs_id
    test_subject = gemini_metadata.GeminiMetadataReader(Mock(), Mock(), Mock())
    test_subject.set(test_storage_name)
    assert len(test_subject._json_metadata) == 1, 'json entries'
    assert len(test_subject._headers) == 1, 'header entries'
    assert len(test_subject._file_info) == 1, 'file info entries'
Ejemplo n.º 6
0
    def build(self, entry):
        """
        :param entry: a Gemini file name
        :return: an instance of StorageName for use in execute_composable.
        """
        if self._config.use_local_files:
            raise NotImplementedError('The need has not been encountered '
                                      'in the real world.')

        external_metadata.get_obs_metadata(
            gem_name.GemName.remove_extensions(entry))
        instrument = get_instrument()
        storage_name = gem_name.GemName(file_name=entry, instrument=instrument)
        return storage_name
Ejemplo n.º 7
0
def test_preview_augment_known_no_preview():
    # rejected file exists that says there's a preview known to not
    # exist, so trying to generate a thumbnail will result in no
    # change to the plane/artifact structure

    try:
        obs = mc.read_obs_from_file(TEST_OBS_FILE)
        obs.planes[TEST_PRODUCT_ID].data_release = datetime.utcnow()
        assert (len(
            obs.planes[TEST_PRODUCT_ID].artifacts) == 1), 'initial condition'

        if os.path.exists(REJECTED_FILE):
            os.unlink(REJECTED_FILE)
        test_rejected = mc.Rejected(REJECTED_FILE)
        test_rejected.record(mc.Rejected.NO_PREVIEW, f'{TEST_PRODUCT_ID}.jpg')
        test_config = mc.Config()
        test_observable = mc.Observable(test_rejected, mc.Metrics(test_config))
        test_storage_name = gem_name.GemName(file_name=TEST_FP_FILE)
        cadc_client_mock = Mock()
        clients_mock = Mock()
        clients_mock.data_client = cadc_client_mock
        kwargs = {
            'working_directory': TEST_DATA_DIR,
            'clients': clients_mock,
            'stream': 'stream',
            'observable': test_observable,
            'storage_name': test_storage_name,
        }

        with patch('caom2pipe.manage_composable.http_get') as http_mock, patch(
                'caom2pipe.manage_composable.data_put') as ad_put_mock, patch(
                    'caom2pipe.manage_composable.get_artifact_metadata'
                ) as art_mock, patch(
                    'caom2pipe.manage_composable.exec_cmd') as exec_mock:
            cadc_client_mock.return_value.data_get.return_value = (
                mc.CadcException('test'))
            obs = preview_augmentation.visit(obs, **kwargs)
            assert not http_mock.called, 'http mock should not be called'
            assert not ad_put_mock.called, 'ad put mock should not be called'
            assert not art_mock.called, 'art mock should not be called'
            assert not exec_mock.called, 'exec mock should not be called'
            assert obs is not None, 'expect a result'
            assert (len(obs.planes[TEST_PRODUCT_ID].artifacts) == 1
                    ), 'no new artifacts'

        test_rejected.persist_state()
        assert os.path.exists(REJECTED_FILE)
    finally:
        if os.path.exists(REJECTED_FILE):
            os.unlink(REJECTED_FILE)
Ejemplo n.º 8
0
def test_pull_augmentation(http_mock, json_mock, header_mock, file_type_mock):
    obs = mc.read_obs_from_file(TEST_OBS_AD_URI_FILE)
    obs.planes[TEST_PRODUCT_ID].data_release = datetime.utcnow()
    original_uri = 'gemini:GEMINI/GN2001BQ013-04.fits'
    assert len(obs.planes[TEST_PRODUCT_ID].artifacts) == 1, 'initial condition'
    assert (
        original_uri
        in obs.planes[TEST_PRODUCT_ID].artifacts.keys()), 'initial condition'
    test_uri = f'{SCHEME}:{COLLECTION}/{TEST_PRODUCT_ID}.fits'

    test_rejected = mc.Rejected(REJECTED_FILE)
    test_config = mc.Config()
    test_observable = mc.Observable(test_rejected, mc.Metrics(test_config))
    cadc_client_mock = Mock()
    cadc_client_mock.return_value.info.return_value = None
    clients_mock = Mock()
    clients_mock.data_client = cadc_client_mock
    json_mock.side_effect = gem_mocks.mock_retrieve_json
    filter_cache = svofps.FilterMetadataCache(Mock())
    test_reader = gemini_metadata.GeminiFileMetadataReader(
        Mock(), Mock(), filter_cache)
    test_fqn = f'{gem_mocks.TEST_DATA_DIR}/GMOS/GN2001BQ013-04.fits.header'
    test_storage_name = gem_name.GemName(file_name='GN2001BQ013-04.fits')
    header_mock.side_effect = gem_mocks._mock_headers
    file_type_mock.return_values = 'application/fits'
    test_reader.set(test_storage_name)
    kwargs = {
        'working_directory': TEST_DATA_DIR,
        'clients': clients_mock,
        'observable': test_observable,
        'metadata_reader': test_reader,
        'storage_name': test_storage_name,
    }

    obs = pull_augmentation.visit(obs, **kwargs)
    test_url = f'{pull_augmentation.FILE_URL}/{TEST_PRODUCT_ID}.fits'
    test_prev = f'{TEST_DATA_DIR}/{TEST_PRODUCT_ID}.fits'
    http_mock.assert_called_with(test_url, test_prev), 'mock not called'
    assert cadc_client_mock.put.called, 'put mock not called'
    cadc_client_mock.put.assert_called_with(
        TEST_DATA_DIR, 'gemini:GEMINI/GN2001BQ013-04.fits'), 'wrong put args'
    assert obs is not None, 'expect a result'
    assert len(obs.planes[TEST_PRODUCT_ID].artifacts) == 1, 'no new artifacts'
    try:
        ignore = obs.planes[TEST_PRODUCT_ID].artifacts[test_uri]
    except KeyError as ke:
        # because CAOM does magic
        result = obs.planes[TEST_PRODUCT_ID].artifacts[original_uri]
        assert result.uri == test_uri, f'wrong uri {result.uri}'
Ejemplo n.º 9
0
def test_preview_augment_unknown_no_preview():
    # what happens when it's not known that there's no preview
    obs = mc.read_obs_from_file(TEST_OBS_FILE)
    obs.planes[TEST_PRODUCT_ID].data_release = datetime.utcnow()
    assert len(obs.planes[TEST_PRODUCT_ID].artifacts) == 1, 'initial condition'

    # make sure the rejected file is empty
    if os.path.exists(REJECTED_FILE):
        os.unlink(REJECTED_FILE)
    test_rejected = mc.Rejected(REJECTED_FILE)
    test_config = mc.Config()
    test_observable = mc.Observable(test_rejected, mc.Metrics(test_config))
    test_storage_name = gem_name.GemName(file_name=f'{TEST_PRODUCT_ID}.fits')

    cadc_client_mock = Mock()
    clients_mock = Mock()
    clients_mock.data_client = cadc_client_mock
    kwargs = {
        'working_directory': TEST_DATA_DIR,
        'clients': clients_mock,
        'stream': 'stream',
        'observable': test_observable,
        'storage_name': test_storage_name,
    }

    with patch(
            'caom2pipe.manage_composable.http_get',
            side_effect=mc.CadcException(
                'Not Found for url: https://archive.gemini.edu/preview'),
    ) as http_mock, patch(
            'caom2pipe.manage_composable.data_put') as ad_put_mock, patch(
                'caom2pipe.manage_composable.get_artifact_metadata'
            ) as art_mock, patch(
                'caom2pipe.manage_composable.exec_cmd') as exec_mock:
        cadc_client_mock.get.side_effect = exceptions.UnexpectedException(
            'test')
        obs = preview_augmentation.visit(obs, **kwargs)
        assert obs is not None, 'expect result'
        test_url = f'{preview_augmentation.PREVIEW_URL}{TEST_PRODUCT_ID}.fits'
        test_prev = f'{TEST_DATA_DIR}/{TEST_PRODUCT_ID}.jpg'
        http_mock.assert_called_with(test_url, test_prev), 'mock not called'
        assert not ad_put_mock.called, 'ad put mock should not be called'
        assert not art_mock.called, 'art mock should not be called'
        assert not exec_mock.called, 'exec mock should not be called'
Ejemplo n.º 10
0
def _run_single():
    """
    Run the processing for a single entry.
    :return 0 if successful, -1 if there's any sort of failure. Return status
        is used by airflow for task instance management and reporting.
    """
    config = mc.Config()
    config.get_executors()
    config.resource_id = 'ivo://cadc.nrc.ca/sc2repo'
    if config.features.run_in_airflow:
        temp = tempfile.NamedTemporaryFile()
        mc.write_to_file(temp.name, sys.argv[2])
        config.proxy = temp.name
    else:
        config.proxy = sys.argv[2]
    config.stream = 'default'
    if config.features.use_file_names:
        storage_name = gem_name.GemName(file_name=sys.argv[1])
    else:
        raise mc.CadcException('No code to handle running GEM by obs id.')
    external_metadata.init_global(config=config)
    meta_visitors = _define_meta_visitors(config)
    return rc.run_single(config, storage_name, main_app.APPLICATION,
                         meta_visitors, DATA_VISITORS)
Ejemplo n.º 11
0
def test_preview_augment_failure(http_mock):
    # mimic 'Not Found' behaviour
    # this should result in no new artifacts being added to the plane
    # but a record for 'no preview exists at Gemini' added to the
    # record

    def _failure_mock(ignore_url, ignore_local_fqn):
        raise mc.CadcException(
            'Could not retrieve /usr/src/app/N20211007A0003/'
            'N20211007A0003b.jpg from '
            'https://archive.gemini.edu/preview/N20211007A0003b.fits. Failed '
            'with 404 Client Error: Not Found for url: '
            'https://archive.gemini.edu/preview/N20211007A0003b.fits')

    obs = mc.read_obs_from_file(TEST_OBS_FILE)
    obs.planes[TEST_PRODUCT_ID].data_release = datetime.utcnow()
    assert len(obs.planes[TEST_PRODUCT_ID].artifacts) == 1, 'initial condition'

    test_rejected = mc.Rejected(REJECTED_FILE)
    test_config = mc.Config()
    test_metrics = mc.Metrics(test_config)
    test_observable = mc.Observable(test_rejected, test_metrics)
    cadc_client_mock = Mock()
    clients_mock = Mock()
    clients_mock.data_client = cadc_client_mock
    test_storage_name = gem_name.GemName(file_name=f'{TEST_PRODUCT_ID}.fits')
    kwargs = {
        'working_directory': '/test_files',
        'clients': clients_mock,
        'observable': test_observable,
        'storage_name': test_storage_name,
    }

    test_prev = f'/test_files/{TEST_PRODUCT_ID}.jpg'
    if os.path.exists(test_prev):
        os.unlink(test_prev)

    try:
        cadc_client_mock.get.side_effect = exceptions.UnexpectedException(
            'test')
        http_mock.side_effect = _failure_mock
        obs = preview_augmentation.visit(obs, **kwargs)
        test_url = (f'{preview_augmentation.PREVIEW_URL}'
                    f'{TEST_PRODUCT_ID}.fits')
        assert http_mock.called, 'http mock should be called'
        http_mock.assert_called_with(test_url, test_prev), 'mock not called'
        assert not cadc_client_mock.put.called, 'put mock should not be called'
        assert obs is not None, 'expect a result'
        assert (len(obs.planes[TEST_PRODUCT_ID].artifacts) == 1
                ), 'same as the pre-condition'
        prev_uri = mc.build_uri(COLLECTION, f'{TEST_PRODUCT_ID}.jpg', SCHEME)
        thumb_uri = mc.build_uri(COLLECTION, f'{TEST_PRODUCT_ID}_th.jpg',
                                 'cadc')
        assert (prev_uri not in obs.planes[TEST_PRODUCT_ID].artifacts.keys()
                ), 'should be no preview'
        assert (thumb_uri not in obs.planes[TEST_PRODUCT_ID].artifacts
                ), 'should be no thumbnail'
        assert not (
            test_rejected.is_no_preview(prev_uri)), 'preview should be tracked'

        assert http_mock.call_count == 1, 'wrong number of calls'
        # now try again to generate the preview, and ensure that the
        # rejected tracking is working
        obs = preview_augmentation.visit(obs, **kwargs)
        assert obs is not None, 'expect a result the second time'
        assert http_mock.call_count == 1, 'never even tried to retrieve it'
    finally:
        if os.path.exists(test_prev):
            os.unlink(test_prev)