def test_omm_name(): TEST_NAME = 'C121212_00001_SCI' assert 'ad:OMM/{}.fits.gz'.format(TEST_NAME) == OmmName( TEST_NAME, '{}.fits'.format(TEST_NAME)).file_uri TEST_NAME = 'C121212_sh2-132_J_old_SCIRED' assert '{}_prev.jpg'.format(TEST_NAME) == OmmName(TEST_NAME).prev assert '{}_prev_256.jpg'.format(TEST_NAME) == OmmName(TEST_NAME).thumb
def _do_prev(file_id, science_fqn, working_dir, plane, cadc_client): preview = OmmName(file_id).prev preview_fqn = os.path.join(working_dir, preview) thumb = OmmName(file_id).thumb thumb_fqn = os.path.join(working_dir, thumb) if os.access(preview_fqn, 0): os.remove(preview_fqn) prev_cmd = 'fitscut --all --autoscale=99.5 --asinh-scale --jpg --invert ' \ '--compass {}'.format(science_fqn) mc.exec_cmd_redirect(prev_cmd, preview_fqn) if os.access(thumb_fqn, 0): os.remove(thumb_fqn) prev_cmd = 'fitscut --all --output-size=256 --autoscale=99.5 ' \ '--asinh-scale --jpg --invert --compass {}'.format(science_fqn) mc.exec_cmd_redirect(prev_cmd, thumb_fqn) prev_uri = OmmName(file_id).prev_uri thumb_uri = OmmName(file_id).thumb_uri _augment(plane, prev_uri, preview_fqn, ProductType.PREVIEW) _augment(plane, thumb_uri, thumb_fqn, ProductType.THUMBNAIL) if cadc_client is not None: _store_smalls(cadc_client, working_dir, preview, thumb) return 2
def test_visitor(test_name): storage_name = OmmName(file_name=basename(test_name).replace( '.header', '.gz'), ) file_info = FileInfo(id=storage_name.file_uri, file_type='application/fits') headers = ac.make_headers_from_file(test_name) metadata_reader = rdc.FileMetadataReader() metadata_reader._headers = {storage_name.file_uri: headers} metadata_reader._file_info = {storage_name.file_uri: file_info} kwargs = { 'storage_name': storage_name, 'metadata_reader': metadata_reader, } observation = None input_file = f'{TEST_DATA_DIR}/in.{storage_name.product_id}.fits.xml' if exists(input_file): observation = mc.read_obs_from_file(input_file) observation = fits2caom2_augmentation.visit(observation, **kwargs) expected_fqn = (f'{TEST_DATA_DIR}/{storage_name.file_id}.expected.xml') expected = mc.read_obs_from_file(expected_fqn) compare_result = get_differences(expected, observation) if compare_result is not None: actual_fqn = expected_fqn.replace('expected', 'actual') mc.write_obs_to_file(observation, actual_fqn) compare_text = '\n'.join([r for r in compare_result]) msg = (f'Differences found in observation {expected.observation_id}\n' f'{compare_text}') raise AssertionError(msg)
def visit(observation, **kwargs): """ Clean up the issue described here (multiple planes for the same photons): https://github.com/opencadc-metadata-curation/omm2caom2/issues/3 """ mc.check_param(observation, Observation) logging.info(f'Begin cleanup augmentation for ' f'{observation.observation_id}') cadc_client = kwargs.get('cadc_client') count = 0 if cadc_client is None: logging.warning( 'Stopping. Need a CADC Client for cleanup augmentation.') else: if len(observation.planes) > 1: # from Daniel, Sylvie - 21-05-20 # How to figure out which plane is newer: # SB - I do not think that we should use the “VERSION” keyword. # I think we must go with the ingested date. # # Daniel Durand # Might be better indeed. Need to compare the SCI and the REJECT # file and see which one is the latest latest_plane_id = None latest_timestamp = None temp = [] for plane in observation.planes.values(): for artifact in plane.artifacts.values(): if OmmName.is_preview(artifact.uri): continue meta = cadc_client.info(artifact.uri) if meta is None: logging.warning( f'Did not find {artifact.uri} in CADC storage.') else: if latest_plane_id is None: latest_plane_id = plane.product_id latest_timestamp = mc.make_time(meta.lastmod) else: current_timestamp = mc.make_time(meta.lastmod) if current_timestamp > latest_timestamp: latest_timestamp = current_timestamp temp.append(latest_plane_id) latest_plane_id = plane.product_id else: temp.append(plane.product_id) delete_list = list(set(temp)) for entry in delete_list: logging.warning(f'Removing plane {entry} from observation ' f'{observation.observation_id}. There are ' f'duplicate photons.') count += 1 observation.planes.pop(entry) _send_slack_message(entry) logging.info(f'Completed cleanup augmentation for ' f'{observation.observation_id}') return observation
def test_preview_augment_plane(): preview = os.path.join(TESTDATA_DIR, OmmName(TEST_OBS, TEST_FILE).prev) thumb = os.path.join(TESTDATA_DIR, OmmName(TEST_OBS, TEST_FILE).thumb) if os.path.exists(preview): os.remove(preview) if os.path.exists(thumb): os.remove(thumb) test_fqn = os.path.join(TESTDATA_DIR, OmmName(TEST_OBS, TEST_FILE).model_file_name) test_obs = mc.read_obs_from_file(test_fqn) assert len(test_obs.planes[TEST_OBS].artifacts) == 1 preva = 'ad:OMM/C170324_0054_SCI_prev.jpg' thumba = 'ad:OMM/C170324_0054_SCI_prev_256.jpg' test_kwargs = {'working_directory': TESTDATA_DIR, 'cadc_client': None} test_result = omm_preview_augmentation.visit(test_obs, **test_kwargs) assert test_result is not None, 'expected a visit return value' assert test_result['artifacts'] == 2 assert len(test_obs.planes[TEST_OBS].artifacts) == 3 assert os.path.exists(preview) assert os.path.exists(thumb) assert test_obs.planes[TEST_OBS].artifacts[preva].content_checksum == \ ChecksumURI('md5:f37d21c53055498d1b5cb7753e1c6d6f'), \ 'prev checksum failure' assert test_obs.planes[TEST_OBS].artifacts[thumba].content_checksum == \ ChecksumURI('md5:19661c3c2508ecc22425ee2a05881ed4'), \ 'thumb checksum failure' # now do updates test_obs.planes[TEST_OBS].artifacts[preva].content_checksum = \ ChecksumURI('f37d21c53055498d1b5cb7753e1c6d6f') test_obs.planes[TEST_OBS].artifacts[thumba].content_checksum = \ ChecksumURI('19661c3c2508ecc22425ee2a05881ed4') test_result = omm_preview_augmentation.visit(test_obs, **test_kwargs) assert test_result is not None, 'expected update visit return value' assert test_result['artifacts'] == 2 assert len(test_obs.planes) == 1 assert len(test_obs.planes[TEST_OBS].artifacts) == 3 assert os.path.exists(preview) assert os.path.exists(thumb) assert test_obs.planes[TEST_OBS].artifacts[preva].content_checksum == \ ChecksumURI('md5:f37d21c53055498d1b5cb7753e1c6d6f'), \ 'prev update failed' assert test_obs.planes[TEST_OBS].artifacts[thumba].content_checksum == \ ChecksumURI('md5:19661c3c2508ecc22425ee2a05881ed4'), \ 'prev_256 update failed'
def test_omm_name(): test_config = mc.Config() test_config.task_types = [] test_config.use_local_files = True test_builder = OmmBuilder(test_config) test_name = 'C121212_00001_SCI' for entry in [f'{test_name}', f'/tmp/{test_name}']: test_subject = test_builder.build(f'{entry}.fits') assert f'ad:OMM/{test_name}.fits.gz' == test_subject.file_uri assert (test_subject.source_names == [f'{entry}.fits' ]), 'wrong source name' assert (test_subject.destination_uris[0] == f'ad:OMM/{test_name}.fits.gz'), 'wrong source name' test_name = 'C121212_sh2-132_J_old_SCIRED' file_name = f'{test_name}_prev_256.jpg' assert f'{test_name}_prev.jpg' == OmmName(file_name=file_name).prev assert f'{test_name}_prev_256.jpg' == OmmName(file_name=file_name).thumb test_name = 'C121212_sh2-132_J_old_SCIRED' file_name = f'{test_name}_prev_256.jpg' assert f'{test_name}_prev.jpg' == OmmName(file_name=file_name).prev assert f'{test_name}_prev_256.jpg' == OmmName(file_name=file_name).thumb test_obs_id = 'C121121_J024345.57-021326.4_K' test_name = f'{test_obs_id}_SCIRED' file_name = f'{test_name}.fits.gz' assert f'{test_name}_prev.jpg' == OmmName(file_name=file_name).prev assert f'{test_name}_prev_256.jpg' == OmmName(file_name=file_name).thumb assert OmmName(file_name=file_name).obs_id == test_obs_id
def __init__(self, **kwargs): super().__init__(COLLECTION, ReleaseType.DATA, **kwargs) self._unzip() self._storage_name = OmmName(file_name=self._science_file) self._preview_fqn = os.path.join(self._working_dir, self._storage_name.prev) self._thumb_fqn = os.path.join(self._working_dir, self._storage_name.thumb) self._logger = logging.getLogger(__name__)
def test_footprint_update_position(): test_kwargs = {'science_file': OmmName(TEST_OBS, TEST_FILE).file_name} test_fqn = os.path.join(TESTDATA_DIR, OmmName(TEST_OBS, TEST_FILE).model_file_name) test_obs = mc.read_obs_from_file(test_fqn) test_chunk = test_obs.planes[TEST_OBS].artifacts[OmmName( TEST_OBS, TEST_FILE).file_uri].parts['0'].chunks[0] assert test_chunk.position.axis.bounds is None # expected failure due to required kwargs parameter with pytest.raises(mc.CadcException): test_result = omm_footprint_augmentation.visit(test_obs) test_kwargs['working_directory'] = TESTDATA_DIR test_result = omm_footprint_augmentation.visit(test_obs, **test_kwargs) assert test_result is not None, 'expected a visit return value' assert test_result['chunks'] == 1 assert test_chunk.position.axis.bounds is not None, \ 'bound calculation failed'
def test_preview_augment_plane(): omm_name = OmmName(file_name=TEST_FILE, entry=TEST_FILE) preview = os.path.join(TEST_FILES_DIR, omm_name.prev) thumb = os.path.join(TEST_FILES_DIR, omm_name.thumb) if os.path.exists(preview): os.remove(preview) if os.path.exists(thumb): os.remove(thumb) test_fqn = os.path.join(TEST_DATA_DIR, f'{omm_name.product_id}.expected.xml') test_obs = mc.read_obs_from_file(test_fqn) assert len(test_obs.planes[omm_name.product_id].artifacts) == 1 preva = 'cadc:OMM/C170324_0054_SCI_prev.jpg' thumba = 'cadc:OMM/C170324_0054_SCI_prev_256.jpg' test_config = mc.Config() test_config.observe_execution = True test_metrics = mc.Metrics(test_config) test_observable = mc.Observable(rejected=None, metrics=test_metrics) test_kwargs = { 'working_directory': TEST_FILES_DIR, 'cadc_client': None, 'observable': test_observable, 'storage_name': omm_name, } test_result = preview_augmentation.visit(test_obs, **test_kwargs) assert test_result is not None, 'expected a visit return value' assert os.path.exists(preview) assert os.path.exists(thumb) test_plane = test_result.planes[omm_name.product_id] assert test_plane.artifacts[preva].content_checksum == ChecksumURI( 'md5:f37d21c53055498d1b5cb7753e1c6d6f'), 'prev checksum failure' assert test_plane.artifacts[thumba].content_checksum == ChecksumURI( 'md5:19661c3c2508ecc22425ee2a05881ed4'), 'thumb checksum failure' # now do updates test_obs.planes[ omm_name.product_id].artifacts[preva].content_checksum = ChecksumURI( 'de9f39804f172682ea9b001f8ca11f15') test_obs.planes[ omm_name.product_id].artifacts[thumba].content_checksum = ChecksumURI( 'cd118dae04391f6bea93ba4bf2711adf') test_result = preview_augmentation.visit(test_obs, **test_kwargs) assert test_result is not None, 'expected update visit return value' assert len(test_result.planes[omm_name.product_id].artifacts) == 3 assert os.path.exists(preview) assert os.path.exists(thumb) assert test_plane.artifacts[preva].content_checksum == ChecksumURI( 'md5:f37d21c53055498d1b5cb7753e1c6d6f'), 'prev update failed' assert test_plane.artifacts[thumba].content_checksum == ChecksumURI( 'md5:19661c3c2508ecc22425ee2a05881ed4'), 'prev_256 update failed' assert len(test_metrics.history) == 0, 'wrong history, client is not None'
def test_time_nan(): test_obs = 'C120712_NGC7790_H_SCIRED' test_file = f'file://{TESTDATA_DIR}/{test_obs}.fits.header' omm_name = OmmName(file_name=f'{test_obs}.fits', entry=test_file) test_file_info = FileInfo(id=omm_name.file_uri) test_xml = f'{TESTDATA_DIR}/{test_obs}.xml' obs = mc.read_obs_from_file(test_xml) headers = data_util.get_local_file_headers(test_file) telescope = Telescope(omm_name.file_uri, headers) with pytest.raises(mc.CadcException): result = telescope.update(obs, omm_name, test_file_info) assert result is None, 'should have returned nothing'
def omm_run_single(): import sys config = mc.Config() config.get_executors() config.collection = COLLECTION config.working_directory = '/usr/src/app' config.task_types = [mc.TaskType.INGEST, mc.TaskType.MODIFY] config.resource_id = 'ivo://cadc.nrc.ca/sc2repo' if config.features.run_in_airflow: temp = tempfile.NamedTemporaryFile() mc.write_to_file(temp.name, sys.argv[2]) config.proxy_fqn = temp.name else: config.proxy_fqn = sys.argv[2] config.stream = 'raw' if config.features.use_file_names: storage_name = OmmName(file_name=sys.argv[1]) else: obs_id = OmmName.remove_extensions(sys.argv[1]) storage_name = OmmName(obs_id=obs_id) result = ec.run_single(config, storage_name, APPLICATION, meta_visitors, data_visitors, OmmChooser()) sys.exit(result)
def test_footprint_update_position(): omm_name = OmmName(file_name=TEST_FILE, entry=TEST_FILE) test_kwargs = {'storage_name': omm_name} test_fqn = os.path.join(TEST_DATA_DIR, f'{omm_name.product_id}.expected.xml') test_obs = mc.read_obs_from_file(test_fqn) test_chunk = (test_obs.planes[omm_name.product_id].artifacts[ omm_name.file_uri].parts['0'].chunks[0]) assert test_chunk.position.axis.bounds is None # expected failure due to required kwargs parameter with pytest.raises(mc.CadcException): test_result = footprint_augmentation.visit(test_obs) test_kwargs['working_directory'] = TEST_FILES_DIR test_result = footprint_augmentation.visit(test_obs, **test_kwargs) assert test_result is not None, 'expected a visit return value' assert (test_chunk.position.axis.bounds is not None), 'bound calculation failed'
def test_is_valid(): assert OmmName('C121212_00001_SCI').is_valid() assert not OmmName('c121212_00001_SCI').is_valid() assert OmmName('C121212_00001_CAL').is_valid() assert not OmmName('c121212_00001_CAL').is_valid() assert OmmName('C121212_domeflat_K_CALRED').is_valid() assert not OmmName('C121212_DOMEFLAT_K_CALRED').is_valid() assert OmmName('C121212_sh2-132_J_old_SCIRED').is_valid() assert OmmName('C121212_J0454+8024_J_SCIRED').is_valid() assert OmmName('C121212_00001_TEST').is_valid() assert OmmName('C121212_00001_FOCUS').is_valid() test_subject = OmmName(file_name='C121212_00001_SCI.fits') assert test_subject.is_valid() assert test_subject.obs_id == 'C121212_00001_SCI' test_subject = OmmName(file_name='C121212_00001_SCI.fits.gz') assert test_subject.is_valid() assert test_subject.obs_id == 'C121212_00001_SCI' test_subject = OmmName(fname_on_disk='C121212_00001_SCI.fits', file_name='C121212_00001_SCI.fits.gz') assert test_subject.is_valid() assert test_subject.obs_id == 'C121212_00001_SCI' with pytest.raises(mc.CadcException): test_subject = OmmName(file_name='C121212_00001_SCI') test_subject = OmmName(fname_on_disk='C121212_00001_FOCUS') test_subject = OmmName('C121212_00001_FOCUS.fits') test_subject = OmmName('C121212_00001_FOCUS.fits.gz')
def test_is_valid(): assert OmmName(file_name='C121212_00001_SCI.fits.gz').is_valid() assert not OmmName(file_name='c121212_00001_SCI.fits.gz').is_valid() assert OmmName(file_name='C121212_00001_CAL.fits.gz').is_valid() assert not OmmName(file_name='c121212_00001_CAL.fits.gz').is_valid() assert OmmName(file_name='C121212_domeflat_K_CALRED.fits.gz').is_valid() assert not OmmName( file_name='C121212_DOMEFLAT_K_CALRED.fits.gz').is_valid() assert OmmName(file_name='C121212_sh2-132_J_old_SCIRED.fits.gz').is_valid() assert OmmName(file_name='C121212_J0454+8024_J_SCIRED.fits.gz').is_valid() assert OmmName(file_name='C121212_00001_TEST.fits.gz').is_valid() assert OmmName(file_name='C121212_00001_FOCUS.fits.gz').is_valid() assert OmmName( file_name='C121121_J024345.57-021326.4_K_SCIRED.fits.gz').is_valid() test_subject = OmmName(file_name='C121212_00001_SCI.fits') assert test_subject.is_valid() assert test_subject.obs_id == 'C121212_00001' test_subject = OmmName(file_name='C121212_00001_SCI.fits.gz') assert test_subject.is_valid() assert test_subject.obs_id == 'C121212_00001' test_subject = OmmName( fname_on_disk='C121212_00001_SCI.fits', file_name='C121212_00001_SCI.fits.gz', ) assert test_subject.is_valid() assert test_subject.obs_id == 'C121212_00001' assert (test_subject.file_uri == 'ad:OMM/C121212_00001_SCI.fits.gz' ), 'wrong file uri' with pytest.raises(mc.CadcException): test_subject = OmmName(file_name='C121212_00001_SCI') test_subject = OmmName(fname_on_disk='C121212_00001_FOCUS') test_subject = OmmName('C121212_00001_FOCUS.fits') test_subject = OmmName('C121212_00001_FOCUS.fits.gz')