def vlass_run_single(): import sys config = mc.Config() config.get_executors() config.collection = COLLECTION config.working_directory = '/usr/src/app' config.use_local_files = False config.logging_level = 'INFO' config.log_to_file = False config.task_types = [mc.TaskType.INGEST] config.resource_id = 'ivo://cadc.nrc.ca/sc2repo' if config.features.run_in_airflow: temp = tempfile.NamedTemporaryFile() mc.write_to_file(temp.name, sys.argv[2]) config.proxy_fqn = temp.name else: config.proxy_fqn = sys.argv[2] config.stream = 'raw' file_name = sys.argv[1] if config.features.use_file_names: vlass_name = VlassName(file_name=file_name) else: vlass_name = VlassName(obs_id=sys.argv[1]) ec.run_single(config, vlass_name, APPLICATION, meta_visitors=visitors, data_visitors=None)
def test_builder(obs_metadata_mock, tap_client_mock): obs_metadata_mock.side_effect = gem_mocks.mock_get_obs_metadata test_config = mc.Config() test_config.working_directory = '/test_files' test_config.proxy_fqn = os.path.join(gem_mocks.TEST_DATA_DIR, 'test_proxy.pem') em.init_global(config=test_config) test_subject = builder.GemObsIDBuilder(test_config) test_entry = 'S20050825S0143.fits' for support in [False, True]: test_config.features.supports_latest_client = support test_config.features.use_file_names = True for task_type in [mc.TaskType.INGEST, mc.TaskType.SCRAPE]: test_config.task_types = [task_type] test_result = test_subject.build(test_entry) assert test_result is not None, \ f'expect a result support {support}' expected_path = COLLECTION if support else ARCHIVE assert test_result.file_uri == \ f'{SCHEME}:{expected_path}/{test_entry}', 'wrong file uri' assert test_result.prev_uri == \ f'{SCHEME}:{expected_path}/{test_result.prev}', \ 'wrong preview uri' expected_scheme = V_SCHEME if support else A_SCHEME assert test_result.thumb_uri == \ f'{expected_scheme}:{expected_path}/{test_result.thumb}', \ 'wrong thumb uri' test_config.task_types = [mc.TaskType.INGEST] test_config.features.use_file_names = False with pytest.raises(mc.CadcException): test_result = test_subject.build(test_entry)
def _execute_and_check_list_for_validate(ftp_mock, source_list_fqn, result_count, cache_count): source_dir_fqn = os.path.join( test_main_app.TEST_DATA_DIR, scrape.NEOSSAT_DIR_LIST) source_fqn = os.path.join(test_main_app.TEST_DATA_DIR, 'test_source_dir_listing.csv') shutil.copy(source_fqn, source_dir_fqn) ftp_mock.return_value.__enter__.return_value.listdir. \ side_effect = _list_dirs ftp_mock.return_value.__enter__.return_value.stat. \ side_effect = _entry_stats getcwd_orig = os.getcwd os.getcwd = Mock(return_value=test_main_app.TEST_DATA_DIR) try: test_config = mc.Config() test_config.get_executors() scrape.list_for_validate(test_config) result = mc.read_as_yaml(source_list_fqn) assert result is not None, 'expect a file record' assert len(result) == result_count, 'wrong number of entries' assert f'{MOCK_DIR}/NEOS_SCI_2017213215701_cord.fits' in result, \ 'wrong content' cache_result = scrape._read_cache(test_config.working_directory) assert cache_result is not None, 'expected return value' assert len(cache_result) == cache_count, \ 'wrong number of cached entries' assert f'{MOCK_DIR}/NEOS_SCI_2017213215701.fits' in cache_result, \ 'wrong content' finally: os.getcwd = getcwd_orig
def test_provenance_augmentation(dmf_mock, headers_mock, access_mock, builder_mock, repo_get_mock, test_fqn): builder_mock.return_value._get_obs_id.return_value = None access_mock.return_value = 'https://localhost' test_rejected = mc.Rejected(REJECTED_FILE) test_config = mc.Config() test_config.task_types = [mc.TaskType.VISIT] test_observable = mc.Observable(test_rejected, mc.Metrics(test_config)) headers_mock.return_value.get_head.side_effect = _get_headers_mock dmf_mock.get.side_effect = _get_obs_id_mock repo_get_mock.side_effect = _repo_get_mock getcwd_orig = os.getcwd os.getcwd = Mock(return_value=test_main_app.TEST_DATA_DIR) temp = os.path.basename(test_fqn).replace('.expected.xml', '.fits') test_storage_name = GemProcName(entry=temp) try: test_obs = mc.read_obs_from_file(test_fqn) assert not test_obs.target.moving, 'initial conditions moving target' kwargs = { 'storage_name': test_storage_name, 'working_directory': test_main_app.TEST_DATA_DIR, 'observable': test_observable, 'caom_repo_client': Mock(), } test_result = provenance_augmentation.visit(test_obs, **kwargs) assert test_result is not None, 'expect a result' assert test_result.get('provenance') == 2, 'wrong result' assert len(test_obs.members) == 1, 'wrong membership' assert test_obs.target.moving, 'should be changed' finally: os.getcwd = getcwd_orig
def test_omm_name(): test_config = mc.Config() test_config.task_types = [] test_config.use_local_files = True test_builder = OmmBuilder(test_config) test_name = 'C121212_00001_SCI' for entry in [f'{test_name}', f'/tmp/{test_name}']: test_subject = test_builder.build(f'{entry}.fits') assert f'ad:OMM/{test_name}.fits.gz' == test_subject.file_uri assert (test_subject.source_names == [f'{entry}.fits' ]), 'wrong source name' assert (test_subject.destination_uris[0] == f'ad:OMM/{test_name}.fits.gz'), 'wrong source name' test_name = 'C121212_sh2-132_J_old_SCIRED' file_name = f'{test_name}_prev_256.jpg' assert f'{test_name}_prev.jpg' == OmmName(file_name=file_name).prev assert f'{test_name}_prev_256.jpg' == OmmName(file_name=file_name).thumb test_name = 'C121212_sh2-132_J_old_SCIRED' file_name = f'{test_name}_prev_256.jpg' assert f'{test_name}_prev.jpg' == OmmName(file_name=file_name).prev assert f'{test_name}_prev_256.jpg' == OmmName(file_name=file_name).thumb test_obs_id = 'C121121_J024345.57-021326.4_K' test_name = f'{test_obs_id}_SCIRED' file_name = f'{test_name}.fits.gz' assert f'{test_name}_prev.jpg' == OmmName(file_name=file_name).prev assert f'{test_name}_prev_256.jpg' == OmmName(file_name=file_name).thumb assert OmmName(file_name=file_name).obs_id == test_obs_id
def _run_state(): """Uses a state file with a timestamp to control which files will be retrieved from the CSA ftp host. Ingestion is based on fully-qualified file names from the CSA ftp host, because those are difficult to reproduce otherwise. """ builder = nbc.FileNameBuilder(NEOSSatName) config = mc.Config() config.get_executors() state = mc.State(config.state_fqn) start_time = state.get_bookmark(NEOS_BOOKMARK) temp = mc.increment_time(start_time, 0).timestamp() todo_list, max_timestamp = scrape.build_todo( temp, config.working_directory, config.state_fqn) max_date = datetime.fromtimestamp(max_timestamp) incremental_source = data_source.IncrementalSource(todo_list) transferrer = tc.FtpTransfer(config.data_source) return rc.run_by_state(config=config, name_builder=builder, command_name=APPLICATION, bookmark_name=NEOS_BOOKMARK, meta_visitors=META_VISITORS, data_visitors=DATA_VISITORS, end_time=max_date, chooser=None, source=incremental_source, store_transfer=transferrer)
def test_store(put_mock): test_config = mc.Config() test_config.logging_level = 'ERROR' test_config.working_directory = '/tmp' test_url = 'https://archive-new.nrao.edu/vlass/quicklook/VLASS2.1/' \ 'T10t12/VLASS2.1.ql.T10t12.J073401-033000.10.2048.v1/' \ 'VLASS2.1.ql.T10t12.J073401-033000.10.2048.v1.I.iter1.image.' \ 'pbcor.tt0.rms.subim.fits' test_storage_name = VlassName(url=test_url, entry=test_url) transferrer = Mock() cred_param = Mock() cadc_data_client = Mock() caom_repo_client = Mock() observable = mc.Observable(mc.Rejected('/tmp/rejected.yml'), mc.Metrics(test_config)) test_subject = ec.Store(test_config, test_storage_name, APPLICATION, cred_param, cadc_data_client, caom_repo_client, observable, transferrer) test_subject.execute(None) assert put_mock.called, 'expect a call' args, kwargs = put_mock.call_args assert args[2] == test_storage_name.file_name, 'wrong file name' assert transferrer.get.called, 'expect a transfer call' args, kwargs = transferrer.get.call_args assert args[0] == test_url, 'wrong source parameter' assert args[1] == f'/tmp/{test_storage_name.obs_id}/' \ f'{test_storage_name.file_name}',\ 'wrong destination parameter'
def test_look_pull_and_put_v(http_mock, mock_client): stat_orig = os.stat os.stat = Mock() os.stat.return_value = Mock(st_size=1234) try: test_storage_name = 'cadc:GEMINI/TEST.fits' f_name = 'test_f_name.fits' url = f'https://localhost/{f_name}' test_config = mc.Config() test_config.observe_execution = True test_metrics = mc.Metrics(test_config) mock_client.get_node.side_effect = tc.mock_get_node mock_client.copy.return_value = 1234 assert len(test_metrics.history) == 0, 'initial history conditions' assert len(test_metrics.failures) == 0, 'initial failure conditions' mc.look_pull_and_put_v( test_storage_name, f_name, tc.TEST_DATA_DIR, url, mock_client, 'md5:01234', test_metrics, ) test_fqn = os.path.join(tc.TEST_DATA_DIR, f_name) mock_client.copy.assert_called_with( test_fqn, destination=test_storage_name), 'mock not called' http_mock.assert_called_with(url, os.path.join( tc.TEST_DATA_DIR, f_name)), 'http mock not called' assert len(test_metrics.history) == 1, 'history conditions' assert len(test_metrics.failures) == 0, 'failure conditions' finally: os.stat = stat_orig
def test_run_by_builder(data_client_mock, exec_mock, query_endpoint_mock): query_endpoint_mock.side_effect = test_scrape._query_endpoint data_client_mock.return_value.info.side_effect = (_mock_get_file_info) data_client_mock.return_value.get_head.side_effect = ( ac.make_headers_from_file) exec_mock.return_value = 0 getcwd_orig = os.getcwd os.getcwd = Mock(return_value=test_main_app.TEST_DATA_DIR) test_config = mc.Config() test_config.get_executors() test_f_name = ('VLASS1.2.ql.T07t13.J083838-153000.10.2048.v1.I.iter1.' 'image.pbcor.tt0.subim.fits') with open(test_config.work_fqn, 'w') as f: f.write(f'{test_f_name}\n') # the equivalent of calling work.init_web_log() scrape.web_log_content['abc'] = 123 try: # execution test_result = composable._run() assert test_result == 0, 'wrong result' finally: os.getcwd = getcwd_orig if os.path.exists(test_config.work_fqn): os.unlink(test_config.work_fqn) assert exec_mock.called, 'expect to be called' exec_mock.assert_called_with(ANY), 'wrong args'
def _run_by_public(): """Run the processing for observations that are public, but there are no artifacts representing the previews in CAOM, or a FITS file in ad. Called as gem_run_public. The time-boxing is based on timestamps from a state.yml file. Call once/day, since data release timestamps have times of 00:00:00.000. :return 0 if successful, -1 if there's any sort of failure. Return status is used by airflow for task instance management and reporting. """ config = mc.Config() config.get_executors() external_metadata.init_global(config=config) name_builder = nbc.FileNameBuilder(gem_name.GemName) incremental_source = data_source.PublicIncremental(config) meta_visitors = _define_meta_visitors(config) return rc.run_by_state(config=config, name_builder=name_builder, command_name=main_app.APPLICATION, bookmark_name=data_source.GEM_BOOKMARK, meta_visitors=meta_visitors, data_visitors=DATA_VISITORS, end_time=None, source=incremental_source, chooser=None)
def test_vault_list_dir_time_box_data_source(): node1 = type('', (), {})() node1.props = { 'date': '2020-09-15 19:55:03.067000+00:00', 'size': 14, } node1.uri = 'vos://cadc.nrc.ca!vault/goliaths/moc/994898p_moc.fits' node2 = type('', (), {})() node2.props = { 'date': '2020-09-13 19:55:03.067000+00:00', 'size': 12, } node2.uri = 'vos://cadc.nrc.ca!vault/goliaths/moc/994899p_moc.fits' node1.isdir = Mock(return_value=False) node2.isdir = Mock(return_value=False) def _glob_mock(ignore_source_directory): return [1, 2] def _get_node_mock(target): if target == 1: return node1 else: return node2 test_vos_client = Mock() test_vos_client.glob.side_effect = _glob_mock test_vos_client.get_node.side_effect = _get_node_mock test_config = mc.Config() test_config.get_executors() test_config.data_sources = ['vos:goliaths/wrong'] test_subject = dsc.VaultDataSource(test_vos_client, test_config) assert test_subject is not None, 'expect a test_subject' test_prev_exec_time = datetime( year=2020, month=9, day=15, hour=10, minute=0, second=0, tzinfo=timezone.utc, ) test_exec_time = datetime( year=2020, month=9, day=16, hour=10, minute=0, second=0, tzinfo=timezone.utc, ) test_result = test_subject.get_time_box_work(test_prev_exec_time, test_exec_time) assert test_result is not None, 'expect a test result' assert len(test_result) == 1, 'wrong number of results' assert ('vos://cadc.nrc.ca!vault/goliaths/moc/994898p_moc.fits' == test_result[0].entry_name), 'wrong name result' assert (datetime( 2020, 9, 15, 19, 55, 3, 67000, tzinfo=timezone.utc) == test_result[0].entry_ts), 'wrong ts result'
def test_vault_list_dir_data_source(): def _query_mock(ignore_source_directory): return ['abc.txt', 'abc.fits', '900898p_moc.fits'] node1 = type('', (), {})() node1.props = { 'size': 0, } node1.uri = 'vos://cadc.nrc.ca!vault/goliaths/wrong/900898p_moc.fits' node2 = type('', (), {})() node2.props = { 'size': 12, } node2.uri = 'vos://cadc.nrc.ca!vault/goliaths/moc/abc.fits' node3 = type('', (), {})() node3.props = { 'size': 12, } node3.uri = 'vos://cadc.nrc.ca!vault/goliaths/moc/abc.txt' test_vos_client = Mock() test_vos_client.listdir.side_effect = _query_mock test_vos_client.get_node.side_effect = [node1, node2, node3] test_config = mc.Config() test_config.get_executors() test_config.data_sources = ['vos:goliaths/wrong'] test_config.data_source_extensions = ['.fits'] test_subject = dsc.VaultDataSource(test_vos_client, test_config) assert test_subject is not None, 'expect a test_subject' test_result = test_subject.get_work() assert test_result is not None, 'expect a test result' assert len(test_result) == 1, 'wrong number of results' assert 'vos:goliaths/wrong/abc.fits' in test_result, 'wrong result'
def _run_state(): """Uses a state file with a timestamp to control which quicklook files will be retrieved from VLASS. Ingestion is based on URLs, because a URL that contains the phrase 'QA_REJECTED' is the only way to tell if the attribute 'requirements' should be set to 'fail', or not. """ config = mc.Config() config.get_executors() state = mc.State(config.state_fqn) # a way to get a datetime from a string, or maybe a datetime, depending # on the execution environment start_time = mc.increment_time(state.get_bookmark(VLASS_BOOKMARK), 0) todo_list, max_date = scrape.build_file_url_list(start_time) source = data_source.NraoPage(todo_list) name_builder = nbc.EntryBuilder(storage_name.VlassName) storage_name.set_use_storage_inventory( config.features.supports_latest_client) return rc.run_by_state( config=config, bookmark_name=VLASS_BOOKMARK, meta_visitors=META_VISITORS, data_visitors=DATA_VISITORS, name_builder=name_builder, source=source, end_time=max_date, store_transfer=tc.HttpTransfer(), )
def _run(): """Run the processing for observations using a todo file to identify the work to be done, but with the support of a Builder, so that StorageName instances can be provided. This is important here, because the instrument name needs to be provided to the StorageName constructor. :return 0 if successful, -1 if there's any sort of failure. Return status is used by airflow for task instance management and reporting. """ config = mc.Config() config.get_executors() # time_bounds_augmentation and quality_augmentation depend on # metadata scraped from the NRAO site, but that only changes if a new # file is created, a new version of a file is created, or an old version # of a file is replaced. If the pipeline isn't STORE'ing information from # the source, files aren't changing, and the related metadata isn't # changing, so be polite to the NRAO site, and don't scrape if it's not # necessary. meta_visitors = [cleanup_augmentation] if (mc.TaskType.STORE in config.task_types and mc.TaskType.INGEST in config.task_types): meta_visitors = META_VISITORS name_builder = nbc.EntryBuilder(storage_name.VlassName) storage_name.set_use_storage_inventory( config.features.supports_latest_client) return rc.run_by_todo( config=config, name_builder=name_builder, meta_visitors=meta_visitors, data_visitors=DATA_VISITORS, store_transfer=tc.HttpTransfer(), )
def test_aug_visit_works(query_endpoint_mock, get_mock): get_mock.return_value.__enter__.return_value.raw = test_scrape.WL_INDEX query_endpoint_mock.side_effect = test_scrape._query_endpoint test_config = mc.Config() test_config.get_executors() test_state = mc.State(test_config.state_fqn) work.init_web_log(test_state, test_config) test_name = sn.VlassName( file_name='VLASS1.2.ql.T07t13.J081828-133000.10.2048.v1.I.iter1.' 'image.pbcor.tt0.subim.fits', entry='VLASS1.2.ql.T07t13.J081828-133000.10.2048.v1.I.iter1.' 'image.pbcor.tt0.subim.fits') test_file = os.path.join(TEST_DATA_DIR, f'{test_name.obs_id}.xml') test_obs = mc.read_obs_from_file(test_file) assert test_obs is not None, 'unexpected None' data_dir = os.path.join(THIS_DIR, '../../data') kwargs = {'working_directory': data_dir, 'cadc_client': Mock()} test_result = time_bounds_augmentation.visit(test_obs, **kwargs) assert test_obs is not None, 'unexpected modification' assert test_result is not None, 'should have a result status' assert len(test_result) == 1, 'modified artifacts count' assert test_result['artifacts'] == 2, 'artifact count' plane = test_obs.planes[test_name.product_id] chunk = plane.artifacts[test_name.file_uri].parts['0'].chunks[0] assert chunk is not None assert chunk.time is not None, 'no time information' assert chunk.time.axis is not None, 'no axis information' assert chunk.time.axis.bounds is not None, 'no bounds information' assert len(chunk.time.axis.bounds.samples) == 1, \ 'wrong amount of bounds info' assert chunk.time.exposure == 234.0, 'wrong exposure value'
def test_run(run_mock, access_mock): run_mock.return_value = 0 access_mock.return_value = 'https://localhost' test_f_id = 'test_file_id' test_f_name = f'{test_f_id}.fits' getcwd_orig = os.getcwd os.getcwd = Mock(return_value=test_main_app.TEST_DATA_DIR) config = mc.Config() config.get_executors() with TemporaryDirectory(dir=test_main_app.TEST_DATA_DIR) as temp_dir: os.chdir(temp_dir) config.working_directory = temp_dir config.log_file_directory = f'{temp_dir}/logs' config.rejected_directory = f'{temp_dir}/rejected' mc.Config.write_to_file(config) with open(f'{temp_dir}/test_proxy.pem', 'w') as f: f.write('test content') with open(f'{temp_dir}/todo.txt', 'w') as f: f.write(test_f_name) try: # execution test_result = composable._run() assert test_result == 0, 'wrong return value' assert run_mock.called, 'should have been called' args, kwargs = run_mock.call_args test_storage = args[0] assert isinstance(test_storage, mc.StorageName), type(test_storage) assert test_storage.file_name == test_f_name, 'wrong file name' assert (test_storage.source_names[0] == test_f_name ), 'wrong fname on disk' finally: os.getcwd = getcwd_orig os.chdir(test_main_app.TEST_DATA_DIR)
def _common_init(): config = mc.Config() config.get_executors() clients = GemClientCollection(config) meta_visitors = META_VISITORS gemini_session = mc.get_endpoint_session() provenance_finder = gemini_metadata.ProvenanceFinder( config, clients.query_client, gemini_session) svofps_session = mc.get_endpoint_session() filter_cache = svofps.FilterMetadataCache(svofps_session) clients.gemini_session = gemini_session clients.svo_session = svofps_session if config.use_local_files or mc.TaskType.SCRAPE in config.task_types: metadata_reader = gemini_metadata.GeminiFileMetadataReader( gemini_session, provenance_finder, filter_cache) meta_visitors = [ fits2caom2_augmentation, preview_augmentation, cleanup_augmentation, ] elif [mc.TaskType.VISIT] == config.task_types: metadata_reader = gemini_metadata.GeminiStorageClientReader( clients.data_client, gemini_session, provenance_finder, filter_cache, ) else: metadata_reader = gemini_metadata.GeminiMetadataReader( gemini_session, provenance_finder, filter_cache) reader_lookup = gemini_metadata.GeminiMetadataLookup(metadata_reader) reader_lookup.reader = metadata_reader name_builder = builder.GemObsIDBuilder(config, metadata_reader, reader_lookup) return clients, config, metadata_reader, meta_visitors, name_builder
def _run_by_state(): """Uses a state file with a timestamp to control which quicklook files will be retrieved from VLASS. Ingestion is based on URLs, because a URL that contains the phrase 'QA_REJECTED' is the only way to tell if the attribute 'requirements' should be set to 'fail', or not. """ config = mc.Config() config.get_executors() state = mc.State(config.state_fqn) # a way to get a datetime from a string, or maybe a datetime, depending # on the execution environment start_time = mc.increment_time(state.get_bookmark(VLASS_BOOKMARK), 0) todo_list, max_date = scrape.build_file_url_list(start_time) if len(todo_list) > 0: state = mc.State(config.state_fqn) work.init_web_log(state, config) # still make all subsequent calls if len == 0, for consistent reporting source = data_source.NraoPage(todo_list) name_builder = builder.VlassInstanceBuilder(config) return rc.run_by_state(config=config, command_name=sn.APPLICATION, bookmark_name=VLASS_BOOKMARK, meta_visitors=META_VISITORS, data_visitors=DATA_VISITORS, name_builder=name_builder, source=source, end_time=max_date, store_transfer=tc.HttpTransfer())
def test_transfer_fails_fits_check(): # test case - when the fits check fails, the file is cleaned up, if # the configuration says it should be vos_client_mock = Mock(autospec=True) def mock_copy(ignore_src, ignore_dst, send_md5=True): copyfile('/test_files/broken.fits', '/tmp/broken.fits') vos_client_mock.copy.side_effect = mock_copy test_config = mc.Config() test_config.cleanup_files_when_storing = True test_config.cleanup_failure_destination = 'vos:goliaths/dao_test/failure' test_subject = transfer.VoFitsCleanupTransfer(vos_client_mock, test_config) assert test_subject is not None, 'expect ctor to work' test_subject.observable = Mock(autospec=True) test_source = 'vos:goliaths/dao_test/broken.fits' test_destination = '/tmp/broken.fits' test_subject.get(test_source, test_destination) assert vos_client_mock.copy.called, 'expect copy call' vos_client_mock.copy.assert_called_with(test_source, test_destination, send_md5=True), 'wrong copy args' assert vos_client_mock.move.called, 'expect move call' vos_client_mock.move.assert_called_with( test_source, 'vos:goliaths/dao_test/failure/broken.fits'), 'wrong move args'
def test_config(): test_config = mc.Config() test_config.working_directory = tc.THIS_DIR test_config.collection = 'OMM' test_config.netrc_file = os.path.join(tc.TEST_DATA_DIR, 'test_netrc') test_config.work_file = 'todo.txt' test_config.logging_level = 'DEBUG' test_config.log_file_directory = tc.TEST_DATA_DIR test_config.failure_fqn = f'{tc.TEST_DATA_DIR}/fail.txt' test_config.failure_log_file_name = 'fail.txt' test_config.retry_fqn = f'{tc.TEST_DATA_DIR}/retry.txt' test_config.retry_file_name = 'retry.txt' test_config.success_fqn = f'{tc.TEST_DATA_DIR}/good.txt' test_config.success_log_file_name = 'good.txt' test_config.rejected_fqn = f'{tc.TEST_DATA_DIR}/rejected.yml' test_config.progress_fqn = f'{tc.TEST_DATA_DIR}/progress.txt' test_config.resource_id = 'ivo://cadc.nrc.ca/sc2repo' test_config.features.run_in_airflow = False test_config.features.use_file_names = False test_config._report_fqn = f'{test_config.log_file_directory}/' \ f'test_report.txt' test_config.stream = 'TEST' for f_name in [test_config.failure_fqn, test_config.success_fqn, test_config.retry_fqn]: if os.path.exists(f_name): os.unlink(f_name) return test_config
def test_storage_time_box_query(query_mock): def _mock_query(arg1, arg2): return Table.read( 'fileName,ingestDate\n' 'NEOS_SCI_2015347000000_clean.fits,2019-10-23T16:27:19.000\n' 'NEOS_SCI_2015347000000.fits,2019-10-23T16:27:27.000\n' 'NEOS_SCI_2015347002200_clean.fits,2019-10-23T16:27:33.000\n'.split( '\n' ), format='csv', ) query_mock.side_effect = _mock_query getcwd_orig = os.getcwd os.getcwd = Mock(return_value=tc.TEST_DATA_DIR) tap_client_ctor_orig = CadcTapClient.__init__ CadcTapClient.__init__ = Mock(return_value=None) test_config = mc.Config() test_config.get_executors() utc_now = datetime.utcnow() prev_exec_date = utc_now - timedelta(seconds=3600) exec_date = utc_now - timedelta(seconds=1800) try: test_subject = dsc.QueryTimeBoxDataSource(test_config) test_result = test_subject.get_time_box_work(prev_exec_date, exec_date) assert test_result is not None, 'expect result' assert len(test_result) == 3, 'wrong number of results' assert ( test_result[0].entry_name == 'NEOS_SCI_2015347000000_clean.fits' ), 'wrong results' finally: os.getcwd = getcwd_orig CadcTapClient.__init__ = tap_client_ctor_orig
def test_provenance_augmentation(obs_id_mock, repo_get_mock, headers_mock, test_fqn): test_rejected = mc.Rejected(REJECTED_FILE) test_config = mc.Config() test_observable = mc.Observable(test_rejected, mc.Metrics(test_config)) headers_mock.side_effect = _get_headers_mock repo_get_mock.side_effect = _repo_get_mock obs_id_mock.side_effect = _get_obs_id_mock getcwd_orig = os.getcwd os.getcwd = Mock(return_value=test_main_app.TEST_DATA_DIR) try: test_obs = mc.read_obs_from_file(test_fqn) assert not test_obs.target.moving, 'initial conditions moving target' kwargs = { 'science_file': os.path.basename(test_fqn).replace('.expected.xml', '.fits'), 'working_directory': test_main_app.TEST_DATA_DIR, 'observable': test_observable, 'caom_repo_client': Mock() } test_result = provenance_augmentation.visit(test_obs, **kwargs) assert test_result is not None, 'expect a result' assert test_result.get('provenance') == 2, 'wrong result' assert len(test_obs.members) == 1, 'wrong membership' assert test_obs.target.moving, 'should be changed' finally: os.getcwd = getcwd_orig
def run_query(): """ Run the processing for all the entries returned from a time-boxed ad query. :param sys.argv[1] the timestamp for the > comparison in the time-boxed query :param sys.argv[2] the timestamp for the <= comparison in the time-boxed query :return 0 if successful, -1 if there's any sort of failure. Return status is used by airflow for task instance management and reporting. """ prev_exec_date = sys.argv[1] exec_date = sys.argv[2] config = mc.Config() config.get() config.stream = 'default' file_list = mc.read_file_list_from_archive(config, APPLICATION, prev_exec_date, exec_date) sys.argv = sys.argv[:1] result = 0 if len(file_list) > 0: mc.write_to_file(config.work_fqn, '\n'.join(file_list)) result |= ec.run_by_file(GemName, APPLICATION, COLLECTION, config.proxy_fqn, meta_visitors, data_visitors, archive=ARCHIVE) sys.exit(result)
def test_builder(file_info_mock, header_mock): file_info_mock.side_effect = gem_mocks.mock_get_obs_metadata test_config = mc.Config() test_config.working_directory = '/test_files' test_config.proxy_fqn = os.path.join(gem_mocks.TEST_DATA_DIR, 'test_proxy.pem') test_reader = gemini_metadata.GeminiMetadataReader(Mock(), Mock(), Mock()) test_metadata = gemini_metadata.GeminiMetadataLookup(test_reader) test_subject = builder.GemObsIDBuilder(test_config, test_reader, test_metadata) test_entries = ['S20050825S0143.fits', 'TX20131117_raw.3002.fits'] for test_entry in test_entries: for task_type in [mc.TaskType.INGEST, mc.TaskType.SCRAPE]: test_config.task_types = [task_type] test_result = test_subject.build(test_entry) assert test_result is not None, f'expect a result' assert (test_result.file_uri == f'{SCHEME}:{COLLECTION}/{test_entry}'), 'wrong file uri' assert (test_result.prev_uri == f'{SCHEME}:{COLLECTION}/{test_result.prev}' ), 'wrong preview uri' assert (test_result.thumb_uri == f'{V_SCHEME}:{COLLECTION}/{test_result.thumb}' ), 'wrong thumb uri' assert test_result.obs_id is not None, f'expect an obs id'
def test_storage_name(): d_name1 = 'DRAO_ST_CGPS_RN43_20180715T1450_C21.tar.gz' d_name2 = 'DRAO_ST_CGPS_RN43_20180715T1450_C74.tar.gz' d_name3 = 'DRAO_ST_CGPS_RN43_20180715T1450_RAW.tar.gz' d_name4 = 'DRAO_ST_CGPS_RN43_20180715T1450_S21.tar.gz' test_f_names = sorted([d_name1, d_name2, d_name3, d_name4]) f_name = 'RN43.json' test_subject = draost_name.DraoSTName(fname_on_disk=f_name) assert test_subject.is_valid(), 'should be valid' assert test_subject.obs_id == 'RN43', 'wrong obs_id' assert test_subject.product_id is None, 'not maintained by pipeline' assert test_subject.file_uri is None, 'not maintained by pipeline' assert test_subject.lineage is None, 'not maintained by pipeline' test_config = mc.Config() test_config.get_executors() test_config.working_directory = TEST_FILES_DIR for ii in test_f_names: f_name = f'{TEST_FILES_DIR}/{ii}' if not os.path.exists(f_name): with open(f_name, 'w') as f: f.write('test content') test_result = draost_name.DraoSTName.get_f_names( test_subject.obs_id, test_config.working_directory) assert test_result == test_f_names, \ 'two ways to name'
def test_store(put_mock): test_config = mc.Config() test_config.logging_level = 'ERROR' test_config.working_directory = '/tmp' test_fqn = '/users/OpenData_DonneesOuvertes/pub/NEOSSAT/ASTRO/2019/' \ '268/NEOS_SCI_2019268004930_clean.fits' test_storage_name = NEOSSatName(file_name=test_fqn, entry=test_fqn) transferrer = Mock() cred_param = Mock() cadc_data_client = Mock() caom_repo_client = Mock() observable = mc.Observable(mc.Rejected('/tmp/rejected.yml'), mc.Metrics(test_config)) test_subject = ec.Store(test_config, test_storage_name, APPLICATION, cred_param, cadc_data_client, caom_repo_client, observable, transferrer) test_subject.execute(None) assert put_mock.called, 'expect a call' args, kwargs = put_mock.call_args assert args[2] == test_storage_name.file_name, 'wrong file name' assert transferrer.get.called, 'expect a transfer call' args, kwargs = transferrer.get.call_args import logging logging.error(args) assert args[0] == test_fqn, 'wrong source parameter' assert args[1] == f'/tmp/{test_storage_name.obs_id}/' \ f'{test_storage_name.file_name}', \ 'wrong destination parameter'
def test_preview_augment_delete_preview(): # plane starts with a preview artifact, but it represents a non-existent # file, so remove the artifact from the CAOM observation test_product_id = 'S20080610S0045' fqn = os.path.join(TEST_DATA_DIR, 'GS-2008A-C-5-35-002.fits.xml') obs = mc.read_obs_from_file(fqn) assert len(obs.planes[test_product_id].artifacts) == 2, 'initial condition' test_rejected = mc.Rejected('/tmp/nonexistent') test_rejected.content = { 'bad_metadata': [], 'no_preview': ['S20080610S0043.jpg', 'S20080610S0041.jpg', 'S20080610S0044.jpg', 'S20080610S0045.jpg']} test_config = mc.Config() test_observable = mc.Observable(test_rejected, mc.Metrics(test_config)) kwargs = {'working_directory': TEST_DATA_DIR, 'cadc_client': None, 'stream': 'stream', 'observable': test_observable} result = preview_augmentation.visit(obs, **kwargs) assert result is not None, 'expect a result' assert result['artifacts'] == 1, 'wrong result' assert len(obs.planes[test_product_id].artifacts) == 1, 'post condition'
def test_pull_v_augmentation(put_mock, http_mock): obs = mc.read_obs_from_file(TEST_OBS_FILE) obs.planes[TEST_PRODUCT_ID].data_release = datetime.utcnow() assert len(obs.planes[TEST_PRODUCT_ID].artifacts) == 1, 'initial condition' test_uri = f'{SCHEME}:{COLLECTION}/{TEST_PRODUCT_ID}.fits' for plane in obs.planes.values(): for artifact in plane.artifacts.values(): artifact.uri = test_uri test_rejected = mc.Rejected(REJECTED_FILE) test_config = mc.Config() test_observable = mc.Observable(test_rejected, mc.Metrics(test_config)) cadc_client_mock = Mock() kwargs = {'working_directory': TEST_DATA_DIR, 'cadc_client': cadc_client_mock, 'observable': test_observable} result = pull_v_augmentation.visit(obs, **kwargs) test_url = f'{pull_augmentation.FILE_URL}/{TEST_PRODUCT_ID}.fits' test_prev = f'{TEST_DATA_DIR}/{TEST_PRODUCT_ID}.fits' http_mock.assert_called_with(test_url, test_prev), 'mock not called' assert put_mock.called, 'put mock not called' args, kwargs = put_mock.call_args assert args[1] == TEST_DATA_DIR, 'wrong working dir' assert args[2] == f'{TEST_PRODUCT_ID}.fits', 'wrong file name' assert args[3] == test_uri, 'wrong storage name' assert result is not None, 'expect a result' assert result['observation'] == 0, 'no updated metadata' assert len(obs.planes[TEST_PRODUCT_ID].artifacts) == 1, \ 'no new artifacts'
def test_pull_augmentation(): obs = mc.read_obs_from_file(TEST_OBS_FILE) obs.planes[TEST_PRODUCT_ID].data_release = datetime.utcnow() assert len(obs.planes[TEST_PRODUCT_ID].artifacts) == 1, 'initial condition' test_rejected = mc.Rejected(REJECTED_FILE) test_config = mc.Config() test_observable = mc.Observable(test_rejected, mc.Metrics(test_config)) cadc_client_mock = Mock() kwargs = {'working_directory': TEST_DATA_DIR, 'cadc_client': cadc_client_mock, 'stream': 'stream', 'observable': test_observable} with patch('caom2pipe.manage_composable.http_get') as http_mock, \ patch('caom2pipe.manage_composable.data_put') as ad_put_mock: cadc_client_mock.return_value.data_get.return_value = mc.CadcException( 'test') # no scheme from cadc client cadc_client_mock.get_file_info.return_value = {'md5sum': '1234'} result = pull_augmentation.visit(obs, **kwargs) test_url = f'{pull_augmentation.FILE_URL}/{TEST_PRODUCT_ID}.fits' test_prev = f'{TEST_DATA_DIR}/{TEST_PRODUCT_ID}.fits' http_mock.assert_called_with(test_url, test_prev), 'mock not called' assert ad_put_mock.called, 'ad put mock not called' assert result is not None, 'expect a result' assert result['observation'] == 0, 'no updated metadata' assert len(obs.planes[TEST_PRODUCT_ID].artifacts) == 1, \ 'no new artifacts'
def test_data_visit_params(): test_wd = '/tmp/abc' if os.path.exists(test_wd): if os.path.isdir(test_wd): os.rmdir(test_wd) else: os.unlink(test_wd) storage_name = mc.StorageName( obs_id='abc', fname_on_disk='abc.fits.gz', source_names=['vos:DAO/incoming/abc.fits.gz'], destination_uris=['ad:TEST/abc.fits.gz'], ) test_config = mc.Config() test_config.task_types = [mc.TaskType.MODIFY] test_config.working_directory = '/tmp' test_config.logging_level = 'DEBUG' test_cadc_client = Mock(autospec=True) test_caom_client = Mock(autospec=True) test_caom_client.read.side_effect = _read_obs2 data_visitor = Mock(autospec=True) test_data_visitors = [data_visitor] test_observable = Mock(autospec=True) test_transferrer = Mock(autospec=True) try: test_config.use_local_files = False test_subject = ec.DataVisit( test_config, storage_name, test_cadc_client, test_caom_client, test_data_visitors, test_config.task_types[0], test_observable, test_transferrer, ) assert test_subject is not None, 'broken ctor' test_subject.execute(context=None) assert data_visitor.visit.called, 'expect visit call' data_visitor.visit.assert_called_with( ANY, working_directory='/tmp/abc', storage_name=storage_name, log_file_directory=None, cadc_client=ANY, caom_repo_client=ANY, stream=None, observable=ANY, ), f'wrong visit params {storage_name.source_names}' data_visitor.visit.reset_mock() finally: if os.path.exists(test_wd): dir_listing = os.listdir(test_wd) for f in dir_listing: os.unlink(f) os.rmdir(test_wd)