def _run_by_public(): """Run the processing for observations that are public, but there are no artifacts representing the previews in CAOM, or a FITS file in ad. Called as gem_run_public. The time-boxing is based on timestamps from a state.yml file. Call once/day, since data release timestamps have times of 00:00:00.000. :return 0 if successful, -1 if there's any sort of failure. Return status is used by airflow for task instance management and reporting. """ config = mc.Config() config.get_executors() external_metadata.init_global(config=config) name_builder = nbc.FileNameBuilder(gem_name.GemName) incremental_source = data_source.PublicIncremental(config) meta_visitors = _define_meta_visitors(config) return rc.run_by_state(config=config, name_builder=name_builder, command_name=main_app.APPLICATION, bookmark_name=data_source.GEM_BOOKMARK, meta_visitors=meta_visitors, data_visitors=DATA_VISITORS, end_time=None, source=incremental_source, chooser=None)
def test_builder(obs_metadata_mock, tap_client_mock): obs_metadata_mock.side_effect = gem_mocks.mock_get_obs_metadata test_config = mc.Config() test_config.working_directory = '/test_files' test_config.proxy_fqn = os.path.join(gem_mocks.TEST_DATA_DIR, 'test_proxy.pem') em.init_global(config=test_config) test_subject = builder.GemObsIDBuilder(test_config) test_entry = 'S20050825S0143.fits' for support in [False, True]: test_config.features.supports_latest_client = support test_config.features.use_file_names = True for task_type in [mc.TaskType.INGEST, mc.TaskType.SCRAPE]: test_config.task_types = [task_type] test_result = test_subject.build(test_entry) assert test_result is not None, \ f'expect a result support {support}' expected_path = COLLECTION if support else ARCHIVE assert test_result.file_uri == \ f'{SCHEME}:{expected_path}/{test_entry}', 'wrong file uri' assert test_result.prev_uri == \ f'{SCHEME}:{expected_path}/{test_result.prev}', \ 'wrong preview uri' expected_scheme = V_SCHEME if support else A_SCHEME assert test_result.thumb_uri == \ f'{expected_scheme}:{expected_path}/{test_result.thumb}', \ 'wrong thumb uri' test_config.task_types = [mc.TaskType.INGEST] test_config.features.use_file_names = False with pytest.raises(mc.CadcException): test_result = test_subject.build(test_entry)
def test_get_obs_metadata_not_at_gemini(tap_client_mock, session_mock): session_mock.get.side_effect = gem_mocks.mock_session_get_not_found test_config = mc.Config() test_config.working_directory = gem_mocks.TEST_DATA_DIR test_config.proxy_file_name = 'test_proxy.pem' ext_md.init_global(config=test_config) with pytest.raises(mc.CadcException, match=f'Could not find JSON record *'): test_result = ext_md.get_obs_metadata('test_file_id')
def _run(): """ Uses a todo file with file names, even though Gemini provides information about existing data referenced by observation ID. """ config = mc.Config() config.get_executors() external_metadata.init_global(config=config) name_builder = builder.GemObsIDBuilder(config) meta_visitors = _define_meta_visitors(config) return rc.run_by_todo(config, name_builder, chooser=None, command_name=main_app.APPLICATION, meta_visitors=meta_visitors)
def _run(): """ Uses a todo file to identify the work to be done. :return 0 if successful, -1 if there's any sort of failure. Return status is used by airflow for task instance management and reporting. """ config = mc.Config() config.get_executors() external_metadata.init_global(config=config) name_builder = builder.GemProcBuilder(config) return rc.run_by_todo( config=config, name_builder=name_builder, command_name=main_app.APPLICATION, meta_visitors=META_VISITORS, data_visitors=DATA_VISITORS, )
def test_repair_provenance(gem_mock, tap_mock): copyfile(f'{gem_mocks.TEST_DATA_DIR}/from_paul.txt', '/app/data/from_paul.txt') getcwd_orig = os.getcwd os.getcwd = Mock(return_value=gem_mocks.TEST_DATA_DIR) try: gem_mock.side_effect = gem_mocks.mock_get_obs_metadata tap_mock.side_effect = gem_mocks.mock_query_tap external_metadata.set_ofr(None) external_metadata.get_gofr() test_config = mc.Config() test_config.get_executors() external_metadata.init_global(config=test_config) for ii in test_subjects: ignore, test_fid = main_app._repair_provenance_value( ii[1], 'test obs') assert test_fid is not None, 'failed lookup {}'.format(ii) assert test_fid == ii[0], 'error {}'.format(ii[1]) finally: os.getcwd = getcwd_orig
def test_preview_augmentation(data_client_mock, tap_mock): getcwd_orig = os.getcwd os.getcwd = Mock(return_value=test_main_app.TEST_DATA_DIR) tap_mock.side_effect = _tap_mock data_client_mock.return_value.info.side_effect = ( test_main_app._get_file_info) test_f_id = 'rnN20140428S0181_ronchi' test_f_name = f'{test_f_id}.fits' test_obs = mc.read_obs_from_file( f'{test_main_app.TEST_DATA_DIR}/{test_f_id}.expected.xml') test_rejected = mc.Rejected(REJECTED_FILE) test_config = mc.Config() test_config.get_executors() test_observable = mc.Observable(test_rejected, mc.Metrics(test_config)) external_metadata.init_global(test_config) test_builder = builder.GemProcBuilder(test_config) test_fqn = os.path.join(test_main_app.TEST_DATA_DIR, test_f_name) test_storage_name = test_builder.build(test_fqn) kwargs = { 'working_directory': TEST_FILES_DIR, 'cadc_client': None, 'stream': 'stream', 'observable': test_observable, 'storage_name': test_storage_name, } try: start_ts = datetime.utcnow().timestamp() test_result = preview_augmentation.visit(test_obs, **kwargs) end_ts = datetime.utcnow().timestamp() logging.error(f'{test_f_name} execution time {end_ts - start_ts}') except Exception as e: logging.error(e) logging.error(traceback.format_exc()) assert False finally: os.getcwd = getcwd_orig assert test_result is not None, 'expect a result' assert test_result.get('artifacts') == 2, 'wrong result'
def test_caching_relationship(tap_mock, get_obs_mock): shutil.copyfile(f'{gem_mocks.TEST_DATA_DIR}/from_paul.txt', '/app/data/from_paul.txt') getcwd_orig = os.getcwd os.getcwd = Mock(return_value=gem_mocks.TEST_DATA_DIR) try: test_config = mc.Config() test_config.get_executors() ext_md.init_global(config=test_config) initial_length = 525 tap_mock.side_effect = gem_mocks._query_mock_none get_obs_mock.side_effect = gem_mocks.mock_get_obs_metadata test_subject = ext_md.CachingObsFileRelationship() test_subject.tap_client = Mock() # test an entry that's not in the file, not at CADC, is at # archive.gemini.edu assert len(test_subject.name_list) == initial_length, \ 'bad initial length' test_result = test_subject.get_obs_id('N20200210S0077') assert test_result is not None, 'expect a gemini result' assert test_result == 'GN-CAL20200210-22-076', 'wrong gemini result' assert len(test_subject.name_list) == initial_length + 1, \ 'bad updated length from Gemini' # entry is not in file, but is at CADC tap_mock.side_effect = gem_mocks.mock_query_tap test_result = test_subject.get_obs_id('x') assert test_result is not None, 'expect a cadc result' assert test_result == 'test_data_label', 'wrong cadc result' assert len(test_subject.name_list) == initial_length + 2, \ 'bad updated length from cadc' # entry is in file test_result = test_subject.get_obs_id('N20170616S0540') assert test_result is not None, 'expect a file result' assert test_result == 'GN-CAL20170616-11-022', 'wrong file result' assert len(test_subject.name_list) == initial_length + 2, \ 'bad updated length from file' finally: os.getcwd = getcwd_orig
def _run_remote(): """ Uses a todo file to identify the work to be done. :return 0 if successful, -1 if there's any sort of failure. Return status is used by airflow for task instance management and reporting. """ config = mc.Config() config.get_executors() external_metadata.init_global(config=config) name_builder = builder.GemProcBuilder(config) vos_client = Client(vospace_certfile=config.proxy_fqn) store_transfer = tc.VoFitsTransfer(vos_client) data_source = dsc.VaultListDirDataSource(vos_client, config) return rc.run_by_todo( config=config, name_builder=name_builder, command_name=main_app.APPLICATION, source=data_source, meta_visitors=META_VISITORS, data_visitors=DATA_VISITORS, store_transfer=store_transfer, )
def _run_by_incremental(): """Run incremental processing for observations that are posted on the site archive.gemini.edu. TODO in the future this will depend on the incremental query endpoint. :return 0 if successful, -1 if there's any sort of failure. Return status is used by airflow for task instance management and reporting. """ config = mc.Config() config.get_executors() state = mc.State(config.state_fqn) end_timestamp_s = state.bookmarks.get(data_source.GEM_BOOKMARK).get( 'end_timestamp', datetime.now()) end_timestamp_dt = mc.make_time_tz(end_timestamp_s) logging.info(f'{main_app.APPLICATION} will end at {end_timestamp_s}') external_metadata.init_global(config=config) name_builder = nbc.FileNameBuilder(gem_name.GemName) incremental_source = data_source.IncrementalSource() meta_visitors = _define_meta_visitors(config) result = rc.run_by_state( config=config, name_builder=name_builder, command_name=main_app.APPLICATION, bookmark_name=data_source.GEM_BOOKMARK, meta_visitors=meta_visitors, data_visitors=DATA_VISITORS, end_time=end_timestamp_dt, source=incremental_source, chooser=None, ) if incremental_source.max_records_encountered: logging.warning('!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!') logging.warning('Encountered maximum records!!') logging.warning('!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!') result |= -1 return result
def _run_single(): """ Run the processing for a single entry. :return 0 if successful, -1 if there's any sort of failure. Return status is used by airflow for task instance management and reporting. """ config = mc.Config() config.get_executors() config.resource_id = 'ivo://cadc.nrc.ca/sc2repo' if config.features.run_in_airflow: temp = tempfile.NamedTemporaryFile() mc.write_to_file(temp.name, sys.argv[2]) config.proxy = temp.name else: config.proxy = sys.argv[2] config.stream = 'default' if config.features.use_file_names: storage_name = gem_name.GemName(file_name=sys.argv[1]) else: raise mc.CadcException('No code to handle running GEM by obs id.') external_metadata.init_global(config=config) meta_visitors = _define_meta_visitors(config) return rc.run_single(config, storage_name, main_app.APPLICATION, meta_visitors, DATA_VISITORS)
def test_main_app_v(client_mock, tap_mock, gemini_client_mock, gemini_pi_mock, svofps_mock, cadc_client_mock, get_file_info_mock, test_name): # client_mock present because of global in external_metadata cadc_client_mock.get_node.side_effect = gem_mocks.mock_get_node gemini_client_mock.side_effect = gem_mocks.mock_get_obs_metadata gemini_pi_mock.side_effect = gem_mocks.mock_get_pi_metadata svofps_mock.side_effect = gem_mocks.mock_get_votable tap_mock.side_effect = gem_mocks.mock_query_tap get_file_info_mock.return_value.get_file_info.side_effect = \ gem_mocks.mock_get_file_info getcwd_orig = os.getcwd os.getcwd = Mock( return_value=os.path.join(gem_mocks.TEST_DATA_DIR, 'si_config')) try: test_config = mc.Config() test_config.get_executors() test_config.features.supports_latest_client = True em.set_ofr(None) em.init_global(test_config) test_data_size = os.stat( os.path.join(gem_mocks.TEST_DATA_DIR, 'from_paul.txt')) app_size = os.stat('/app/data/from_paul.txt') if test_data_size.st_size != app_size.st_size: copyfile(os.path.join(gem_mocks.TEST_DATA_DIR, 'from_paul.txt'), '/app/data/from_paul.txt') basename = os.path.basename(test_name) dirname = os.path.dirname(test_name) file_id = _get_file_id(basename) obs_id = _get_obs_id(file_id) product_id = file_id lineage = _get_lineage(dirname, basename, test_config) input_file = '{}.in.xml'.format(product_id) actual_fqn = _get_actual_file_name(dirname, product_id) local = _get_local(test_name) plugin = gem_mocks.PLUGIN if os.path.exists(actual_fqn): os.remove(actual_fqn) if os.path.exists(os.path.join(dirname, input_file)): sys.argv = \ ('{} --quiet --no_validate --local {} ' '--plugin {} --module {} --in {}/{} --out {} --lineage {}'. format(main_app.APPLICATION, local, plugin, plugin, dirname, input_file, actual_fqn, lineage)).split() else: sys.argv = \ ('{} --quiet --no_validate --local {} ' '--plugin {} --module {} --observation {} {} --out {} ' '--lineage {}'. format(main_app.APPLICATION, local, plugin, plugin, main_app.COLLECTION, obs_id, actual_fqn, lineage)).split() print(sys.argv) main_app.to_caom2() expected_fqn = _get_expected_file_name(dirname, product_id) compare_result = _new_si_compare_differences(actual_fqn, expected_fqn, test_config) if compare_result is not None: raise AssertionError(compare_result) # assert False # cause I want to see logging messages finally: os.getcwd = getcwd_orig