def _run_by_state(): """Uses a state file with a timestamp to control which quicklook files will be retrieved from VLASS. Ingestion is based on URLs, because a URL that contains the phrase 'QA_REJECTED' is the only way to tell if the attribute 'requirements' should be set to 'fail', or not. """ config = mc.Config() config.get_executors() state = mc.State(config.state_fqn) # a way to get a datetime from a string, or maybe a datetime, depending # on the execution environment start_time = mc.increment_time(state.get_bookmark(VLASS_BOOKMARK), 0) todo_list, max_date = scrape.build_file_url_list(start_time) if len(todo_list) > 0: state = mc.State(config.state_fqn) work.init_web_log(state, config) # still make all subsequent calls if len == 0, for consistent reporting source = data_source.NraoPage(todo_list) name_builder = builder.VlassInstanceBuilder(config) return rc.run_by_state(config=config, command_name=sn.APPLICATION, bookmark_name=VLASS_BOOKMARK, meta_visitors=META_VISITORS, data_visitors=DATA_VISITORS, name_builder=name_builder, source=source, end_time=max_date, store_transfer=tc.HttpTransfer())
def _run(): """Run the processing for observations using a todo file to identify the work to be done, but with the support of a Builder, so that StorageName instances can be provided. This is important here, because the instrument name needs to be provided to the StorageName constructor. :return 0 if successful, -1 if there's any sort of failure. Return status is used by airflow for task instance management and reporting. """ config = mc.Config() config.get_executors() # time_bounds_augmentation and quality_augmentation depend on # metadata scraped from the NRAO site, but that only changes if a new # file is created, a new version of a file is created, or an old version # of a file is replaced. If the pipeline isn't STORE'ing information from # the source, files aren't changing, and the related metadata isn't # changing, so be polite to the NRAO site, and don't scrape if it's not # necessary. meta_visitors = [cleanup_augmentation] if (mc.TaskType.STORE in config.task_types and mc.TaskType.INGEST in config.task_types): meta_visitors = META_VISITORS name_builder = nbc.EntryBuilder(storage_name.VlassName) storage_name.set_use_storage_inventory( config.features.supports_latest_client) return rc.run_by_todo( config=config, name_builder=name_builder, meta_visitors=meta_visitors, data_visitors=DATA_VISITORS, store_transfer=tc.HttpTransfer(), )
def _run_state(): """Uses a state file with a timestamp to control which quicklook files will be retrieved from VLASS. Ingestion is based on URLs, because a URL that contains the phrase 'QA_REJECTED' is the only way to tell if the attribute 'requirements' should be set to 'fail', or not. """ config = mc.Config() config.get_executors() state = mc.State(config.state_fqn) # a way to get a datetime from a string, or maybe a datetime, depending # on the execution environment start_time = mc.increment_time(state.get_bookmark(VLASS_BOOKMARK), 0) todo_list, max_date = scrape.build_file_url_list(start_time) source = data_source.NraoPage(todo_list) name_builder = nbc.EntryBuilder(storage_name.VlassName) storage_name.set_use_storage_inventory( config.features.supports_latest_client) return rc.run_by_state( config=config, bookmark_name=VLASS_BOOKMARK, meta_visitors=META_VISITORS, data_visitors=DATA_VISITORS, name_builder=name_builder, source=source, end_time=max_date, store_transfer=tc.HttpTransfer(), )
def _run_single(): """expects a single file name on the command line""" builder = nbc.EntryBuilder(storage_name.VlassName) vlass_name = builder.build(sys.argv[1]) return rc.run_single( storage_name=vlass_name, meta_visitors=META_VISITORS, data_visitors=DATA_VISITORS, store_transfer=tc.HttpTransfer(), )
def _run_single(): """expects a single file name on the command line""" config = mc.Config() config.get_executors() if config.features.use_file_names: vlass_name = sn.VlassName(file_name=sys.argv[1], entry=sys.argv[1]) elif config.features.use_urls: vlass_name = sn.VlassName(url=sys.argv[1], entry=sys.argv[1]) else: vlass_name = sn.VlassName(obs_id=sys.argv[1], entry=sys.argv[1]) return rc.run_single(config=config, storage_name=vlass_name, command_name=sn.APPLICATION, meta_visitors=META_VISITORS, data_visitors=DATA_VISITORS, store_transfer=tc.HttpTransfer())
def _run(): """Run the processing for observations using a todo file to identify the work to be done, but with the support of a Builder, so that StorageName instances can be provided. This is important here, because the instrument name needs to be provided to the StorageName constructor. :return 0 if successful, -1 if there's any sort of failure. Return status is used by airflow for task instance management and reporting. """ config = mc.Config() config.get_executors() state = mc.State(config.state_fqn) work.init_web_log(state, config) name_builder = builder.VlassInstanceBuilder(config) return rc.run_by_todo(config=config, name_builder=name_builder, command_name=sn.APPLICATION, meta_visitors=META_VISITORS, data_visitors=DATA_VISITORS, store_transfer=tc.HttpTransfer())
def test_http_transfer(get_mock): test_source = 'http://localhost/test_file.fits' test_destination = '/tmp/test_file.fits' if not os.path.exists(test_destination): with open(test_destination, 'w') as f: f.write('test content') get_mock.side_effect = Mock(autospec=True) test_config = mc.Config() test_config.working_directory = test_conf.TEST_DATA_DIR test_config.netrc_file = 'test_netrc' test_config.rejected_fqn = '/tmp/rejected.yml' test_observable = mc.Observable(mc.Rejected(test_config.rejected_fqn), mc.Metrics(test_config)) test_subject = tc.HttpTransfer() assert test_subject is not None, 'expect a result' test_subject.observable = test_observable with pytest.raises(mc.CadcException): test_subject.get(test_source, test_destination) assert get_mock.called, 'should have been called' args, kwargs = get_mock.call_args assert args[1] == test_source, 'wrong source name' assert args[2] == test_destination, 'wrong dest name'