def test_choose_exceptions(test_config): test_config.init_local_files = False test_config.task_types = [mc.TaskType.SCRAPE] with pytest.raises(mc.CadcException): test_organizer = ec.OrganizeExecutes(test_config, 'command name', [], []) test_organizer.choose(tc.TestStorageName())
def test_organize_executes_chooser(test_config): test_obs_id = tc.TestStorageName() test_config.use_local_files = True log_file_directory = os.path.join(tc.THIS_DIR, 'logs') test_config.log_file_directory = log_file_directory test_config.features.supports_composite = True caom_client = Mock(autospec=True) caom_client.read.side_effect = _read_obs2 test_config.task_types = [mc.TaskType.INGEST] test_chooser = tc.TestChooser() test_oe = ec.OrganizeExecutes( test_config, 'command_name', [], [], test_chooser, cadc_client=Mock(autospec=True, return_value=None), caom_client=caom_client, ) executors = test_oe.choose(test_obs_id) assert executors is not None assert len(executors) == 1 assert isinstance(executors[0], ec.LocalMetaDeleteCreate) assert executors[0].stream == 'TEST', 'stream' assert executors[0].working_dir == os.path.join( tc.THIS_DIR, 'test_obs_id'), 'working_dir' assert caom_client.read.called, 'read should be called' caom_client.read.reset() test_config.use_local_files = False test_config.task_types = [mc.TaskType.INGEST] test_oe = ec.OrganizeExecutes( test_config, 'command_name', [], [], test_chooser, cadc_client=Mock(autospec=True, return_value=None), caom_client=caom_client, ) executors = test_oe.choose(test_obs_id) assert executors is not None assert len(executors) == 1 assert isinstance(executors[0], ec.MetaDeleteCreate) assert caom_client.read.called, 'read should be called'
def run_single( config, storage_name, command_name, meta_visitors, data_visitors, chooser=None, store_transfer=None, modify_transfer=None, ): """Process a single entry by StorageName detail. :param config mc.Config :param storage_name instance of StorageName for the collection :param command_name extension of fits2caom2 for the collection :param meta_visitors List of metadata visit methods. :param data_visitors List of data visit methods. :param chooser OrganizeChooser instance for detailed CaomExecute descendant choices :param store_transfer Transfer extension that identifies hot to retrieve data from a source for storage at CADC, probably an HTTP or FTP site. Don't try to guess what this one is. :param modify_transfer Transfer extension that identifies how to retrieve data from a source for modification of CAOM2 metadata. By this time, files are usually stored at CADC, so it's probably a CadcTransfer instance, but this allows for the case that a file is never stored at CADC. Try to guess what this one is. """ # TODO - this does not follow the current implementation pattern - # maybe there's a rethink required # missing the metrics and the reporting # logging.debug(f'Begin run_single {config.work_fqn}') clients = cc.ClientCollection(config) modify_transfer = _set_modify_transfer(modify_transfer, config, clients.data_client) organizer = ec.OrganizeExecutes( config, command_name, meta_visitors, data_visitors, chooser, store_transfer, modify_transfer, clients.data_client, clients.metadata_client, ) organizer.complete_record_count = 1 organizer.choose(storage_name) result = organizer.do_one(storage_name) logging.debug(f'run_single result is {result}') return result
def test_do_one(test_config): test_config.task_types = [] test_organizer = ec.OrganizeExecutes(test_config, 'test2caom2', [], []) # no client test_result = test_organizer.do_one(tc.TestStorageName()) assert test_result is not None assert test_result == -1 # client test_config.features.use_clients = True test_result = test_organizer.do_one(tc.TestStorageName()) assert test_result is not None assert test_result == -1
def test_storage_name_failure(test_config): class TestStorageNameFails(tc.TestStorageName): def __init__(self): super(TestStorageNameFails, self).__init__() def is_valid(self): return False test_config.log_to_file = True assert not os.path.exists(test_config.success_fqn) assert not os.path.exists(test_config.failure_fqn) assert not os.path.exists(test_config.retry_fqn) test_organizer = ec.OrganizeExecutes(test_config, 'command name', [], []) test_organizer.choose(TestStorageNameFails()) assert os.path.exists(test_config.success_fqn) assert os.path.exists(test_config.failure_fqn) assert os.path.exists(test_config.retry_fqn)
def test_organize_executes_client_visit(test_config): test_obs_id = tc.TestStorageName() test_config.features.use_clients = True test_config.task_types = [mc.TaskType.VISIT] test_config.use_local_files = False repo_client_mock = Mock(autospec=True) repo_client_mock.read.side_effect = _read_obs2 test_oe = ec.OrganizeExecutes( test_config, 'command_name', [], [], cadc_client=Mock(autospec=True), caom_client=repo_client_mock, ) executors = test_oe.choose(test_obs_id) assert executors is not None assert len(executors) == 1 assert isinstance(executors[0], ec.MetaVisit) assert not repo_client_mock.read.called, 'mock should not be called?'
def run_by_state( config=None, name_builder=None, command_name=None, bookmark_name=None, meta_visitors=[], data_visitors=[], end_time=None, chooser=None, source=None, modify_transfer=None, store_transfer=None, clients=None, ): """A default implementation for using the StateRunner. :param config Config instance :param name_builder NameBuilder extension that creates an instance of a StorageName extension, from an entry from a DataSourceComposable listing :param command_name string that represents the specific pipeline application name :param bookmark_name string that represents the state.yml lookup value :param meta_visitors list of modules with visit methods, that expect the metadata of a work file to exist on disk :param data_visitors list of modules with visit methods, that expect the work file to exist on disk :param end_time datetime for stopping a run, should be in UTC. :param chooser OrganizerChooser, if there's strange rules about file naming. :param source DataSourceComposable extension that identifies work to be done. :param modify_transfer Transfer extension that identifies how to retrieve data from a source for modification of CAOM2 metadata. By this time, files are usually stored at CADC, so it's probably a CadcTransfer instance, but this allows for the case that a file is never stored at CADC. Try to guess what this one is. :param store_transfer Transfer extension that identifies hot to retrieve data from a source for storage at CADC, probably an HTTP or FTP site. Don't try to guess what this one is. :param clients instance of ClientsCollection, if one was required """ if config is None: config = mc.Config() config.get_executors() _set_logging(config) if clients is None: clients = cc.ClientCollection(config) if name_builder is None: name_builder = name_builder_composable.StorageNameInstanceBuilder( config.collection) if source is None: if config.use_local_files: source = data_source_composable.ListDirTimeBoxDataSource( config, recursive=config.recurse_data_sources) else: source = data_source_composable.QueryTimeBoxDataSourceTS(config) if end_time is None: end_time = get_utc_now_tz() modify_transfer = _set_modify_transfer(modify_transfer, config, clients.data_client) organizer = ec.OrganizeExecutes( config, command_name, meta_visitors, data_visitors, chooser, store_transfer, modify_transfer, clients.data_client, clients.metadata_client, ) runner = StateRunner(config, organizer, name_builder, source, bookmark_name, end_time) result = runner.run() result |= runner.run_retry() runner.report() return result
def run_by_state_ad( config=None, name_builder=None, command_name=None, bookmark_name=None, meta_visitors=[], data_visitors=[], end_time=None, chooser=None, source=None, transferrer=None, ): """A default implementation for using the StateRunner. :param config Config instance :param name_builder NameBuilder extension that creates an instance of a StorageName extension, from an entry from a DataSourceComposable listing :param command_name string that represents the specific pipeline application name :param bookmark_name string that represents the state.yml lookup value :param meta_visitors list of modules with visit methods, that expect the metadata of a work file to exist on disk :param data_visitors list of modules with visit methods, that expect the work file to exist on disk :param end_time datetime for stopping a run, should be in UTC. :param chooser OrganizerChooser, if there's strange rules about file naming. :param source DataSourceComposable extension that identifies work to be done. :param transferrer Transfer extension that identifies how to retrieve data from a source. """ if config is None: config = mc.Config() config.get_executors() _set_logging(config) cadc_client, caom_client = _set_clients(config) if name_builder is None: name_builder = name_builder_composable.StorageNameInstanceBuilder( config.collection) if source is None: source = data_source_composable.QueryTimeBoxDataSource(config) if end_time is None: end_time = get_utc_now() if transferrer is None: if config.use_local_files: transferrer = transfer_composable.Transfer() else: transferrer = transfer_composable.CadcTransfer() organizer = ec.OrganizeExecutes( config, command_name, meta_visitors, data_visitors, chooser, transferrer, cadc_client=cadc_client, caom_client=caom_client, ) runner = StateRunner(config, organizer, name_builder, source, bookmark_name, end_time) result = runner.run() result |= runner.run_retry() runner.report() return result
def run_by_todo( config=None, name_builder=None, chooser=None, command_name=None, source=None, meta_visitors=[], data_visitors=[], modify_transfer=None, store_transfer=None, ): """A default implementation for using the TodoRunner. :param config Config instance :param name_builder NameBuilder extension that creates an instance of a StorageName extension, from an entry from a DataSourceComposable listing :param command_name string that represents the specific pipeline application name :param source DataSource implementation, if there's a special data source :param meta_visitors list of modules with visit methods, that expect the metadata of a work file to exist on disk :param data_visitors list of modules with visit methods, that expect the work file to exist on disk :param chooser OrganizerChooser, if there's strange rules about file naming. :param modify_transfer Transfer extension that identifies how to retrieve data from a source for modification of CAOM2 metadata. By this time, files are usually stored at CADC, so it's probably a CadcTransfer instance, but this allows for the case that a file is never stored at CADC. Try to guess what this one is. :param store_transfer Transfer extension that identifies hot to retrieve data from a source for storage at CADC, probably an HTTP or FTP site. Don't try to guess what this one is. """ if config is None: config = mc.Config() config.get_executors() _set_logging(config) cadc_client, caom_client = _set_clients(config) if name_builder is None: name_builder = name_builder_composable.StorageNameInstanceBuilder( config.collection) if source is None: if config.use_local_files: source = data_source_composable.ListDirDataSource(config, chooser) else: source = data_source_composable.TodoFileDataSource(config) modify_transfer = _set_modify_transfer(modify_transfer, config) organizer = ec.OrganizeExecutes( config, command_name, meta_visitors, data_visitors, chooser, store_transfer, modify_transfer, cadc_client=cadc_client, caom_client=caom_client, ) runner = TodoRunner(config, organizer, name_builder, source) result = runner.run() result |= runner.run_retry() runner.report() return result
def test_organize_executes_client_do_one(test_config): test_obs_id = tc.TestStorageName() test_config.use_local_files = True log_file_directory = os.path.join(tc.THIS_DIR, 'logs') test_config.log_file_directory = log_file_directory success_log_file_name = 'success_log.txt' test_config.success_log_file_name = success_log_file_name failure_log_file_name = 'failure_log.txt' test_config.failure_log_file_name = failure_log_file_name test_config.features.use_clients = True retry_file_name = 'retries.txt' test_config.retry_file_name = retry_file_name exec_cmd_orig = mc.exec_cmd_info repo_client_mock = Mock(autospec=True) repo_client_mock.read.return_value = None try: mc.exec_cmd_info = Mock( return_value='INFO:cadc-data:info\n' 'File C170324_0054_SCI_prev.jpg:\n' ' archive: OMM\n' ' encoding: None\n' ' lastmod: Mon, 25 Jun 2018 16:52:07 GMT\n' ' md5sum: f37d21c53055498d1b5cb7753e1c6d6f\n' ' name: C120902_sh2-132_J_old_' 'SCIRED.fits.gz\n' ' size: 754408\n' ' type: image/jpeg\n' ' umd5sum: 704b494a972eed30b18b817e243ced7d\n' ' usize: 754408\n'.encode('utf-8') ) test_config.task_types = [mc.TaskType.SCRAPE] test_oe = ec.OrganizeExecutes( test_config, TEST_APP, [], [], chooser=None, cadc_client=Mock(autospec=True), caom_client=repo_client_mock, ) executors = test_oe.choose(test_obs_id) assert executors is not None assert len(executors) == 1 assert isinstance(executors[0], ec.ScrapeUpdate) test_config.task_types = [ mc.TaskType.STORE, mc.TaskType.INGEST, mc.TaskType.MODIFY ] test_oe = ec.OrganizeExecutes( test_config, TEST_APP, [], [], chooser=None, cadc_client=Mock(autospec=True), caom_client=repo_client_mock, ) executors = test_oe.choose(test_obs_id) assert executors is not None assert len(executors) == 3 assert isinstance(executors[0], ec.Store), type(executors[0]) assert isinstance(executors[1], ec.LocalMetaCreate) assert isinstance(executors[2], ec.LocalDataVisit) assert repo_client_mock.read.called, 'mock should be called' assert repo_client_mock.read.reset() test_config.use_local_files = False test_config.task_types = [mc.TaskType.INGEST, mc.TaskType.MODIFY] test_oe = ec.OrganizeExecutes( test_config, TEST_APP, [], [], chooser=None, cadc_client=Mock(autospec=True), caom_client=repo_client_mock, ) executors = test_oe.choose(test_obs_id) assert executors is not None assert len(executors) == 2 assert isinstance(executors[0], ec.MetaCreate) assert isinstance(executors[1], ec.DataVisit) assert repo_client_mock.read.called, 'mock should be called' test_config.use_local_files = True test_config.task_types = [mc.TaskType.INGEST, mc.TaskType.MODIFY] test_oe = ec.OrganizeExecutes( test_config, TEST_APP, [], [], chooser=None, cadc_client=Mock(autospec=True), caom_client=repo_client_mock, ) executors = test_oe.choose(test_obs_id) assert executors is not None assert len(executors) == 2 assert isinstance(executors[0], ec.LocalMetaCreate) assert isinstance(executors[1], ec.LocalDataVisit) assert repo_client_mock.read.called, 'mock should be called' assert repo_client_mock.read.reset() repo_client_mock.read.side_effect = _read_obs2 test_config.task_types = [mc.TaskType.SCRAPE, mc.TaskType.MODIFY] test_config.use_local_files = True test_oe = ec.OrganizeExecutes( test_config, TEST_APP, [], [], chooser=None, cadc_client=Mock(autospec=True), caom_client=repo_client_mock, ) executors = test_oe.choose(test_obs_id) assert executors is not None assert len(executors) == 2 assert isinstance(executors[0], ec.ScrapeUpdate) assert isinstance(executors[1], ec.DataScrape) assert repo_client_mock.read.called, 'mock should be called' assert repo_client_mock.read.reset() test_config.task_types = [mc.TaskType.INGEST] test_config.use_local_files = False test_chooser = tc.TestChooser() ec.CaomExecute.repo_cmd_get_client = Mock(return_value=_read_obs(None)) test_oe = ec.OrganizeExecutes( test_config, TEST_APP, [], [], chooser=test_chooser, cadc_client=Mock(autospec=True), caom_client=repo_client_mock, ) executors = test_oe.choose(test_obs_id) assert executors is not None assert len(executors) == 1 assert isinstance(executors[0], ec.MetaDeleteCreate) assert repo_client_mock.read.called, 'mock should be called' assert repo_client_mock.read.reset() test_config.task_types = [mc.TaskType.INGEST_OBS] test_config.use_local_files = False ec.CaomExecute.repo_cmd_get_client = Mock( return_value=_read_obs(test_obs_id) ) test_oe = ec.OrganizeExecutes( test_config, TEST_APP, [], [], cadc_client=Mock(autospec=True), caom_client=repo_client_mock, ) executors = test_oe.choose(test_obs_id) assert executors is not None assert len(executors) == 1 assert isinstance(executors[0], ec.MetaUpdateObservation) assert repo_client_mock.read.called, 'mock should be called' assert executors[0].url == 'https://test_url/', 'url' assert executors[0].fname is None, 'file name' assert executors[0].stream == 'TEST', 'stream' assert executors[0].lineage is None, 'lineage' assert executors[0].external_urls_param == '', 'external_url_params' assert ( executors[0].working_dir == f'{tc.THIS_DIR}/test_obs_id' ), 'working_dir' assert test_oe.todo_fqn == f'{tc.THIS_DIR}/todo.txt', 'wrong todo' finally: mc.exec_cmd_orig = exec_cmd_orig
def test_organize_executes_chooser(test_config): test_obs_id = tc.TestStorageName() test_config.use_local_files = True log_file_directory = os.path.join(tc.THIS_DIR, 'logs') test_config.log_file_directory = log_file_directory test_config.features.supports_composite = True exec_cmd_orig = mc.exec_cmd_info caom_client = Mock(autospec=True) caom_client.read.side_effect = _read_obs2 try: mc.exec_cmd_info = \ Mock( return_value='INFO:cadc-data:info\n' 'File C170324_0054_SCI_prev.jpg:\n' ' archive: OMM\n' ' encoding: None\n' ' lastmod: Mon, 25 Jun 2018 16:52:07 GMT\n' ' md5sum: f37d21c53055498d1b5cb7753e1c6d6f\n' ' name: C120902_sh2-132_J_old_' 'SCIRED.fits.gz\n' ' size: 754408\n' ' type: image/jpeg\n' ' umd5sum: 704b494a972eed30b18b817e243ced7d\n' ' usize: 754408\n'.encode('utf-8') ) test_config.task_types = [mc.TaskType.INGEST] test_chooser = tc.TestChooser() test_oe = ec.OrganizeExecutes( test_config, 'command_name', [], [], test_chooser, cadc_client=Mock(autospec=True, return_value=None), caom_client=caom_client, ) executors = test_oe.choose(test_obs_id) assert executors is not None assert len(executors) == 1 assert isinstance(executors[0], ec.LocalMetaDeleteCreate) assert executors[0].fname == 'test_obs_id.fits', 'file name' assert executors[0].stream == 'TEST', 'stream' assert executors[0].working_dir == tc.THIS_DIR, 'working_dir' assert caom_client.read.called, 'read should be called' caom_client.read.reset() test_config.use_local_files = False test_config.task_types = [mc.TaskType.INGEST] test_oe = ec.OrganizeExecutes( test_config, 'command_name', [], [], test_chooser, cadc_client=Mock(autospec=True, return_value=None), caom_client=caom_client, ) executors = test_oe.choose(test_obs_id) assert executors is not None assert len(executors) == 1 assert isinstance(executors[0], ec.MetaDeleteCreate) assert caom_client.read.called, 'read should be called' finally: mc.exec_cmd_orig = exec_cmd_orig
def test_organize_executes_client_do_one(test_config): test_obs_id = tc.TestStorageName() test_config.use_local_files = True log_file_directory = os.path.join(tc.THIS_DIR, 'logs') test_config.log_file_directory = log_file_directory success_log_file_name = 'success_log.txt' test_config.success_log_file_name = success_log_file_name failure_log_file_name = 'failure_log.txt' test_config.failure_log_file_name = failure_log_file_name test_config.features.use_clients = True retry_file_name = 'retries.txt' test_config.retry_file_name = retry_file_name repo_client_mock = Mock(autospec=True) repo_client_mock.read.return_value = None test_config.task_types = [mc.TaskType.SCRAPE] test_oe = ec.OrganizeExecutes( test_config, TEST_APP, [], [], chooser=None, cadc_client=Mock(autospec=True), caom_client=repo_client_mock, ) executors = test_oe.choose(test_obs_id) assert executors is not None assert len(executors) == 1 assert isinstance(executors[0], ec.ScrapeUpdate), f'{type(executors[0])}' test_config.task_types = [ mc.TaskType.STORE, mc.TaskType.INGEST, mc.TaskType.MODIFY, ] test_oe = ec.OrganizeExecutes( test_config, TEST_APP, [], [], chooser=None, cadc_client=Mock(autospec=True), caom_client=repo_client_mock, ) executors = test_oe.choose(test_obs_id) assert executors is not None assert len(executors) == 3 assert (isinstance(executors[0], ec.Store), type(executors[0])) assert isinstance(executors[1], ec.LocalMetaCreate) assert isinstance(executors[2], ec.LocalDataVisit) assert repo_client_mock.read.called, 'mock should be called' assert repo_client_mock.read.reset() test_config.use_local_files = False test_config.task_types = [mc.TaskType.INGEST, mc.TaskType.MODIFY] test_oe = ec.OrganizeExecutes( test_config, TEST_APP, [], [], chooser=None, cadc_client=Mock(autospec=True), caom_client=repo_client_mock, ) executors = test_oe.choose(test_obs_id) assert executors is not None assert len(executors) == 2 assert isinstance(executors[0], ec.MetaCreate) assert isinstance(executors[1], ec.DataVisit) assert repo_client_mock.read.called, 'mock should be called' test_config.use_local_files = True test_config.task_types = [mc.TaskType.INGEST, mc.TaskType.MODIFY] test_oe = ec.OrganizeExecutes( test_config, TEST_APP, [], [], chooser=None, cadc_client=Mock(autospec=True), caom_client=repo_client_mock, ) executors = test_oe.choose(test_obs_id) assert executors is not None assert len(executors) == 2 assert isinstance(executors[0], ec.LocalMetaCreate) assert isinstance(executors[1], ec.LocalDataVisit) assert repo_client_mock.read.called, 'mock should be called' assert repo_client_mock.read.reset() repo_client_mock.read.side_effect = _read_obs2 test_config.task_types = [mc.TaskType.SCRAPE, mc.TaskType.MODIFY] test_config.use_local_files = True test_oe = ec.OrganizeExecutes( test_config, TEST_APP, [], [], chooser=None, cadc_client=Mock(autospec=True), caom_client=repo_client_mock, ) executors = test_oe.choose(test_obs_id) assert executors is not None assert len(executors) == 2 assert isinstance(executors[0], ec.ScrapeUpdate) assert isinstance(executors[1], ec.DataScrape) assert repo_client_mock.read.called, 'mock should be called' assert repo_client_mock.read.reset() test_config.task_types = [mc.TaskType.INGEST] test_config.use_local_files = False test_chooser = tc.TestChooser() ec.CaomExecute.repo_cmd_get_client = Mock(return_value=_read_obs(None)) test_oe = ec.OrganizeExecutes( test_config, TEST_APP, [], [], chooser=test_chooser, cadc_client=Mock(autospec=True), caom_client=repo_client_mock, ) executors = test_oe.choose(test_obs_id) assert executors is not None assert len(executors) == 1 assert isinstance(executors[0], ec.MetaDeleteCreate) assert repo_client_mock.read.called, 'mock should be called' assert repo_client_mock.read.reset() test_config.task_types = [mc.TaskType.INGEST_OBS] test_config.use_local_files = False ec.CaomExecute.repo_cmd_get_client = Mock( return_value=_read_obs(test_obs_id)) test_oe = ec.OrganizeExecutes( test_config, TEST_APP, [], [], cadc_client=Mock(autospec=True), caom_client=repo_client_mock, ) executors = test_oe.choose(test_obs_id) assert executors is not None assert len(executors) == 1 assert isinstance(executors[0], ec.MetaUpdateObservation) assert repo_client_mock.read.called, 'mock should be called' assert executors[0].stream == 'TEST', 'stream' assert executors[0].external_urls_param == '', 'external_url_params' assert (executors[0].working_dir == f'{tc.THIS_DIR}/test_obs_id' ), 'working_dir' assert test_oe.todo_fqn == f'{tc.THIS_DIR}/todo.txt', 'wrong todo'