def test_clients(get_access_mock): get_access_mock.return_value = 'https://localhost' test_config = mc.Config() test_config.get_executors() test_config.resource_id = 'ivo://cadc.nrc.ca/test' test_config.tap_id = 'ivo://cadc.nrc.ca/test' test_subject = clc.ClientCollection(test_config) assert test_subject is not None, 'ctor failure'
def _common(): config = mc.Config() config.get_executors() clients = clc.ClientCollection(config) metadata_reader = None if config.use_local_files: metadata_reader = rdc.FileMetadataReader() name_builder = nbc.EntryBuilder(dao_name.DAOName) return config, clients, name_builder, metadata_reader
def run_single( config, storage_name, command_name, meta_visitors, data_visitors, chooser=None, store_transfer=None, modify_transfer=None, ): """Process a single entry by StorageName detail. :param config mc.Config :param storage_name instance of StorageName for the collection :param command_name extension of fits2caom2 for the collection :param meta_visitors List of metadata visit methods. :param data_visitors List of data visit methods. :param chooser OrganizeChooser instance for detailed CaomExecute descendant choices :param store_transfer Transfer extension that identifies hot to retrieve data from a source for storage at CADC, probably an HTTP or FTP site. Don't try to guess what this one is. :param modify_transfer Transfer extension that identifies how to retrieve data from a source for modification of CAOM2 metadata. By this time, files are usually stored at CADC, so it's probably a CadcTransfer instance, but this allows for the case that a file is never stored at CADC. Try to guess what this one is. """ # TODO - this does not follow the current implementation pattern - # maybe there's a rethink required # missing the metrics and the reporting # logging.debug(f'Begin run_single {config.work_fqn}') clients = cc.ClientCollection(config) modify_transfer = _set_modify_transfer(modify_transfer, config, clients.data_client) organizer = ec.OrganizeExecutes( config, command_name, meta_visitors, data_visitors, chooser, store_transfer, modify_transfer, clients.data_client, clients.metadata_client, ) organizer.complete_record_count = 1 organizer.choose(storage_name) result = organizer.do_one(storage_name) logging.debug(f'run_single result is {result}') return result
def run_by_state( config=None, name_builder=None, command_name=None, bookmark_name=None, meta_visitors=[], data_visitors=[], end_time=None, chooser=None, source=None, modify_transfer=None, store_transfer=None, clients=None, ): """A default implementation for using the StateRunner. :param config Config instance :param name_builder NameBuilder extension that creates an instance of a StorageName extension, from an entry from a DataSourceComposable listing :param command_name string that represents the specific pipeline application name :param bookmark_name string that represents the state.yml lookup value :param meta_visitors list of modules with visit methods, that expect the metadata of a work file to exist on disk :param data_visitors list of modules with visit methods, that expect the work file to exist on disk :param end_time datetime for stopping a run, should be in UTC. :param chooser OrganizerChooser, if there's strange rules about file naming. :param source DataSourceComposable extension that identifies work to be done. :param modify_transfer Transfer extension that identifies how to retrieve data from a source for modification of CAOM2 metadata. By this time, files are usually stored at CADC, so it's probably a CadcTransfer instance, but this allows for the case that a file is never stored at CADC. Try to guess what this one is. :param store_transfer Transfer extension that identifies hot to retrieve data from a source for storage at CADC, probably an HTTP or FTP site. Don't try to guess what this one is. :param clients instance of ClientsCollection, if one was required """ if config is None: config = mc.Config() config.get_executors() _set_logging(config) if clients is None: clients = cc.ClientCollection(config) if name_builder is None: name_builder = name_builder_composable.StorageNameInstanceBuilder( config.collection) if source is None: if config.use_local_files: source = data_source_composable.ListDirTimeBoxDataSource( config, recursive=config.recurse_data_sources) else: source = data_source_composable.QueryTimeBoxDataSourceTS(config) if end_time is None: end_time = get_utc_now_tz() modify_transfer = _set_modify_transfer(modify_transfer, config, clients.data_client) organizer = ec.OrganizeExecutes( config, command_name, meta_visitors, data_visitors, chooser, store_transfer, modify_transfer, clients.data_client, clients.metadata_client, ) runner = StateRunner(config, organizer, name_builder, source, bookmark_name, end_time) result = runner.run() result |= runner.run_retry() runner.report() return result
ignore_scheme, ignore_path, f_name = mc.decompose_uri(uri) print(f':::Looking for {obs_id} and {f_name}') else: print( f':::No observation records found for collection {archive} from service {service}' ) sys.exit(-1) obs = caom_client.read(archive, obs_id) obs_fqn = f'/usr/src/app/expected.{obs_id}.xml' mc.write_obs_to_file(obs, obs_fqn) print(f':::2 - Get {f_name}') config = mc.Config() config.get_executors() clients = clc.ClientCollection(config) metrics = mc.Metrics(config) data_location = '/usr/src/app' if cleans_up: data_location = '/data' for ii in ['/data/success', '/data/failure', '/data']: with os.scandir(ii) as it: for jj in it: if not jj.is_dir(): os.unlink(os.path.join(ii, jj)) if collection == 'GEM': uri = uri.replace('gemini:GEM/', 'gemini:GEMINI/') clients.data_client.get(data_location, uri) print( ':::3 - Update config.yml to say task types are scrape and modify, and use local files.'