def guess_dataset_ios(path: str) -> List[Tuple[DatasetIO, float]]: """ Guess suitable DatasetIO objects for a file system path or URL given by *path*. Returns a list of (DatasetIO, fitness) tuples, sorted by descending fitness values. Fitness values are in the interval (0, 1]. The first entry is the most appropriate DatasetIO object. :param path: A file system path or URL. :return: A list of (DatasetIO, fitness) tuples. """ if os.path.isfile(path): input_type = "file" elif os.path.isdir(path): input_type = "dir" elif path.find("://") > 0: input_type = "url" else: input_type = None dataset_ios = get_extension_registry().find_components( EXTENSION_POINT_DATASET_IOS) dataset_io_fitness_list = [] for dataset_io in dataset_ios: fitness = dataset_io.fitness(path, path_type=input_type) if fitness > 0.0: dataset_io_fitness_list.append((dataset_io, fitness)) dataset_io_fitness_list.sort(key=lambda item: -item[1]) return dataset_io_fitness_list
def query_dataset_io( filter_fn: Callable[[DatasetIO], bool] = None) -> List[DatasetIO]: dataset_ios = get_extension_registry().find_components( EXTENSION_POINT_DATASET_IOS) if filter_fn is None: return dataset_ios return list(filter(filter_fn, dataset_ios))
def _dump_extensions(point: str) -> int: count = 0 for extension in get_extension_registry().find_extensions(point): print( f' {extension.name:>24s} {extension.metadata.get("description", "<no description>")}' ) count += 1 return count
def store_info(store_id: str, store_params: List[str], show_params: bool, show_openers: bool, show_writers: bool, show_data_ids: bool, use_json_format: bool): """ Show data store information. Dumps detailed data store information in human readable form or as JSON, when using the --json option. You can obtain valid STORE names using command "xcube store list". Note some stores require provision of parameters PARAMS when using one of the options --openers/-O, --writers/-W, or --data/-D. To find out which parameters are available use the command with just the --params/-P option first. """ extension = get_extension_registry().get_extension( EXTENSION_POINT_DATA_STORES, store_id) from xcube.core.store import get_data_store_params_schema from xcube.core.store import MutableDataStore params_schema = get_data_store_params_schema(store_id) description = extension.metadata.get('description') requires_store_instance = any((show_openers, show_writers, show_data_ids)) data_store = _new_data_store( store_id, store_params) if requires_store_instance else None if use_json_format: d = dict() d['store_id'] = store_id if description: d['description'] = description if show_params: d['params_schema'] = params_schema.to_dict() if show_openers: d['opener_ids'] = data_store.get_data_opener_ids() if show_writers and isinstance(data_store, MutableDataStore): d['writer_ids'] = data_store.get_data_writer_ids() if show_data_ids: d['data_ids'] = list(data_store.get_data_ids()) if show_openers: print(json.dumps(d, indent=2)) else: print(f'\nData store description:') print(f' {description or _NO_DESCRIPTION}') if show_params: print(_format_params_schema(params_schema)) if show_openers: print(f'\nData openers:') _dump_store_openers(data_store) if show_writers: if isinstance(data_store, MutableDataStore): print(f'\nData writers:') _dump_store_writers(data_store) else: print( f'No writers available, because data store "{store_id}" is not mutable.' ) if show_data_ids: print(f'\nData resources:') count = _dump_store_data_ids(data_store) print(f'{count} data resource{"s" if count != 1 else ""} found.')
def _format_info(): from xcube.util.plugin import get_extension_registry iproc_extensions = get_extension_registry().find_extensions( EXTENSION_POINT_INPUT_PROCESSORS) dsio_extensions = get_extension_registry().find_extensions( EXTENSION_POINT_DATASET_IOS, lambda e: 'w' in e.metadata.get('modes', set())) help_text = '\nInput processors to be used with option --proc:\n' help_text += _format_input_processors(iproc_extensions) help_text += '\nFor more input processors use existing "xcube-gen-..." plugins ' \ "from the xcube's GitHub organisation or write your own plugin.\n" help_text += '\n' help_text += '\nOutput formats to be used with option --format:\n' help_text += _format_dataset_ios(dsio_extensions) help_text += '\n' return help_text
def _dump_named_extensions(point: str, names: Sequence[str]) -> int: count = 0 for name in names: extension = get_extension_registry().get_extension(point, name) if extension: print( f' {name:>24s} {extension.metadata.get("description", _NO_DESCRIPTION)}' ) else: print(f' {name:>24s} {_UNKNOWN_EXTENSION}') count += 1 return count
def find_data_writer_extensions( predicate: ExtensionPredicate = None, extension_registry: Optional[ExtensionRegistry] = None ) -> List[Extension]: """ Get registered data writer extensions using the optional filter function *predicate*. :param predicate: An optional filter function. :param extension_registry: Optional extension registry. If not given, the global extension registry will be used. :return: List of matching extensions. """ extension_registry = extension_registry or get_extension_registry() return extension_registry.find_extensions(EXTENSION_POINT_DATA_WRITERS, predicate=predicate)
def find_data_store_extensions( predicate: ExtensionPredicate = None, extension_registry: Optional[ExtensionRegistry] = None ) -> List[Extension]: """ Find data store extensions using the optional filter function *predicate*. :param predicate: An optional filter function. :param extension_registry: Optional extension registry. If not given, the global extension registry will be used. :return: List of data store extensions. """ extension_registry = extension_registry or get_extension_registry() return extension_registry.find_extensions(EXTENSION_POINT_DATA_STORES, predicate=predicate)
def writer_info(writer_id: str): """ Show data opener information. You can obtain valid WRITER names using command "xcube io writer list". """ extension = get_extension_registry().get_extension( EXTENSION_POINT_DATA_WRITERS, writer_id) description = extension.metadata.get('description') if description: print(description) from xcube.core.store import new_data_writer writer_ = new_data_writer(writer_id) params_schema = writer_.get_write_data_params_schema() print(_format_params_schema(params_schema))
def find_dataset_io(format_name: str, modes: Iterable[str] = None, default: DatasetIO = None) -> Optional[DatasetIO]: modes = set(modes) if modes else None format_name = format_name.lower() dataset_ios = get_extension_registry().find_components(EXTENSION_POINT_DATASET_IOS) for dataset_io in dataset_ios: # noinspection PyUnresolvedReferences if format_name == dataset_io.name.lower(): # noinspection PyTypeChecker if not modes or modes.issubset(dataset_io.modes): return dataset_io for dataset_io in dataset_ios: # noinspection PyUnresolvedReferences if format_name == dataset_io.ext.lower(): # noinspection PyTypeChecker if not modes or modes.issubset(dataset_io.modes): return dataset_io return default
def new_data_writer(writer_id: str, extension_registry: Optional[ExtensionRegistry] = None, **writer_params) -> 'DataWriter': """ Get an instance of the data writer identified by *writer_id*. The optional, extra writer parameters *writer_params* may be used by data store (``xcube.core.store.DataStore``) implementations so they can share their internal state with the writer. :param writer_id: The data writer identifier. :param extension_registry: Optional extension registry. If not given, the global extension registry will be used. :param writer_params: Extra writer parameters. :return: A data writer instance. """ assert_given(writer_id, 'writer_id') extension_registry = extension_registry or get_extension_registry() return extension_registry.get_component(EXTENSION_POINT_DATA_WRITERS, writer_id)(**writer_params)
def get_data_store_class( data_store_id: str, extension_registry: Optional[ExtensionRegistry] = None ) -> Union[Type['DataStore'], Type['MutableDataStore']]: """ Get the class for the data store identified by *data_store_id*. :param data_store_id: A data store identifier. :param extension_registry: Optional extension registry. If not given, the global extension registry will be used. :return: The class for the data store. """ extension_registry = extension_registry or get_extension_registry() if not extension_registry.has_extension(EXTENSION_POINT_DATA_STORES, data_store_id): raise DataStoreError(f'Unknown data store "{data_store_id}"' f' (may be due to missing xcube plugin)') return extension_registry.get_component(EXTENSION_POINT_DATA_STORES, data_store_id)
def new_data_opener(opener_id: str, extension_registry: Optional[ExtensionRegistry] = None, **opener_params) -> 'DataOpener': """ Get an instance of the data opener identified by *opener_id*. The optional, extra opener parameters *opener_params* may be used by data store (``xcube.core.store.DataStore``) implementations so they can share their internal state with the opener. :param opener_id: The data opener identifier. :param extension_registry: Optional extension registry. If not given, the global extension registry will be used. :param opener_params: Extra opener parameters. :return: A data opener instance. """ assert_given(opener_id, 'opener_id') extension_registry = extension_registry or get_extension_registry() if not extension_registry.has_extension(EXTENSION_POINT_DATA_OPENERS, opener_id): raise DataStoreError(f'A data opener named' f' {opener_id!r} is not registered') return extension_registry.get_component(EXTENSION_POINT_DATA_OPENERS, opener_id)(**opener_params)
def get_extension(name: str): return get_extension_registry().get_extension(EXTENSION_POINT_DATASET_IOS, name)
def dump(output_file_path: str, config_file_path: Optional[str], type_specifier: Optional[str]): """ Dump metadata of given data stores. Dumps data store metadata and metadata for a store's data resources for given data stores into a JSON file. Data stores may be selected and configured by a configuration file CONFIG, which may have JSON or YAML format. For example, this YAML configuration configures a single directory data store: \b this_dir: title: Current Dir description: A store that represents my current directory store_id: "directory" store_params: base_dir: "." """ from xcube.core.store import DataStoreConfig from xcube.core.store import DataStorePool import time if config_file_path: store_pool = DataStorePool.from_file(config_file_path) else: extensions = get_extension_registry().find_extensions( EXTENSION_POINT_DATA_STORES) store_configs = { extension.name: DataStoreConfig(extension.name, title=extension.metadata.get('title'), description=extension.metadata.get('description')) for extension in extensions if extension.name not in ('memory', 'directory', 's3') } store_pool = DataStorePool(store_configs) stores = [] for store_instance_id in store_pool.store_instance_ids: t0 = time.perf_counter() print(f'Generating entries for store "{store_instance_id}"...') try: store_instance = store_pool.get_store(store_instance_id) except BaseException as error: print(f'error: cannot open store "{store_instance_id}": {error}', file=sys.stderr) continue try: search_result = [ dsd.to_dict() for dsd in store_instance.search_data( type_specifier=type_specifier) ] except BaseException as error: print(f'error: cannot search store "{store_instance_id}": {error}', file=sys.stderr) continue store_config = store_pool.get_store_config(store_instance_id) stores.append( dict(store_instance_id=store_instance_id, store_id=store_instance_id, title=store_config.title, description=store_config.description, type_specifier=type_specifier, datasets=search_result)) print('Done after {:.2f} seconds'.format(time.perf_counter() - t0)) with open(output_file_path, 'w') as fp: json.dump(dict(stores=stores), fp, indent=2) print(f'Dumped {len(stores)} store(s) to {output_file_path}.')
from xcube.version import version # noinspection PyShadowingBuiltins,PyUnusedLocal @click.group(name='xcube') @click.version_option(version) @cli_option_traceback @cli_option_scheduler def cli(traceback=False, scheduler=None): """ xcube Toolkit """ # Add registered CLI commands for command in get_extension_registry().find_components( EXTENSION_POINT_CLI_COMMANDS): cli.add_command(command) def main(args=None): # noinspection PyBroadException ctx_obj = new_cli_ctx_obj() try: exit_code = cli.main(args=args, obj=ctx_obj, standalone_mode=False) except Exception as e: exit_code = handle_cli_exception(e, traceback_mode=ctx_obj.get( "traceback", False)) sys.exit(exit_code)
def find_input_processor_class(name: str): extension = get_extension_registry().get_extension( EXTENSION_POINT_INPUT_PROCESSORS, name) if not extension: return None return extension.component
def dump(output_file_path: Optional[str], config_file_path: Optional[str], data_type: Optional[str], short_form: bool, include_props: str, exclude_props: str, csv_format: bool, yaml_format: bool, json_format: bool): """ Dump metadata of given data stores. Dumps data store metadata and metadata for a store's data resources for given data stores into a JSON file. Data stores may be selected and configured by a configuration file CONFIG, which may have JSON or YAML format. For example, this YAML configuration configures a single directory data store: \b this_dir: title: Current Dir description: A store that represents my current directory store_id: "directory" store_params: base_dir: "." """ from xcube.core.store import DataStoreConfig from xcube.core.store import DataStorePool import yaml import json import os.path if csv_format: output_format = 'csv' ext = '.csv' elif yaml_format: output_format = 'yaml' ext = '.yml' elif json_format: output_format = 'json' ext = '.json' elif output_file_path is not None: path_no_ext, ext = os.path.splitext(output_file_path) if ext in ('.csv', '.txt'): output_format = 'csv' elif ext in ('.yaml', '.yml'): output_format = 'yaml' else: output_format = 'json' else: output_format = 'json' ext = '.json' if output_file_path is None: path_no_ext, _ = os.path.splitext(_DEFAULT_DUMP_OUTPUT) output_file_path = path_no_ext + ext include_props = _parse_props(include_props) if include_props else None exclude_props = _parse_props(exclude_props) if exclude_props else None if short_form: short_include_props = _parse_props(_SHORT_INCLUDE) include_props = include_props or {} for data_key in ('store', 'data', 'var'): include_props[data_key] = include_props.get(data_key, set()).union( short_include_props[data_key]) if config_file_path: store_pool = DataStorePool.from_file(config_file_path) else: extensions = get_extension_registry().find_extensions( EXTENSION_POINT_DATA_STORES) store_configs = { extension.name: DataStoreConfig(extension.name, title=extension.metadata.get('title'), description=extension.metadata.get('description')) for extension in extensions if extension.name not in ('memory', 'directory', 's3') } store_pool = DataStorePool(store_configs) dump_data = _get_store_data_var_tuples(store_pool, data_type, include_props, exclude_props) if output_format == 'csv': column_names = None column_names_set = None rows = [] for store_dict, data_dict, var_dict in dump_data: if store_dict is None: break row = {} row.update({'store.' + k: v for k, v in store_dict.items()}) row.update({'data.' + k: v for k, v in data_dict.items()}) row.update({'var.' + k: v for k, v in var_dict.items()}) rows.append(row) if column_names_set is None: column_names = list(row.keys()) column_names_set = set(column_names) else: for k in row.keys(): if k not in column_names_set: column_names.append(k) column_names_set.add(k) def format_cell_value(value: Any) -> str: return str(value) if value is not None else '' sep = '\t' with open(output_file_path, 'w') as fp: if column_names: fp.write(sep.join(column_names) + '\n') for row in rows: fp.write( sep.join( map(format_cell_value, tuple(row.get(k) for k in column_names))) + '\n') print(f'Dumped {len(rows)} store entry/ies to {output_file_path}.') else: last_store_dict = None last_data_dict = None vars_list = [] data_list = [] store_list = [] for store_dict, data_dict, var_dict in dump_data: if data_dict is not last_data_dict or data_dict is None: if last_data_dict is not None: last_data_dict['data_vars'] = vars_list vars_list = [] data_list.append(last_data_dict) last_data_dict = data_dict if store_dict is not last_store_dict or store_dict is None: if last_store_dict is not None: last_store_dict['data'] = data_list data_list = [] store_list.append(last_store_dict) last_store_dict = store_dict if var_dict: vars_list.append(var_dict) with open(output_file_path, 'w') as fp: if output_format == 'json': json.dump(dict(stores=store_list), fp, indent=2) else: yaml.dump(dict(stores=store_list), fp, indent=2) print( f'Dumped entries of {len(store_list)} store(s) to {output_file_path}.' )