Example #1
0
def main(gen_config_path: str,
         store_configs_path: str = None,
         verbose: bool = False):
    """
    Generator tool for data cubes.

    Creates cube views from one or more cube stores, resamples them to a common grid,
    optionally performs some cube transformation,
    and writes the resulting cube to some target cube store.

    *gen_config_path* is the cube generator configuration. It may be provided as a JSON or YAML file
    (file extensions ".json" or ".yaml"). If the *gen_config_path* argument is omitted, it is expected that
    the cube generator configuration is piped as a JSON string.

    *store_configs_path* is a path to a JSON file with data store configurations. It is a mapping of names to
    configured stores. Entries are dictionaries that have a mandatory "store_id" property which is a name of a
    registered xcube data store. The optional "store_params" property may define data store specific parameters.

    :param gen_config_path: Cube generation configuration. It may be provided as a JSON or YAML file
        (file extensions ".json" or ".yaml"). If the REQUEST file argument is omitted, it is expected that
        the cube generator configuration is piped as a JSON string.
    :param store_configs_path: A JSON file that maps store names to parameterized stores.
    :param verbose: Whether to output progress information to stdout.
    """

    store_pool = DataStorePool.from_file(
        store_configs_path) if store_configs_path else DataStorePool()

    gen_config = GenConfig.from_file(gen_config_path, verbose=verbose)

    if gen_config.callback_config:
        ApiProgressCallbackObserver(gen_config.callback_config).activate()
    if verbose:
        ConsoleProgressObserver().activate()

    with observe_progress('Generating cube', 100) as cm:
        cm.will_work(10)
        cubes = open_cubes(gen_config.input_configs,
                           cube_config=gen_config.cube_config,
                           store_pool=store_pool)

        cm.will_work(10)
        cube = resample_and_merge_cubes(cubes,
                                        cube_config=gen_config.cube_config)

        cm.will_work(80)
        data_id = write_cube(cube,
                             output_config=gen_config.output_config,
                             store_pool=store_pool)

    if verbose:
        print('Cube "{}" generated within {:.2f} seconds'.format(
            str(data_id), cm.state.total_time))
Example #2
0
 def test_from_yaml_file(self):
     store_configs = {
         "ram-1": {
             "store_id": "memory"
         },
         "ram-2": {
             "store_id": "memory"
         }
     }
     path = 'test-store-configs.yaml'
     with open(path, 'w') as fp:
         yaml.dump(store_configs, fp, indent=2)
     try:
         pool = DataStorePool.from_file(path)
         self.assertIsInstance(pool, DataStorePool)
         self.assertEqual(['ram-1', 'ram-2'], pool.store_instance_ids)
     finally:
         import os
         os.remove(path)
Example #3
0
 def _assert_file_ok(self,
                     format_name: str,
                     root_1="/root1",
                     root_2="/root2",
                     use_env_vars=False):
     if use_env_vars:
         store_configs = self._get_test_config(
             root_1='${_TEST_ROOT_1}',
             root_2='${_TEST_ROOT_2}'
         )
         import os
         os.environ['_TEST_ROOT_1'] = root_1
         os.environ['_TEST_ROOT_2'] = root_2
     else:
         store_configs = self._get_test_config(
             root_1=root_1,
             root_2=root_2
         )
     path = 'test-store-configs.' + format_name
     with open(path, 'w') as fp:
         mod = yaml if format_name == 'yaml' else json
         mod.dump(store_configs, fp, indent=2)
     try:
         pool = DataStorePool.from_file(path)
         self.assertIsInstance(pool, DataStorePool)
         self.assertEqual(['ram-1', 'ram-2'], pool.store_instance_ids)
         config_1 = pool.get_store_config('ram-1')
         self.assertIsInstance(config_1, DataStoreConfig)
         self.assertEqual(
             {'store_id': 'memory',
              'store_params': {'root': root_1}},
             config_1.to_dict())
         config_2 = pool.get_store_config('ram-2')
         self.assertIsInstance(config_2, DataStoreConfig)
         self.assertEqual(
             {'store_id': 'memory',
              'store_params': {'root': root_2}},
             config_2.to_dict())
     finally:
         import os
         os.remove(path)
Example #4
0
def dump(output_file_path: Optional[str], config_file_path: Optional[str],
         data_type: Optional[str], short_form: bool, include_props: str,
         exclude_props: str, csv_format: bool, yaml_format: bool,
         json_format: bool):
    """
    Dump metadata of given data stores.

    Dumps data store metadata and metadata for a store's data resources
    for given data stores  into a JSON file.
    Data stores may be selected and configured by a configuration file CONFIG,
    which may have JSON or YAML format.
    For example, this YAML configuration configures a single directory data store:

    \b
    this_dir:
        title: Current Dir
        description: A store that represents my current directory
        store_id: "directory"
        store_params:
            base_dir: "."

    """
    from xcube.core.store import DataStoreConfig
    from xcube.core.store import DataStorePool
    import yaml
    import json
    import os.path

    if csv_format:
        output_format = 'csv'
        ext = '.csv'
    elif yaml_format:
        output_format = 'yaml'
        ext = '.yml'
    elif json_format:
        output_format = 'json'
        ext = '.json'
    elif output_file_path is not None:
        path_no_ext, ext = os.path.splitext(output_file_path)
        if ext in ('.csv', '.txt'):
            output_format = 'csv'
        elif ext in ('.yaml', '.yml'):
            output_format = 'yaml'
        else:
            output_format = 'json'
    else:
        output_format = 'json'
        ext = '.json'

    if output_file_path is None:
        path_no_ext, _ = os.path.splitext(_DEFAULT_DUMP_OUTPUT)
        output_file_path = path_no_ext + ext

    include_props = _parse_props(include_props) if include_props else None
    exclude_props = _parse_props(exclude_props) if exclude_props else None

    if short_form:
        short_include_props = _parse_props(_SHORT_INCLUDE)
        include_props = include_props or {}
        for data_key in ('store', 'data', 'var'):
            include_props[data_key] = include_props.get(data_key, set()).union(
                short_include_props[data_key])

    if config_file_path:
        store_pool = DataStorePool.from_file(config_file_path)
    else:
        extensions = get_extension_registry().find_extensions(
            EXTENSION_POINT_DATA_STORES)
        store_configs = {
            extension.name:
            DataStoreConfig(extension.name,
                            title=extension.metadata.get('title'),
                            description=extension.metadata.get('description'))
            for extension in extensions
            if extension.name not in ('memory', 'directory', 's3')
        }
        store_pool = DataStorePool(store_configs)

    dump_data = _get_store_data_var_tuples(store_pool, data_type,
                                           include_props, exclude_props)

    if output_format == 'csv':
        column_names = None
        column_names_set = None
        rows = []
        for store_dict, data_dict, var_dict in dump_data:
            if store_dict is None:
                break
            row = {}
            row.update({'store.' + k: v for k, v in store_dict.items()})
            row.update({'data.' + k: v for k, v in data_dict.items()})
            row.update({'var.' + k: v for k, v in var_dict.items()})
            rows.append(row)
            if column_names_set is None:
                column_names = list(row.keys())
                column_names_set = set(column_names)
            else:
                for k in row.keys():
                    if k not in column_names_set:
                        column_names.append(k)
                        column_names_set.add(k)

        def format_cell_value(value: Any) -> str:
            return str(value) if value is not None else ''

        sep = '\t'
        with open(output_file_path, 'w') as fp:
            if column_names:
                fp.write(sep.join(column_names) + '\n')
                for row in rows:
                    fp.write(
                        sep.join(
                            map(format_cell_value,
                                tuple(row.get(k)
                                      for k in column_names))) + '\n')

        print(f'Dumped {len(rows)} store entry/ies to {output_file_path}.')

    else:
        last_store_dict = None
        last_data_dict = None
        vars_list = []
        data_list = []
        store_list = []
        for store_dict, data_dict, var_dict in dump_data:
            if data_dict is not last_data_dict or data_dict is None:
                if last_data_dict is not None:
                    last_data_dict['data_vars'] = vars_list
                    vars_list = []
                    data_list.append(last_data_dict)
                last_data_dict = data_dict
            if store_dict is not last_store_dict or store_dict is None:
                if last_store_dict is not None:
                    last_store_dict['data'] = data_list
                    data_list = []
                    store_list.append(last_store_dict)
                last_store_dict = store_dict
            if var_dict:
                vars_list.append(var_dict)

        with open(output_file_path, 'w') as fp:
            if output_format == 'json':
                json.dump(dict(stores=store_list), fp, indent=2)
            else:
                yaml.dump(dict(stores=store_list), fp, indent=2)

        print(
            f'Dumped entries of {len(store_list)} store(s) to {output_file_path}.'
        )
Example #5
0
File: io.py Project: micder/xcube
def dump(output_file_path: str, config_file_path: Optional[str],
         type_specifier: Optional[str]):
    """
    Dump metadata of given data stores.

    Dumps data store metadata and metadata for a store's data resources
    for given data stores  into a JSON file.
    Data stores may be selected and configured by a configuration file CONFIG,
    which may have JSON or YAML format.
    For example, this YAML configuration configures a single directory data store:

    \b
    this_dir:
        title: Current Dir
        description: A store that represents my current directory
        store_id: "directory"
        store_params:
            base_dir: "."

    """
    from xcube.core.store import DataStoreConfig
    from xcube.core.store import DataStorePool
    import time

    if config_file_path:
        store_pool = DataStorePool.from_file(config_file_path)
    else:
        extensions = get_extension_registry().find_extensions(
            EXTENSION_POINT_DATA_STORES)
        store_configs = {
            extension.name:
            DataStoreConfig(extension.name,
                            title=extension.metadata.get('title'),
                            description=extension.metadata.get('description'))
            for extension in extensions
            if extension.name not in ('memory', 'directory', 's3')
        }
        store_pool = DataStorePool(store_configs)

    stores = []
    for store_instance_id in store_pool.store_instance_ids:
        t0 = time.perf_counter()
        print(f'Generating entries for store "{store_instance_id}"...')
        try:
            store_instance = store_pool.get_store(store_instance_id)
        except BaseException as error:
            print(f'error: cannot open store "{store_instance_id}": {error}',
                  file=sys.stderr)
            continue

        try:
            search_result = [
                dsd.to_dict() for dsd in store_instance.search_data(
                    type_specifier=type_specifier)
            ]
        except BaseException as error:
            print(f'error: cannot search store "{store_instance_id}": {error}',
                  file=sys.stderr)
            continue

        store_config = store_pool.get_store_config(store_instance_id)
        stores.append(
            dict(store_instance_id=store_instance_id,
                 store_id=store_instance_id,
                 title=store_config.title,
                 description=store_config.description,
                 type_specifier=type_specifier,
                 datasets=search_result))
        print('Done after {:.2f} seconds'.format(time.perf_counter() - t0))

    with open(output_file_path, 'w') as fp:
        json.dump(dict(stores=stores), fp, indent=2)

    print(f'Dumped {len(stores)} store(s) to {output_file_path}.')