Beispiel #1
0
 def test_to_dict(self):
     self.assertEqual({}, DataStorePool().to_dict())
     self.assertEqual({'ram': {'store_id': 'memory'},
                       'dir': {'store_id': 'file',
                               'store_params': {'base_dir': 'bibo'}}},
                      DataStorePool({'ram': DataStoreConfig(store_id='memory'),
                                     'dir': DataStoreConfig(store_id='file',
                                                            store_params=dict(base_dir="bibo"))
                                     }).to_dict())
Beispiel #2
0
def main(gen_config_path: str,
         store_configs_path: str = None,
         verbose: bool = False):
    """
    Generator tool for data cubes.

    Creates cube views from one or more cube stores, resamples them to a common grid,
    optionally performs some cube transformation,
    and writes the resulting cube to some target cube store.

    *gen_config_path* is the cube generator configuration. It may be provided as a JSON or YAML file
    (file extensions ".json" or ".yaml"). If the *gen_config_path* argument is omitted, it is expected that
    the cube generator configuration is piped as a JSON string.

    *store_configs_path* is a path to a JSON file with data store configurations. It is a mapping of names to
    configured stores. Entries are dictionaries that have a mandatory "store_id" property which is a name of a
    registered xcube data store. The optional "store_params" property may define data store specific parameters.

    :param gen_config_path: Cube generation configuration. It may be provided as a JSON or YAML file
        (file extensions ".json" or ".yaml"). If the REQUEST file argument is omitted, it is expected that
        the cube generator configuration is piped as a JSON string.
    :param store_configs_path: A JSON file that maps store names to parameterized stores.
    :param verbose: Whether to output progress information to stdout.
    """

    store_pool = DataStorePool.from_file(
        store_configs_path) if store_configs_path else DataStorePool()

    gen_config = GenConfig.from_file(gen_config_path, verbose=verbose)

    if gen_config.callback_config:
        ApiProgressCallbackObserver(gen_config.callback_config).activate()
    if verbose:
        ConsoleProgressObserver().activate()

    with observe_progress('Generating cube', 100) as cm:
        cm.will_work(10)
        cubes = open_cubes(gen_config.input_configs,
                           cube_config=gen_config.cube_config,
                           store_pool=store_pool)

        cm.will_work(10)
        cube = resample_and_merge_cubes(cubes,
                                        cube_config=gen_config.cube_config)

        cm.will_work(80)
        data_id = write_cube(cube,
                             output_config=gen_config.output_config,
                             store_pool=store_pool)

    if verbose:
        print('Cube "{}" generated within {:.2f} seconds'.format(
            str(data_id), cm.state.total_time))
Beispiel #3
0
    def test_from_dict_with_bad_dicts(self):
        store_configs = {"dir": {}}
        with self.assertRaises(jsonschema.exceptions.ValidationError) as cm:
            DataStorePool.from_dict(store_configs)
        self.assertTrue("'store_id' is a required property"
                        in f'{cm.exception}',
                        msg=f'{cm.exception}')

        store_configs = {"dir": {"store_id": 10}}
        with self.assertRaises(jsonschema.exceptions.ValidationError) as cm:
            DataStorePool.from_dict(store_configs)
        self.assertTrue("Failed validating 'type' in schema"
                        in f'{cm.exception}',
                        msg=f'{cm.exception}')
Beispiel #4
0
 def test_multi_stores_with_params(self):
     """Just test many stores at once"""
     store_configs = {
         "ram-1": {
             "store_id": "memory",
         },
         "ram-2": {
             "store_id": "memory",
         },
         "local-1": {
             "store_id": "file",
             "store_params": {
                 "root": "/home/bibo/datacubes-1",
             }
         },
         "local-2": {
             "store_id": "file",
             "store_params": {
                 "root": "/home/bibo/datacubes-2",
             }
         },
     }
     pool = DataStorePool.from_dict(store_configs)
     self.assertIsInstance(pool, DataStorePool)
     self.assertEqual(["local-1", "local-2", "ram-1", "ram-2"], pool.store_instance_ids)
     for instance_id in pool.store_instance_ids:
         self.assertTrue(pool.has_store_instance(instance_id))
         self.assertIsInstance(pool.get_store_config(instance_id), DataStoreConfig)
         self.assertIsInstance(pool.get_store(instance_id), DataStore)
Beispiel #5
0
    def test_get_store_instance_id(self):
        store_params_1 = {
            "root": "./bibo"
        }
        ds_config_1 = DataStoreConfig(store_id='file',
                                      store_params=store_params_1)
        ds_configs = {'dir-1': ds_config_1}
        pool = DataStorePool(ds_configs)

        store_params_2 = {
            "root": "./babo"
        }
        ds_config_2 = DataStoreConfig(store_id='file',
                                      store_params=store_params_2)
        ds_config_3 = DataStoreConfig(store_id='file',
                                      store_params=store_params_1,
                                      title='A third configuration')

        self.assertEqual('dir-1', pool.get_store_instance_id(ds_config_1))
        self.assertEqual('dir-1', pool.get_store_instance_id(ds_config_1,
                                                             strict_check=True))

        self.assertIsNone(pool.get_store_instance_id(ds_config_2))

        self.assertEqual('dir-1', pool.get_store_instance_id(ds_config_3))
        self.assertIsNone(pool.get_store_instance_id(ds_config_3,
                                                     strict_check=True))
Beispiel #6
0
 def test_close_all_stores(self):
     store_configs = {
         "ram-1": {
             "store_id": "memory",
         },
     }
     pool = DataStorePool.from_dict(store_configs)
     # Smoke test, we do not expect any visible state changes after close_all_stores()
     pool.close_all_stores()
Beispiel #7
0
 def test_get_data_store_instance_from_pool(self):
     pool = DataStorePool({
         'dir':
         DataStoreConfig('directory', store_params=dict(base_dir='.'))
     })
     instance = get_data_store_instance('@dir', store_pool=pool)
     self.assertIsInstance(instance.store, DirectoryDataStore)
     instance2 = get_data_store_instance('@dir', store_pool=pool)
     self.assertIs(instance, instance2)
Beispiel #8
0
 def test_from_dict_no_store_params(self):
     store_configs = {
         "ram-1": {
             "store_id": "memory"
         }
     }
     pool = DataStorePool.from_dict(store_configs)
     self.assertIsInstance(pool, DataStorePool)
     self.assertEqual(["ram-1"], pool.store_instance_ids)
     self.assertIsInstance(pool.get_store_config('ram-1'), DataStoreConfig)
Beispiel #9
0
 def test_get_data_store_instance_from_pool(self):
     pool = DataStorePool({
         'dir': DataStoreConfig('file',
                                store_params=dict(root='.'))
     })
     instance = get_data_store_instance('@dir', store_pool=pool)
     self.assertTrue(hasattr(instance.store, 'root'))
     # noinspection PyUnresolvedReferences
     self.assertTrue(os.path.isabs(instance.store.root))
     self.assertTrue(os.path.isdir(instance.store.root))
     instance2 = get_data_store_instance('@dir', store_pool=pool)
     self.assertIs(instance, instance2)
Beispiel #10
0
 def test_get_data_store_instance_from_pool_with_params(self):
     pool = DataStorePool({
         '@dir':
         DataStoreConfig('directory', store_params=dict(base_dir='.'))
     })
     with self.assertRaises(ValueError) as cm:
         get_data_store_instance('@dir',
                                 store_pool=pool,
                                 store_params={'thres': 5})
     self.assertEqual(
         'store_params cannot be given, with store_id ("@dir") referring to a configured store',
         f'{cm.exception}')
Beispiel #11
0
 def test_get_data_store_instance_from_pool_with_params(self):
     pool = DataStorePool({
         '@dir': DataStoreConfig('file',
                                 store_params=dict(root='.'))
     })
     with self.assertRaises(ValueError) as cm:
         get_data_store_instance(
             '@dir', store_pool=pool, store_params={'auto_mkdir': True}
         )
     self.assertEqual('store_params cannot be given,'
                      ' with store_id ("@dir") referring'
                      ' to a configured store',
                      f'{cm.exception}')
Beispiel #12
0
    def __init__(self,
                 store_pool: DataStorePool = None,
                 raise_on_error: bool = False,
                 verbosity: int = 0):
        super().__init__(raise_on_error=raise_on_error, verbosity=verbosity)
        if store_pool is not None:
            assert_instance(store_pool, DataStorePool, 'store_pool')

        self._store_pool = store_pool if store_pool is not None \
            else DataStorePool()
        self._generated_data_id: Optional[str] = None
        self._generated_cube: Optional[xr.Dataset] = None
        self._generated_gm: Optional[GridMapping] = None
Beispiel #13
0
 def test_get_store(self):
     store_configs = {
         "dir-1": {
             "store_id": "directory",
             "store_params": {
                 "base_dir": "bibo"
             }
         },
     }
     pool = DataStorePool.from_dict(store_configs)
     store = pool.get_store('dir-1')
     self.assertIsInstance(store, DirectoryDataStore)
     self.assertEqual('bibo', store.base_dir)
     # Should stay same instance
     self.assertIs(store, pool.get_store('dir-1'))
     self.assertIs(store, pool.get_store('dir-1'))
Beispiel #14
0
 def get_data_store_pool(self) -> Optional[DataStorePool]:
     data_store_configs = self._config.get('DataStores', [])
     if not data_store_configs or self._data_store_pool:
         return self._data_store_pool
     if not isinstance(data_store_configs, list):
         raise ServiceConfigError('DataStores must be a list')
     store_configs: Dict[str, DataStoreConfig] = {}
     for data_store_config_dict in data_store_configs:
         store_instance_id = data_store_config_dict.get('Identifier')
         store_id = data_store_config_dict.get('StoreId')
         store_params = data_store_config_dict.get('StoreParams', {})
         dataset_configs = data_store_config_dict.get('Datasets')
         store_config = DataStoreConfig(store_id,
                                        store_params=store_params,
                                        user_data=dataset_configs)
         store_configs[store_instance_id] = store_config
     self._data_store_pool = DataStorePool(store_configs)
     return self._data_store_pool
Beispiel #15
0
 def test_get_store(self):
     store_configs = {
         "dir-1": {
             "store_id": "file",
             "store_params": {
                 "root": "./bibo"
             }
         },
     }
     pool = DataStorePool.from_dict(store_configs)
     store = pool.get_store('dir-1')
     self.assertTrue(hasattr(store, 'root'))
     # noinspection PyUnresolvedReferences
     self.assertTrue(os.path.isabs(store.root))
     self.assertFalse(os.path.exists(store.root))
     # Should stay same instance
     self.assertIs(store, pool.get_store('dir-1'))
     self.assertIs(store, pool.get_store('dir-1'))
Beispiel #16
0
 def test_from_yaml_file(self):
     store_configs = {
         "ram-1": {
             "store_id": "memory"
         },
         "ram-2": {
             "store_id": "memory"
         }
     }
     path = 'test-store-configs.yaml'
     with open(path, 'w') as fp:
         yaml.dump(store_configs, fp, indent=2)
     try:
         pool = DataStorePool.from_file(path)
         self.assertIsInstance(pool, DataStorePool)
         self.assertEqual(['ram-1', 'ram-2'], pool.store_instance_ids)
     finally:
         import os
         os.remove(path)
Beispiel #17
0
    def test_normalize(self):
        pool = DataStorePool({
            '@dir': DataStoreConfig('directory',
                                    store_params=dict(root='.'))
        })
        file_path = '_test-data-stores-pool.json'
        with open(file_path, 'w') as fp:
            json.dump(pool.to_dict(), fp)
        try:
            pool_1 = DataStorePool.normalize(file_path)
            self.assertIsInstance(pool_1, DataStorePool)
            pool_2 = DataStorePool.normalize(pool_1)
            self.assertIs(pool_2, pool_1)
            pool_3 = DataStorePool.normalize(pool_2.to_dict())
            self.assertIsInstance(pool_3, DataStorePool)
        finally:
            os.remove(file_path)

        with self.assertRaises(TypeError):
            # noinspection PyTypeChecker
            DataStorePool.normalize(42)
Beispiel #18
0
 def _assert_file_ok(self,
                     format_name: str,
                     root_1="/root1",
                     root_2="/root2",
                     use_env_vars=False):
     if use_env_vars:
         store_configs = self._get_test_config(
             root_1='${_TEST_ROOT_1}',
             root_2='${_TEST_ROOT_2}'
         )
         import os
         os.environ['_TEST_ROOT_1'] = root_1
         os.environ['_TEST_ROOT_2'] = root_2
     else:
         store_configs = self._get_test_config(
             root_1=root_1,
             root_2=root_2
         )
     path = 'test-store-configs.' + format_name
     with open(path, 'w') as fp:
         mod = yaml if format_name == 'yaml' else json
         mod.dump(store_configs, fp, indent=2)
     try:
         pool = DataStorePool.from_file(path)
         self.assertIsInstance(pool, DataStorePool)
         self.assertEqual(['ram-1', 'ram-2'], pool.store_instance_ids)
         config_1 = pool.get_store_config('ram-1')
         self.assertIsInstance(config_1, DataStoreConfig)
         self.assertEqual(
             {'store_id': 'memory',
              'store_params': {'root': root_1}},
             config_1.to_dict())
         config_2 = pool.get_store_config('ram-2')
         self.assertIsInstance(config_2, DataStoreConfig)
         self.assertEqual(
             {'store_id': 'memory',
              'store_params': {'root': root_2}},
             config_2.to_dict())
     finally:
         import os
         os.remove(path)
Beispiel #19
0
    def _maybe_assign_store_instance_ids(self):
        assignable_dataset_configs = [
            dc for dc in self._dataset_configs if 'StoreInstanceId' not in dc
            and dc.get('FileSystem', 'file') in NON_MEMORY_FILE_SYSTEMS
        ]
        # split into sublists according to file system and non-root store params
        config_lists = []
        for config in assignable_dataset_configs:
            store_params = self._get_other_store_params_than_root(config)
            file_system = config.get('FileSystem', 'file')
            appended = False
            for config_list in config_lists:
                if config_list[0] == file_system and \
                        config_list[1] == store_params:
                    config_list[2].append(config)
                    appended = True
                    break
            if not appended:
                config_lists.append((file_system, store_params, [config]))

        data_store_pool = self.get_data_store_pool()
        if not data_store_pool:
            data_store_pool = self._data_store_pool = DataStorePool()

        for file_system, store_params, config_list in config_lists:
            # Retrieve paths per configuration
            paths = [dc['Path'] for dc in config_list]
            list.sort(paths)
            # Determine common prefixes of paths (and call them roots)
            prefixes = _get_common_prefixes(paths)
            if len(prefixes) < 1:
                roots = ['']
            else:
                # perform further step to merge prefixes with same start
                prefixes = list(set(prefixes))
                prefixes.sort()
                roots = []
                root_candidate = prefixes[0]
                for root in prefixes[1:]:
                    common_root = os.path.commonprefix([root_candidate, root])
                    if _is_not_empty(common_root):
                        root_candidate = common_root
                    else:
                        roots.append(root_candidate)
                        root_candidate = root
                roots.append(root_candidate)
            for root in roots:
                # ensure root does not end with full or partial directory
                # or file name
                while not root.endswith("/") and not root.endswith("\\") and \
                        len(root) > 0:
                    root = root[:-1]
                if root.endswith("/") or root.endswith("\\"):
                    root = root[:-1]
                abs_root = root
                # For local file systems: Determine absolute root from base dir
                fs_protocol = FS_TYPE_TO_PROTOCOL.get(file_system, file_system)
                if fs_protocol == 'file' and not os.path.isabs(abs_root):
                    abs_root = os.path.join(self._base_dir, abs_root)
                    abs_root = os.path.normpath(abs_root)
                store_params_for_root = store_params.copy()
                store_params_for_root['root'] = abs_root
                # See if there already is a store with this configuration
                data_store_config = DataStoreConfig(
                    store_id=fs_protocol, store_params=store_params_for_root)
                store_instance_id = data_store_pool.\
                    get_store_instance_id(data_store_config)
                if not store_instance_id:
                    # Create new store with new unique store instance id
                    counter = 1
                    while data_store_pool.has_store_instance(
                            f'{fs_protocol}_{counter}'):
                        counter += 1
                    store_instance_id = f'{fs_protocol}_{counter}'
                    data_store_pool.add_store_config(store_instance_id,
                                                     data_store_config)
                for config in config_list:
                    if config['Path'].startswith(root):
                        config['StoreInstanceId'] = store_instance_id
                        new_path = config['Path'][len(root):]
                        while new_path.startswith("/") or \
                                new_path.startswith("\\"):
                            new_path = new_path[1:]
                        config['Path'] = new_path
Beispiel #20
0
    def new(cls,
            service_config: Optional[ServiceConfigLike] = None,
            stores_config: Optional[DataStorePoolLike] = None,
            raise_on_error: bool = False,
            verbosity: int = 0,
            **kwargs) -> 'CubeGenerator':
        """
        Create a new cube generator from given configurations.

        If *service_config* is given, it describes a remote xcube
        generator remote, otherwise a local cube generator is configured
        using optional *stores_config*.

        The *service_config* parameter can be passed in different ways:

        * An instance of :class:ServiceConfig.
        * A ``str``. Then it is interpreted as a path to a YAML or JSON file
          and the remote configuration is loaded from this file.
          The file content may include template variables that are
          interpolated by environment variables,
          e.g. "${XCUBE_GEN_CLIENT_SECRET}".
        * A ``dict``. Then it is interpreted as a remote configuration
          JSON object.

        If *stores_config* is given, it describes a pool of data stores to be
        used as input and output for the cube generator. *stores_config*
        if a mapping of store instance identifiers to configured store
        instances. A store instance is a dictionary that has a mandatory
        "store_id" property which is a name of a registered xcube data store.
        as well as an optional "store_params" property that may define data
        store specific parameters.

        Similar to *service_config*, the *stores_config* parameter
        can be passed in different ways:

        * An instance of :class:DataStorePool.
        * A ``str``. Then it is interpreted as a YAML or JSON file path
          and the stores configuration is loaded from this file.
        * A ``dict``. Then it is interpreted as a stores configuration
          JSON object.

        The *service_config* and *stores_config* parameters cannot
        be given both.

        :param service_config: Service configuration.
        :param stores_config: Data stores configuration.
        :param raise_on_error: Whether to raise a CubeGeneratorError
            exception on generator failures. If False, the default,
            the returned result will have the "status" field set to "error"
            while other fields such as "message", "traceback", "output"
            provide more failure details.
        :param verbosity: Level of verbosity, 0 means off.
        :param kwargs: Extra arguments passed to the generator constructors.
        """
        if service_config is not None:
            from .remote.config import ServiceConfig
            from .remote.generator import RemoteCubeGenerator
            assert_true(
                stores_config is None,
                'service_config and stores_config cannot be'
                ' given at the same time.')
            assert_instance(service_config,
                            (str, dict, ServiceConfig, type(None)),
                            'service_config')
            service_config = ServiceConfig.normalize(service_config) \
                if service_config is not None else None
            return RemoteCubeGenerator(service_config=service_config,
                                       raise_on_error=raise_on_error,
                                       verbosity=verbosity,
                                       **kwargs)
        else:
            from .local.generator import LocalCubeGenerator
            assert_instance(stores_config,
                            (str, dict, DataStorePool, type(None)),
                            'stores_config')
            store_pool = DataStorePool.normalize(stores_config) \
                if stores_config is not None else None
            return LocalCubeGenerator(store_pool=store_pool,
                                      raise_on_error=raise_on_error,
                                      verbosity=verbosity)
Beispiel #21
0
 def test_default_constr(self):
     pool = DataStorePool()
     self.assertEqual([], pool.store_instance_ids)
     self.assertEqual([], pool.store_configs)
Beispiel #22
0
 def test_from_dict_empty(self):
     pool = DataStorePool.from_dict({})
     self.assertIsInstance(pool, DataStorePool)
     self.assertEqual([], pool.store_instance_ids)
     self.assertEqual([], pool.store_configs)
Beispiel #23
0
 def test_add_remove_store_config(self):
     pool = DataStorePool()
     self.assertEqual([], pool.store_instance_ids)
     pool.add_store_config('mem-1', DataStoreConfig('memory'))
     self.assertEqual(['mem-1'], pool.store_instance_ids)
     pool.add_store_config('mem-2', DataStoreConfig('memory'))
     self.assertEqual(['mem-1', 'mem-2'], pool.store_instance_ids)
     pool.add_store_config('mem-1', DataStoreConfig('memory'))
     self.assertEqual(['mem-1', 'mem-2'], pool.store_instance_ids)
     pool.remove_store_config('mem-1')
     self.assertEqual(['mem-2'], pool.store_instance_ids)
     pool.remove_store_config('mem-2')
     self.assertEqual([], pool.store_instance_ids)
Beispiel #24
0
def dump(output_file_path: str, config_file_path: Optional[str],
         type_specifier: Optional[str]):
    """
    Dump metadata of given data stores.

    Dumps data store metadata and metadata for a store's data resources
    for given data stores  into a JSON file.
    Data stores may be selected and configured by a configuration file CONFIG,
    which may have JSON or YAML format.
    For example, this YAML configuration configures a single directory data store:

    \b
    this_dir:
        title: Current Dir
        description: A store that represents my current directory
        store_id: "directory"
        store_params:
            base_dir: "."

    """
    from xcube.core.store import DataStoreConfig
    from xcube.core.store import DataStorePool
    import time

    if config_file_path:
        store_pool = DataStorePool.from_file(config_file_path)
    else:
        extensions = get_extension_registry().find_extensions(
            EXTENSION_POINT_DATA_STORES)
        store_configs = {
            extension.name:
            DataStoreConfig(extension.name,
                            title=extension.metadata.get('title'),
                            description=extension.metadata.get('description'))
            for extension in extensions
            if extension.name not in ('memory', 'directory', 's3')
        }
        store_pool = DataStorePool(store_configs)

    stores = []
    for store_instance_id in store_pool.store_instance_ids:
        t0 = time.perf_counter()
        print(f'Generating entries for store "{store_instance_id}"...')
        try:
            store_instance = store_pool.get_store(store_instance_id)
        except BaseException as error:
            print(f'error: cannot open store "{store_instance_id}": {error}',
                  file=sys.stderr)
            continue

        try:
            search_result = [
                dsd.to_dict() for dsd in store_instance.search_data(
                    type_specifier=type_specifier)
            ]
        except BaseException as error:
            print(f'error: cannot search store "{store_instance_id}": {error}',
                  file=sys.stderr)
            continue

        store_config = store_pool.get_store_config(store_instance_id)
        stores.append(
            dict(store_instance_id=store_instance_id,
                 store_id=store_instance_id,
                 title=store_config.title,
                 description=store_config.description,
                 type_specifier=type_specifier,
                 datasets=search_result))
        print('Done after {:.2f} seconds'.format(time.perf_counter() - t0))

    with open(output_file_path, 'w') as fp:
        json.dump(dict(stores=stores), fp, indent=2)

    print(f'Dumped {len(stores)} store(s) to {output_file_path}.')
Beispiel #25
0
def dump(output_file_path: Optional[str], config_file_path: Optional[str],
         data_type: Optional[str], short_form: bool, include_props: str,
         exclude_props: str, csv_format: bool, yaml_format: bool,
         json_format: bool):
    """
    Dump metadata of given data stores.

    Dumps data store metadata and metadata for a store's data resources
    for given data stores  into a JSON file.
    Data stores may be selected and configured by a configuration file CONFIG,
    which may have JSON or YAML format.
    For example, this YAML configuration configures a single directory data store:

    \b
    this_dir:
        title: Current Dir
        description: A store that represents my current directory
        store_id: "directory"
        store_params:
            base_dir: "."

    """
    from xcube.core.store import DataStoreConfig
    from xcube.core.store import DataStorePool
    import yaml
    import json
    import os.path

    if csv_format:
        output_format = 'csv'
        ext = '.csv'
    elif yaml_format:
        output_format = 'yaml'
        ext = '.yml'
    elif json_format:
        output_format = 'json'
        ext = '.json'
    elif output_file_path is not None:
        path_no_ext, ext = os.path.splitext(output_file_path)
        if ext in ('.csv', '.txt'):
            output_format = 'csv'
        elif ext in ('.yaml', '.yml'):
            output_format = 'yaml'
        else:
            output_format = 'json'
    else:
        output_format = 'json'
        ext = '.json'

    if output_file_path is None:
        path_no_ext, _ = os.path.splitext(_DEFAULT_DUMP_OUTPUT)
        output_file_path = path_no_ext + ext

    include_props = _parse_props(include_props) if include_props else None
    exclude_props = _parse_props(exclude_props) if exclude_props else None

    if short_form:
        short_include_props = _parse_props(_SHORT_INCLUDE)
        include_props = include_props or {}
        for data_key in ('store', 'data', 'var'):
            include_props[data_key] = include_props.get(data_key, set()).union(
                short_include_props[data_key])

    if config_file_path:
        store_pool = DataStorePool.from_file(config_file_path)
    else:
        extensions = get_extension_registry().find_extensions(
            EXTENSION_POINT_DATA_STORES)
        store_configs = {
            extension.name:
            DataStoreConfig(extension.name,
                            title=extension.metadata.get('title'),
                            description=extension.metadata.get('description'))
            for extension in extensions
            if extension.name not in ('memory', 'directory', 's3')
        }
        store_pool = DataStorePool(store_configs)

    dump_data = _get_store_data_var_tuples(store_pool, data_type,
                                           include_props, exclude_props)

    if output_format == 'csv':
        column_names = None
        column_names_set = None
        rows = []
        for store_dict, data_dict, var_dict in dump_data:
            if store_dict is None:
                break
            row = {}
            row.update({'store.' + k: v for k, v in store_dict.items()})
            row.update({'data.' + k: v for k, v in data_dict.items()})
            row.update({'var.' + k: v for k, v in var_dict.items()})
            rows.append(row)
            if column_names_set is None:
                column_names = list(row.keys())
                column_names_set = set(column_names)
            else:
                for k in row.keys():
                    if k not in column_names_set:
                        column_names.append(k)
                        column_names_set.add(k)

        def format_cell_value(value: Any) -> str:
            return str(value) if value is not None else ''

        sep = '\t'
        with open(output_file_path, 'w') as fp:
            if column_names:
                fp.write(sep.join(column_names) + '\n')
                for row in rows:
                    fp.write(
                        sep.join(
                            map(format_cell_value,
                                tuple(row.get(k)
                                      for k in column_names))) + '\n')

        print(f'Dumped {len(rows)} store entry/ies to {output_file_path}.')

    else:
        last_store_dict = None
        last_data_dict = None
        vars_list = []
        data_list = []
        store_list = []
        for store_dict, data_dict, var_dict in dump_data:
            if data_dict is not last_data_dict or data_dict is None:
                if last_data_dict is not None:
                    last_data_dict['data_vars'] = vars_list
                    vars_list = []
                    data_list.append(last_data_dict)
                last_data_dict = data_dict
            if store_dict is not last_store_dict or store_dict is None:
                if last_store_dict is not None:
                    last_store_dict['data'] = data_list
                    data_list = []
                    store_list.append(last_store_dict)
                last_store_dict = store_dict
            if var_dict:
                vars_list.append(var_dict)

        with open(output_file_path, 'w') as fp:
            if output_format == 'json':
                json.dump(dict(stores=store_list), fp, indent=2)
            else:
                yaml.dump(dict(stores=store_list), fp, indent=2)

        print(
            f'Dumped entries of {len(store_list)} store(s) to {output_file_path}.'
        )
Beispiel #26
0
 def test_get_store_error(self):
     pool = DataStorePool()
     with self.assertRaises(DataStoreError) as cm:
         pool.get_store('dir-1')
     self.assertEqual('Configured data store instance "dir-1" not found.', f'{cm.exception}')