Exemple #1
0
    def test_get_store_instance_id(self):
        store_params_1 = {
            "root": "./bibo"
        }
        ds_config_1 = DataStoreConfig(store_id='file',
                                      store_params=store_params_1)
        ds_configs = {'dir-1': ds_config_1}
        pool = DataStorePool(ds_configs)

        store_params_2 = {
            "root": "./babo"
        }
        ds_config_2 = DataStoreConfig(store_id='file',
                                      store_params=store_params_2)
        ds_config_3 = DataStoreConfig(store_id='file',
                                      store_params=store_params_1,
                                      title='A third configuration')

        self.assertEqual('dir-1', pool.get_store_instance_id(ds_config_1))
        self.assertEqual('dir-1', pool.get_store_instance_id(ds_config_1,
                                                             strict_check=True))

        self.assertIsNone(pool.get_store_instance_id(ds_config_2))

        self.assertEqual('dir-1', pool.get_store_instance_id(ds_config_3))
        self.assertIsNone(pool.get_store_instance_id(ds_config_3,
                                                     strict_check=True))
Exemple #2
0
 def test_to_dict(self):
     self.assertEqual({}, DataStorePool().to_dict())
     self.assertEqual({'ram': {'store_id': 'memory'},
                       'dir': {'store_id': 'file',
                               'store_params': {'base_dir': 'bibo'}}},
                      DataStorePool({'ram': DataStoreConfig(store_id='memory'),
                                     'dir': DataStoreConfig(store_id='file',
                                                            store_params=dict(base_dir="bibo"))
                                     }).to_dict())
Exemple #3
0
 def test_to_dict(self):
     store_config = DataStoreConfig('directory',
                                    store_params={'base_dir': '.'},
                                    title='Local',
                                    description='Local files')
     self.assertEqual({'description': 'Local files',
                       'name': 'Local',
                       'store_id': 'directory',
                       'store_params': {'base_dir': '.'}},
                      store_config.to_dict())
Exemple #4
0
    def test_constructor_asserts(self):
        with self.assertRaises(ValueError) as cm:
            DataStoreConfig('')
        self.assertEqual('store_id must be given', f'{cm.exception}')

        with self.assertRaises(TypeError) as cm:
            # noinspection PyTypeChecker
            DataStoreConfig('directory', store_params=[1, 'B'])
        self.assertEqual("store_params must be an instance of <class 'dict'>",
                         f'{cm.exception}')
Exemple #5
0
 def test_from_dict_with_invalid_cost_params(self):
     with self.assertRaises(DataStoreError):
         DataStoreConfig.from_dict({'description': 'Local files',
                                    'title': 'Local',
                                    'store_id': 'file',
                                    'store_params': {'root': '.'},
                                    'cost_params': {
                                        # Required:
                                        # 'input_pixels_per_punit': 10,
                                        # 'output_pixels_per_punit': 20,
                                    }})
Exemple #6
0
 def test_add_remove_store_config(self):
     pool = DataStorePool()
     self.assertEqual([], pool.store_instance_ids)
     pool.add_store_config('mem-1', DataStoreConfig('memory'))
     self.assertEqual(['mem-1'], pool.store_instance_ids)
     pool.add_store_config('mem-2', DataStoreConfig('memory'))
     self.assertEqual(['mem-1', 'mem-2'], pool.store_instance_ids)
     pool.add_store_config('mem-1', DataStoreConfig('memory'))
     self.assertEqual(['mem-1', 'mem-2'], pool.store_instance_ids)
     pool.remove_store_config('mem-1')
     self.assertEqual(['mem-2'], pool.store_instance_ids)
     pool.remove_store_config('mem-2')
     self.assertEqual([], pool.store_instance_ids)
Exemple #7
0
 def test_get_data_store_instance_from_pool(self):
     pool = DataStorePool({
         'dir':
         DataStoreConfig('directory', store_params=dict(base_dir='.'))
     })
     instance = get_data_store_instance('@dir', store_pool=pool)
     self.assertIsInstance(instance.store, DirectoryDataStore)
     instance2 = get_data_store_instance('@dir', store_pool=pool)
     self.assertIs(instance, instance2)
Exemple #8
0
 def test_constructor_and_instance_props(self):
     store_config = DataStoreConfig('file',
                                    store_params={'root': '.'},
                                    title='Local',
                                    description='Local files')
     self.assertEqual('file', store_config.store_id)
     self.assertEqual({'root': '.'}, store_config.store_params)
     self.assertEqual('Local', store_config.title)
     self.assertEqual('Local files', store_config.description)
Exemple #9
0
 def test_from_dict(self):
     store_config = DataStoreConfig.from_dict({
         'description': 'Local files',
         'title': 'Local',
         'store_id': 'file',
         'store_params': {'root': '.'}
     })
     self.assertIsInstance(store_config, DataStoreConfig)
     self.assertEqual('file', store_config.store_id)
     self.assertEqual({'root': '.'}, store_config.store_params)
     self.assertEqual('Local', store_config.title)
     self.assertEqual('Local files', store_config.description)
Exemple #10
0
 def test_get_data_store_instance_from_pool_with_params(self):
     pool = DataStorePool({
         '@dir':
         DataStoreConfig('directory', store_params=dict(base_dir='.'))
     })
     with self.assertRaises(ValueError) as cm:
         get_data_store_instance('@dir',
                                 store_pool=pool,
                                 store_params={'thres': 5})
     self.assertEqual(
         'store_params cannot be given, with store_id ("@dir") referring to a configured store',
         f'{cm.exception}')
Exemple #11
0
 def test_get_data_store_instance_from_pool(self):
     pool = DataStorePool({
         'dir': DataStoreConfig('file',
                                store_params=dict(root='.'))
     })
     instance = get_data_store_instance('@dir', store_pool=pool)
     self.assertTrue(hasattr(instance.store, 'root'))
     # noinspection PyUnresolvedReferences
     self.assertTrue(os.path.isabs(instance.store.root))
     self.assertTrue(os.path.isdir(instance.store.root))
     instance2 = get_data_store_instance('@dir', store_pool=pool)
     self.assertIs(instance, instance2)
Exemple #12
0
 def test_get_data_store_instance_from_pool_with_params(self):
     pool = DataStorePool({
         '@dir': DataStoreConfig('file',
                                 store_params=dict(root='.'))
     })
     with self.assertRaises(ValueError) as cm:
         get_data_store_instance(
             '@dir', store_pool=pool, store_params={'auto_mkdir': True}
         )
     self.assertEqual('store_params cannot be given,'
                      ' with store_id ("@dir") referring'
                      ' to a configured store',
                      f'{cm.exception}')
Exemple #13
0
 def test_from_dict_with_valid_cost_params(self):
     store_config = DataStoreConfig.from_dict({
         'description': 'Local files',
         'title': 'Local',
         'store_id': 'file',
         'store_params': {'root': '.'},
         'cost_params': {
             'input_pixels_per_punit': 500,
             'output_pixels_per_punit': 100,
             'input_punits_weight': 1.1,
         }
     })
     self.assertIsInstance(store_config, DataStoreConfig)
     self.assertEqual('file', store_config.store_id)
     self.assertEqual({'root': '.'}, store_config.store_params)
     self.assertEqual('Local', store_config.title)
     self.assertEqual('Local files', store_config.description)
Exemple #14
0
 def get_data_store_pool(self) -> Optional[DataStorePool]:
     data_store_configs = self._config.get('DataStores', [])
     if not data_store_configs or self._data_store_pool:
         return self._data_store_pool
     if not isinstance(data_store_configs, list):
         raise ServiceConfigError('DataStores must be a list')
     store_configs: Dict[str, DataStoreConfig] = {}
     for data_store_config_dict in data_store_configs:
         store_instance_id = data_store_config_dict.get('Identifier')
         store_id = data_store_config_dict.get('StoreId')
         store_params = data_store_config_dict.get('StoreParams', {})
         dataset_configs = data_store_config_dict.get('Datasets')
         store_config = DataStoreConfig(store_id,
                                        store_params=store_params,
                                        user_data=dataset_configs)
         store_configs[store_instance_id] = store_config
     self._data_store_pool = DataStorePool(store_configs)
     return self._data_store_pool
Exemple #15
0
    def test_normalize(self):
        pool = DataStorePool({
            '@dir': DataStoreConfig('directory',
                                    store_params=dict(root='.'))
        })
        file_path = '_test-data-stores-pool.json'
        with open(file_path, 'w') as fp:
            json.dump(pool.to_dict(), fp)
        try:
            pool_1 = DataStorePool.normalize(file_path)
            self.assertIsInstance(pool_1, DataStorePool)
            pool_2 = DataStorePool.normalize(pool_1)
            self.assertIs(pool_2, pool_1)
            pool_3 = DataStorePool.normalize(pool_2.to_dict())
            self.assertIsInstance(pool_3, DataStorePool)
        finally:
            os.remove(file_path)

        with self.assertRaises(TypeError):
            # noinspection PyTypeChecker
            DataStorePool.normalize(42)
Exemple #16
0
def dump(output_file_path: Optional[str], config_file_path: Optional[str],
         data_type: Optional[str], short_form: bool, include_props: str,
         exclude_props: str, csv_format: bool, yaml_format: bool,
         json_format: bool):
    """
    Dump metadata of given data stores.

    Dumps data store metadata and metadata for a store's data resources
    for given data stores  into a JSON file.
    Data stores may be selected and configured by a configuration file CONFIG,
    which may have JSON or YAML format.
    For example, this YAML configuration configures a single directory data store:

    \b
    this_dir:
        title: Current Dir
        description: A store that represents my current directory
        store_id: "directory"
        store_params:
            base_dir: "."

    """
    from xcube.core.store import DataStoreConfig
    from xcube.core.store import DataStorePool
    import yaml
    import json
    import os.path

    if csv_format:
        output_format = 'csv'
        ext = '.csv'
    elif yaml_format:
        output_format = 'yaml'
        ext = '.yml'
    elif json_format:
        output_format = 'json'
        ext = '.json'
    elif output_file_path is not None:
        path_no_ext, ext = os.path.splitext(output_file_path)
        if ext in ('.csv', '.txt'):
            output_format = 'csv'
        elif ext in ('.yaml', '.yml'):
            output_format = 'yaml'
        else:
            output_format = 'json'
    else:
        output_format = 'json'
        ext = '.json'

    if output_file_path is None:
        path_no_ext, _ = os.path.splitext(_DEFAULT_DUMP_OUTPUT)
        output_file_path = path_no_ext + ext

    include_props = _parse_props(include_props) if include_props else None
    exclude_props = _parse_props(exclude_props) if exclude_props else None

    if short_form:
        short_include_props = _parse_props(_SHORT_INCLUDE)
        include_props = include_props or {}
        for data_key in ('store', 'data', 'var'):
            include_props[data_key] = include_props.get(data_key, set()).union(
                short_include_props[data_key])

    if config_file_path:
        store_pool = DataStorePool.from_file(config_file_path)
    else:
        extensions = get_extension_registry().find_extensions(
            EXTENSION_POINT_DATA_STORES)
        store_configs = {
            extension.name:
            DataStoreConfig(extension.name,
                            title=extension.metadata.get('title'),
                            description=extension.metadata.get('description'))
            for extension in extensions
            if extension.name not in ('memory', 'directory', 's3')
        }
        store_pool = DataStorePool(store_configs)

    dump_data = _get_store_data_var_tuples(store_pool, data_type,
                                           include_props, exclude_props)

    if output_format == 'csv':
        column_names = None
        column_names_set = None
        rows = []
        for store_dict, data_dict, var_dict in dump_data:
            if store_dict is None:
                break
            row = {}
            row.update({'store.' + k: v for k, v in store_dict.items()})
            row.update({'data.' + k: v for k, v in data_dict.items()})
            row.update({'var.' + k: v for k, v in var_dict.items()})
            rows.append(row)
            if column_names_set is None:
                column_names = list(row.keys())
                column_names_set = set(column_names)
            else:
                for k in row.keys():
                    if k not in column_names_set:
                        column_names.append(k)
                        column_names_set.add(k)

        def format_cell_value(value: Any) -> str:
            return str(value) if value is not None else ''

        sep = '\t'
        with open(output_file_path, 'w') as fp:
            if column_names:
                fp.write(sep.join(column_names) + '\n')
                for row in rows:
                    fp.write(
                        sep.join(
                            map(format_cell_value,
                                tuple(row.get(k)
                                      for k in column_names))) + '\n')

        print(f'Dumped {len(rows)} store entry/ies to {output_file_path}.')

    else:
        last_store_dict = None
        last_data_dict = None
        vars_list = []
        data_list = []
        store_list = []
        for store_dict, data_dict, var_dict in dump_data:
            if data_dict is not last_data_dict or data_dict is None:
                if last_data_dict is not None:
                    last_data_dict['data_vars'] = vars_list
                    vars_list = []
                    data_list.append(last_data_dict)
                last_data_dict = data_dict
            if store_dict is not last_store_dict or store_dict is None:
                if last_store_dict is not None:
                    last_store_dict['data'] = data_list
                    data_list = []
                    store_list.append(last_store_dict)
                last_store_dict = store_dict
            if var_dict:
                vars_list.append(var_dict)

        with open(output_file_path, 'w') as fp:
            if output_format == 'json':
                json.dump(dict(stores=store_list), fp, indent=2)
            else:
                yaml.dump(dict(stores=store_list), fp, indent=2)

        print(
            f'Dumped entries of {len(store_list)} store(s) to {output_file_path}.'
        )
Exemple #17
0
def dump(output_file_path: str, config_file_path: Optional[str],
         type_specifier: Optional[str]):
    """
    Dump metadata of given data stores.

    Dumps data store metadata and metadata for a store's data resources
    for given data stores  into a JSON file.
    Data stores may be selected and configured by a configuration file CONFIG,
    which may have JSON or YAML format.
    For example, this YAML configuration configures a single directory data store:

    \b
    this_dir:
        title: Current Dir
        description: A store that represents my current directory
        store_id: "directory"
        store_params:
            base_dir: "."

    """
    from xcube.core.store import DataStoreConfig
    from xcube.core.store import DataStorePool
    import time

    if config_file_path:
        store_pool = DataStorePool.from_file(config_file_path)
    else:
        extensions = get_extension_registry().find_extensions(
            EXTENSION_POINT_DATA_STORES)
        store_configs = {
            extension.name:
            DataStoreConfig(extension.name,
                            title=extension.metadata.get('title'),
                            description=extension.metadata.get('description'))
            for extension in extensions
            if extension.name not in ('memory', 'directory', 's3')
        }
        store_pool = DataStorePool(store_configs)

    stores = []
    for store_instance_id in store_pool.store_instance_ids:
        t0 = time.perf_counter()
        print(f'Generating entries for store "{store_instance_id}"...')
        try:
            store_instance = store_pool.get_store(store_instance_id)
        except BaseException as error:
            print(f'error: cannot open store "{store_instance_id}": {error}',
                  file=sys.stderr)
            continue

        try:
            search_result = [
                dsd.to_dict() for dsd in store_instance.search_data(
                    type_specifier=type_specifier)
            ]
        except BaseException as error:
            print(f'error: cannot search store "{store_instance_id}": {error}',
                  file=sys.stderr)
            continue

        store_config = store_pool.get_store_config(store_instance_id)
        stores.append(
            dict(store_instance_id=store_instance_id,
                 store_id=store_instance_id,
                 title=store_config.title,
                 description=store_config.description,
                 type_specifier=type_specifier,
                 datasets=search_result))
        print('Done after {:.2f} seconds'.format(time.perf_counter() - t0))

    with open(output_file_path, 'w') as fp:
        json.dump(dict(stores=stores), fp, indent=2)

    print(f'Dumped {len(stores)} store(s) to {output_file_path}.')
Exemple #18
0
    def _maybe_assign_store_instance_ids(self):
        assignable_dataset_configs = [
            dc for dc in self._dataset_configs if 'StoreInstanceId' not in dc
            and dc.get('FileSystem', 'file') in NON_MEMORY_FILE_SYSTEMS
        ]
        # split into sublists according to file system and non-root store params
        config_lists = []
        for config in assignable_dataset_configs:
            store_params = self._get_other_store_params_than_root(config)
            file_system = config.get('FileSystem', 'file')
            appended = False
            for config_list in config_lists:
                if config_list[0] == file_system and \
                        config_list[1] == store_params:
                    config_list[2].append(config)
                    appended = True
                    break
            if not appended:
                config_lists.append((file_system, store_params, [config]))

        data_store_pool = self.get_data_store_pool()
        if not data_store_pool:
            data_store_pool = self._data_store_pool = DataStorePool()

        for file_system, store_params, config_list in config_lists:
            # Retrieve paths per configuration
            paths = [dc['Path'] for dc in config_list]
            list.sort(paths)
            # Determine common prefixes of paths (and call them roots)
            prefixes = _get_common_prefixes(paths)
            if len(prefixes) < 1:
                roots = ['']
            else:
                # perform further step to merge prefixes with same start
                prefixes = list(set(prefixes))
                prefixes.sort()
                roots = []
                root_candidate = prefixes[0]
                for root in prefixes[1:]:
                    common_root = os.path.commonprefix([root_candidate, root])
                    if _is_not_empty(common_root):
                        root_candidate = common_root
                    else:
                        roots.append(root_candidate)
                        root_candidate = root
                roots.append(root_candidate)
            for root in roots:
                # ensure root does not end with full or partial directory
                # or file name
                while not root.endswith("/") and not root.endswith("\\") and \
                        len(root) > 0:
                    root = root[:-1]
                if root.endswith("/") or root.endswith("\\"):
                    root = root[:-1]
                abs_root = root
                # For local file systems: Determine absolute root from base dir
                fs_protocol = FS_TYPE_TO_PROTOCOL.get(file_system, file_system)
                if fs_protocol == 'file' and not os.path.isabs(abs_root):
                    abs_root = os.path.join(self._base_dir, abs_root)
                    abs_root = os.path.normpath(abs_root)
                store_params_for_root = store_params.copy()
                store_params_for_root['root'] = abs_root
                # See if there already is a store with this configuration
                data_store_config = DataStoreConfig(
                    store_id=fs_protocol, store_params=store_params_for_root)
                store_instance_id = data_store_pool.\
                    get_store_instance_id(data_store_config)
                if not store_instance_id:
                    # Create new store with new unique store instance id
                    counter = 1
                    while data_store_pool.has_store_instance(
                            f'{fs_protocol}_{counter}'):
                        counter += 1
                    store_instance_id = f'{fs_protocol}_{counter}'
                    data_store_pool.add_store_config(store_instance_id,
                                                     data_store_config)
                for config in config_list:
                    if config['Path'].startswith(root):
                        config['StoreInstanceId'] = store_instance_id
                        new_path = config['Path'][len(root):]
                        while new_path.startswith("/") or \
                                new_path.startswith("\\"):
                            new_path = new_path[1:]
                        config['Path'] = new_path