Exemple #1
0
 def get_type_specifiers_for_data(self, data_id: str) -> Tuple[str, ...]:
     if self.has_data(data_id, type_specifier=str(TYPE_SPECIFIER_CUBE)):
         return str(TYPE_SPECIFIER_DATASET), str(TYPE_SPECIFIER_CUBE)
     if self.has_data(data_id, type_specifier=str(TYPE_SPECIFIER_DATASET)):
         return str(TYPE_SPECIFIER_DATASET),
     raise DataStoreError(
         f'Data resource "{data_id}" does not exist in store')
Exemple #2
0
 def delete_data(self, data_id: str):
     path = self._resolve_data_id_to_path(data_id)
     try:
         self._s3.delete(path, recursive=True)
         self.deregister_data(data_id)
     except ValueError as e:
         raise DataStoreError(f'{e}') from e
Exemple #3
0
 def write_data(self,
                data: Any,
                data_id: str = None,
                writer_id: str = None,
                replace: bool = False,
                **write_params) -> str:
     assert_instance(data, (xr.Dataset, MultiLevelDataset))
     if not writer_id:
         if isinstance(data, MultiLevelDataset):
             predicate = get_data_accessor_predicate(
                 type_specifier=TYPE_SPECIFIER_MULTILEVEL_DATASET,
                 format_id='levels',
                 storage_id=_STORAGE_ID)
         elif isinstance(data, xr.Dataset):
             predicate = get_data_accessor_predicate(
                 type_specifier=TYPE_SPECIFIER_DATASET,
                 format_id='zarr',
                 storage_id=_STORAGE_ID)
         else:
             raise DataStoreError(f'Unsupported data type "{type(data)}"')
         extensions = find_data_writer_extensions(predicate=predicate)
         writer_id = extensions[0].name
     data_id = self._ensure_valid_data_id(data_id, data)
     path = self._resolve_data_id_to_path(data_id)
     self._new_s3_writer(writer_id).write_data(data,
                                               data_id=path,
                                               replace=replace,
                                               **write_params)
     self.register_data(data_id, data)
     return data_id
Exemple #4
0
 def describe_data(self,
                   data_id: str,
                   type_specifier: str = None) -> DataDescriptor:
     self._assert_valid_data_id(data_id)
     actual_type_specifier = self._get_type_specifier_for_data_id(data_id)
     if actual_type_specifier is not None:
         if type_specifier is None or actual_type_specifier.satisfies(
                 type_specifier):
             data = self.open_data(data_id)
             return new_data_descriptor(data_id, data, require=True)
         else:
             raise DataStoreError(
                 f'Type specifier "{type_specifier}" cannot be satisfied'
                 f' by type specifier "{actual_type_specifier}" of data resource "{data_id}"'
             )
     else:
         raise DataStoreError(f'Data resource "{data_id}" not found')
Exemple #5
0
 def describe_data(self, data_id: str) -> DatasetDescriptor:
     self._assert_valid_data_id(data_id)
     try:
         ds_metadata = self._cci_odp.get_dataset_metadata(data_id)
         return self._get_data_descriptor_from_metadata(
             data_id, ds_metadata)
     except ValueError:
         raise DataStoreError(
             f'Cannot describe metadata. "{data_id}" does not seem to be a valid identifier.'
         )
Exemple #6
0
 def _get_accessor_id_parts(cls,
                            data_id: str,
                            require=True) -> Optional[Tuple[str, str, str]]:
     assert_given(data_id, 'data_id')
     _, ext = os.path.splitext(data_id)
     accessor_id_parts = _FILENAME_EXT_TO_ACCESSOR_ID_PARTS.get(ext)
     if not accessor_id_parts and require:
         raise DataStoreError(
             f'A dataset named "{data_id}" is not supported')
     return accessor_id_parts
Exemple #7
0
 def get_data_opener_ids(
     self,
     data_id: str = None,
     type_specifier: str = None,
 ) -> Tuple[str, ...]:
     self._assert_valid_type_specifier(type_specifier)
     if data_id is not None and not self.has_data(data_id):
         raise DataStoreError(
             f'Data Resource "{data_id}" is not available.')
     may_be_cube = data_id is None or self.has_data(
         data_id, str(TYPE_SPECIFIER_CUBE))
     if type_specifier:
         if TYPE_SPECIFIER_CUBE.is_satisfied_by(type_specifier):
             if not may_be_cube:
                 raise DataStoreError(
                     f'Data Resource "{data_id}" is not available '
                     f'as specified type "{type_specifier}".')
             return CUBE_OPENER_ID,
     if may_be_cube:
         return DATASET_OPENER_ID, CUBE_OPENER_ID
     return DATASET_OPENER_ID,
Exemple #8
0
 def describe_data(self,
                   data_id: str,
                   type_specifier: str = None) -> DataDescriptor:
     self._assert_valid_data_id(data_id)
     if type_specifier is not None:
         data_type_specifier = get_type_specifier(self._data_dict[data_id])
         if data_type_specifier is None or not data_type_specifier.satisfies(
                 type_specifier):
             raise DataStoreError(
                 f'Type specifier "{type_specifier}" cannot be satisfied'
                 f' by type specifier "{data_type_specifier}" of data resource "{data_id}"'
             )
     return new_data_descriptor(data_id, self._data_dict[data_id])
Exemple #9
0
 def write_data(self,
                data: Any,
                data_id: str = None,
                writer_id: str = None,
                replace: bool = False,
                **write_params) -> str:
     self._assert_empty_params(write_params, 'write_params')
     data_id = self._ensure_valid_data_id(data_id)
     if data_id in self._data_dict and not replace:
         raise DataStoreError(
             f'Data resource "{data_id}" already exist in store')
     self._data_dict[data_id] = data
     return data_id
Exemple #10
0
 def open_data(self, data_id: str, **open_params) -> xr.Dataset:
     s3 = self._s3
     if s3 is None:
         s3, open_params = self.consume_s3fs_params(open_params)
     bucket_name, open_params = self.consume_bucket_name_param(open_params)
     try:
         return xr.open_zarr(
             s3fs.S3Map(root=f'{bucket_name}/{data_id}'
                        if bucket_name else data_id,
                        s3=s3,
                        check=False), **open_params)
     except ValueError as e:
         raise DataStoreError(f'{e}') from e
Exemple #11
0
 def _get_dataset_and_collection_metadata(
         self,
         data_id: str) -> Tuple[Dict[str, Any], Optional[Dict[str, Any]]]:
     dataset_metadata = SentinelHubMetadata().datasets.get(data_id)
     if dataset_metadata is None:
         raise DataStoreError(f'Dataset "{data_id}" not found.')
     if self._sentinel_hub is not None:
         # If we are connected to the API, we may also have collection metadata
         collection_name = dataset_metadata.get('collection_name')
         if collection_name is not None:
             for collection_metadata in self._sentinel_hub.collections():
                 if collection_name == collection_metadata.get('id'):
                     return dataset_metadata, collection_metadata
     return dataset_metadata, None
Exemple #12
0
    def test_get_filename_ext(self):
        import xarray as xr
        import geopandas as gpd
        from xcube.core.mldataset import BaseMultiLevelDataset

        dataset = xr.Dataset()
        self.assertEqual('.zarr', self.store._get_filename_ext(dataset))
        frame = gpd.GeoDataFrame()
        self.assertEqual('.geojson', self.store._get_filename_ext(frame))
        mldataset = BaseMultiLevelDataset(base_dataset=dataset)
        self.assertEqual('.levels', self.store._get_filename_ext(mldataset))

        self.assertIsNone(self.store._get_filename_ext(None))
        self.assertIsNone(
            self.store._get_filename_ext(DataStoreError('A nonsense object')))
Exemple #13
0
 def _get_opener(self,
                 opener_id: str = None,
                 type_specifier: str = None) -> CciOdpDataOpener:
     self._assert_valid_opener_id(opener_id)
     self._assert_valid_type_specifier(type_specifier)
     if type_specifier:
         if TYPE_SPECIFIER_CUBE.is_satisfied_by(type_specifier):
             type_opener_id = CUBE_OPENER_ID
         else:
             type_opener_id = DATASET_OPENER_ID
         if opener_id and opener_id != type_opener_id:
             raise DataStoreError(
                 f'Invalid combination of opener_id "{opener_id}" '
                 f'and type_specifier "{type_specifier}"')
         opener_id = type_opener_id
     if opener_id == CUBE_OPENER_ID:
         return self._cube_opener
     return self._dataset_opener
Exemple #14
0
def open_cubes(input_configs: Sequence[InputConfig],
               cube_config: CubeConfig,
               store_pool: DataStorePool = None):
    cubes = []
    all_cube_params = cube_config.to_dict()
    with observe_progress('Opening input(s)', len(input_configs)) as progress:
        for input_config in input_configs:
            open_params = {}
            opener_id = input_config.opener_id
            if input_config.store_id:
                store_instance = get_data_store_instance(
                    input_config.store_id,
                    store_params=input_config.store_params,
                    store_pool=store_pool)
                store = store_instance.store
                if opener_id is None:
                    opener_ids = store.get_data_opener_ids(
                        data_id=input_config.data_id,
                        type_specifier=TYPE_SPECIFIER_CUBE)
                    if not opener_ids:
                        raise DataStoreError(
                            f'Data store "{input_config.store_id}" does not support data cubes'
                        )
                    opener_id = opener_ids[0]
                opener = store
                open_params.update(opener_id=opener_id,
                                   **input_config.open_params)
            else:
                opener = new_data_opener(opener_id)
                open_params.update(**input_config.store_params,
                                   **input_config.open_params)
            open_params_schema = opener.get_open_data_params_schema(
                input_config.data_id)
            cube_params = {
                k: v
                for k, v in all_cube_params.items()
                if k in open_params_schema.properties
            }
            cube = opener.open_data(input_config.data_id, **open_params,
                                    **cube_params)
            cubes.append(cube)
            progress.worked(1)

    return cubes
Exemple #15
0
 def _get_accessor_extensions(self,
                              data_id: str,
                              get_data_accessor_extensions,
                              require=True) -> List[Extension]:
     accessor_id_parts = self._get_accessor_id_parts(data_id,
                                                     require=require)
     if not accessor_id_parts:
         return []
     type_specifier, format_id, storage_id = accessor_id_parts
     predicate = get_data_accessor_predicate(type_specifier=type_specifier,
                                             format_id=format_id,
                                             storage_id=storage_id)
     extensions = get_data_accessor_extensions(predicate)
     if not extensions:
         if require:
             raise DataStoreError(
                 f'No accessor found for data resource "{data_id}"')
         return []
     return extensions
Exemple #16
0
 def write_data(self,
                data: xr.Dataset,
                data_id: str,
                replace=False,
                **write_params):
     assert_instance(data, xr.Dataset, 'data')
     s3 = self._s3
     if s3 is None:
         s3, write_params = self.consume_s3fs_params(write_params)
     bucket_name, write_params = self.consume_bucket_name_param(
         write_params)
     try:
         data.to_zarr(s3fs.S3Map(
             root=f'{bucket_name}/{data_id}' if bucket_name else data_id,
             s3=s3,
             check=False),
                      mode='w' if replace else None,
                      **write_params)
     except ValueError as e:
         raise DataStoreError(f'{e}') from e
Exemple #17
0
    def search_data(self,
                    type_specifier: str = None,
                    **search_params) -> Iterator[DataDescriptor]:
        """
        Search the data store.

        The default implementation returns all data resources that may be filtered using
        the optional *type_specifier*.

        :param type_specifier: Type specifier to filter returned data resources.
        :param search_params: Not supported (yet)
        :return: an iterator of :class:DataDescriptor instances
        """
        if search_params:
            raise DataStoreError(
                f'Unsupported search parameters: {", ".join(search_params.keys())}'
            )
        for data_id, _ in self.get_data_ids(type_specifier=type_specifier,
                                            include_titles=False):
            yield self.describe_data(data_id)
Exemple #18
0
 def delete_data(self, data_id: str):
     if not os.path.exists(data_id):
         raise DataStoreError(f'A dataset named "{data_id}" does not exist')
     rimraf(data_id)
Exemple #19
0
 def _assert_empty_params(self, params: Optional[Mapping[str, Any]],
                          name: str):
     if params:
         param_names = ', '.join(map(lambda k: f'"{k}"', params.keys()))
         raise DataStoreError(f'Unsupported {name} {param_names}')
Exemple #20
0
 def _assert_valid_opener_id(self, opener_id):
     if opener_id is not None and opener_id != DATASET_OPENER_ID and opener_id != CUBE_OPENER_ID:
         raise DataStoreError(
             f'Data opener identifier must be "{DATASET_OPENER_ID}" or "{CUBE_OPENER_ID}",'
             f'but got "{opener_id}"')
Exemple #21
0
 def get_type_specifiers_for_data(self, data_id: str) -> Tuple[str, ...]:
     if not self.has_data(data_id):
         raise DataStoreError(
             f'"{data_id}" is not provided by this data store')
     data_type_specifier, _, _ = self._get_accessor_id_parts(data_id)
     return data_type_specifier,
Exemple #22
0
 def _assert_not_closed(self):
     if self._s3 is None:
         raise DataStoreError(f'Data store already closed.')
Exemple #23
0
 def _assert_valid_data_id(self, data_id):
     if not self.has_data(data_id):
         raise DataStoreError(
             f'Data resource "{data_id}" does not exist in store')
Exemple #24
0
 def _assert_valid_data_id(self, data_id: str):
     if data_id not in self.dataset_names:
         raise DataStoreError(
             f'Cannot describe metadata of data resource "{data_id}", '
             f'as it cannot be accessed by data accessor "{self._id}".')
Exemple #25
0
 def _assert_valid_data_id(self, data_id):
     assert_given(data_id, 'data_id')
     if data_id not in self._data_dict:
         raise DataStoreError(
             f'Data resource "{data_id}" does not exist in store')
Exemple #26
0
 def _validate_type_specifier(type_specifier: Union[str, TypeSpecifier]):
     if not CDSDataStore._is_type_specifier_satisfied(type_specifier):
         raise DataStoreError(
             f'Supplied type specifier "{type_specifier}" is not compatible '
             f'with "{TYPE_SPECIFIER_CUBE}."')
Exemple #27
0
 def _assert_valid_type_specifier(cls, type_specifier):
     if not cls._is_valid_type_specifier(type_specifier):
         raise DataStoreError(
             f'Type Specifier must be "{TYPE_SPECIFIER_DATASET}" or "{TYPE_SPECIFIER_CUBE}", '
             f'but got "{type_specifier}"')
Exemple #28
0
 def _assert_valid_opener_id(opener_id):
     if opener_id is not None and opener_id != CDS_DATA_OPENER_ID:
         raise DataStoreError(
             f'Data opener identifier must be "{CDS_DATA_OPENER_ID}"'
             f'but got "{opener_id}"')