Beispiel #1
0
 def test_no_grid_mapping(self):
     dataset = xr.Dataset(dict(a=[1, 2, 3], b=0.5))
     with self.assertRaises(DatasetIsNotACubeError) as cm:
         decode_cube(dataset)
     self.assertEqual(
         "Failed to detect grid mapping:"
         " cannot find any grid mapping in dataset", f'{cm.exception}')
Beispiel #2
0
 def test_no_cube_vars_found(self):
     dataset = new_cube()
     self.assertEqual(set(), set(dataset.data_vars))
     with self.assertRaises(DatasetIsNotACubeError) as cm:
         decode_cube(dataset, force_non_empty=True)
     self.assertEqual(
         "No variables found with dimensions"
         " ('time', [...] 'lat', 'lon')"
         " or dimension sizes too small", f'{cm.exception}')
Beispiel #3
0
 def test_grid_mapping_not_geographic(self):
     dataset = new_cube(x_name='x',
                        y_name='y',
                        variables=dict(a=0.5),
                        crs='epsg:25832')
     with self.assertRaises(DatasetIsNotACubeError) as cm:
         decode_cube(dataset, force_geographic=True)
     self.assertEqual(
         "Grid mapping must use geographic CRS,"
         " but was 'ETRS89 / UTM zone 32N'", f'{cm.exception}')
Beispiel #4
0
 def test_cube_stays_cube(self):
     dataset = new_cube(variables=dict(a=1, b=2, c=3))
     cube, grid_mapping, rest = decode_cube(dataset)
     self.assertIs(dataset, cube)
     self.assertIsInstance(grid_mapping, GridMapping)
     self.assertTrue(grid_mapping.crs.is_geographic)
     self.assertIsInstance(rest, xr.Dataset)
     self.assertEqual(set(), set(rest.data_vars))
Beispiel #5
0
    def open_cube(self, input_config: InputConfig) -> TransformedCube:
        cube_config = self._cube_config
        cube_params = cube_config.to_dict()
        opener_id = input_config.opener_id
        store_params = input_config.store_params or {}
        open_params = input_config.open_params or {}

        with observe_progress('reading cube', 3) as observer:
            try:
                if input_config.store_id:
                    store_instance = get_data_store_instance(
                        input_config.store_id,
                        store_params=store_params,
                        store_pool=self._store_pool)
                    store = store_instance.store
                    if opener_id is None:
                        opener_id = self._get_opener_id(input_config, store)
                    opener = store
                    open_params = dict(open_params)
                    open_params['opener_id'] = opener_id
                else:
                    opener = new_data_opener(opener_id)
                    open_params = dict(open_params)
                    open_params.update(store_params)

                open_params_schema = opener.get_open_data_params_schema(
                    input_config.data_id)

                dataset_open_params = {
                    k: v
                    for k, v in cube_params.items()
                    if k in open_params_schema.properties
                }

                observer.worked(1)

                dataset = opener.open_data(input_config.data_id, **open_params,
                                           **dataset_open_params)
                observer.worked(1)

            except DataStoreError as dse:
                raise CubeGeneratorError(f'{dse}', status_code=400) from dse

            # Turn dataset into cube and grid_mapping
            try:
                cube, gm, _ = decode_cube(dataset, normalize=True)
            except DatasetIsNotACubeError as e:
                raise CubeGeneratorError(f'{e}') from e
            observer.worked(1)

        if dataset_open_params:
            drop_names = [
                k for k in dataset_open_params.keys()
                if k not in _STEADY_CUBE_CONFIG_NAMES
            ]
            cube_config = cube_config.drop_props(drop_names)

        return cube, gm, cube_config
Beispiel #6
0
 def __init__(self,
              base_dataset: xr.Dataset,
              tile_grid: TileGrid = None,
              ds_id: str = None):
     assert_instance(base_dataset, xr.Dataset, name='base_dataset')
     self._base_cube, grid_mapping, _ = decode_cube(base_dataset,
                                                    force_non_empty=True)
     super().__init__(grid_mapping=grid_mapping,
                      tile_grid=tile_grid,
                      ds_id=ds_id)
Beispiel #7
0
 def _open_ml_dataset(self, dataset_config: DatasetConfigDict) \
         -> MultiLevelDataset:
     ds_id: str = dataset_config.get('Identifier')
     store_instance_id = dataset_config.get('StoreInstanceId')
     if store_instance_id:
         data_store_pool = self.get_data_store_pool()
         data_store = data_store_pool.get_store(store_instance_id)
         data_id = dataset_config.get('Path')
         open_params = dataset_config.get('StoreOpenParams') or {}
         # Inject chunk_cache_capacity into open parameters
         chunk_cache_capacity = self.get_dataset_chunk_cache_capacity(
             dataset_config)
         if chunk_cache_capacity \
                 and (data_id.endswith('.zarr')
                      or data_id.endswith('.levels')) \
                 and 'cache_size' not in open_params:
             open_params['cache_size'] = chunk_cache_capacity
         with self.measure_time(tag=f"opened dataset {ds_id!r}"
                                f" from data store"
                                f" {store_instance_id!r}"):
             dataset = data_store.open_data(data_id, **open_params)
         if isinstance(dataset, MultiLevelDataset):
             ml_dataset = dataset
         else:
             cube, _, _ = decode_cube(dataset,
                                      normalize=True,
                                      force_non_empty=True,
                                      force_geographic=True)
             ml_dataset = BaseMultiLevelDataset(cube, ds_id=ds_id)
     else:
         fs_type = dataset_config.get('FileSystem')
         if fs_type != 'memory':
             raise ServiceConfigError(f"Invalid FileSystem {fs_type!r}"
                                      f" in dataset configuration"
                                      f" {ds_id!r}")
         with self.measure_time(tag=f"opened dataset {ds_id!r}"
                                f" from {fs_type!r}"):
             ml_dataset = _open_ml_dataset_from_python_code(
                 self, dataset_config)
     augmentation = dataset_config.get('Augmentation')
     if augmentation:
         script_path = self.get_config_path(
             augmentation,
             f"'Augmentation' of dataset configuration {ds_id}")
         input_parameters = augmentation.get('InputParameters')
         callable_name = augmentation.get('Function', COMPUTE_VARIABLES)
         ml_dataset = augment_ml_dataset(ml_dataset,
                                         script_path,
                                         callable_name,
                                         self.get_ml_dataset,
                                         self.set_ml_dataset,
                                         input_parameters=input_parameters,
                                         exception_type=ServiceConfigError)
     return ml_dataset
Beispiel #8
0
 def test_encode_is_inverse(self):
     dataset = new_cube(variables=dict(a=1, b=2, c=3),
                        x_name='x',
                        y_name='y')
     dataset = dataset.assign(
         d=xr.DataArray([8, 9, 10], dims='level'),
         crs=xr.DataArray(0, attrs=pyproj.CRS.from_string('CRS84').to_cf()),
     )
     cube, grid_mapping, rest = decode_cube(dataset)
     dataset2 = encode_cube(cube, grid_mapping, rest)
     self.assertEqual(set(dataset.data_vars), set(dataset2.data_vars))
     self.assertIn('crs', dataset2.data_vars)
Beispiel #9
0
 def test_no_cube_vars_are_dropped(self):
     dataset = new_cube(variables=dict(a=1, b=2, c=3))
     dataset = dataset.assign(
         d=xr.DataArray([8, 9, 10], dims='level'),
         crs=xr.DataArray(0, attrs=pyproj.CRS.from_string('CRS84').to_cf()),
     )
     self.assertEqual({'a', 'b', 'c', 'd', 'crs'}, set(dataset.data_vars))
     cube, grid_mapping, rest = decode_cube(dataset)
     self.assertIsInstance(cube, xr.Dataset)
     self.assertIsInstance(grid_mapping, GridMapping)
     self.assertEqual({'a', 'b', 'c'}, set(cube.data_vars))
     self.assertEqual(pyproj.CRS.from_string('CRS84'), grid_mapping.crs)
     self.assertIsInstance(rest, xr.Dataset)
     self.assertEqual({'d', 'crs'}, set(rest.data_vars))