def test_no_grid_mapping(self): dataset = xr.Dataset(dict(a=[1, 2, 3], b=0.5)) with self.assertRaises(DatasetIsNotACubeError) as cm: decode_cube(dataset) self.assertEqual( "Failed to detect grid mapping:" " cannot find any grid mapping in dataset", f'{cm.exception}')
def test_no_cube_vars_found(self): dataset = new_cube() self.assertEqual(set(), set(dataset.data_vars)) with self.assertRaises(DatasetIsNotACubeError) as cm: decode_cube(dataset, force_non_empty=True) self.assertEqual( "No variables found with dimensions" " ('time', [...] 'lat', 'lon')" " or dimension sizes too small", f'{cm.exception}')
def test_grid_mapping_not_geographic(self): dataset = new_cube(x_name='x', y_name='y', variables=dict(a=0.5), crs='epsg:25832') with self.assertRaises(DatasetIsNotACubeError) as cm: decode_cube(dataset, force_geographic=True) self.assertEqual( "Grid mapping must use geographic CRS," " but was 'ETRS89 / UTM zone 32N'", f'{cm.exception}')
def test_cube_stays_cube(self): dataset = new_cube(variables=dict(a=1, b=2, c=3)) cube, grid_mapping, rest = decode_cube(dataset) self.assertIs(dataset, cube) self.assertIsInstance(grid_mapping, GridMapping) self.assertTrue(grid_mapping.crs.is_geographic) self.assertIsInstance(rest, xr.Dataset) self.assertEqual(set(), set(rest.data_vars))
def open_cube(self, input_config: InputConfig) -> TransformedCube: cube_config = self._cube_config cube_params = cube_config.to_dict() opener_id = input_config.opener_id store_params = input_config.store_params or {} open_params = input_config.open_params or {} with observe_progress('reading cube', 3) as observer: try: if input_config.store_id: store_instance = get_data_store_instance( input_config.store_id, store_params=store_params, store_pool=self._store_pool) store = store_instance.store if opener_id is None: opener_id = self._get_opener_id(input_config, store) opener = store open_params = dict(open_params) open_params['opener_id'] = opener_id else: opener = new_data_opener(opener_id) open_params = dict(open_params) open_params.update(store_params) open_params_schema = opener.get_open_data_params_schema( input_config.data_id) dataset_open_params = { k: v for k, v in cube_params.items() if k in open_params_schema.properties } observer.worked(1) dataset = opener.open_data(input_config.data_id, **open_params, **dataset_open_params) observer.worked(1) except DataStoreError as dse: raise CubeGeneratorError(f'{dse}', status_code=400) from dse # Turn dataset into cube and grid_mapping try: cube, gm, _ = decode_cube(dataset, normalize=True) except DatasetIsNotACubeError as e: raise CubeGeneratorError(f'{e}') from e observer.worked(1) if dataset_open_params: drop_names = [ k for k in dataset_open_params.keys() if k not in _STEADY_CUBE_CONFIG_NAMES ] cube_config = cube_config.drop_props(drop_names) return cube, gm, cube_config
def __init__(self, base_dataset: xr.Dataset, tile_grid: TileGrid = None, ds_id: str = None): assert_instance(base_dataset, xr.Dataset, name='base_dataset') self._base_cube, grid_mapping, _ = decode_cube(base_dataset, force_non_empty=True) super().__init__(grid_mapping=grid_mapping, tile_grid=tile_grid, ds_id=ds_id)
def _open_ml_dataset(self, dataset_config: DatasetConfigDict) \ -> MultiLevelDataset: ds_id: str = dataset_config.get('Identifier') store_instance_id = dataset_config.get('StoreInstanceId') if store_instance_id: data_store_pool = self.get_data_store_pool() data_store = data_store_pool.get_store(store_instance_id) data_id = dataset_config.get('Path') open_params = dataset_config.get('StoreOpenParams') or {} # Inject chunk_cache_capacity into open parameters chunk_cache_capacity = self.get_dataset_chunk_cache_capacity( dataset_config) if chunk_cache_capacity \ and (data_id.endswith('.zarr') or data_id.endswith('.levels')) \ and 'cache_size' not in open_params: open_params['cache_size'] = chunk_cache_capacity with self.measure_time(tag=f"opened dataset {ds_id!r}" f" from data store" f" {store_instance_id!r}"): dataset = data_store.open_data(data_id, **open_params) if isinstance(dataset, MultiLevelDataset): ml_dataset = dataset else: cube, _, _ = decode_cube(dataset, normalize=True, force_non_empty=True, force_geographic=True) ml_dataset = BaseMultiLevelDataset(cube, ds_id=ds_id) else: fs_type = dataset_config.get('FileSystem') if fs_type != 'memory': raise ServiceConfigError(f"Invalid FileSystem {fs_type!r}" f" in dataset configuration" f" {ds_id!r}") with self.measure_time(tag=f"opened dataset {ds_id!r}" f" from {fs_type!r}"): ml_dataset = _open_ml_dataset_from_python_code( self, dataset_config) augmentation = dataset_config.get('Augmentation') if augmentation: script_path = self.get_config_path( augmentation, f"'Augmentation' of dataset configuration {ds_id}") input_parameters = augmentation.get('InputParameters') callable_name = augmentation.get('Function', COMPUTE_VARIABLES) ml_dataset = augment_ml_dataset(ml_dataset, script_path, callable_name, self.get_ml_dataset, self.set_ml_dataset, input_parameters=input_parameters, exception_type=ServiceConfigError) return ml_dataset
def test_encode_is_inverse(self): dataset = new_cube(variables=dict(a=1, b=2, c=3), x_name='x', y_name='y') dataset = dataset.assign( d=xr.DataArray([8, 9, 10], dims='level'), crs=xr.DataArray(0, attrs=pyproj.CRS.from_string('CRS84').to_cf()), ) cube, grid_mapping, rest = decode_cube(dataset) dataset2 = encode_cube(cube, grid_mapping, rest) self.assertEqual(set(dataset.data_vars), set(dataset2.data_vars)) self.assertIn('crs', dataset2.data_vars)
def test_no_cube_vars_are_dropped(self): dataset = new_cube(variables=dict(a=1, b=2, c=3)) dataset = dataset.assign( d=xr.DataArray([8, 9, 10], dims='level'), crs=xr.DataArray(0, attrs=pyproj.CRS.from_string('CRS84').to_cf()), ) self.assertEqual({'a', 'b', 'c', 'd', 'crs'}, set(dataset.data_vars)) cube, grid_mapping, rest = decode_cube(dataset) self.assertIsInstance(cube, xr.Dataset) self.assertIsInstance(grid_mapping, GridMapping) self.assertEqual({'a', 'b', 'c'}, set(cube.data_vars)) self.assertEqual(pyproj.CRS.from_string('CRS84'), grid_mapping.crs) self.assertIsInstance(rest, xr.Dataset) self.assertEqual({'d', 'crs'}, set(rest.data_vars))