def test_it(self): ds = _get_test_dataset() ml_ds = BaseMultiLevelDataset(ds) self.assertIsInstance(ml_ds.ds_id, str) self.assertEqual(3, ml_ds.num_levels) self.assertEqual( TileGrid(3, 2, 1, 180, 180, (-180, -90, 180, 90), inv_y=False), ml_ds.tile_grid) ds0 = ml_ds.get_dataset(0) self.assertIs(ds, ds0) ds1 = ml_ds.get_dataset(1) self.assertIsNot(ds, ds1) self.assertEqual({'time': 14, 'lat': 360, 'lon': 720}, ds1.dims) ds2 = ml_ds.get_dataset(2) self.assertIsNot(ds, ds2) self.assertEqual({'time': 14, 'lat': 180, 'lon': 360}, ds2.dims) self.assertEqual([ds0, ds1, ds2], ml_ds.datasets) ml_ds.close()
def test_it(self): ds = _get_test_dataset() ml_ds = BaseMultiLevelDataset(ds) self.assertIsInstance(ml_ds.ds_id, str) self.assertEqual(3, ml_ds.num_levels) self.assertIsInstance(ml_ds.tile_grid, ImageTileGrid) self.assertEqual((180, 180), ml_ds.tile_grid.tile_size) self.assertEqual(3, ml_ds.tile_grid.num_levels) ds0 = ml_ds.get_dataset(0) self.assertIsNot(ds, ds0) self.assertEqual({ 'time': 14, 'lat': 720, 'lon': 1440, 'bnds': 2 }, ds0.dims) ds1 = ml_ds.get_dataset(1) self.assertIsNot(ds, ds1) self.assertEqual({'time': 14, 'lat': 360, 'lon': 720}, ds1.dims) ds2 = ml_ds.get_dataset(2) self.assertIsNot(ds, ds2) self.assertEqual({'time': 14, 'lat': 180, 'lon': 360}, ds2.dims) self.assertEqual([ds0, ds1, ds2], ml_ds.datasets) ml_ds.close()
def write_data(self, data: Union[xr.Dataset, MultiLevelDataset], data_id: str, replace: bool = False, **write_params) -> str: assert_instance(data, (xr.Dataset, MultiLevelDataset), name='data') assert_instance(data_id, str, name='data_id') tile_size = write_params.pop('tile_size', None) if isinstance(data, MultiLevelDataset): ml_dataset = data if tile_size: warnings.warn('tile_size is ignored for multi-level datasets') else: base_dataset: xr.Dataset = data if tile_size: assert_instance(tile_size, int, name='tile_size') gm = GridMapping.from_dataset(base_dataset) x_name, y_name = gm.xy_dim_names base_dataset = base_dataset.chunk({ x_name: tile_size, y_name: tile_size }) ml_dataset = BaseMultiLevelDataset(base_dataset) fs, root, write_params = self.load_fs(write_params) consolidated = write_params.pop('consolidated', True) use_saved_levels = write_params.pop('use_saved_levels', False) base_dataset_id = write_params.pop('base_dataset_id', None) if use_saved_levels: ml_dataset = BaseMultiLevelDataset(ml_dataset.get_dataset(0), tile_grid=ml_dataset.tile_grid) path_class = get_fs_path_class(fs) data_path = path_class(data_id) fs.mkdirs(str(data_path), exist_ok=replace) for index in range(ml_dataset.num_levels): level_dataset = ml_dataset.get_dataset(index) if base_dataset_id and index == 0: # Write file "0.link" instead of copying # level zero dataset to "0.zarr". # Compute a relative base dataset path first base_dataset_path = path_class(root, base_dataset_id) data_parent_path = data_path.parent try: base_dataset_path = base_dataset_path.relative_to( data_parent_path) except ValueError as e: raise DataStoreError( f'invalid base_dataset_id: {base_dataset_id}') from e base_dataset_path = '..' / base_dataset_path # Then write relative base dataset path into link file link_path = data_path / f'{index}.link' with fs.open(str(link_path), mode='w') as fp: fp.write(f'{base_dataset_path}') else: # Write level "{index}.zarr" level_path = data_path / f'{index}.zarr' zarr_store = fs.get_mapper(str(level_path), create=True) try: level_dataset.to_zarr(zarr_store, mode='w' if replace else None, consolidated=consolidated, **write_params) except ValueError as e: # TODO: remove already written data! raise DataStoreError(f'Failed to write' f' dataset {data_id}: {e}') from e if use_saved_levels: level_dataset = xr.open_zarr(zarr_store, consolidated=consolidated) ml_dataset.set_dataset(index, level_dataset) return data_id