Example #1
0
    def test_it(self):
        ds = _get_test_dataset()

        ml_ds = BaseMultiLevelDataset(ds)

        self.assertIsInstance(ml_ds.ds_id, str)

        self.assertEqual(3, ml_ds.num_levels)
        self.assertEqual(
            TileGrid(3, 2, 1, 180, 180, (-180, -90, 180, 90), inv_y=False),
            ml_ds.tile_grid)

        ds0 = ml_ds.get_dataset(0)
        self.assertIs(ds, ds0)

        ds1 = ml_ds.get_dataset(1)
        self.assertIsNot(ds, ds1)
        self.assertEqual({'time': 14, 'lat': 360, 'lon': 720}, ds1.dims)

        ds2 = ml_ds.get_dataset(2)
        self.assertIsNot(ds, ds2)
        self.assertEqual({'time': 14, 'lat': 180, 'lon': 360}, ds2.dims)

        self.assertEqual([ds0, ds1, ds2], ml_ds.datasets)

        ml_ds.close()
Example #2
0
    def test_it(self):
        ds = _get_test_dataset()

        ml_ds = BaseMultiLevelDataset(ds)

        self.assertIsInstance(ml_ds.ds_id, str)

        self.assertEqual(3, ml_ds.num_levels)
        self.assertIsInstance(ml_ds.tile_grid, ImageTileGrid)
        self.assertEqual((180, 180), ml_ds.tile_grid.tile_size)
        self.assertEqual(3, ml_ds.tile_grid.num_levels)

        ds0 = ml_ds.get_dataset(0)
        self.assertIsNot(ds, ds0)
        self.assertEqual({
            'time': 14,
            'lat': 720,
            'lon': 1440,
            'bnds': 2
        }, ds0.dims)

        ds1 = ml_ds.get_dataset(1)
        self.assertIsNot(ds, ds1)
        self.assertEqual({'time': 14, 'lat': 360, 'lon': 720}, ds1.dims)

        ds2 = ml_ds.get_dataset(2)
        self.assertIsNot(ds, ds2)
        self.assertEqual({'time': 14, 'lat': 180, 'lon': 360}, ds2.dims)

        self.assertEqual([ds0, ds1, ds2], ml_ds.datasets)

        ml_ds.close()
Example #3
0
    def write_data(self,
                   data: Union[xr.Dataset, MultiLevelDataset],
                   data_id: str,
                   replace: bool = False,
                   **write_params) -> str:
        assert_instance(data, (xr.Dataset, MultiLevelDataset), name='data')
        assert_instance(data_id, str, name='data_id')
        tile_size = write_params.pop('tile_size', None)
        if isinstance(data, MultiLevelDataset):
            ml_dataset = data
            if tile_size:
                warnings.warn('tile_size is ignored for multi-level datasets')
        else:
            base_dataset: xr.Dataset = data
            if tile_size:
                assert_instance(tile_size, int, name='tile_size')
                gm = GridMapping.from_dataset(base_dataset)
                x_name, y_name = gm.xy_dim_names
                base_dataset = base_dataset.chunk({
                    x_name: tile_size,
                    y_name: tile_size
                })
            ml_dataset = BaseMultiLevelDataset(base_dataset)
        fs, root, write_params = self.load_fs(write_params)
        consolidated = write_params.pop('consolidated', True)
        use_saved_levels = write_params.pop('use_saved_levels', False)
        base_dataset_id = write_params.pop('base_dataset_id', None)

        if use_saved_levels:
            ml_dataset = BaseMultiLevelDataset(ml_dataset.get_dataset(0),
                                               tile_grid=ml_dataset.tile_grid)

        path_class = get_fs_path_class(fs)
        data_path = path_class(data_id)
        fs.mkdirs(str(data_path), exist_ok=replace)

        for index in range(ml_dataset.num_levels):
            level_dataset = ml_dataset.get_dataset(index)
            if base_dataset_id and index == 0:
                # Write file "0.link" instead of copying
                # level zero dataset to "0.zarr".

                # Compute a relative base dataset path first
                base_dataset_path = path_class(root, base_dataset_id)
                data_parent_path = data_path.parent
                try:
                    base_dataset_path = base_dataset_path.relative_to(
                        data_parent_path)
                except ValueError as e:
                    raise DataStoreError(
                        f'invalid base_dataset_id: {base_dataset_id}') from e
                base_dataset_path = '..' / base_dataset_path

                # Then write relative base dataset path into link file
                link_path = data_path / f'{index}.link'
                with fs.open(str(link_path), mode='w') as fp:
                    fp.write(f'{base_dataset_path}')
            else:
                # Write level "{index}.zarr"
                level_path = data_path / f'{index}.zarr'
                zarr_store = fs.get_mapper(str(level_path), create=True)
                try:
                    level_dataset.to_zarr(zarr_store,
                                          mode='w' if replace else None,
                                          consolidated=consolidated,
                                          **write_params)
                except ValueError as e:
                    # TODO: remove already written data!
                    raise DataStoreError(f'Failed to write'
                                         f' dataset {data_id}: {e}') from e
                if use_saved_levels:
                    level_dataset = xr.open_zarr(zarr_store,
                                                 consolidated=consolidated)
                    ml_dataset.set_dataset(index, level_dataset)

        return data_id