Exemple #1
0
 def test_not_chunked(self):
     dataset = new_test_dataset(["2010-01-01", "2010-01-02"],
                                precipitation=0.4,
                                temperature=275.2)
     with self.assertRaises(ValueError) as cm:
         get_empty_dataset_chunks(dataset)
     self.assertEqual('data array not chunked', f'{cm.exception}')
Exemple #2
0
def _prune(input_path: str = None, dry_run: bool = False, monitor=None):
    from xcube.core.chunk import get_empty_dataset_chunks
    from xcube.core.dsio import guess_dataset_format
    from xcube.core.dsio import open_cube

    input_format = guess_dataset_format(input_path)
    if input_format != FORMAT_NAME_ZARR:
        raise click.ClickException("input must be a cube in ZARR format")

    monitor(f'Opening cube from {input_path!r}...')
    with open_cube(input_path) as cube:
        monitor('Identifying empty blocks...')
        empty_chunks = get_empty_dataset_chunks(cube)

    num_deleted = 0
    for var_name, chunk_indices in empty_chunks.items():
        monitor(
            f'Deleting {len(chunk_indices)} empty block file(s) for variable {var_name!r}...'
        )
        for chunk_index in chunk_indices:
            ok = _delete_block_file(input_path, var_name, chunk_index, dry_run,
                                    monitor)
            if ok:
                num_deleted += 1

    monitor(f'Done, {num_deleted} block file(s) deleted.')
Exemple #3
0
 def test_non_empty(self):
     dataset = new_test_dataset(["2010-01-01", "2010-01-02"],
                                precipitation=0.4,
                                temperature=275.2).chunk(
                                    dict(time=1, lat=90, lon=90))
     empty_dataset_chunks = get_empty_dataset_chunks(dataset)
     self.assertEqual({
         'precipitation': (),
         'temperature': ()
     }, empty_dataset_chunks)
Exemple #4
0
 def test_not_chunked(self):
     dataset = new_test_dataset(["2010-01-01", "2010-01-02"],
                                precipitation=0.4,
                                temperature=275.2)
     empty_dataset_chunks = get_empty_dataset_chunks(dataset)
     self.assertIsInstance(empty_dataset_chunks, collections.Iterator)
     self.assertFalse(isinstance(empty_dataset_chunks, (list, tuple)))
     self.assertEqual([('precipitation', ()),
                       ('temperature', ())],
                      [(v, tuple(c)) for v, c in empty_dataset_chunks])
Exemple #5
0
 def test_all_empty(self):
     dataset = new_test_dataset(["2010-01-01", "2010-01-02"],
                                precipitation=np.nan,
                                temperature=np.nan).chunk(
                                    dict(time=1, lat=90, lon=90))
     empty_dataset_chunks = get_empty_dataset_chunks(dataset)
     self.assertEqual(
         {
             'precipitation': ((0, 0, 0), (0, 0, 1), (0, 0, 2), (0, 0, 3),
                               (0, 1, 0), (0, 1, 1), (0, 1, 2), (0, 1, 3),
                               (1, 0, 0), (1, 0, 1), (1, 0, 2), (1, 0, 3),
                               (1, 1, 0), (1, 1, 1), (1, 1, 2), (1, 1, 3)),
             'temperature': ((0, 0, 0), (0, 0, 1), (0, 0, 2), (0, 0, 3),
                             (0, 1, 0), (0, 1, 1), (0, 1, 2), (0, 1, 3),
                             (1, 0, 0), (1, 0, 1), (1, 0, 2), (1, 0, 3),
                             (1, 1, 0), (1, 1, 1), (1, 1, 2), (1, 1, 3))
         }, empty_dataset_chunks)
Exemple #6
0
 def test_all_empty(self):
     dataset = new_test_dataset(["2010-01-01", "2010-01-02"],
                                precipitation=np.nan,
                                temperature=np.nan) \
         .chunk(dict(time=1, lat=90, lon=90))
     empty_dataset_chunks = get_empty_dataset_chunks(dataset)
     self.assertIsInstance(empty_dataset_chunks, collections.Iterator)
     self.assertFalse(isinstance(empty_dataset_chunks, (list, tuple)))
     self.assertEqual([('precipitation', ((0, 0, 0),
                                          (0, 0, 1),
                                          (0, 0, 2),
                                          (0, 0, 3),
                                          (0, 1, 0),
                                          (0, 1, 1),
                                          (0, 1, 2),
                                          (0, 1, 3),
                                          (1, 0, 0),
                                          (1, 0, 1),
                                          (1, 0, 2),
                                          (1, 0, 3),
                                          (1, 1, 0),
                                          (1, 1, 1),
                                          (1, 1, 2),
                                          (1, 1, 3))),
                       ('temperature', ((0, 0, 0),
                                        (0, 0, 1),
                                        (0, 0, 2),
                                        (0, 0, 3),
                                        (0, 1, 0),
                                        (0, 1, 1),
                                        (0, 1, 2),
                                        (0, 1, 3),
                                        (1, 0, 0),
                                        (1, 0, 1),
                                        (1, 0, 2),
                                        (1, 0, 3),
                                        (1, 1, 0),
                                        (1, 1, 1),
                                        (1, 1, 2),
                                        (1, 1, 3)))],
                      [(v, tuple(c)) for v, c in empty_dataset_chunks])
Exemple #7
0
def _prune(input_path: str, dry_run: bool, monitor: Monitor):
    from xcube.core.chunk import get_empty_dataset_chunks
    from xcube.core.dsio import guess_dataset_format
    from xcube.core.dsio import open_dataset

    input_format = guess_dataset_format(input_path)
    if input_format != FORMAT_NAME_ZARR:
        raise click.ClickException("input must be a dataset in Zarr format")

    num_deleted_total = 0

    monitor(f'Opening dataset from {input_path!r}...', 1)
    with open_dataset(input_path) as dataset:
        monitor('Identifying empty chunks...', 1)
        for var_name, chunk_indices in get_empty_dataset_chunks(dataset):
            num_empty_chunks = 0
            num_deleted = 0
            for chunk_index in chunk_indices:
                num_empty_chunks += 1
                if num_empty_chunks == 1:
                    monitor(
                        f'Found empty chunks in variable {var_name!r}, '
                        f'deleting block files...', 2)

                ok = _delete_block_file(input_path, var_name, chunk_index,
                                        dry_run, monitor)
                if ok:
                    num_deleted += 1
            if num_deleted > 0:
                monitor(
                    f'Deleted {num_deleted} block file(s) '
                    f'for variable {var_name!r}.', 2)
            elif num_empty_chunks > 0:
                monitor(
                    f'No block files for variable {var_name!r} '
                    f'could be deleted.', 2)
            num_deleted_total += num_deleted

    monitor(f'Done, {num_deleted_total} block file(s) deleted total.', 1)