Exemple #1
0
 def test_iter_chunk_keys_with_more_base_dims(self):
   actual = sorted(core.iter_chunk_keys({'x': (3, 3)}, base={'x': 30, 'y': 0}))
   expected = [
       xarray_beam.ChunkKey({'x': 30, 'y': 0}),
       xarray_beam.ChunkKey({'x': 33, 'y': 0}),
   ]
   self.assertEqual(actual, expected)
Exemple #2
0
  def test_chunks_to_zarr(self):
    dataset = xarray.Dataset(
        {'foo': ('x', np.arange(0, 60, 10))},
        coords={'x': np.arange(6)},
    )
    chunked = dataset.chunk()
    inputs = [
        (xarray_beam.ChunkKey({'x': 0}), dataset),
    ]
    with self.subTest('no template'):
      temp_dir = self.create_tempdir().full_path
      inputs | xarray_beam.ChunksToZarr(temp_dir)
      result = xarray.open_zarr(temp_dir, consolidated=True)
      xarray.testing.assert_identical(dataset, result)
    with self.subTest('with template'):
      temp_dir = self.create_tempdir().full_path
      inputs | xarray_beam.ChunksToZarr(temp_dir, chunked)
      result = xarray.open_zarr(temp_dir, consolidated=True)
      xarray.testing.assert_identical(dataset, result)
    with self.subTest('with zarr_chunks and with template'):
      temp_dir = self.create_tempdir().full_path
      zarr_chunks = {'x': 3}
      inputs | xarray_beam.ChunksToZarr(temp_dir, chunked, zarr_chunks)
      result = xarray.open_zarr(temp_dir, consolidated=True)
      xarray.testing.assert_identical(dataset, result)
      self.assertEqual(result.chunks, {'x': (3, 3)})
    with self.subTest('with zarr_chunks and no template'):
      temp_dir = self.create_tempdir().full_path
      zarr_chunks = {'x': 3}
      inputs | xarray_beam.ChunksToZarr(temp_dir, zarr_chunks=zarr_chunks)
      result = xarray.open_zarr(temp_dir, consolidated=True)
      xarray.testing.assert_identical(dataset, result)
      self.assertEqual(result.chunks, {'x': (3, 3)})

    temp_dir = self.create_tempdir().full_path
    with self.assertRaisesRegex(
        ValueError,
        'template does not have any variables chunked with Dask',
    ):
      xarray_beam.ChunksToZarr(temp_dir, dataset)

    temp_dir = self.create_tempdir().full_path
    template = chunked.assign_coords(x=np.zeros(6))
    with self.assertRaisesRegex(
        ValueError,
        'template and chunk indexes do not match',
    ):
      inputs | xarray_beam.ChunksToZarr(temp_dir, template)

    inputs2 = [
        (xarray_beam.ChunkKey({'x': 0}),
         dataset.expand_dims(z=[1, 2])),
    ]
    temp_dir = self.create_tempdir().full_path
    with self.assertRaisesRegex(
        ValueError,
        'unexpected new indexes found in chunk',
    ):
      inputs2 | xarray_beam.ChunksToZarr(temp_dir, template)
Exemple #3
0
 def test_comparison(self):
   key = xarray_beam.ChunkKey({'x': 0, 'y': 10})
   with self.assertRaises(TypeError):
     key < 'foo'
   with self.assertRaisesRegex(ValueError, 'Dimensions must match'):
     key < xarray_beam.ChunkKey({'x': 0})
   other = xarray_beam.ChunkKey({'x': 0, 'y': 20})
   self.assertLess(key, other)
   self.assertGreater(other, key)
Exemple #4
0
 def test_use_as_beam_key(self):
   inputs = [
       (xarray_beam.ChunkKey({'x': 0, 'y': 1}), 1),
       (xarray_beam.ChunkKey({'x': 0, 'y': 2}), 2),
       (xarray_beam.ChunkKey({'y': 1, 'x': 0}), 3),
   ]
   expected = [
       (xarray_beam.ChunkKey({'x': 0, 'y': 1}), [1, 3]),
       (xarray_beam.ChunkKey({'x': 0, 'y': 2}), [2]),
   ]
   actual = inputs | beam.GroupByKey()
   self.assertEqual(actual, expected)
Exemple #5
0
  def test_operators(self):
    key = xarray_beam.ChunkKey({'x': 0, 'y': 10})

    expected = xarray_beam.ChunkKey({'x': 0, 'y': 10, 'z': 100})
    actual = key | {'z': 100}
    self.assertEqual(actual, expected)

    expected = xarray_beam.ChunkKey({'y': 10})
    actual = key - {'x'}
    self.assertEqual(actual, expected)

    with self.assertRaises(TypeError):
      key - 'x'

    with self.assertRaisesRegex(ValueError, 'not found'):
      key - {'z'}
Exemple #6
0
 def test_mapping(self):
   key = xarray_beam.ChunkKey({'x': 0, 'y': 10})
   self.assertEqual(list(key.keys()), ['x', 'y'])
   self.assertEqual(list(key.values()), [0, 10])
   self.assertIn('x', key)
   self.assertNotIn('z', key)
   self.assertEqual(key['x'], 0)
Exemple #7
0
  def test_immutability(self):
    key = xarray_beam.ChunkKey({'x': 0, 'y': 10})

    with self.assertRaises(TypeError):
      key['z'] = 100

    dict_ = {key: 'foo'}
    self.assertEqual(dict_[key], 'foo')
Exemple #8
0
  def test_dataset_to_chunks_whole(self):
    dataset = xarray.Dataset({'foo': ('x', np.arange(6))})
    expected = [(xarray_beam.ChunkKey({'x': 0}), dataset)]
    actual = (
        test_util.EagerPipeline()
        | xarray_beam.DatasetToChunks(dataset, chunks={'x': -1})
    )
    self.assertIdenticalChunks(actual, expected)

    actual = (
        test_util.EagerPipeline()
        | xarray_beam.DatasetToChunks(dataset, chunks={})
    )
    self.assertIdenticalChunks(actual, expected)
Exemple #9
0
  def test_dataset_to_chunks_multiple(self):
    dataset = xarray.Dataset({'foo': ('x', np.arange(6))})
    expected = [
        (xarray_beam.ChunkKey({'x': 0}), dataset.head(x=3)),
        (xarray_beam.ChunkKey({'x': 3}), dataset.tail(x=3)),
    ]
    actual = (
        test_util.EagerPipeline()
        | xarray_beam.DatasetToChunks(dataset.chunk({'x': 3}))
    )
    self.assertIdenticalChunks(actual, expected)

    actual = (
        test_util.EagerPipeline()
        | xarray_beam.DatasetToChunks(dataset.chunk({'x': 3}), num_threads=2)
    )
    self.assertIdenticalChunks(actual, expected)

    actual = (
        test_util.EagerPipeline()
        | xarray_beam.DatasetToChunks(dataset, chunks={'x': 3})
    )
    self.assertIdenticalChunks(actual, expected)
Exemple #10
0
  def test_to_slices(self):
    key = xarray_beam.ChunkKey({'x': 0, 'y': 10})

    expected = {'x': slice(0, 5, 1), 'y': slice(10, 20, 1)}
    slices = key.to_slices({'x': 5, 'y': 10})
    self.assertEqual(slices, expected)

    slices = key.to_slices({'x': 5, 'y': 10, 'extra_key': 100})
    self.assertEqual(slices, expected)

    expected = {'x': slice(None), 'y': slice(10, 20, 1)}
    slices = key.to_slices({'y': 10})
    self.assertEqual(slices, expected)

    with self.assertRaisesRegex(ValueError, 'non-zero offset'):
      key.to_slices({'x': 5})
Exemple #11
0
 def test_iter_chunk_keys(self):
   actual = sorted(core.iter_chunk_keys({'x': (3, 3), 'y': (2, 2, 2)}))
   expected = [
       xarray_beam.ChunkKey({'x': 0, 'y': 0}),
       xarray_beam.ChunkKey({'x': 0, 'y': 2}),
       xarray_beam.ChunkKey({'x': 0, 'y': 4}),
       xarray_beam.ChunkKey({'x': 3, 'y': 0}),
       xarray_beam.ChunkKey({'x': 3, 'y': 2}),
       xarray_beam.ChunkKey({'x': 3, 'y': 4}),
   ]
   self.assertEqual(actual, expected)
Exemple #12
0
 def test_repr(self):
   key = xarray_beam.ChunkKey({'x': 0, 'y': 10})
   expected = "ChunkKey({'x': 0, 'y': 10})"
   self.assertEqual(repr(key), expected)