def test_append(self): ds_io = MemDatasetIO() ds1 = new_test_dataset('2017-02-01', 180, temperature=1.2, precipitation=2.1) ds2 = new_test_dataset('2017-02-02', 180, temperature=2.3, precipitation=3.2) ds3 = new_test_dataset('2017-02-03', 180, temperature=3.4, precipitation=4.3) ds_io.append(ds1, 'test.nc') ds_io.append(ds2, 'test.nc') ds_io.append(ds3, 'test.nc') ds4 = ds_io._datasets.get('test.nc') self.assertIsNotNone(ds4) self.assertIn('time', ds4) self.assertIn('temperature', ds4) self.assertEqual(('time', 'lat', 'lon'), ds4.temperature.dims) self.assertEqual((3, 180, 360), ds4.temperature.shape) expected_time = xr.DataArray( pd.to_datetime(['2017-02-01', '2017-02-02', '2017-02-03'])) np.testing.assert_equal(expected_time.values, ds4.time.values)
def test_not_chunked(self): dataset = new_test_dataset(["2010-01-01", "2010-01-02"], precipitation=0.4, temperature=275.2) with self.assertRaises(ValueError) as cm: get_empty_dataset_chunks(dataset) self.assertEqual('data array not chunked', f'{cm.exception}')
def test_chunk_dataset(self): dataset = new_test_dataset([ "2010-01-01", "2010-01-02", "2010-01-03", "2010-01-04", "2010-01-05" ], precipitation=0.4, temperature=275.2) chunked_dataset = chunk_dataset(dataset, chunk_sizes=dict(time=1, lat=10, lon=20), format_name="zarr") self.assertEqual({'chunks': (1, 10, 20)}, chunked_dataset.precipitation.encoding) self.assertEqual({'chunks': (1, 10, 20)}, chunked_dataset.temperature.encoding) chunked_dataset = chunk_dataset(dataset, chunk_sizes=dict(time=1, lat=20, lon=40), format_name="netcdf4") self.assertEqual({'chunksizes': (1, 20, 40)}, chunked_dataset.precipitation.encoding) self.assertEqual({'chunksizes': (1, 20, 40)}, chunked_dataset.temperature.encoding) chunked_dataset = chunk_dataset(dataset, chunk_sizes=dict(time=1, lat=20, lon=40)) self.assertEqual({}, chunked_dataset.precipitation.encoding) self.assertEqual({}, chunked_dataset.temperature.encoding)
def test_vars_to_dim(self): dataset = new_test_dataset([ "2010-01-01", "2010-01-02", "2010-01-03", "2010-01-04", "2010-01-05" ], precipitation=0.4, temperature=275.2) self.assertIsInstance(dataset.xcube.vars_to_dim(), xr.Dataset)
def test_non_empty(self): dataset = new_test_dataset(["2010-01-01", "2010-01-02"], precipitation=0.4, temperature=275.2).chunk( dict(time=1, lat=90, lon=90)) empty_dataset_chunks = get_empty_dataset_chunks(dataset) self.assertEqual({ 'precipitation': (), 'temperature': () }, empty_dataset_chunks)
def test_unchunk_dataset(self): dataset = new_test_dataset(["2010-01-01", "2010-01-02", "2010-01-03", "2010-01-04", "2010-01-05"], precipitation=0.4, temperature=275.2) for var in dataset.data_vars.values(): var.encoding.update({"chunks": (5, 180, 360), "_FillValue": -999.0}) chunked_dataset = chunk_dataset(dataset, format_name="zarr") self.assertEqual({"_FillValue": -999.0}, chunked_dataset.precipitation.encoding) self.assertEqual({"_FillValue": -999.0}, chunked_dataset.temperature.encoding)
def test_not_chunked(self): dataset = new_test_dataset(["2010-01-01", "2010-01-02"], precipitation=0.4, temperature=275.2) empty_dataset_chunks = get_empty_dataset_chunks(dataset) self.assertIsInstance(empty_dataset_chunks, collections.Iterator) self.assertFalse(isinstance(empty_dataset_chunks, (list, tuple))) self.assertEqual([('precipitation', ()), ('temperature', ())], [(v, tuple(c)) for v, c in empty_dataset_chunks])
def test_dump_dataset(self): dataset = new_test_dataset([ "2010-01-01", "2010-01-02", "2010-01-03", "2010-01-04", "2010-01-05" ], precipitation=0.4, temperature=275.2) for var in dataset.variables.values(): var.encoding.update({"_FillValue": 999.0}) print(dataset.dims) text = dump_dataset(dataset) self.assertIn("<xarray.Dataset>", text) self.assertIn("Dimensions: (lat: 180, lon: 360, time: 5)\n", text) self.assertIn("Coordinates:\n", text) self.assertIn(" * lon (lon) float64 ", text) self.assertIn("Data variables:\n", text) self.assertIn(" precipitation (time, lat, lon) float64 ", text) self.assertNotIn("Encoding for coordinate variable 'lat':\n", text) self.assertNotIn("Encoding for data variable 'temperature':\n", text) self.assertNotIn(" _FillValue: 999.0\n", text) text = dump_dataset(dataset, show_var_encoding=True) self.assertIn("<xarray.Dataset>", text) self.assertIn("Dimensions: (lat: 180, lon: 360, time: 5)\n", text) self.assertIn("Coordinates:\n", text) self.assertIn(" * lon (lon) float64 ", text) self.assertIn("Data variables:\n", text) self.assertIn(" precipitation (time, lat, lon) float64 ", text) self.assertIn("Encoding for coordinate variable 'lat':\n", text) self.assertIn("Encoding for data variable 'temperature':\n", text) self.assertIn(" _FillValue: 999.0\n", text) text = dump_dataset(dataset, ["precipitation"]) self.assertIn( "<xarray.DataArray 'precipitation' (time: 5, lat: 180, lon: 360)>\n", text) self.assertNotIn("Encoding:\n", text) self.assertNotIn(" _FillValue: 999.0", text) text = dump_dataset(dataset, ["precipitation"], show_var_encoding=True) self.assertIn( "<xarray.DataArray 'precipitation' (time: 5, lat: 180, lon: 360)>\n", text) self.assertIn("Encoding:\n", text) self.assertIn(" _FillValue: 999.0", text)
def test_levels(self): dataset = new_test_dataset(["2010-01-01", "2010-01-02", "2010-01-03", "2010-01-04", "2010-01-05"], precipitation=0.4, temperature=275.2) levels = dataset.xcube.levels(spatial_tile_shape=(45, 45)) self.assertIsInstance(levels, list) self.assertEqual(3, len(levels)) self.assertTrue(all(isinstance(level, xr.Dataset) for level in levels)) self.assertTrue(all("precipitation" in level for level in levels)) self.assertTrue(all("temperature" in level for level in levels)) self.assertEqual([(5, 180, 360), (5, 90, 180), (5, 45, 90)], [level.precipitation.shape for level in levels]) self.assertEqual([((1, 1, 1, 1, 1), (45, 45, 45, 45), (45, 45, 45, 45, 45, 45, 45, 45)), ((1, 1, 1, 1, 1), (45, 45), (45, 45, 45, 45)), ((1, 1, 1, 1, 1), (45,), (45, 45))], [level.precipitation.chunks for level in levels])
def setUp(self) -> None: num_times = 30 time = [] periods = ['1D', '1D', '3D', '4D', '2D'] t = pd.to_datetime('2017-07-01T10:30:15Z', utc=True) for i in range(num_times): time.append(t.isoformat()) t += pd.to_timedelta(periods[i % len(periods)]) temperature, precipitation = zip(*[(272 + 0.1 * i, 120 - 0.2 * i) for i in range(num_times)]) input_cube = new_test_dataset(time, temperature=temperature, precipitation=precipitation) input_cube = chunk_dataset(input_cube, chunk_sizes=dict(time=1, lat=90, lon=180)) self.input_cube = input_cube
def test_all_empty(self): dataset = new_test_dataset(["2010-01-01", "2010-01-02"], precipitation=np.nan, temperature=np.nan).chunk( dict(time=1, lat=90, lon=90)) empty_dataset_chunks = get_empty_dataset_chunks(dataset) self.assertEqual( { 'precipitation': ((0, 0, 0), (0, 0, 1), (0, 0, 2), (0, 0, 3), (0, 1, 0), (0, 1, 1), (0, 1, 2), (0, 1, 3), (1, 0, 0), (1, 0, 1), (1, 0, 2), (1, 0, 3), (1, 1, 0), (1, 1, 1), (1, 1, 2), (1, 1, 3)), 'temperature': ((0, 0, 0), (0, 0, 1), (0, 0, 2), (0, 0, 3), (0, 1, 0), (0, 1, 1), (0, 1, 2), (0, 1, 3), (1, 0, 0), (1, 0, 1), (1, 0, 2), (1, 0, 3), (1, 1, 0), (1, 1, 1), (1, 1, 2), (1, 1, 3)) }, empty_dataset_chunks)
def test_resample(self): num_times = 30 time, temperature, precipitation = zip( *[(('2017-07-0%s' if i < 9 else '2017-07-%s') % (i + 1), 272 + 0.1 * i, 120 - 0.2 * i) for i in range(num_times)]) ds1 = new_test_dataset(time, temperature=temperature, precipitation=precipitation) ds2 = resample(ds1, '3D', 'max') self.assertIsNot(ds2, ds1) self.assertIn('time', ds2) self.assertIn('temperature', ds2) self.assertIn('precipitation', ds2) self.assertEqual(('time', ), ds2.time.dims) self.assertEqual(('time', 'lat', 'lon'), ds2.temperature.dims) self.assertEqual(('time', 'lat', 'lon'), ds2.precipitation.dims) self.assertEqual((num_times / 3, ), ds2.time.shape) self.assertEqual((num_times / 3, 180, 360), ds2.temperature.shape) self.assertEqual((num_times / 3, 180, 360), ds2.precipitation.shape) np.testing.assert_equal( ds2.time.values, np.array( pd.to_datetime([ '2017-07-01', '2017-07-04', '2017-07-07', '2017-07-10', '2017-07-13', '2017-07-16', '2017-07-19', '2017-07-22', '2017-07-25', '2017-07-28' ]))) np.testing.assert_allclose( ds2.temperature.values[..., 0, 0], np.array([ 272.2, 272.5, 272.8, 273.1, 273.4, 273.7, 274., 274.3, 274.6, 274.9 ])) np.testing.assert_allclose( ds2.precipitation.values[..., 0, 0], np.array([ 120., 119.4, 118.8, 118.2, 117.6, 117., 116.4, 115.8, 115.2, 114.6 ]))
def test_all_empty(self): dataset = new_test_dataset(["2010-01-01", "2010-01-02"], precipitation=np.nan, temperature=np.nan) \ .chunk(dict(time=1, lat=90, lon=90)) empty_dataset_chunks = get_empty_dataset_chunks(dataset) self.assertIsInstance(empty_dataset_chunks, collections.Iterator) self.assertFalse(isinstance(empty_dataset_chunks, (list, tuple))) self.assertEqual([('precipitation', ((0, 0, 0), (0, 0, 1), (0, 0, 2), (0, 0, 3), (0, 1, 0), (0, 1, 1), (0, 1, 2), (0, 1, 3), (1, 0, 0), (1, 0, 1), (1, 0, 2), (1, 0, 3), (1, 1, 0), (1, 1, 1), (1, 1, 2), (1, 1, 3))), ('temperature', ((0, 0, 0), (0, 0, 1), (0, 0, 2), (0, 0, 3), (0, 1, 0), (0, 1, 1), (0, 1, 2), (0, 1, 3), (1, 0, 0), (1, 0, 1), (1, 0, 2), (1, 0, 3), (1, 1, 0), (1, 1, 1), (1, 1, 2), (1, 1, 3)))], [(v, tuple(c)) for v, c in empty_dataset_chunks])