def test_trim_timing_info(self): ds = create_test_data(0) from xbout.load import _BOUT_PER_PROC_VARIABLES # remove a couple of entries from _BOUT_PER_PROC_VARIABLES so we test that _trim # does not fail if not all of them are present _BOUT_PER_PROC_VARIABLES = _BOUT_PER_PROC_VARIABLES[:-2] for v in _BOUT_PER_PROC_VARIABLES: ds[v] = 42.0 ds = _trim(ds, guards={}, keep_boundaries={}, nxpe=1, nype=1) expected = create_test_data(0) xrt.assert_equal(ds, expected)
def test_dask_distributed_read_netcdf_integration_test(loop, engine, autoclose, nc_format): if engine == 'h5netcdf' and autoclose: pytest.skip('h5netcdf does not support autoclose') if nc_format not in NC_FORMATS[engine]: pytest.skip('invalid format for engine') chunks = {'dim1': 4, 'dim2': 3, 'dim3': 6} with create_tmp_file(allow_cleanup_failure=ON_WINDOWS) as filename: with cluster() as (s, [a, b]): with Client(s['address'], loop=loop) as c: original = create_test_data() original.to_netcdf(filename, engine=engine, format=nc_format) with xr.open_dataset(filename, chunks=chunks, engine=engine, autoclose=autoclose) as restored: assert isinstance(restored.var1.data, da.Array) computed = restored.compute() assert_allclose(original, computed)
def test_no_trim(self): ds = create_test_data(0) # Manually add filename - encoding normally added by xr.open_dataset ds.encoding['source'] = 'folder0/BOUT.dmp.0.nc' actual = _trim(ds, guards={}, keep_boundaries={}, nxpe=1, nype=1) xrt.assert_equal(actual, ds)
def test_dask_distributed_netcdf_roundtrip( loop, tmp_netcdf_filename, engine, nc_format): if engine not in ENGINES: pytest.skip('engine not available') chunks = {'dim1': 4, 'dim2': 3, 'dim3': 6} with cluster() as (s, [a, b]): with Client(s['address'], loop=loop): original = create_test_data().chunk(chunks) if engine == 'scipy': with pytest.raises(NotImplementedError): original.to_netcdf(tmp_netcdf_filename, engine=engine, format=nc_format) return original.to_netcdf(tmp_netcdf_filename, engine=engine, format=nc_format) with xr.open_dataset(tmp_netcdf_filename, chunks=chunks, engine=engine) as restored: assert isinstance(restored.var1.data, da.Array) computed = restored.compute() assert_allclose(original, computed)
def test_dask_distributed_netcdf_roundtrip( loop, tmp_netcdf_filename, engine, nc_format): if engine not in ENGINES: pytest.skip('engine not available') chunks = {'dim1': 4, 'dim2': 3, 'dim3': 6} with cluster() as (s, [a, b]): with Client(s['address'], loop=loop) as c: original = create_test_data().chunk(chunks) if engine == 'scipy': with pytest.raises(NotImplementedError): original.to_netcdf(tmp_netcdf_filename, engine=engine, format=nc_format) return original.to_netcdf(tmp_netcdf_filename, engine=engine, format=nc_format) with xr.open_dataset(tmp_netcdf_filename, chunks=chunks, engine=engine) as restored: assert isinstance(restored.var1.data, da.Array) computed = restored.compute() assert_allclose(original, computed)
def test_infer_boundaries_2d_parallelization_doublenull( self, xproc, yproc, nxpe, nype, lower_boundaries, upper_boundaries): """ Numbering scheme for nxpe=3, nype=4 y 9 10 11 ^ 6 7 8 | 3 4 5 | 0 1 2 -----> x """ ds = create_test_data(0) ds["jyseps2_1"] = 3 ds["jyseps1_2"] = 11 ds["ny_inner"] = 8 ds["MYSUB"] = 4 ds["PE_XIND"] = xproc ds["PE_YIND"] = yproc actual_lower_boundaries, actual_upper_boundaries = _infer_contains_boundaries( ds, nxpe, nype) assert actual_lower_boundaries == lower_boundaries assert actual_upper_boundaries == upper_boundaries
def test_async(c, s, a, b): x = create_test_data() assert not dask.is_dask_collection(x) y = x.chunk({'dim2': 4}) + 10 assert dask.is_dask_collection(y) assert dask.is_dask_collection(y.var1) assert dask.is_dask_collection(y.var2) z = y.persist() assert str(z) assert dask.is_dask_collection(z) assert dask.is_dask_collection(z.var1) assert dask.is_dask_collection(z.var2) assert len(y.__dask_graph__()) > len(z.__dask_graph__()) assert not futures_of(y) assert futures_of(z) future = c.compute(z) w = yield future assert not dask.is_dask_collection(w) assert_allclose(x + 10, w) assert s.tasks
def test_dask_distributed_read_netcdf_integration_test(loop, engine, autoclose, nc_format): if engine == 'h5netcdf' and autoclose: pytest.skip('h5netcdf does not support autoclose') if nc_format not in NC_FORMATS[engine]: pytest.skip('invalid format for engine') chunks = {'dim1': 4, 'dim2': 3, 'dim3': 6} with create_tmp_file(allow_cleanup_failure=ON_WINDOWS) as filename: with cluster() as (s, [a, b]): with Client(s['address'], loop=loop) as c: original = create_test_data() original.to_netcdf(filename, engine=engine, format=nc_format) with xr.open_dataset(filename, chunks=chunks, engine=engine, autoclose=autoclose) as restored: assert isinstance(restored.var1.data, da.Array) computed = restored.compute() assert_allclose(original, computed)
def test_async(c, s, a, b): x = create_test_data() assert not dask.is_dask_collection(x) y = x.chunk({'dim2': 4}) + 10 assert dask.is_dask_collection(y) assert dask.is_dask_collection(y.var1) assert dask.is_dask_collection(y.var2) z = y.persist() assert str(z) assert dask.is_dask_collection(z) assert dask.is_dask_collection(z.var1) assert dask.is_dask_collection(z.var2) assert len(y.__dask_graph__()) > len(z.__dask_graph__()) assert not futures_of(y) assert futures_of(z) future = c.compute(z) w = yield future assert not dask.is_dask_collection(w) assert_allclose(x + 10, w) assert s.tasks
def test_dask_distributed_zarr_integration_test(loop, consolidated, compute): if consolidated: pytest.importorskip("zarr", minversion="2.2.1.dev2") write_kwargs = {"consolidated": True} read_kwargs = {"backend_kwargs": {"consolidated": True}} else: write_kwargs = read_kwargs = {} chunks = {"dim1": 4, "dim2": 3, "dim3": 5} with cluster() as (s, [a, b]): with Client(s["address"], loop=loop): original = create_test_data().chunk(chunks) with create_tmp_file(allow_cleanup_failure=ON_WINDOWS, suffix=".zarrc") as filename: maybe_futures = original.to_zarr(filename, compute=compute, **write_kwargs) if not compute: maybe_futures.compute() with xr.open_dataset(filename, chunks="auto", engine="zarr", **read_kwargs) as restored: assert isinstance(restored.var1.data, da.Array) computed = restored.compute() assert_allclose(original, computed)
def test_infer_boundaries_2d_parallelization_doublenull_by_filenum( self, xproc, yproc, nxpe, nype, lower_boundaries, upper_boundaries): """ Numbering scheme for nxpe=3, nype=4 y 9 10 11 ^ 6 7 8 | 3 4 5 | 0 1 2 -----> x """ filenum = yproc * nxpe + xproc ds = create_test_data(0) ds["jyseps2_1"] = 3 ds["jyseps1_2"] = 11 ds["ny_inner"] = 8 ds["MYSUB"] = 4 ds.encoding["source"] = "folder0/BOUT.dmp." + str(filenum) + ".nc" actual_lower_boundaries, actual_upper_boundaries = _infer_contains_boundaries( ds, nxpe, nype) assert actual_lower_boundaries == lower_boundaries assert actual_upper_boundaries == upper_boundaries
def test_trim_guards(self): ds = create_test_data(0) # Manually add filename - encoding normally added by xr.open_dataset ds.encoding['source'] = 'folder0/BOUT.dmp.0.nc' actual = _trim(ds, guards={'time': 2}, keep_boundaries={}, nxpe=1, nype=1) selection = {'time': slice(2, -2)} expected = ds.isel(**selection) xrt.assert_equal(expected, actual)
def test_dask_distributed_integration_test(loop, engine): with cluster() as (s, _): with distributed.Client(s['address'], loop=loop): original = create_test_data() with create_tmp_file(allow_cleanup_failure=ON_WINDOWS) as filename: original.to_netcdf(filename, engine=engine) with xr.open_dataset(filename, chunks=3, engine=engine) as restored: assert isinstance(restored.var1.data, da.Array) computed = restored.compute() assert_allclose(original, computed)
def create_test_dataarray_attrs(seed=0, var='var1'): da = create_test_data(seed)[var] da.attrs = { 'attr1': 5, 'attr2': 'history', 'attr3': { 'nested': 'more_info' } } return da
def create_test_dataset_attrs(seed=0): ds = create_test_data(seed) ds.attrs = { 'attr1': 5, 'attr2': 'history', 'attr3': { 'nested': 'more_info' } } return ds
def create_test_dataarray_attrs(seed=0, var="var1"): da = create_test_data(seed)[var] da.attrs = { "attr1": 5, "attr2": "history", "attr3": { "nested": "more_info" } } return da
def test_dask_distributed_integration_test(loop, engine): with cluster() as (s, _): with distributed.Client(('127.0.0.1', s['port']), loop=loop): original = create_test_data() with create_tmp_file() as filename: original.to_netcdf(filename, engine=engine) restored = xr.open_dataset(filename, chunks=3, engine=engine) assert isinstance(restored.var1.data, da.Array) computed = restored.compute() assert_allclose(original, computed)
def test_dask_distributed_integration_test(loop, engine): with cluster() as (s, _): with distributed.Client(('127.0.0.1', s['port']), loop=loop): original = create_test_data() with create_tmp_file() as filename: original.to_netcdf(filename, engine=engine) restored = xr.open_dataset(filename, chunks=3, engine=engine) assert isinstance(restored.var1.data, da.Array) computed = restored.compute() assert_allclose(original, computed)
def test_dask_distributed_zarr_integration_test(loop): with cluster() as (s, _): with distributed.Client(s['address'], loop=loop): original = create_test_data() with create_tmp_file(allow_cleanup_failure=ON_WINDOWS) as filename: original.to_zarr(filename) with xr.open_zarr(filename) as restored: assert isinstance(restored.var1.data, da.Array) computed = restored.compute() assert_allclose(original, computed)
def create_test_dataset_attrs(seed=0): ds = create_test_data(seed) ds.attrs = { "attr1": 5, "attr2": "history", "attr3": { "nested": "more_info" } } return ds
def test_dask_distributed_netcdf_integration_test_not_implemented(loop, engine): chunks = {'dim1': 4, 'dim2': 3, 'dim3': 6} with create_tmp_file(allow_cleanup_failure=ON_WINDOWS) as filename: with cluster() as (s, [a, b]): with Client(s['address'], loop=loop) as c: original = create_test_data().chunk(chunks) with raises_regex(NotImplementedError, 'distributed'): original.to_netcdf(filename, engine=engine)
def test_dask_distributed_netcdf_integration_test_not_implemented(loop, engine): chunks = {'dim1': 4, 'dim2': 3, 'dim3': 6} with create_tmp_file(allow_cleanup_failure=ON_WINDOWS) as filename: with cluster() as (s, [a, b]): with Client(s['address'], loop=loop) as c: original = create_test_data().chunk(chunks) with raises_regex(NotImplementedError, 'distributed'): original.to_netcdf(filename, engine=engine)
def test_dask_distributed_zarr_integration_test(loop): chunks = {'dim1': 4, 'dim2': 3, 'dim3': 5} with cluster() as (s, [a, b]): with Client(s['address'], loop=loop) as c: original = create_test_data().chunk(chunks) with create_tmp_file(allow_cleanup_failure=ON_WINDOWS, suffix='.zarr') as filename: original.to_zarr(filename) with xr.open_zarr(filename) as restored: assert isinstance(restored.var1.data, da.Array) computed = restored.compute() assert_allclose(original, computed)
def test_dask_distributed_zarr_integration_test(loop): chunks = {'dim1': 4, 'dim2': 3, 'dim3': 5} with cluster() as (s, [a, b]): with Client(s['address'], loop=loop) as c: original = create_test_data().chunk(chunks) with create_tmp_file(allow_cleanup_failure=ON_WINDOWS, suffix='.zarr') as filename: original.to_zarr(filename) with xr.open_zarr(filename) as restored: assert isinstance(restored.var1.data, da.Array) computed = restored.compute() assert_allclose(original, computed)
def test_keep_yboundaries(self): ds = create_test_data(0) ds = ds.rename({'dim2': 'y'}) # Manually add filename - encoding normally added by xr.open_dataset ds.encoding['source'] = 'folder0/BOUT.dmp.0.nc' ds['jyseps2_1'] = 8 ds['jyseps1_2'] = 8 actual = _trim(ds, guards={'y': 2}, keep_boundaries={'y': True}, nxpe=1, nype=1) expected = ds # Should be unchanged xrt.assert_equal(expected, actual)
def test_keep_yboundaries(self): ds = create_test_data(0) ds = ds.rename({"dim2": "y"}) # Manually add filename - encoding normally added by xr.open_dataset ds.encoding["source"] = "folder0/BOUT.dmp.0.nc" ds["jyseps2_1"] = 8 ds["jyseps1_2"] = 8 actual = _trim(ds, guards={"y": 2}, keep_boundaries={"y": True}, nxpe=1, nype=1) expected = ds # Should be unchanged xrt.assert_equal(expected, actual)
def test_keep_yboundaries_doublenull_by_filenum(self, filenum, lower, upper): ds = create_test_data(0) ds = ds.rename({'dim2': 'y'}) # Manually add filename - encoding normally added by xr.open_dataset ds.encoding['source'] = 'folder0/BOUT.dmp.'+str(filenum)+'.nc' ds['jyseps2_1'] = 3 ds['jyseps1_2'] = 11 ds['ny_inner'] = 8 ds['MYSUB'] = 4 actual = _trim(ds, guards={'y': 2}, keep_boundaries={'y': True}, nxpe=1, nype=4) expected = ds # Should be unchanged if not lower: expected = expected.isel(y=slice(2, None, None)) if not upper: expected = expected.isel(y=slice(None, -2, None)) xrt.assert_equal(expected, actual)
def test_dask_distributed_read_netcdf_integration_test( loop, tmp_netcdf_filename, engine, nc_format): if engine not in ENGINES: pytest.skip('engine not available') chunks = {'dim1': 4, 'dim2': 3, 'dim3': 6} with cluster() as (s, [a, b]): with Client(s['address'], loop=loop): original = create_test_data() original.to_netcdf(tmp_netcdf_filename, engine=engine, format=nc_format) with xr.open_dataset(tmp_netcdf_filename, chunks=chunks, engine=engine) as restored: assert isinstance(restored.var1.data, da.Array) computed = restored.compute() assert_allclose(original, computed)
def test_dask_distributed_netcdf_roundtrip(monkeypatch, loop, engine, autoclose, nc_format): monkeypatch.setenv('HDF5_USE_FILE_LOCKING', 'FALSE') chunks = {'dim1': 4, 'dim2': 3, 'dim3': 6} with create_tmp_file(allow_cleanup_failure=ON_WINDOWS) as filename: with cluster() as (s, [a, b]): with Client(s['address'], loop=loop) as c: original = create_test_data().chunk(chunks) original.to_netcdf(filename, engine=engine, format=nc_format) with xr.open_dataset(filename, chunks=chunks, engine=engine, autoclose=autoclose) as restored: assert isinstance(restored.var1.data, da.Array) computed = restored.compute() assert_allclose(original, computed)
def test_dask_distributed_zarr_integration_test(loop, consolidated, compute): if consolidated: pytest.importorskip('zarr', minversion="2.2.1.dev2") write_kwargs = dict(consolidated=True) read_kwargs = dict(consolidated=True) else: write_kwargs = read_kwargs = {} chunks = {'dim1': 4, 'dim2': 3, 'dim3': 5} with cluster() as (s, [a, b]): with Client(s['address'], loop=loop): original = create_test_data().chunk(chunks) with create_tmp_file(allow_cleanup_failure=ON_WINDOWS, suffix='.zarrc') as filename: maybe_futures = original.to_zarr(filename, compute=compute, **write_kwargs) if not compute: maybe_futures.compute() with xr.open_zarr(filename, **read_kwargs) as restored: assert isinstance(restored.var1.data, da.Array) computed = restored.compute() assert_allclose(original, computed)
def test_dask_distributed_zarr_integration_test(loop, consolidated, compute): if consolidated: zarr = pytest.importorskip('zarr', minversion="2.2.1.dev2") write_kwargs = dict(consolidated=True) read_kwargs = dict(consolidated=True) else: write_kwargs = read_kwargs = {} chunks = {'dim1': 4, 'dim2': 3, 'dim3': 5} with cluster() as (s, [a, b]): with Client(s['address'], loop=loop) as c: original = create_test_data().chunk(chunks) with create_tmp_file(allow_cleanup_failure=ON_WINDOWS, suffix='.zarrc') as filename: maybe_futures = original.to_zarr(filename, compute=compute, **write_kwargs) if not compute: maybe_futures.compute() with xr.open_zarr(filename, **read_kwargs) as restored: assert isinstance(restored.var1.data, da.Array) computed = restored.compute() assert_allclose(original, computed)
def test_dask_distributed_read_netcdf_integration_test( loop, tmp_netcdf_filename, engine, nc_format): if engine not in ENGINES: pytest.skip('engine not available') chunks = {'dim1': 4, 'dim2': 3, 'dim3': 6} with cluster() as (s, [a, b]): with Client(s['address'], loop=loop) as c: original = create_test_data() original.to_netcdf(tmp_netcdf_filename, engine=engine, format=nc_format) with xr.open_dataset(tmp_netcdf_filename, chunks=chunks, engine=engine) as restored: assert isinstance(restored.var1.data, da.Array) computed = restored.compute() assert_allclose(original, computed)
def test_dask_distributed_netcdf_roundtrip(monkeypatch, loop, engine, autoclose, nc_format): monkeypatch.setenv('HDF5_USE_FILE_LOCKING', 'FALSE') chunks = {'dim1': 4, 'dim2': 3, 'dim3': 6} with create_tmp_file(allow_cleanup_failure=ON_WINDOWS) as filename: with cluster() as (s, [a, b]): with Client(s['address'], loop=loop) as c: original = create_test_data().chunk(chunks) original.to_netcdf(filename, engine=engine, format=nc_format) with xr.open_dataset(filename, chunks=chunks, engine=engine, autoclose=autoclose) as restored: assert isinstance(restored.var1.data, da.Array) computed = restored.compute() assert_allclose(original, computed)
def test_infer_boundaries_2d_parallelization( self, xproc, yproc, nxpe, nype, lower_boundaries, upper_boundaries): """ Numbering scheme for nxpe=3, nype=4 y 9 10 11 ^ 6 7 8 | 3 4 5 | 0 1 2 -----> x """ ds = create_test_data(0) ds['jyseps2_1'] = 0 ds['jyseps1_2'] = 0 ds['PE_XIND'] = xproc ds['PE_YIND'] = yproc actual_lower_boundaries, actual_upper_boundaries = _infer_contains_boundaries( ds, nxpe, nype) assert actual_lower_boundaries == lower_boundaries assert actual_upper_boundaries == upper_boundaries
def test_keep_yboundaries_doublenull_by_filenum(self, filenum, lower, upper): ds = create_test_data(0) ds = ds.rename({"dim2": "y"}) # Manually add filename - encoding normally added by xr.open_dataset ds.encoding["source"] = "folder0/BOUT.dmp." + str(filenum) + ".nc" ds["jyseps2_1"] = 3 ds["jyseps1_2"] = 11 ds["ny_inner"] = 8 ds["MYSUB"] = 4 actual = _trim(ds, guards={"y": 2}, keep_boundaries={"y": True}, nxpe=1, nype=4) expected = ds # Should be unchanged if not lower: expected = expected.isel(y=slice(2, None, None)) if not upper: expected = expected.isel(y=slice(None, -2, None)) xrt.assert_equal(expected, actual)
def test_trim_ghosts(self): ds = create_test_data(0) actual = _trim(ds, ghosts={'time': 2}) selection = {'time': slice(2, -2)} expected = ds.isel(**selection) xrt.assert_equal(expected, actual)
def create_test_dataset_attrs(seed=0): ds = create_test_data(seed) ds.attrs = {'attr1': 5, 'attr2': 'history', 'attr3': {'nested': 'more_info'}} return ds
def create_test_dataarray_attrs(seed=0, var='var1'): da = create_test_data(seed)[var] da.attrs = {'attr1': 5, 'attr2': 'history', 'attr3': {'nested': 'more_info'}} return da
def test_no_trim(self): ds = create_test_data(0) actual = _trim(ds) xrt.assert_equal(actual, ds)