Beispiel #1
0
    def test_trim_timing_info(self):
        ds = create_test_data(0)
        from xbout.load import _BOUT_PER_PROC_VARIABLES

        # remove a couple of entries from _BOUT_PER_PROC_VARIABLES so we test that _trim
        # does not fail if not all of them are present
        _BOUT_PER_PROC_VARIABLES = _BOUT_PER_PROC_VARIABLES[:-2]

        for v in _BOUT_PER_PROC_VARIABLES:
            ds[v] = 42.0
        ds = _trim(ds, guards={}, keep_boundaries={}, nxpe=1, nype=1)

        expected = create_test_data(0)
        xrt.assert_equal(ds, expected)
Beispiel #2
0
def test_dask_distributed_read_netcdf_integration_test(loop, engine, autoclose,
                                                       nc_format):

    if engine == 'h5netcdf' and autoclose:
        pytest.skip('h5netcdf does not support autoclose')

    if nc_format not in NC_FORMATS[engine]:
        pytest.skip('invalid format for engine')

    chunks = {'dim1': 4, 'dim2': 3, 'dim3': 6}

    with create_tmp_file(allow_cleanup_failure=ON_WINDOWS) as filename:
        with cluster() as (s, [a, b]):
            with Client(s['address'], loop=loop) as c:

                original = create_test_data()
                original.to_netcdf(filename, engine=engine, format=nc_format)

                with xr.open_dataset(filename,
                                     chunks=chunks,
                                     engine=engine,
                                     autoclose=autoclose) as restored:
                    assert isinstance(restored.var1.data, da.Array)
                    computed = restored.compute()
                    assert_allclose(original, computed)
Beispiel #3
0
 def test_no_trim(self):
     ds = create_test_data(0)
     # Manually add filename - encoding normally added by xr.open_dataset
     ds.encoding['source'] = 'folder0/BOUT.dmp.0.nc'
     actual = _trim(ds, guards={}, keep_boundaries={}, nxpe=1,
                    nype=1)
     xrt.assert_equal(actual, ds)
Beispiel #4
0
def test_dask_distributed_netcdf_roundtrip(
        loop, tmp_netcdf_filename, engine, nc_format):

    if engine not in ENGINES:
        pytest.skip('engine not available')

    chunks = {'dim1': 4, 'dim2': 3, 'dim3': 6}

    with cluster() as (s, [a, b]):
        with Client(s['address'], loop=loop):

            original = create_test_data().chunk(chunks)

            if engine == 'scipy':
                with pytest.raises(NotImplementedError):
                    original.to_netcdf(tmp_netcdf_filename,
                                       engine=engine, format=nc_format)
                return

            original.to_netcdf(tmp_netcdf_filename,
                               engine=engine, format=nc_format)

            with xr.open_dataset(tmp_netcdf_filename,
                                 chunks=chunks, engine=engine) as restored:
                assert isinstance(restored.var1.data, da.Array)
                computed = restored.compute()
                assert_allclose(original, computed)
Beispiel #5
0
def test_dask_distributed_netcdf_roundtrip(
        loop, tmp_netcdf_filename, engine, nc_format):

    if engine not in ENGINES:
        pytest.skip('engine not available')

    chunks = {'dim1': 4, 'dim2': 3, 'dim3': 6}

    with cluster() as (s, [a, b]):
        with Client(s['address'], loop=loop) as c:

            original = create_test_data().chunk(chunks)

            if engine == 'scipy':
                with pytest.raises(NotImplementedError):
                    original.to_netcdf(tmp_netcdf_filename,
                                       engine=engine, format=nc_format)
                return

            original.to_netcdf(tmp_netcdf_filename,
                               engine=engine, format=nc_format)

            with xr.open_dataset(tmp_netcdf_filename,
                                 chunks=chunks, engine=engine) as restored:
                assert isinstance(restored.var1.data, da.Array)
                computed = restored.compute()
                assert_allclose(original, computed)
Beispiel #6
0
    def test_infer_boundaries_2d_parallelization_doublenull(
            self, xproc, yproc, nxpe, nype, lower_boundaries,
            upper_boundaries):
        """
        Numbering scheme for nxpe=3, nype=4

        y  9 10 11
        ^  6 7  8
        |  3 4  5
        |  0 1  2
         -----> x
        """

        ds = create_test_data(0)
        ds["jyseps2_1"] = 3
        ds["jyseps1_2"] = 11
        ds["ny_inner"] = 8
        ds["MYSUB"] = 4
        ds["PE_XIND"] = xproc
        ds["PE_YIND"] = yproc
        actual_lower_boundaries, actual_upper_boundaries = _infer_contains_boundaries(
            ds, nxpe, nype)

        assert actual_lower_boundaries == lower_boundaries
        assert actual_upper_boundaries == upper_boundaries
Beispiel #7
0
def test_async(c, s, a, b):
    x = create_test_data()
    assert not dask.is_dask_collection(x)
    y = x.chunk({'dim2': 4}) + 10
    assert dask.is_dask_collection(y)
    assert dask.is_dask_collection(y.var1)
    assert dask.is_dask_collection(y.var2)

    z = y.persist()
    assert str(z)

    assert dask.is_dask_collection(z)
    assert dask.is_dask_collection(z.var1)
    assert dask.is_dask_collection(z.var2)
    assert len(y.__dask_graph__()) > len(z.__dask_graph__())

    assert not futures_of(y)
    assert futures_of(z)

    future = c.compute(z)
    w = yield future
    assert not dask.is_dask_collection(w)
    assert_allclose(x + 10, w)

    assert s.tasks
Beispiel #8
0
def test_dask_distributed_read_netcdf_integration_test(loop, engine, autoclose,
                                                       nc_format):

    if engine == 'h5netcdf' and autoclose:
        pytest.skip('h5netcdf does not support autoclose')

    if nc_format not in NC_FORMATS[engine]:
        pytest.skip('invalid format for engine')

    chunks = {'dim1': 4, 'dim2': 3, 'dim3': 6}

    with create_tmp_file(allow_cleanup_failure=ON_WINDOWS) as filename:
        with cluster() as (s, [a, b]):
            with Client(s['address'], loop=loop) as c:

                original = create_test_data()
                original.to_netcdf(filename, engine=engine, format=nc_format)

                with xr.open_dataset(filename,
                                     chunks=chunks,
                                     engine=engine,
                                     autoclose=autoclose) as restored:
                    assert isinstance(restored.var1.data, da.Array)
                    computed = restored.compute()
                    assert_allclose(original, computed)
Beispiel #9
0
def test_async(c, s, a, b):
    x = create_test_data()
    assert not dask.is_dask_collection(x)
    y = x.chunk({'dim2': 4}) + 10
    assert dask.is_dask_collection(y)
    assert dask.is_dask_collection(y.var1)
    assert dask.is_dask_collection(y.var2)

    z = y.persist()
    assert str(z)

    assert dask.is_dask_collection(z)
    assert dask.is_dask_collection(z.var1)
    assert dask.is_dask_collection(z.var2)
    assert len(y.__dask_graph__()) > len(z.__dask_graph__())

    assert not futures_of(y)
    assert futures_of(z)

    future = c.compute(z)
    w = yield future
    assert not dask.is_dask_collection(w)
    assert_allclose(x + 10, w)

    assert s.tasks
Beispiel #10
0
def test_dask_distributed_zarr_integration_test(loop, consolidated, compute):
    if consolidated:
        pytest.importorskip("zarr", minversion="2.2.1.dev2")
        write_kwargs = {"consolidated": True}
        read_kwargs = {"backend_kwargs": {"consolidated": True}}
    else:
        write_kwargs = read_kwargs = {}
    chunks = {"dim1": 4, "dim2": 3, "dim3": 5}
    with cluster() as (s, [a, b]):
        with Client(s["address"], loop=loop):
            original = create_test_data().chunk(chunks)
            with create_tmp_file(allow_cleanup_failure=ON_WINDOWS,
                                 suffix=".zarrc") as filename:
                maybe_futures = original.to_zarr(filename,
                                                 compute=compute,
                                                 **write_kwargs)
                if not compute:
                    maybe_futures.compute()
                with xr.open_dataset(filename,
                                     chunks="auto",
                                     engine="zarr",
                                     **read_kwargs) as restored:
                    assert isinstance(restored.var1.data, da.Array)
                    computed = restored.compute()
                    assert_allclose(original, computed)
Beispiel #11
0
    def test_infer_boundaries_2d_parallelization_doublenull_by_filenum(
            self, xproc, yproc, nxpe, nype, lower_boundaries,
            upper_boundaries):
        """
        Numbering scheme for nxpe=3, nype=4

        y  9 10 11
        ^  6 7  8
        |  3 4  5
        |  0 1  2
         -----> x
        """

        filenum = yproc * nxpe + xproc

        ds = create_test_data(0)
        ds["jyseps2_1"] = 3
        ds["jyseps1_2"] = 11
        ds["ny_inner"] = 8
        ds["MYSUB"] = 4
        ds.encoding["source"] = "folder0/BOUT.dmp." + str(filenum) + ".nc"
        actual_lower_boundaries, actual_upper_boundaries = _infer_contains_boundaries(
            ds, nxpe, nype)

        assert actual_lower_boundaries == lower_boundaries
        assert actual_upper_boundaries == upper_boundaries
Beispiel #12
0
 def test_trim_guards(self):
     ds = create_test_data(0)
     # Manually add filename - encoding normally added by xr.open_dataset
     ds.encoding['source'] = 'folder0/BOUT.dmp.0.nc'
     actual = _trim(ds, guards={'time': 2}, keep_boundaries={},
                    nxpe=1, nype=1)
     selection = {'time': slice(2, -2)}
     expected = ds.isel(**selection)
     xrt.assert_equal(expected, actual)
Beispiel #13
0
def test_dask_distributed_integration_test(loop, engine):
    with cluster() as (s, _):
        with distributed.Client(s['address'], loop=loop):
            original = create_test_data()
            with create_tmp_file(allow_cleanup_failure=ON_WINDOWS) as filename:
                original.to_netcdf(filename, engine=engine)
                with xr.open_dataset(filename, chunks=3, engine=engine) as restored:
                    assert isinstance(restored.var1.data, da.Array)
                    computed = restored.compute()
                    assert_allclose(original, computed)
Beispiel #14
0
def create_test_dataarray_attrs(seed=0, var='var1'):
    da = create_test_data(seed)[var]
    da.attrs = {
        'attr1': 5,
        'attr2': 'history',
        'attr3': {
            'nested': 'more_info'
        }
    }
    return da
Beispiel #15
0
def create_test_dataset_attrs(seed=0):
    ds = create_test_data(seed)
    ds.attrs = {
        'attr1': 5,
        'attr2': 'history',
        'attr3': {
            'nested': 'more_info'
        }
    }
    return ds
Beispiel #16
0
def create_test_dataarray_attrs(seed=0, var="var1"):
    da = create_test_data(seed)[var]
    da.attrs = {
        "attr1": 5,
        "attr2": "history",
        "attr3": {
            "nested": "more_info"
        }
    }
    return da
Beispiel #17
0
def test_dask_distributed_integration_test(loop, engine):
    with cluster() as (s, _):
        with distributed.Client(('127.0.0.1', s['port']), loop=loop):
            original = create_test_data()
            with create_tmp_file() as filename:
                original.to_netcdf(filename, engine=engine)
                restored = xr.open_dataset(filename, chunks=3, engine=engine)
                assert isinstance(restored.var1.data, da.Array)
                computed = restored.compute()
                assert_allclose(original, computed)
Beispiel #18
0
def test_dask_distributed_integration_test(loop, engine):
    with cluster() as (s, _):
        with distributed.Client(('127.0.0.1', s['port']), loop=loop):
            original = create_test_data()
            with create_tmp_file() as filename:
                original.to_netcdf(filename, engine=engine)
                restored = xr.open_dataset(filename, chunks=3, engine=engine)
                assert isinstance(restored.var1.data, da.Array)
                computed = restored.compute()
                assert_allclose(original, computed)
Beispiel #19
0
def test_dask_distributed_zarr_integration_test(loop):
    with cluster() as (s, _):
        with distributed.Client(s['address'], loop=loop):
            original = create_test_data()
            with create_tmp_file(allow_cleanup_failure=ON_WINDOWS) as filename:
                original.to_zarr(filename)
                with xr.open_zarr(filename) as restored:
                    assert isinstance(restored.var1.data, da.Array)
                    computed = restored.compute()
                    assert_allclose(original, computed)
Beispiel #20
0
def create_test_dataset_attrs(seed=0):
    ds = create_test_data(seed)
    ds.attrs = {
        "attr1": 5,
        "attr2": "history",
        "attr3": {
            "nested": "more_info"
        }
    }
    return ds
Beispiel #21
0
def test_dask_distributed_netcdf_integration_test_not_implemented(loop, engine):
    chunks = {'dim1': 4, 'dim2': 3, 'dim3': 6}

    with create_tmp_file(allow_cleanup_failure=ON_WINDOWS) as filename:
        with cluster() as (s, [a, b]):
            with Client(s['address'], loop=loop) as c:

                original = create_test_data().chunk(chunks)

                with raises_regex(NotImplementedError, 'distributed'):
                    original.to_netcdf(filename, engine=engine)
Beispiel #22
0
def test_dask_distributed_netcdf_integration_test_not_implemented(loop, engine):
    chunks = {'dim1': 4, 'dim2': 3, 'dim3': 6}

    with create_tmp_file(allow_cleanup_failure=ON_WINDOWS) as filename:
        with cluster() as (s, [a, b]):
            with Client(s['address'], loop=loop) as c:

                original = create_test_data().chunk(chunks)

                with raises_regex(NotImplementedError, 'distributed'):
                    original.to_netcdf(filename, engine=engine)
Beispiel #23
0
def test_dask_distributed_zarr_integration_test(loop):
    chunks = {'dim1': 4, 'dim2': 3, 'dim3': 5}
    with cluster() as (s, [a, b]):
        with Client(s['address'], loop=loop) as c:
            original = create_test_data().chunk(chunks)
            with create_tmp_file(allow_cleanup_failure=ON_WINDOWS,
                                 suffix='.zarr') as filename:
                original.to_zarr(filename)
                with xr.open_zarr(filename) as restored:
                    assert isinstance(restored.var1.data, da.Array)
                    computed = restored.compute()
                    assert_allclose(original, computed)
Beispiel #24
0
def test_dask_distributed_zarr_integration_test(loop):
    chunks = {'dim1': 4, 'dim2': 3, 'dim3': 5}
    with cluster() as (s, [a, b]):
        with Client(s['address'], loop=loop) as c:
            original = create_test_data().chunk(chunks)
            with create_tmp_file(allow_cleanup_failure=ON_WINDOWS,
                                 suffix='.zarr') as filename:
                original.to_zarr(filename)
                with xr.open_zarr(filename) as restored:
                    assert isinstance(restored.var1.data, da.Array)
                    computed = restored.compute()
                    assert_allclose(original, computed)
Beispiel #25
0
    def test_keep_yboundaries(self):
        ds = create_test_data(0)
        ds = ds.rename({'dim2': 'y'})

        # Manually add filename - encoding normally added by xr.open_dataset
        ds.encoding['source'] = 'folder0/BOUT.dmp.0.nc'

        ds['jyseps2_1'] = 8
        ds['jyseps1_2'] = 8

        actual = _trim(ds, guards={'y': 2}, keep_boundaries={'y': True}, nxpe=1, nype=1)
        expected = ds  # Should be unchanged
        xrt.assert_equal(expected, actual)
Beispiel #26
0
    def test_keep_yboundaries(self):
        ds = create_test_data(0)
        ds = ds.rename({"dim2": "y"})

        # Manually add filename - encoding normally added by xr.open_dataset
        ds.encoding["source"] = "folder0/BOUT.dmp.0.nc"

        ds["jyseps2_1"] = 8
        ds["jyseps1_2"] = 8

        actual = _trim(ds,
                       guards={"y": 2},
                       keep_boundaries={"y": True},
                       nxpe=1,
                       nype=1)
        expected = ds  # Should be unchanged
        xrt.assert_equal(expected, actual)
Beispiel #27
0
    def test_keep_yboundaries_doublenull_by_filenum(self, filenum, lower, upper):
        ds = create_test_data(0)
        ds = ds.rename({'dim2': 'y'})

        # Manually add filename - encoding normally added by xr.open_dataset
        ds.encoding['source'] = 'folder0/BOUT.dmp.'+str(filenum)+'.nc'

        ds['jyseps2_1'] = 3
        ds['jyseps1_2'] = 11
        ds['ny_inner'] = 8
        ds['MYSUB'] = 4

        actual = _trim(ds, guards={'y': 2}, keep_boundaries={'y': True}, nxpe=1, nype=4)
        expected = ds  # Should be unchanged
        if not lower:
            expected = expected.isel(y=slice(2, None, None))
        if not upper:
            expected = expected.isel(y=slice(None, -2, None))
        xrt.assert_equal(expected, actual)
Beispiel #28
0
def test_dask_distributed_read_netcdf_integration_test(
        loop, tmp_netcdf_filename, engine, nc_format):

    if engine not in ENGINES:
        pytest.skip('engine not available')

    chunks = {'dim1': 4, 'dim2': 3, 'dim3': 6}

    with cluster() as (s, [a, b]):
        with Client(s['address'], loop=loop):

            original = create_test_data()
            original.to_netcdf(tmp_netcdf_filename,
                               engine=engine, format=nc_format)

            with xr.open_dataset(tmp_netcdf_filename,
                                 chunks=chunks,
                                 engine=engine) as restored:
                assert isinstance(restored.var1.data, da.Array)
                computed = restored.compute()
                assert_allclose(original, computed)
Beispiel #29
0
def test_dask_distributed_netcdf_roundtrip(monkeypatch, loop,
                                           engine, autoclose, nc_format):

    monkeypatch.setenv('HDF5_USE_FILE_LOCKING', 'FALSE')

    chunks = {'dim1': 4, 'dim2': 3, 'dim3': 6}

    with create_tmp_file(allow_cleanup_failure=ON_WINDOWS) as filename:
        with cluster() as (s, [a, b]):
            with Client(s['address'], loop=loop) as c:

                original = create_test_data().chunk(chunks)
                original.to_netcdf(filename, engine=engine, format=nc_format)

                with xr.open_dataset(filename,
                                     chunks=chunks,
                                     engine=engine,
                                     autoclose=autoclose) as restored:
                    assert isinstance(restored.var1.data, da.Array)
                    computed = restored.compute()
                    assert_allclose(original, computed)
Beispiel #30
0
def test_dask_distributed_zarr_integration_test(loop, consolidated, compute):
    if consolidated:
        pytest.importorskip('zarr', minversion="2.2.1.dev2")
        write_kwargs = dict(consolidated=True)
        read_kwargs = dict(consolidated=True)
    else:
        write_kwargs = read_kwargs = {}
    chunks = {'dim1': 4, 'dim2': 3, 'dim3': 5}
    with cluster() as (s, [a, b]):
        with Client(s['address'], loop=loop):
            original = create_test_data().chunk(chunks)
            with create_tmp_file(allow_cleanup_failure=ON_WINDOWS,
                                 suffix='.zarrc') as filename:
                maybe_futures = original.to_zarr(filename, compute=compute,
                                                 **write_kwargs)
                if not compute:
                    maybe_futures.compute()
                with xr.open_zarr(filename, **read_kwargs) as restored:
                    assert isinstance(restored.var1.data, da.Array)
                    computed = restored.compute()
                    assert_allclose(original, computed)
Beispiel #31
0
def test_dask_distributed_zarr_integration_test(loop, consolidated, compute):
    if consolidated:
        zarr = pytest.importorskip('zarr', minversion="2.2.1.dev2")
        write_kwargs = dict(consolidated=True)
        read_kwargs = dict(consolidated=True)
    else:
        write_kwargs = read_kwargs = {}
    chunks = {'dim1': 4, 'dim2': 3, 'dim3': 5}
    with cluster() as (s, [a, b]):
        with Client(s['address'], loop=loop) as c:
            original = create_test_data().chunk(chunks)
            with create_tmp_file(allow_cleanup_failure=ON_WINDOWS,
                                 suffix='.zarrc') as filename:
                maybe_futures = original.to_zarr(filename, compute=compute,
                                                 **write_kwargs)
                if not compute:
                    maybe_futures.compute()
                with xr.open_zarr(filename, **read_kwargs) as restored:
                    assert isinstance(restored.var1.data, da.Array)
                    computed = restored.compute()
                    assert_allclose(original, computed)
Beispiel #32
0
def test_dask_distributed_read_netcdf_integration_test(
        loop, tmp_netcdf_filename, engine, nc_format):

    if engine not in ENGINES:
        pytest.skip('engine not available')

    chunks = {'dim1': 4, 'dim2': 3, 'dim3': 6}

    with cluster() as (s, [a, b]):
        with Client(s['address'], loop=loop) as c:

            original = create_test_data()
            original.to_netcdf(tmp_netcdf_filename,
                               engine=engine, format=nc_format)

            with xr.open_dataset(tmp_netcdf_filename,
                                 chunks=chunks,
                                 engine=engine) as restored:
                assert isinstance(restored.var1.data, da.Array)
                computed = restored.compute()
                assert_allclose(original, computed)
Beispiel #33
0
def test_dask_distributed_netcdf_roundtrip(monkeypatch, loop,
                                           engine, autoclose, nc_format):

    monkeypatch.setenv('HDF5_USE_FILE_LOCKING', 'FALSE')

    chunks = {'dim1': 4, 'dim2': 3, 'dim3': 6}

    with create_tmp_file(allow_cleanup_failure=ON_WINDOWS) as filename:
        with cluster() as (s, [a, b]):
            with Client(s['address'], loop=loop) as c:

                original = create_test_data().chunk(chunks)
                original.to_netcdf(filename, engine=engine, format=nc_format)

                with xr.open_dataset(filename,
                                     chunks=chunks,
                                     engine=engine,
                                     autoclose=autoclose) as restored:
                    assert isinstance(restored.var1.data, da.Array)
                    computed = restored.compute()
                    assert_allclose(original, computed)
Beispiel #34
0
    def test_infer_boundaries_2d_parallelization(
            self, xproc, yproc, nxpe, nype, lower_boundaries, upper_boundaries):
        """
        Numbering scheme for nxpe=3, nype=4

        y  9 10 11
        ^  6 7  8
        |  3 4  5
        |  0 1  2
         -----> x
        """

        ds = create_test_data(0)
        ds['jyseps2_1'] = 0
        ds['jyseps1_2'] = 0
        ds['PE_XIND'] = xproc
        ds['PE_YIND'] = yproc
        actual_lower_boundaries, actual_upper_boundaries = _infer_contains_boundaries(
            ds, nxpe, nype)

        assert actual_lower_boundaries == lower_boundaries
        assert actual_upper_boundaries == upper_boundaries
Beispiel #35
0
    def test_keep_yboundaries_doublenull_by_filenum(self, filenum, lower,
                                                    upper):
        ds = create_test_data(0)
        ds = ds.rename({"dim2": "y"})

        # Manually add filename - encoding normally added by xr.open_dataset
        ds.encoding["source"] = "folder0/BOUT.dmp." + str(filenum) + ".nc"

        ds["jyseps2_1"] = 3
        ds["jyseps1_2"] = 11
        ds["ny_inner"] = 8
        ds["MYSUB"] = 4

        actual = _trim(ds,
                       guards={"y": 2},
                       keep_boundaries={"y": True},
                       nxpe=1,
                       nype=4)
        expected = ds  # Should be unchanged
        if not lower:
            expected = expected.isel(y=slice(2, None, None))
        if not upper:
            expected = expected.isel(y=slice(None, -2, None))
        xrt.assert_equal(expected, actual)
Beispiel #36
0
 def test_trim_ghosts(self):
     ds = create_test_data(0)
     actual = _trim(ds, ghosts={'time': 2})
     selection = {'time': slice(2, -2)}
     expected = ds.isel(**selection)
     xrt.assert_equal(expected, actual)
Beispiel #37
0
def create_test_dataset_attrs(seed=0):
    ds = create_test_data(seed)
    ds.attrs = {'attr1': 5, 'attr2': 'history',
                'attr3': {'nested': 'more_info'}}
    return ds
Beispiel #38
0
def create_test_dataarray_attrs(seed=0, var='var1'):
    da = create_test_data(seed)[var]
    da.attrs = {'attr1': 5, 'attr2': 'history',
                'attr3': {'nested': 'more_info'}}
    return da
Beispiel #39
0
 def test_no_trim(self):
     ds = create_test_data(0)
     actual = _trim(ds)
     xrt.assert_equal(actual, ds)