def test_argmin_max(dim_num, dtype, contains_nan, dask, func, skipna, aggdim):
    # pandas-dev/pandas#16830, we do not check consistency with pandas but
    # just make sure da[da.argmin()] == da.min()

    if aggdim == 'y' and dim_num < 2:
        pytest.skip('dim not in this test')

    if dask and not has_dask:
        pytest.skip('requires dask')

    if contains_nan:
        if not skipna:
            pytest.skip("numpy's argmin (not nanargmin) does not handle "
                        "object-dtype")
        if skipna and np.dtype(dtype).kind in 'iufc':
            pytest.skip("numpy's nanargmin raises ValueError for all nan axis")
    da = construct_dataarray(dim_num, dtype, contains_nan=contains_nan,
                             dask=dask)

    with warnings.catch_warnings():
        warnings.filterwarnings('ignore', 'All-NaN slice')

        if aggdim == 'y' and contains_nan and skipna:
            with pytest.raises(ValueError):
                actual = da.isel(**{
                    aggdim: getattr(da, 'arg' + func)(
                        dim=aggdim, skipna=skipna).compute()})
            return

        actual = da.isel(**{aggdim: getattr(da, 'arg' + func)
                            (dim=aggdim, skipna=skipna).compute()})
        expected = getattr(da, func)(dim=aggdim, skipna=skipna)
        assert_allclose(actual.drop(actual.coords),
                        expected.drop(expected.coords))
Exemple #2
0
def test_multiple_dims(dtype, dask, func):
    if dask and not has_dask:
        pytest.skip('requires dask')
    da = construct_dataarray(3, dtype, contains_nan=True, dask=dask)

    actual = getattr(da, func)(('x', 'y'))
    expected = getattr(getattr(da, func)('x'), func)('y')
    assert_allclose(actual, expected)
Exemple #3
0
def test_notransform():
    # regression test for https://github.com/pydata/xarray/issues/1686
    # Create a geotiff file
    with warnings.catch_warnings():
        # rasterio throws a NotGeoreferencedWarning here, which is
        # expected since we test rasterio's defaults in this case.
        warnings.filterwarnings(
            "ignore",
            category=UserWarning,
            message="Dataset has no geotransform set",
        )
        with create_tmp_file(suffix=".tif") as tmp_file:
            # data
            nx, ny, nz = 4, 3, 3
            data = np.arange(nx * ny * nz, dtype=rasterio.float32).reshape(nz, ny, nx)
            with rasterio.open(
                tmp_file,
                "w",
                driver="GTiff",
                height=ny,
                width=nx,
                count=nz,
                dtype=rasterio.float32,
            ) as s:
                s.descriptions = ("nx", "ny", "nz")
                s.units = ("cm", "m", "km")
                s.write(data)

            # Tests
            expected = DataArray(
                data,
                dims=("band", "y", "x"),
                coords={
                    "band": [1, 2, 3],
                    "y": [0.5, 1.5, 2.5],
                    "x": [0.5, 1.5, 2.5, 3.5],
                },
            )
            expected.coords[DEFAULT_GRID_MAP] = xr.Variable((), 0)
            expected.coords[DEFAULT_GRID_MAP].attrs[
                "GeoTransform"
            ] = "0.0 1.0 0.0 0.0 0.0 1.0"

            with rioxarray.open_rasterio(tmp_file) as rioda:
                assert_allclose(rioda, expected)
                assert rioda.attrs["scale_factor"] == 1.0
                assert rioda.attrs["add_offset"] == 0.0
                assert rioda.attrs["long_name"] == ("nx", "ny", "nz")
                assert rioda.attrs["units"] == ("cm", "m", "km")
                assert isinstance(rioda.rio.resolution(), tuple)
                assert isinstance(rioda.rio._cached_transform(), Affine)
Exemple #4
0
 def test_no_mftime(self):
     # rasterio can accept "filename" urguments that are actually urls,
     # including paths to remote files.
     # In issue #1816, we found that these caused dask to break, because
     # the modification time was used to determine the dask token. This
     # tests ensure we can still chunk such files when reading with
     # rasterio.
     with create_tmp_geotiff(
         8, 10, 3, transform_args=[1, 2, 0.5, 2.0], crs="+proj=latlong"
     ) as (tmp_file, expected):
         with mock.patch("os.path.getmtime", side_effect=OSError):
             with xr.open_rasterio(tmp_file, chunks=(1, 2, 2)) as actual:
                 assert isinstance(actual.data, da.Array)
                 assert_allclose(actual, expected)
def test_transpose():

    # test on DataArray
    X_da = xr.DataArray(
        np.random.random((100, 10)),
        coords={'sample': range(100), 'feature': range(10)},
        dims=('sample', 'feature')
    )

    Xt_da, estimator = transpose(
        X_da, order=['feature', 'sample'], return_estimator=True)

    xrt.assert_allclose(Xt_da, X_da.transpose())

    Xt_da = estimator.inverse_transform(Xt_da)

    xrt.assert_allclose(Xt_da, X_da)

    # test on Dataset with subset of dimensions
    X_ds = xr.Dataset(
        {'var_1': (['sample', 'feat_1', 'feat_2'],
                   np.random.random((100, 10, 5))),
         'var_2': (['feat_2', 'sample'],
                   np.random.random((5, 100)))},
        coords={'sample': range(100), 'feat_1': range(10), 'feat_2': range(5)}
    )

    Xt_ds, estimator = transpose(
        X_ds, order=['sample', 'feat_2'], return_estimator=True)

    xrt.assert_allclose(Xt_ds, X_ds.transpose('sample', 'feat_1', 'feat_2'))

    Xt_ds = estimator.inverse_transform(Xt_ds)

    xrt.assert_allclose(Xt_ds, X_ds)
def test_split():

    # test on DataArray with number of samples multiple of new length
    X_da = xr.DataArray(
        np.random.random((100, 10)),
        coords={
            "sample": range(100),
            "feature": range(10),
            "coord_1": (["sample", "feature"], np.tile("Test", (100, 10))),
        },
        dims=("sample", "feature"),
    )

    estimator = Splitter(
        new_dim="split_sample",
        new_len=5,
        reduce_index="subsample",
        axis=1,
        keep_coords_as="sample_coord",
    )

    Xt_da = estimator.fit_transform(X_da)

    assert Xt_da.shape == (20, 5, 10)
    npt.assert_allclose(Xt_da[0, :, 0], X_da[:5, 0])

    Xit_da = estimator.inverse_transform(Xt_da)

    xrt.assert_allclose(X_da, Xit_da)

    # test on Dataset with number of samples NOT multiple of new length
    X_ds = xr.Dataset(
        {"var_1": (["sample", "feature"], np.random.random((100, 10)))},
        coords={
            "sample": range(100),
            "feature": range(10)
        },
    )

    Xt_ds = split(
        X_ds,
        new_dim="split_sample",
        new_len=7,
        reduce_index="head",
        axis=1,
        new_index_func=None,
    )

    assert Xt_ds["var_1"].shape == (14, 7, 10)
    npt.assert_allclose(Xt_ds.var_1[0, :, 0], X_ds.var_1[:7, 0])
def test_reduce():

    X_da = xr.DataArray(
        np.random.random((100, 10)),
        coords={
            "sample": range(100),
            "feature": range(10)
        },
        dims=("sample", "feature"),
    )

    Xt_da = reduce(X_da)

    xrt.assert_allclose(Xt_da, X_da.reduce(np.linalg.norm, dim="feature"))
Exemple #8
0
    def test_caching(self):
        with create_tmp_geotiff(
            8, 10, 3, transform_args=[1, 2, 0.5, 2.0], crs="+proj=latlong"
        ) as (tmp_file, expected):
            # Cache is the default
            with xr.open_rasterio(tmp_file) as actual:

                # This should cache everything
                assert_allclose(actual, expected)

                # once cached, non-windowed indexing should become possible
                ac = actual.isel(x=[2, 4])
                ex = expected.isel(x=[2, 4])
                assert_allclose(ac, ex)
def test_min_count(dim_num, dtype, dask, func, aggdim, contains_nan, skipna):
    if dask and not has_dask:
        pytest.skip("requires dask")

    da = construct_dataarray(dim_num, dtype, contains_nan=contains_nan, dask=dask)
    min_count = 3

    # If using Dask, the function call should be lazy.
    with raise_if_dask_computes():
        actual = getattr(da, func)(dim=aggdim, skipna=skipna, min_count=min_count)

    expected = series_reduce(da, func, skipna=skipna, dim=aggdim, min_count=min_count)
    assert_allclose(actual, expected)
    assert_dask_array(actual, dask)
Exemple #10
0
    def test_notransform(self):
        # regression test for https://github.com/pydata/xarray/issues/1686
        # Create a geotiff file
        with warnings.catch_warnings():
            # rasterio throws a NotGeoreferencedWarning here, which is
            # expected since we test rasterio's defaults in this case.
            warnings.filterwarnings(
                "ignore",
                category=UserWarning,
                message="Dataset has no geotransform set",
            )
            with create_tmp_file(suffix=".tif") as tmp_file:
                # data
                nx, ny, nz = 4, 3, 3
                data = np.arange(nx * ny * nz, dtype=rasterio.float32).reshape(
                    nz, ny, nx
                )
                with rasterio.open(
                    tmp_file,
                    "w",
                    driver="GTiff",
                    height=ny,
                    width=nx,
                    count=nz,
                    dtype=rasterio.float32,
                ) as s:
                    s.descriptions = ("nx", "ny", "nz")
                    s.units = ("cm", "m", "km")
                    s.write(data)

                # Tests
                expected = DataArray(
                    data,
                    dims=("band", "y", "x"),
                    coords={
                        "band": [1, 2, 3],
                        "y": [0.5, 1.5, 2.5],
                        "x": [0.5, 1.5, 2.5, 3.5],
                    },
                )
                with xr.open_rasterio(tmp_file) as rioda:
                    assert_allclose(rioda, expected)
                    assert rioda.attrs["scales"] == (1.0, 1.0, 1.0)
                    assert rioda.attrs["offsets"] == (0.0, 0.0, 0.0)
                    assert rioda.attrs["descriptions"] == ("nx", "ny", "nz")
                    assert rioda.attrs["units"] == ("cm", "m", "km")
                    assert isinstance(rioda.attrs["res"], tuple)
                    assert isinstance(rioda.attrs["is_tiled"], np.uint8)
                    assert isinstance(rioda.attrs["transform"], tuple)
                    assert len(rioda.attrs["transform"]) == 6
def test_wrapped_transformer():

    from sklearn.preprocessing import StandardScaler

    X = xr.DataArray(
        np.random.random((100, 10)),
        coords={'sample': range(100), 'feature': range(10)},
        dims=['sample', 'feature']
    )

    estimator = wrap(StandardScaler()).partial_fit(X)
    estimator.partial_fit(X)

    assert_allclose(X, estimator.inverse_transform(estimator.transform(X)))
Exemple #12
0
def test_ENVI_tags():
    # Create an ENVI file with some tags in the ENVI namespace
    # this test uses a custom driver, so we can't use create_tmp_geotiff
    with create_tmp_file(suffix=".dat") as tmp_file:
        # data
        nx, ny, nz = 4, 3, 3
        data = np.arange(nx * ny * nz, dtype=rasterio.float32).reshape(nz, ny, nx)
        transform = from_origin(5000, 80000, 1000, 2000.0)
        with rasterio.open(
            tmp_file,
            "w",
            driver="ENVI",
            height=ny,
            width=nx,
            count=nz,
            crs="EPSG:32618",
            transform=transform,
            dtype=rasterio.float32,
        ) as s:
            s.update_tags(
                ns="ENVI",
                description="{Tagged file}",
                wavelength="{123.000000, 234.234000, 345.345678}",
                fwhm="{1.000000, 0.234000, 0.000345}",
            )
            s.write(data)
            dx, dy = s.res[0], -s.res[1]
            crs_wkt = s.crs.to_wkt()

        # Tests
        coords = {
            "band": [1, 2, 3],
            "y": -np.arange(ny) * 2000 + 80000 + dy / 2,
            "x": np.arange(nx) * 1000 + 5000 + dx / 2,
            "wavelength": ("band", np.array([123, 234.234, 345.345678])),
            "fwhm": ("band", np.array([1, 0.234, 0.000345])),
        }
        expected = DataArray(data, dims=("band", "y", "x"), coords=coords)
        expected.coords[DEFAULT_GRID_MAP] = xr.Variable((), 0)
        expected.coords[DEFAULT_GRID_MAP].attrs["crs_wkt"] = crs_wkt

        with rioxarray.open_rasterio(tmp_file) as rioda:
            assert_allclose(rioda, expected)
            assert rioda.rio.crs == crs_wkt
            assert isinstance(rioda.rio._cached_transform(), Affine)
            # from ENVI tags
            assert isinstance(rioda.attrs["description"], str)
            assert isinstance(rioda.attrs["map_info"], str)
            assert isinstance(rioda.attrs["samples"], str)
Exemple #13
0
def test_chunks_with_mask_and_scale():
    with create_tmp_geotiff(
        10, 10, 4, transform_args=[1, 2, 0.5, 2.0], crs="EPSG:4326"
    ) as (tmp_file, expected):
        # Chunk at open time
        with rioxarray.open_rasterio(
            tmp_file, mask_and_scale=True, chunks=(1, 2, 2)
        ) as actual:
            assert isinstance(actual.data, dask.array.Array)
            assert "open_rasterio" in actual.data.name

            # do some arithmetic
            ac = actual.mean().compute()
            ex = expected.mean()
            assert_allclose(ac, ex)
def test_min_count(dim_num, dtype, dask, func, aggdim):
    if dask and not has_dask:
        pytest.skip("requires dask")

    da = construct_dataarray(dim_num, dtype, contains_nan=True, dask=dask)
    min_count = 3

    actual = getattr(da, func)(dim=aggdim, skipna=True, min_count=min_count)
    expected = series_reduce(da,
                             func,
                             skipna=True,
                             dim=aggdim,
                             min_count=min_count)
    assert_allclose(actual, expected)
    assert_dask_array(actual, dask)
def test_min_count(dim_num, dtype, dask, func, aggdim):
    if dask and not has_dask:
        pytest.skip("requires dask")

    da = construct_dataarray(dim_num, dtype, contains_nan=True, dask=dask)
    min_count = 3

    actual = getattr(da, func)(dim=aggdim, skipna=True, min_count=min_count)

    if LooseVersion(pd.__version__) >= LooseVersion("0.22.0"):
        # min_count is only implenented in pandas > 0.22
        expected = series_reduce(da, func, skipna=True, dim=aggdim, min_count=min_count)
        assert_allclose(actual, expected)

    assert_dask_array(actual, dask)
Exemple #16
0
        def test_multi_loc(self):
            data_type = '3-hour'
            years = [1995]
            lat_lon = ((44.624076,-124.280097),(43.489171,-125.152137))
            parameters = 'mean_absolute_period'
            wave_multiloc, meta= wave.io.hindcast.request_wpto_point_data(data_type,
            parameters,lat_lon,years)
            dir_multiyear, meta_dir = wave.io.hindcast.request_wpto_directional_spectrum(lat_lon,year='1995')
            dir_multiyear = dir_multiyear.sel(time_index=slice(dir_multiyear.time_index[0],dir_multiyear.time_index[99]))
            dir_multiyear = dir_multiyear.rename_vars({87:'87',58:'58'})

            assert_frame_equal(self.ml,wave_multiloc)
            assert_frame_equal(self.ml_meta,meta)
            xrt.assert_allclose(self.my_dir,dir_multiyear)
            assert_frame_equal(self.my_dir_meta,meta_dir)
def test_min_count_nd(dtype, dask, func):
    if dask and not has_dask:
        pytest.skip("requires dask")

    min_count = 3
    dim_num = 3
    da = construct_dataarray(dim_num, dtype, contains_nan=True, dask=dask)
    actual = getattr(da, func)(dim=["x", "y", "z"],
                               skipna=True,
                               min_count=min_count)
    # Supplying all dims is equivalent to supplying `...` or `None`
    expected = getattr(da, func)(dim=..., skipna=True, min_count=min_count)

    assert_allclose(actual, expected)
    assert_dask_array(actual, dask)
Exemple #18
0
def test_compute_perfect_model_different_dims_quite_close(
    perfectModelEnsemble_initialized_control, ):
    """Tests nearly equal dim=['init','member'] and dim='member'."""
    stack_dims_true = perfectModelEnsemble_initialized_control.verify(
        comparison="m2c",
        metric="rmse",
        dim=["init", "member"],
    )["tos"]
    stack_dims_false = perfectModelEnsemble_initialized_control.verify(
        comparison="m2c",
        metric="rmse",
        dim="member",
    ).mean(["init"])["tos"]
    # no more than 10% difference
    assert_allclose(stack_dims_true, stack_dims_false, rtol=0.1, atol=0.03)
def test_calculate_rel_vorticity():
    datadict = datasets()
    coords = datadict["coords"]
    ds_b = datadict["B"]
    grid_b = Grid(ds_b, coords=coords)

    ds_c = datadict["C"]
    grid_c = Grid(ds_c, coords=coords)

    test_b = (grid_b.diff(grid_b.interp(ds_b.v * ds_b.dy_ne, "Y"), "X") -
              grid_b.diff(grid_b.interp(ds_b.u * ds_b.dx_ne, "X"),
                          "Y")) / ds_b.area_t

    zeta_b = calculate_rel_vorticity(
        grid_b,
        ds_b.u,
        ds_b.v,
        ds_b.dx_ne,
        ds_b.dy_ne,
        ds_b.area_t,
        gridtype=None,
    )

    test_c = (grid_c.diff(ds_c.v * ds_c.dy_n, "X") -
              grid_c.diff(ds_c.u * ds_c.dx_e, "Y")) / ds_c.area_ne

    zeta_c = calculate_rel_vorticity(
        grid_c,
        ds_c.u,
        ds_c.v,
        ds_c.dx_e,
        ds_c.dy_n,
        ds_c.area_ne,
        gridtype=None,
    )

    assert_allclose(test_b, zeta_b)
    assert_allclose(test_c, zeta_c)
    with pytest.raises(RuntimeError):
        zeta_c = calculate_rel_vorticity(
            grid_b,
            ds_c.u,
            ds_c.v,
            ds_c.dx_n,  # wrong coordinate
            ds_c.dy_n,
            ds_c.area_ne,
            gridtype=None,
        )
Exemple #20
0
def test_custom_metric_passed_to_bootstrap(
        perfectModelEnsemble_initialized_control):
    """Test custom metric in PerfectModelEnsemble.bootstrap."""
    comparison = "e2c"
    np.random.seed(42)
    kwargs = dict(comparison=comparison,
                  iterations=ITERATIONS,
                  dim="init",
                  resample_dim="init")
    actual = perfectModelEnsemble_initialized_control.bootstrap(metric=my_mse,
                                                                **kwargs)

    expected = perfectModelEnsemble_initialized_control.bootstrap(metric="mse",
                                                                  **kwargs)

    assert_allclose(actual, expected, rtol=0.1, atol=1)
Exemple #21
0
def test_min_count(dim_num, dtype, dask, func, aggdim):
    if dask and not has_dask:
        pytest.skip('requires dask')

    da = construct_dataarray(dim_num, dtype, contains_nan=True, dask=dask)
    min_count = 3

    actual = getattr(da, func)(dim=aggdim, skipna=True, min_count=min_count)

    if LooseVersion(pd.__version__) >= LooseVersion('0.22.0'):
        # min_count is only implenented in pandas > 0.22
        expected = series_reduce(da, func, skipna=True, dim=aggdim,
                                 min_count=min_count)
        assert_allclose(actual, expected)

    assert_dask_array(actual, dask)
Exemple #22
0
def test_stats_functions_dask_chunks(PM_da_control_3d, func, step):
    """Check whether selected stats functions be chunked and computed along other
    dim."""
    dim = "time"
    for chunk_dim in PM_da_control_3d.isel({dim: 0}).dims:
        control_chunked = PM_da_control_3d.chunk({chunk_dim: step})
        res_chunked = func(control_chunked, dim=dim)
        res = func(PM_da_control_3d, dim=dim)
        # check for chunks
        assert dask.is_dask_collection(res_chunked)
        assert res_chunked.chunks is not None
        # check for no chunks
        assert not dask.is_dask_collection(res)
        assert res.chunks is None
        # check for identical result
        assert_allclose(res, res_chunked.compute())
Exemple #23
0
def test_add_bounds(obj, dims):
    expected = {}
    expected["lat"] = xr.concat(
        [
            obj.lat.copy(data=np.arange(76.25, 16.0, -2.5)),
            obj.lat.copy(data=np.arange(73.75, 13.6, -2.5)),
        ],
        dim="bounds",
    )
    expected["lon"] = xr.concat(
        [
            obj.lon.copy(data=np.arange(198.75, 325 - 1.25, 2.5)),
            obj.lon.copy(data=np.arange(201.25, 325 + 1.25, 2.5)),
        ],
        dim="bounds",
    )
    t0 = pd.Timestamp("2013-01-01")
    t1 = pd.Timestamp("2013-01-01 18:00")
    dt = "6h"
    dtb2 = pd.Timedelta("3h")
    expected["time"] = xr.concat(
        [
            obj.time.copy(
                data=pd.date_range(start=t0 - dtb2, end=t1 - dtb2, freq=dt)),
            obj.time.copy(
                data=pd.date_range(start=t0 + dtb2, end=t1 + dtb2, freq=dt)),
        ],
        dim="bounds",
    )
    expected["lat"].attrs.clear()
    expected["lon"].attrs.clear()
    expected["time"].attrs.clear()

    added = obj.cf.add_bounds(dims)
    if isinstance(dims, str):
        dims = (dims, )

    for dim in dims:
        name = f"{dim}_bounds"
        assert name in added.coords
        assert added[dim].attrs["bounds"] == name
        assert_allclose(added[name].reset_coords(drop=True), expected[dim])

    # Test multiple dimensions
    assert not {"x1_bounds", "x2_bounds"} <= set(multiple.variables)
    assert {"x1_bounds", "x2_bounds"} <= set(
        multiple.cf.add_bounds("X").variables)
Exemple #24
0
def test_utm():
    with create_tmp_geotiff() as (tmp_file, expected):
        with rioxarray.open_rasterio(tmp_file) as rioda:
            assert_allclose(rioda, expected)
            assert rioda.attrs["scale_factor"] == 1.0
            assert rioda.attrs["add_offset"] == 0.0
            assert rioda.attrs["long_name"] == ("d1", "d2", "d3")
            assert rioda.attrs["units"] == ("u1", "u2", "u3")
            assert rioda.rio.crs == expected.rio.crs
            assert_array_equal(rioda.rio.resolution(), expected.rio.resolution())
            assert isinstance(rioda.rio._cached_transform(), Affine)
            assert rioda.rio.nodata is None

        # Check no parse coords
        with rioxarray.open_rasterio(tmp_file, parse_coordinates=False) as rioda:
            assert "x" not in rioda.coords
            assert "y" not in rioda.coords
Exemple #25
0
    def test_chunks(self):
        with create_tmp_geotiff(
            8, 10, 3, transform_args=[1, 2, 0.5, 2.0], crs="+proj=latlong"
        ) as (tmp_file, expected):
            # Chunk at open time
            with xr.open_rasterio(tmp_file, chunks=(1, 2, 2)) as actual:
                assert isinstance(actual.data, da.Array)
                assert "open_rasterio" in actual.data.name

                # do some arithmetic
                ac = actual.mean()
                ex = expected.mean()
                assert_allclose(ac, ex)

                ac = actual.sel(band=1).mean(dim="x")
                ex = expected.sel(band=1).mean(dim="x")
                assert_allclose(ac, ex)
Exemple #26
0
def test_true_phase():
    """Test if true phase"""
    f0 = 2.0
    T = 4.0
    dx = 0.02
    x = np.arange(-8 * T, 5 * T + dx, dx)  # uncentered and odd number of points
    y = np.cos(2 * np.pi * f0 * x)
    y[np.abs(x) >= (T / 2.0)] = 0.0
    s = xr.DataArray(y, dims=("x",), coords={"x": x})
    lag = x[len(x) // 2]
    f = np.fft.fftfreq(len(x), dx)
    expected = np.fft.fft(np.fft.ifftshift(y)) * np.exp(-1j * 2.0 * np.pi * f * lag)
    expected = xr.DataArray(expected, dims="freq_x", coords={"freq_x": f})
    output = xrft.dft(
        s, dim="x", true_phase=True, true_amplitude=False, shift=False, prefix="freq_"
    )
    xrt.assert_allclose(expected, output)
Exemple #27
0
def test_stats_functions_dask_single_chunk(PM_da_control_3d, func):
    """Test stats functions when single chunk not along dim."""
    step = -1  # single chunk
    for chunk_dim in PM_da_control_3d.dims:
        control_chunked = PM_da_control_3d.chunk({chunk_dim: step})
        for dim in PM_da_control_3d.dims:
            if dim != chunk_dim:
                res_chunked = func(control_chunked, dim=dim)
                res = func(PM_da_control_3d, dim=dim)
                # check for chunks
                assert dask.is_dask_collection(res_chunked)
                assert res_chunked.chunks is not None
                # check for no chunks
                assert not dask.is_dask_collection(res)
                assert res.chunks is None
                # check for identical result
                assert_allclose(res, res_chunked.compute())
Exemple #28
0
def test_split():

    # test on DataArray with number of samples multiple of new length
    X_da = xr.DataArray(np.random.random((100, 10)),
                        coords={
                            'sample':
                            range(100),
                            'feature':
                            range(10),
                            'coord_1':
                            (['sample', 'feature'], np.tile('Test', (100, 10)))
                        },
                        dims=('sample', 'feature'))

    estimator = Splitter(new_dim='split_sample',
                         new_len=5,
                         reduce_index='subsample',
                         axis=1,
                         keep_coords_as='sample_coord')

    Xt_da = estimator.fit_transform(X_da)

    assert Xt_da.shape == (20, 5, 10)
    npt.assert_allclose(Xt_da[0, :, 0], X_da[:5, 0])

    Xit_da = estimator.inverse_transform(Xt_da)

    xrt.assert_allclose(X_da, Xit_da)

    # test on Dataset with number of samples NOT multiple of new length
    X_ds = xr.Dataset(
        {'var_1': (['sample', 'feature'], np.random.random((100, 10)))},
        coords={
            'sample': range(100),
            'feature': range(10)
        })

    Xt_ds = split(X_ds,
                  new_dim='split_sample',
                  new_len=7,
                  reduce_index='head',
                  axis=1,
                  new_index_func=None)

    assert Xt_ds['var_1'].shape == (14, 7, 10)
    npt.assert_allclose(Xt_ds.var_1[0, :, 0], X_ds.var_1[:7, 0])
def test_compute_perfect_model_stack_dims_True_and_False_quite_close(
        pm_da_ds1d, pm_da_control1d):
    """Test whether dim=['init','member'] for stack_dims=False and
    dim='member' for stack_dims=True give similar results."""
    stack_dims_true = compute_perfect_model(
        pm_da_ds1d,
        pm_da_control1d,
        comparison='m2c',
        metric='rmse',
        dim=['init', 'member'],
    )
    stack_dims_false = compute_perfect_model(pm_da_ds1d,
                                             pm_da_control1d,
                                             comparison='m2c',
                                             metric='rmse',
                                             dim='member').mean(['init'])
    # no more than 10% difference
    assert_allclose(stack_dims_true, stack_dims_false, rtol=0.1, atol=0.03)
Exemple #30
0
def test_detrend_2D(array_dims, array_shape, chunks, detrend_type,
                    trend_amplitude):
    da_original = noise(array_dims, array_shape)
    da_trend = (da_original + trend_amplitude["x"] * da_original["x"] +
                trend_amplitude["y"] * da_original["y"])
    if chunks:
        da_trend = da_trend.chunk(chunks)

    detrend_dim = ["y", "x"]
    detrended = detrend(da_trend, detrend_dim, detrend_type=detrend_type)
    assert detrended.chunks == da_trend.chunks
    if detrend_type is None:
        xrt.assert_equal(detrended, da_trend)
    elif detrend_type == "constant":
        xrt.assert_allclose(detrended,
                            da_trend - da_trend.mean(dim=detrend_dim))
    elif detrend_type == "linear":
        xrt.assert_allclose(detrended, da_original)
def test_w_mean(axis, metric_list, gridtype):
    fail_metric_list = ["fail"]
    ds = datasets()[gridtype]
    grid = Grid(ds)
    for var in ds.data_vars:
        metric = _find_metric(ds[var], metric_list)
        dim = _find_dim(grid, ds[var], axis)
        a = w_mean(grid, ds[var], axis, metric_list, verbose=True)
        if dim is None:  # no dimension found, return the input arrays
            b = ds[var]
        else:
            b = weighted_mean(ds[var], ds[metric], dim=dim)
        assert_allclose(a, b)

        # original array should be returned if a non matching metric list
        # is supplied
        a_fail = w_mean(grid, ds[var], axis, fail_metric_list)
        assert_allclose(a_fail, ds[var])
Exemple #32
0
def test_custom_metric_passed_to_compute(PM_da_initialized_1d,
                                         PM_da_control_1d, comparison):
    """Test custom metric in compute_perfect_model."""
    actual = compute_perfect_model(
        PM_da_initialized_1d,
        PM_da_control_1d,
        comparison=comparison,
        metric=my_mse,
    )

    expected = compute_perfect_model(
        PM_da_initialized_1d,
        PM_da_control_1d,
        comparison=comparison,
        metric='mse',
    )

    assert_allclose(actual, expected)
Exemple #33
0
def test_stats_functions_dask_many_chunks(PM_da_control_3d, func):
    """Check whether selected stats functions be chunked in multiple chunks and
     computed along other dim."""
    step = 1
    for chunk_dim in PM_da_control_3d.dims:
        control_chunked = PM_da_control_3d.chunk({chunk_dim: step})
        for dim in PM_da_control_3d.dims:
            if dim != chunk_dim and dim in control_chunked.dims:
                res_chunked = func(control_chunked, dim=dim)
                res = func(PM_da_control_3d, dim=dim)
                # check for chunks
                assert dask.is_dask_collection(res_chunked)
                assert res_chunked.chunks is not None
                # check for no chunks
                assert not dask.is_dask_collection(res)
                assert res.chunks is None
                # check for identical result
                assert_allclose(res, res_chunked.compute())
Exemple #34
0
 def assertDatasetAllClose(self, d1, d2, rtol=1e-05, atol=1e-08,
                           decode_bytes=True):
     __tracebackhide__ = True  # noqa: F841
     assert_allclose(d1, d2, rtol=rtol, atol=atol,
                     decode_bytes=decode_bytes)
Exemple #35
0
 def assertDataArrayAllClose(self, ar1, ar2, rtol=1e-05, atol=1e-08):
     assert_allclose(ar1, ar2, rtol=rtol, atol=atol)
Exemple #36
0
 def assertDatasetAllClose(self, d1, d2, rtol=1e-05, atol=1e-08):
     assert_allclose(d1, d2, rtol=rtol, atol=atol)
Exemple #37
0
 def assertVariableAllClose(self, v1, v2, rtol=1e-05, atol=1e-08):
     assert_allclose(v1, v2, rtol=rtol, atol=atol)
Exemple #38
0
def test_min_count_dataset(func):
    da = construct_dataarray(2, dtype=float, contains_nan=True, dask=False)
    ds = Dataset({'var1': da}, coords={'scalar': 0})
    actual = getattr(ds, func)(dim='x', skipna=True, min_count=3)['var1']
    expected = getattr(ds['var1'], func)(dim='x', skipna=True, min_count=3)
    assert_allclose(actual, expected)
def test_reduce(dim_num, dtype, dask, func, skipna, aggdim):

    if aggdim == 'y' and dim_num < 2:
        pytest.skip('dim not in this test')

    if dtype == np.bool_ and func == 'mean':
        pytest.skip('numpy does not support this')

    if dask and not has_dask:
        pytest.skip('requires dask')

    rtol = 1e-04 if dtype == np.float32 else 1e-05

    da = construct_dataarray(dim_num, dtype, contains_nan=True, dask=dask)
    axis = None if aggdim is None else da.get_axis_num(aggdim)

    # TODO: remove these after resolving
    # https://github.com/dask/dask/issues/3245
    with warnings.catch_warnings():
        warnings.filterwarnings('ignore', 'All-NaN slice')
        warnings.filterwarnings('ignore', 'invalid value encountered in')

        if (LooseVersion(np.__version__) >= LooseVersion('1.13.0') and
                da.dtype.kind == 'O' and skipna):
            # Numpy < 1.13 does not handle object-type array.
            try:
                if skipna:
                    expected = getattr(np, 'nan{}'.format(func))(da.values,
                                                                 axis=axis)
                else:
                    expected = getattr(np, func)(da.values, axis=axis)

                actual = getattr(da, func)(skipna=skipna, dim=aggdim)
                assert np.allclose(actual.values, np.array(expected),
                                   rtol=1.0e-4, equal_nan=True)
            except (TypeError, AttributeError, ZeroDivisionError):
                # TODO currently, numpy does not support some methods such as
                # nanmean for object dtype
                pass

        # make sure the compatiblility with pandas' results.
        actual = getattr(da, func)(skipna=skipna, dim=aggdim)
        if func == 'var':
            expected = series_reduce(da, func, skipna=skipna, dim=aggdim,
                                     ddof=0)
            assert_allclose(actual, expected, rtol=rtol)
            # also check ddof!=0 case
            actual = getattr(da, func)(skipna=skipna, dim=aggdim, ddof=5)
            expected = series_reduce(da, func, skipna=skipna, dim=aggdim,
                                     ddof=5)
            assert_allclose(actual, expected, rtol=rtol)
        else:
            expected = series_reduce(da, func, skipna=skipna, dim=aggdim)
            assert_allclose(actual, expected, rtol=rtol)

        # make sure the dtype argument
        if func not in ['max', 'min']:
            actual = getattr(da, func)(skipna=skipna, dim=aggdim, dtype=float)
            assert actual.dtype == float

        # without nan
        da = construct_dataarray(dim_num, dtype, contains_nan=False, dask=dask)
        actual = getattr(da, func)(skipna=skipna)
        expected = getattr(np, 'nan{}'.format(func))(da.values)
        if actual.dtype == object:
            assert actual.values == np.array(expected)
        else:
            assert np.allclose(actual.values, np.array(expected), rtol=rtol)
Exemple #40
0
 def assertVariableAllClose(self, v1, v2, rtol=1e-05, atol=1e-08):
     __tracebackhide__ = True  # noqa: F841
     assert_allclose(v1, v2, rtol=rtol, atol=atol)
Exemple #41
0
def test_generate_axis():
    a = generate_axis(ds_original, 'X', 'lon', 'lon',
                      pos_from='center',
                      pos_to='right',
                      pad=None,
                      boundary_discontinuity=360)
    b = generate_axis(ds_original, 'Y', 'lat', 'lat',
                      pos_from='center',
                      pos_to='left',
                      pad=None,
                      boundary_discontinuity=180)
    c = generate_axis(ds_original, 'Z', 'z', 'z',
                      pos_from='center',
                      pos_to='left',
                      pad='auto')
    d = generate_axis(ds_original_1D, 'Z', 'z', 'z',
                      pos_from='left',
                      pos_to='center',
                      pad=1.0+dz,
                      new_name='test')
    e = generate_axis(ds_original_left, 'Z', 'z', 'z',
                      pos_from='left',
                      pos_to='center',
                      pad='auto')

    assert_allclose(a['lon_right'], ds_out_right['lon_right'])
    assert_allclose(b['lat_left'], ds_out_left['lat_left'])
    assert_allclose(c['z_left'], ds_out_left['z_left'])
    assert_allclose(d['test'], ds_original_1D_padded['test'])
    assert_allclose(e['z_center'], ds_out_center['z_center'])

    # Mulitdim cases
    aa = generate_axis(a, 'X', 'llon', 'lon',
                       pos_from='center',
                       pos_to='right',
                       pad=None,
                       boundary_discontinuity=360,
                       attrs_from_scratch=False)
    bb = generate_axis(b, 'Y', 'llat', 'lat',
                       pos_from='center',
                       pos_to='left',
                       pad=None,
                       boundary_discontinuity=180,
                       attrs_from_scratch=False)
    ee = generate_axis(e, 'Z', 'zz', 'z',
                       pos_from='left',
                       pos_to='center',
                       pad='auto',
                       attrs_from_scratch=False)
    assert_allclose(aa['llon_right'], ds_out_right['llon_right'])
    assert_allclose(bb['llat_left'], ds_out_left['llat_left'])
    assert_allclose(ee['zz_center'], ds_out_center['zz_center'])

    with pytest.raises(RuntimeError):
        # Check if generate axis fails when a DataArray is passed instead of
        # Dataset
        generate_axis(c['somedata'], 'Z', 'zz', 'z',
                      pos_from='left',
                      pos_to='center',
                      pad='auto',
                      attrs_from_scratch=False)
    with pytest.raises(RuntimeError):
        generate_axis(c, 'Z', 'zz', 'z', pad='auto',
                      boundary_discontinuity=360)
    with pytest.raises(RuntimeError):
        generate_axis(c, 'Z', 'zz', 'z', pad=None,
                      boundary_discontinuity=None)
Exemple #42
0
 def assertDataArrayAllClose(self, ar1, ar2, rtol=1e-05, atol=1e-08,
                             decode_bytes=True):
     __tracebackhide__ = True  # noqa: F841
     assert_allclose(ar1, ar2, rtol=rtol, atol=atol,
                     decode_bytes=decode_bytes)