def test_groupwise():

    from sklearn.preprocessing import scale

    coord_1 = ["a"] * 51 + ["b"] * 49
    coord_2 = list(range(10)) * 10

    X_ds = xr.Dataset(
        {"var_1": (["sample", "feature"], np.random.random((100, 10)))},
        coords={
            "sample": range(100),
            "feature": range(10),
            "coord_1": (["sample"], coord_1),
            "coord_2": (["sample"], coord_2),
        },
    )

    # test wrapped sklearn estimator
    preprocess(X_ds, scale, groupby="coord_1")

    # test newly defined estimator
    Xt_ds2, estimator = split(
        X_ds,
        new_dim="split_sample",
        new_len=5,
        groupby="coord_1",
        keep_coords_as="initial_sample",
        return_estimator=True,
    )

    assert Xt_ds2.var_1.shape == (19, 10, 5)

    Xt_ds2 = estimator.inverse_transform(Xt_ds2)

    assert Xt_ds2.var_1.shape == (95, 10)
def test_preprocess():

    from sklearn.preprocessing import scale

    X_da = xr.DataArray(
        np.random.random((100, 10)),
        coords={'sample': range(100), 'feature': range(10)},
        dims=('sample', 'feature')
    )

    Xt_da_gt = X_da
    Xt_da_gt.data = scale(X_da)

    Xt_da = preprocess(X_da, scale)

    xrt.assert_allclose(Xt_da, Xt_da_gt)

    X_ds = xr.Dataset(
        {'var_1' : (['sample', 'feature'], np.random.random((100, 10)))},
        coords={'sample': range(100), 'feature': range(10)}
    )

    Xt_ds = preprocess(X_ds, scale)

    xrt.assert_allclose(Xt_ds, X_ds.apply(scale))
def test_preprocess():

    from sklearn.preprocessing import scale

    X_da = xr.DataArray(
        np.random.random((100, 10)),
        coords={
            "sample": range(100),
            "feature": range(10)
        },
        dims=("sample", "feature"),
    )

    Xt_da_gt = X_da
    Xt_da_gt.data = scale(X_da)

    Xt_da = preprocess(X_da, scale)

    xrt.assert_allclose(Xt_da, Xt_da_gt)

    X_ds = xr.Dataset(
        {"var_1": (["sample", "feature"], np.random.random((100, 10)))},
        coords={
            "sample": range(100),
            "feature": range(10)
        },
    )

    Xt_ds = preprocess(X_ds, scale)

    xrt.assert_allclose(Xt_ds, X_ds.apply(scale))
Exemple #4
0
def test_groupwise():

    from sklearn.preprocessing import scale

    coord_1 = ['a'] * 51 + ['b'] * 49
    coord_2 = list(range(10)) * 10

    X_ds = xr.Dataset(
        {'var_1': (['sample', 'feature'], np.random.random((100, 10)))},
        coords={
            'sample': range(100),
            'feature': range(10),
            'coord_1': (['sample'], coord_1),
            'coord_2': (['sample'], coord_2)
        })

    # test wrapped sklearn estimator
    preprocess(X_ds, scale, groupby='coord_1')

    # test newly defined estimator
    Xt_ds2, estimator = split(X_ds,
                              new_dim='split_sample',
                              new_len=5,
                              groupby='coord_1',
                              keep_coords_as='initial_sample',
                              return_estimator=True)

    assert Xt_ds2.var_1.shape == (19, 10, 5)

    Xt_ds2 = estimator.inverse_transform(Xt_ds2)

    assert Xt_ds2.var_1.shape == (95, 10)