def test_groupwise():

    from sklearn.preprocessing import scale

    coord_1 = ["a"] * 51 + ["b"] * 49
    coord_2 = list(range(10)) * 10

    X_ds = xr.Dataset(
        {"var_1": (["sample", "feature"], np.random.random((100, 10)))},
        coords={
            "sample": range(100),
            "feature": range(10),
            "coord_1": (["sample"], coord_1),
            "coord_2": (["sample"], coord_2),
        },
    )

    # test wrapped sklearn estimator
    preprocess(X_ds, scale, groupby="coord_1")

    # test newly defined estimator
    Xt_ds2, estimator = split(
        X_ds,
        new_dim="split_sample",
        new_len=5,
        groupby="coord_1",
        keep_coords_as="initial_sample",
        return_estimator=True,
    )

    assert Xt_ds2.var_1.shape == (19, 10, 5)

    Xt_ds2 = estimator.inverse_transform(Xt_ds2)

    assert Xt_ds2.var_1.shape == (95, 10)
def test_groupwise():

    from sklearn.preprocessing import scale

    coord_1 = ['a']*51 + ['b']*49
    coord_2 = list(range(10))*10

    X_ds = xr.Dataset(
        {'var_1': (['sample', 'feature'], np.random.random((100, 10)))},
        coords={'sample': range(100), 'feature': range(10),
                'coord_1': (['sample'], coord_1),
                'coord_2': (['sample'], coord_2)}
    )

    # test wrapped sklearn estimator
    Xt_ds = preprocess(X_ds, scale, groupby='coord_1')

    # test newly defined estimator
    Xt_ds2, estimator = split(
        X_ds, new_dim='split_sample', new_len=5, groupby='coord_1',
        keep_coords_as='initial_sample', return_estimator=True
    )

    assert Xt_ds2.var_1.shape == (19, 10, 5)

    Xt_ds2 = estimator.inverse_transform(Xt_ds2)

    assert Xt_ds2.var_1.shape == (95, 10)
def test_split():

    # test on DataArray with number of samples multiple of new length
    X_da = xr.DataArray(
        np.random.random((100, 10)),
        coords={
            "sample": range(100),
            "feature": range(10),
            "coord_1": (["sample", "feature"], np.tile("Test", (100, 10))),
        },
        dims=("sample", "feature"),
    )

    estimator = Splitter(
        new_dim="split_sample",
        new_len=5,
        reduce_index="subsample",
        axis=1,
        keep_coords_as="sample_coord",
    )

    Xt_da = estimator.fit_transform(X_da)

    assert Xt_da.shape == (20, 5, 10)
    npt.assert_allclose(Xt_da[0, :, 0], X_da[:5, 0])

    Xit_da = estimator.inverse_transform(Xt_da)

    xrt.assert_allclose(X_da, Xit_da)

    # test on Dataset with number of samples NOT multiple of new length
    X_ds = xr.Dataset(
        {"var_1": (["sample", "feature"], np.random.random((100, 10)))},
        coords={
            "sample": range(100),
            "feature": range(10)
        },
    )

    Xt_ds = split(
        X_ds,
        new_dim="split_sample",
        new_len=7,
        reduce_index="head",
        axis=1,
        new_index_func=None,
    )

    assert Xt_ds["var_1"].shape == (14, 7, 10)
    npt.assert_allclose(Xt_ds.var_1[0, :, 0], X_ds.var_1[:7, 0])
Exemple #4
0
def test_split():

    # test on DataArray with number of samples multiple of new length
    X_da = xr.DataArray(np.random.random((100, 10)),
                        coords={
                            'sample':
                            range(100),
                            'feature':
                            range(10),
                            'coord_1':
                            (['sample', 'feature'], np.tile('Test', (100, 10)))
                        },
                        dims=('sample', 'feature'))

    estimator = Splitter(new_dim='split_sample',
                         new_len=5,
                         reduce_index='subsample',
                         axis=1,
                         keep_coords_as='sample_coord')

    Xt_da = estimator.fit_transform(X_da)

    assert Xt_da.shape == (20, 5, 10)
    npt.assert_allclose(Xt_da[0, :, 0], X_da[:5, 0])

    Xit_da = estimator.inverse_transform(Xt_da)

    xrt.assert_allclose(X_da, Xit_da)

    # test on Dataset with number of samples NOT multiple of new length
    X_ds = xr.Dataset(
        {'var_1': (['sample', 'feature'], np.random.random((100, 10)))},
        coords={
            'sample': range(100),
            'feature': range(10)
        })

    Xt_ds = split(X_ds,
                  new_dim='split_sample',
                  new_len=7,
                  reduce_index='head',
                  axis=1,
                  new_index_func=None)

    assert Xt_ds['var_1'].shape == (14, 7, 10)
    npt.assert_allclose(Xt_ds.var_1[0, :, 0], X_ds.var_1[:7, 0])