def test_groupwise(): from sklearn.preprocessing import scale coord_1 = ["a"] * 51 + ["b"] * 49 coord_2 = list(range(10)) * 10 X_ds = xr.Dataset( {"var_1": (["sample", "feature"], np.random.random((100, 10)))}, coords={ "sample": range(100), "feature": range(10), "coord_1": (["sample"], coord_1), "coord_2": (["sample"], coord_2), }, ) # test wrapped sklearn estimator preprocess(X_ds, scale, groupby="coord_1") # test newly defined estimator Xt_ds2, estimator = split( X_ds, new_dim="split_sample", new_len=5, groupby="coord_1", keep_coords_as="initial_sample", return_estimator=True, ) assert Xt_ds2.var_1.shape == (19, 10, 5) Xt_ds2 = estimator.inverse_transform(Xt_ds2) assert Xt_ds2.var_1.shape == (95, 10)
def test_groupwise(): from sklearn.preprocessing import scale coord_1 = ['a']*51 + ['b']*49 coord_2 = list(range(10))*10 X_ds = xr.Dataset( {'var_1': (['sample', 'feature'], np.random.random((100, 10)))}, coords={'sample': range(100), 'feature': range(10), 'coord_1': (['sample'], coord_1), 'coord_2': (['sample'], coord_2)} ) # test wrapped sklearn estimator Xt_ds = preprocess(X_ds, scale, groupby='coord_1') # test newly defined estimator Xt_ds2, estimator = split( X_ds, new_dim='split_sample', new_len=5, groupby='coord_1', keep_coords_as='initial_sample', return_estimator=True ) assert Xt_ds2.var_1.shape == (19, 10, 5) Xt_ds2 = estimator.inverse_transform(Xt_ds2) assert Xt_ds2.var_1.shape == (95, 10)
def test_split(): # test on DataArray with number of samples multiple of new length X_da = xr.DataArray( np.random.random((100, 10)), coords={ "sample": range(100), "feature": range(10), "coord_1": (["sample", "feature"], np.tile("Test", (100, 10))), }, dims=("sample", "feature"), ) estimator = Splitter( new_dim="split_sample", new_len=5, reduce_index="subsample", axis=1, keep_coords_as="sample_coord", ) Xt_da = estimator.fit_transform(X_da) assert Xt_da.shape == (20, 5, 10) npt.assert_allclose(Xt_da[0, :, 0], X_da[:5, 0]) Xit_da = estimator.inverse_transform(Xt_da) xrt.assert_allclose(X_da, Xit_da) # test on Dataset with number of samples NOT multiple of new length X_ds = xr.Dataset( {"var_1": (["sample", "feature"], np.random.random((100, 10)))}, coords={ "sample": range(100), "feature": range(10) }, ) Xt_ds = split( X_ds, new_dim="split_sample", new_len=7, reduce_index="head", axis=1, new_index_func=None, ) assert Xt_ds["var_1"].shape == (14, 7, 10) npt.assert_allclose(Xt_ds.var_1[0, :, 0], X_ds.var_1[:7, 0])
def test_split(): # test on DataArray with number of samples multiple of new length X_da = xr.DataArray(np.random.random((100, 10)), coords={ 'sample': range(100), 'feature': range(10), 'coord_1': (['sample', 'feature'], np.tile('Test', (100, 10))) }, dims=('sample', 'feature')) estimator = Splitter(new_dim='split_sample', new_len=5, reduce_index='subsample', axis=1, keep_coords_as='sample_coord') Xt_da = estimator.fit_transform(X_da) assert Xt_da.shape == (20, 5, 10) npt.assert_allclose(Xt_da[0, :, 0], X_da[:5, 0]) Xit_da = estimator.inverse_transform(Xt_da) xrt.assert_allclose(X_da, Xit_da) # test on Dataset with number of samples NOT multiple of new length X_ds = xr.Dataset( {'var_1': (['sample', 'feature'], np.random.random((100, 10)))}, coords={ 'sample': range(100), 'feature': range(10) }) Xt_ds = split(X_ds, new_dim='split_sample', new_len=7, reduce_index='head', axis=1, new_index_func=None) assert Xt_ds['var_1'].shape == (14, 7, 10) npt.assert_allclose(Xt_ds.var_1[0, :, 0], X_ds.var_1[:7, 0])