def test_groupwise(): from sklearn.preprocessing import scale coord_1 = ["a"] * 51 + ["b"] * 49 coord_2 = list(range(10)) * 10 X_ds = xr.Dataset( {"var_1": (["sample", "feature"], np.random.random((100, 10)))}, coords={ "sample": range(100), "feature": range(10), "coord_1": (["sample"], coord_1), "coord_2": (["sample"], coord_2), }, ) # test wrapped sklearn estimator preprocess(X_ds, scale, groupby="coord_1") # test newly defined estimator Xt_ds2, estimator = split( X_ds, new_dim="split_sample", new_len=5, groupby="coord_1", keep_coords_as="initial_sample", return_estimator=True, ) assert Xt_ds2.var_1.shape == (19, 10, 5) Xt_ds2 = estimator.inverse_transform(Xt_ds2) assert Xt_ds2.var_1.shape == (95, 10)
def test_preprocess(): from sklearn.preprocessing import scale X_da = xr.DataArray( np.random.random((100, 10)), coords={'sample': range(100), 'feature': range(10)}, dims=('sample', 'feature') ) Xt_da_gt = X_da Xt_da_gt.data = scale(X_da) Xt_da = preprocess(X_da, scale) xrt.assert_allclose(Xt_da, Xt_da_gt) X_ds = xr.Dataset( {'var_1' : (['sample', 'feature'], np.random.random((100, 10)))}, coords={'sample': range(100), 'feature': range(10)} ) Xt_ds = preprocess(X_ds, scale) xrt.assert_allclose(Xt_ds, X_ds.apply(scale))
def test_preprocess(): from sklearn.preprocessing import scale X_da = xr.DataArray( np.random.random((100, 10)), coords={ "sample": range(100), "feature": range(10) }, dims=("sample", "feature"), ) Xt_da_gt = X_da Xt_da_gt.data = scale(X_da) Xt_da = preprocess(X_da, scale) xrt.assert_allclose(Xt_da, Xt_da_gt) X_ds = xr.Dataset( {"var_1": (["sample", "feature"], np.random.random((100, 10)))}, coords={ "sample": range(100), "feature": range(10) }, ) Xt_ds = preprocess(X_ds, scale) xrt.assert_allclose(Xt_ds, X_ds.apply(scale))
def test_groupwise(): from sklearn.preprocessing import scale coord_1 = ['a'] * 51 + ['b'] * 49 coord_2 = list(range(10)) * 10 X_ds = xr.Dataset( {'var_1': (['sample', 'feature'], np.random.random((100, 10)))}, coords={ 'sample': range(100), 'feature': range(10), 'coord_1': (['sample'], coord_1), 'coord_2': (['sample'], coord_2) }) # test wrapped sklearn estimator preprocess(X_ds, scale, groupby='coord_1') # test newly defined estimator Xt_ds2, estimator = split(X_ds, new_dim='split_sample', new_len=5, groupby='coord_1', keep_coords_as='initial_sample', return_estimator=True) assert Xt_ds2.var_1.shape == (19, 10, 5) Xt_ds2 = estimator.inverse_transform(Xt_ds2) assert Xt_ds2.var_1.shape == (95, 10)