def test_df_values(self): est1 = dpp.MinMaxScaler() est2 = dpp.MinMaxScaler() result_ar = est1.fit_transform(X) result_df = est2.fit_transform(df) for attr in ["data_min_", "data_max_", "data_range_", "scale_", "min_"]: assert_eq_ar(getattr(est1, attr), getattr(est2, attr).values) assert_eq_ar(est1.transform(X), est2.transform(df).values) if hasattr(result_df, "values"): result_df = result_df.values assert_eq_ar(result_ar, result_df)
def test_basic(self): a = dpp.MinMaxScaler() b = spp.MinMaxScaler() a.fit(X) b.fit(X.compute()) assert_estimator_equal(a, b, exclude='n_samples_seen_')
def test_df_values(self): est1 = dpp.MinMaxScaler() est2 = dpp.MinMaxScaler() result_ar = est1.fit_transform(X) result_df = est2.fit_transform(df) for attr in [ 'data_min_', 'data_max_', 'data_range_', 'scale_', 'min_' ]: assert_eq_ar(getattr(est1, attr), getattr(est2, attr).values) assert_eq_ar(est1.transform(X), est2.transform(X)) assert_eq_ar(est1.transform(df).values, est2.transform(X)) assert_eq_ar(est1.transform(X), est2.transform(df).values) assert_eq_ar(result_ar, result_df.values)
def test_df_column_slice(self): mask = ["3", "4"] mask_ix = [mask.index(x) for x in mask] a = dpp.MinMaxScaler(columns=mask) b = spp.MinMaxScaler() dfa = a.fit_transform(df2).compute() mxb = b.fit_transform(df2.compute()) assert isinstance(dfa, pd.DataFrame) assert_eq_ar(dfa[mask].values, mxb[:, mask_ix]) assert_eq_df(dfa.drop(mask, axis=1), df2.drop(mask, axis=1).compute())
def test_df_inverse_transform(self): mask = ["3", "4"] a = dpp.MinMaxScaler(columns=mask) assert_eq_df( a.inverse_transform(a.fit_transform(df2)).compute(), df2.compute())
def test_inverse_transform(self): a = dpp.MinMaxScaler() assert_eq_ar( a.inverse_transform(a.fit_transform(X)).compute(), X.compute())
def dataScaling(users_genres): scaler = preprocessing.MinMaxScaler() scaler.fit(users_genres) return scaler.transform(users_genres)
def test_df_values(self): a = dpp.MinMaxScaler() assert_eq_ar( a.fit_transform(X).compute(), a.fit_transform(df).compute().as_matrix())
def test_df_inverse_transform(self): mask = ["3", "4"] a = dpp.MinMaxScaler(columns=mask) result = a.inverse_transform(a.fit_transform(df2)) assert dask.is_dask_colelction(result) assert_eq_df(result, df2)
def test_inverse_transform(self): a = dpp.MinMaxScaler() result = a.inverse_transform(a.fit_transform(X)) assert dask.is_dask_collection(result) assert_eq_ar(result, X)