예제 #1
0
    def test_basic(self):
        a = dpp.PolynomialFeatures()
        b = spp.PolynomialFeatures()

        a.fit(X)
        b.fit(X.compute())
        assert_estimator_equal(a._transformer, b)
예제 #2
0
 def test_transformer_params(self):
     pf = dpp.PolynomialFeatures(degree=3,
                                 interaction_only=True,
                                 include_bias=False)
     pf.fit(X)
     assert pf._transformer.degree == pf.degree
     assert pf._transformer.interaction_only is pf.interaction_only
     assert pf._transformer.include_bias is pf.include_bias
예제 #3
0
 def test_df_transform_index(self, daskify):
     frame = copy(df)
     if not daskify:
         frame = frame.compute()
     frame = frame.sample(frac=1.0)
     res_df = dpp.PolynomialFeatures(preserve_dataframe=True,
                                     degree=1).fit_transform(frame)
     assert_eq_df(res_df.iloc[:, 1:], frame, check_dtype=False)
예제 #4
0
    def test_df_transform(self, daskify):
        frame = df
        if not daskify:
            frame = frame.compute()
        a = dpp.PolynomialFeatures(preserve_dataframe=True)
        b = dpp.PolynomialFeatures()
        c = spp.PolynomialFeatures()

        res_df = a.fit_transform(frame)
        res_arr = b.fit_transform(frame)
        res_c = c.fit_transform(frame)
        if daskify:
            res_pandas = a.fit_transform(frame.compute())
            assert dask.is_dask_collection(res_df)
            assert dask.is_dask_collection(res_arr)
            assert_eq_df(res_df.compute().reset_index(drop=True), res_pandas)
        assert_eq_ar(res_df.values, res_c)
        assert_eq_ar(res_df.values, res_arr)
예제 #5
0
    def test_array_transform(self):
        a = dpp.PolynomialFeatures()
        b = spp.PolynomialFeatures()

        res_a = a.fit_transform(X)
        res_b = b.fit_transform(X.compute())
        assert_estimator_equal(a, b)
        assert dask.is_dask_collection(res_a)
        assert_eq_ar(res_a, res_b)
예제 #6
0
    def test_input_types(self):
        a = dpp.PolynomialFeatures()
        b = spp.PolynomialFeatures()

        assert_estimator_equal(a.fit(df), a.fit(df.compute()))
        assert_estimator_equal(a.fit(df), a.fit(df.compute().values))
        assert_estimator_equal(a.fit(df.values), a.fit(df.compute().values))
        assert_estimator_equal(a.fit(df), b.fit(df.compute()))
        assert_estimator_equal(a.fit(df), b.fit(df.compute().values))
예제 #7
0
    def test_transform_array(self):
        a = dpp.PolynomialFeatures()
        b = spp.PolynomialFeatures()

        # pass numpy array to fit_transform
        res_a1 = a.fit_transform(X.compute())
        # pass dask array to fit_transform
        res_a2 = a.fit_transform(X).compute()
        res_b = b.fit_transform(X.compute())
        assert_eq_ar(res_a1, res_b)
        assert_eq_ar(res_a2, res_b)
예제 #8
0
 def test_transformed_shape(self):
     # checks if the transformed objects have the correct columns
     a = dpp.PolynomialFeatures()
     a.fit(X)
     n_cols = len(a.get_feature_names())
     # dask array
     assert a.transform(X).shape[1] == n_cols
     # numpy array
     assert a.transform(X.compute()).shape[1] == n_cols
     # dask dataframe
     assert a.transform(df).shape[1] == n_cols
     # pandas dataframe
     assert a.transform(df.compute()).shape[1] == n_cols
     X_nan_rows = df.values
     df_none_divisions = X_nan_rows.to_dask_dataframe(columns=df.columns)
     # dask array with nan rows
     assert a.transform(X_nan_rows).shape[1] == n_cols
     # dask data frame with nan rows
     assert a.transform(df_none_divisions).shape[1] == n_cols