Ejemplo n.º 1
0
    def test_basic(self, daskify, values):
        de = dpp.DummyEncoder()
        df = dummy[['A', 'D']]
        if daskify:
            df = dd.from_pandas(df, 2)
        de = de.fit(df)
        trn = de.transform(df)

        expected = pd.DataFrame(
            {
                "D": np.array([1, 2, 3, 4]),
                "A_a": np.array([1, 0, 0, 1], dtype='uint8'),
                "A_b": np.array([0, 1, 0, 0], dtype='uint8'),
                "A_c": np.array([0, 0, 1, 0], dtype='uint8'),
            },
            columns=['D', 'A_a', 'A_b', 'A_c'])

        assert_eq(trn, expected)

        if values:
            trn = trn.values

        result = de.inverse_transform(trn)

        if daskify:
            df = df.compute()
            result = result.compute()

        tm.assert_frame_equal(result, df)
Ejemplo n.º 2
0
    def test_encode_subset_of_columns(self, daskify):
        de = dpp.DummyEncoder(columns=["B"])
        df = dummy[["A", "B"]]
        if daskify:
            df = dd.from_pandas(df, 2)
        de = de.fit(df)
        trn = de.transform(df)

        expected = pd.DataFrame(
            {
                "A": pd.Categorical(["a", "b", "c", "a"], ordered=True),
                "B_a": np.array([1, 0, 0, 1], dtype="uint8"),
                "B_b": np.array([0, 1, 0, 0], dtype="uint8"),
                "B_c": np.array([0, 0, 1, 0], dtype="uint8"),
            },
            columns=["A", "B_a", "B_b", "B_c"],
        )

        assert_eq_df(trn, expected)

        result = de.inverse_transform(trn)

        if daskify:
            df = df.compute()
            result = result.compute()

        tm.assert_frame_equal(result, df)
Ejemplo n.º 3
0
 def test_inverse_transform(self):
     de = dpp.DummyEncoder()
     df = dd.from_pandas(
         pd.DataFrame(
             {"A": np.arange(10), "B": pd.Categorical(["a"] * 4 + ["b"] * 6)}
         ),
         npartitions=2,
     )
     de.fit(df)
     assert_eq_df(df, de.inverse_transform(de.transform(df)))
     assert_eq_df(df, de.inverse_transform(de.transform(df).values))
Ejemplo n.º 4
0
    def test_drop_first(self, daskify):
        if daskify:
            df = dd.from_pandas(dummy, 2)
        else:
            df = dummy
        de = dpp.DummyEncoder(drop_first=True)
        trn = de.fit_transform(df)
        assert len(trn.columns) == 8

        result = de.inverse_transform(trn)
        if daskify:
            result, df = compute(result, df)
        tm.assert_frame_equal(result, dummy)
Ejemplo n.º 5
0
 def test_transform_raises(self):
     de = dpp.DummyEncoder()
     de.fit(dummy)
     with pytest.raises(ValueError) as rec:
         de.transform(dummy.drop("B", axis='columns'))
     assert rec.match("Columns of 'X' do not match the training")
Ejemplo n.º 6
0
 def test_da(self):
     a = dd.from_pandas(dummy, npartitions=2)
     de = dpp.DummyEncoder()
     result = de.fit_transform(a)
     assert isinstance(result, dd.DataFrame)
Ejemplo n.º 7
0
 def test_transform_explicit_columns(self):
     de = dpp.DummyEncoder(columns=["A", "B", "C"])
     de.fit(dummy)
     with pytest.raises(ValueError) as rec:
         de.transform(dummy.drop("B", axis="columns"))
     assert rec.match("Columns of 'X' do not match the training")