Esempio n. 1
0
    def test_transform_before_fit_produces_error(self):
        input_df = pd.DataFrame({"one": ["one", "two", "one", "four",
                                         "six", "two", "one", "one"]})
        mti = preprocessing.MapToInt("one")

        with pytest.raises(NotFittedError):
            mti.transform(input_df)
Esempio n. 2
0
 def test_mapping_with_nans(self):
     input_df = pd.DataFrame({"one": ["one", "two", "one", "four",
                                      "six", np.nan, "one", "one"]})
     mti = preprocessing.MapToInt("one")
     mti.fit(input_df)
     expected_output = {"one": 0, "two": 1, "four": 2, "six": 3}
     assert mti.mapping == expected_output
Esempio n. 3
0
    def test_fit_bad_colname_produces_error(self):
        input_df = pd.DataFrame({"one": ["one", "two", "one", "four",
                                         "six", "two", "one", "one"]})
        mti = preprocessing.MapToInt("blahblahblah")

        with pytest.raises(KeyError):
            mti.fit(input_df)
Esempio n. 4
0
 def test_transform_copy(self):
     input_df = pd.DataFrame({"one": ["one", "two", "one", "four",
                                      "six", "two", "one", "one"]})
     expected_df = input_df.copy()
     mti = preprocessing.MapToInt("one", copy=True)
     mti.fit(input_df)
     transformed_data = mti.transform(input_df)
     pd.util.testing.assert_frame_equal(input_df, expected_df)
Esempio n. 5
0
 def test_transform_inplace(self):
     input_df = pd.DataFrame({"one": ["one", "two", "one", "four",
                                      "six", "two", "one", "one"]})
     mti = preprocessing.MapToInt("one", copy=False)
     mti.fit(input_df)
     mti.transform(input_df)
     expected_df = pd.DataFrame({"one": [0, 1, 0, 2, 3, 1, 0, 0]})
     pd.util.testing.assert_frame_equal(input_df, expected_df)
Esempio n. 6
0
 def test_transform_with_nans(self):
     input_df = pd.DataFrame({"one": ["one", "two", "one", "four",
                                      "six", "two", np.nan, "one"]})
     mti = preprocessing.MapToInt("one")
     mti.fit(input_df)
     transformed_df = mti.transform(input_df)
     expected_df = pd.DataFrame({"one": [0, 1, 0, 2, 3, 1, np.nan, 0]})
     pd.util.testing.assert_frame_equal(transformed_df, expected_df)
Esempio n. 7
0
    def test_map_to_int_to_onehot(self):
        fit_df = pd.DataFrame({"quarter": ["Q1", "Q1", "Q1", "Q2", "Q2"]})
        transform_df = fit_df.copy()

        mti = preprocessing.MapToInt("quarter", copy=True)
        ohe = preprocessing.OneHotEncoderFromDataFrame(categorical_feature_names=["quarter"], copy=True)
        pipe = Pipeline(steps=[("one", mti), ("two", ohe)])
        pipe.fit(fit_df)
        output_df = pipe.transform(transform_df)

        expected_df = pd.DataFrame({"onehot_col1": [1.0, 1, 1, 0, 0], "onehot_col2": [0.0, 0, 0, 1, 1]})
        pd.util.testing.assert_frame_equal(output_df, expected_df)