예제 #1
0
    def test_to_and_from_json_booleans(self):
        # training data
        d = {"col1": ["a", "a", "c"], "col2": [True, True, False]}
        df = pd.DataFrame(data=d)
        # fit binarizer
        lb1 = LabelBinarizer()
        lb1.fit(df, "col1")
        lb2 = LabelBinarizer()
        lb2.fit(df, "col2")
        # test data
        d_test = {
            "col1": ["c", "c", "a"],
            "col2": [False, False, True],
            "col3": [2, 3, 4],
        }
        df_test = pd.DataFrame(data=d_test)
        # to json and from json
        new_lb1 = LabelBinarizer()
        new_lb2 = LabelBinarizer()
        new_lb1.from_json(lb1.to_json())
        new_lb2.from_json(json.loads(json.dumps(lb2.to_json(), indent=4)))

        # transform
        df_test = new_lb1.transform(df_test, "col1")
        df_test = new_lb2.transform(df_test, "col2")
        # for binary column, only one value is left, old column should be deleted
        self.assertTrue("col1_c" in df_test.columns)
        self.assertTrue("col1" not in df_test.columns)
        self.assertEqual(2, np.sum(df_test["col1_c"]))
        # for multiple value colum, all columns should be added
        self.assertTrue("col2_True" in df_test.columns)
        self.assertTrue("col2" not in df_test.columns)
        self.assertEqual(1, np.sum(df_test["col2_True"]))
        # do not touch continuous attribute
        self.assertTrue("col3" in df_test.columns)
예제 #2
0
    def inverse_transform(self, X):
        for column, lbl_params in self._convert_params.items():
            if "unique_values" in lbl_params and "new_columns" in lbl_params:
                # convert to one hot
                lbl = LabelBinarizer()
                lbl.from_json(lbl_params)
                X = lbl.inverse_transform(X, column)  # should raise exception
            else:
                # convert to integer
                lbl = LabelEncoder()
                lbl.from_json(lbl_params)
                X.loc[:, column] = lbl.inverse_transform(X.loc[:, column])

        return X
예제 #3
0
    def transform(self, X):
        if (self._convert_method == PreprocessingCategorical.CONVERT_LOO
                and self._columns):
            return self._enc.transform(X)
        else:
            for column, lbl_params in self._convert_params.items():
                if "unique_values" in lbl_params and "new_columns" in lbl_params:
                    # convert to one hot
                    lbl = LabelBinarizer()
                    lbl.from_json(lbl_params)
                    X = lbl.transform(X, column)
                else:
                    # convert to integer
                    lbl = LabelEncoder()
                    lbl.from_json(lbl_params)
                    X.loc[:, column] = lbl.transform(X.loc[:, column])

            return X