def inverse_transform(self, X): for column, lbl_params in self._convert_params.items(): if "unique_values" in lbl_params and "new_columns" in lbl_params: # convert to one hot lbl = LabelBinarizer() lbl.from_json(lbl_params) X = lbl.inverse_transform(X, column) # should raise exception else: # convert to integer lbl = LabelEncoder() lbl.from_json(lbl_params) X.loc[:, column] = lbl.inverse_transform(X.loc[:, column]) return X
def test_inverse_transform(self): d = {"col1": ["a", "a", "c"], "col2": ["w", "e", "d"]} df = pd.DataFrame(data=d) lb = LabelBinarizer() # check first column lb.fit(df, "col1") bb = lb.transform(df, "col1") self.assertTrue("col1_c" in bb.columns) self.assertTrue(np.sum(bb["col1_c"]) == 1) bb = lb.inverse_transform(bb) self.assertTrue("col1_c" not in bb.columns) # check second column lb = LabelBinarizer() lb.fit(df, "col2") bb = lb.transform(df, "col2") self.assertTrue("col2_w" in bb.columns) self.assertTrue("col2_e" in bb.columns) self.assertTrue("col2_d" in bb.columns) self.assertTrue(np.sum(bb["col2_w"]) == 1) bb = lb.inverse_transform(bb) self.assertTrue("col2_w" not in bb.columns)