def test_to_and_from_json_convert_integers(self): # training data d = { "col1": [1, 2, 3], "col2": ["a", "a", "c"], "col3": [1, 1, 3], "col4": ["a", "b", "c"], } df = pd.DataFrame(data=d) cat1 = PreprocessingCategorical( df.columns, PreprocessingCategorical.CONVERT_INTEGER) cat1.fit(df) cat2 = PreprocessingCategorical( df.columns, PreprocessingCategorical.CONVERT_INTEGER) cat2.from_json(cat1.to_json()) df = cat2.transform(df) for col in ["col1", "col2", "col3", "col4"]: self.assertTrue(col in df.columns) self.assertEqual(df["col2"][0], 0) self.assertEqual(df["col2"][1], 0) self.assertEqual(df["col2"][2], 1) self.assertEqual(df["col4"][0], 0) self.assertEqual(df["col4"][1], 1) self.assertEqual(df["col4"][2], 2)
def from_json(self, data_json): if "remove_columns" in data_json: self._remove_columns = data_json.get("remove_columns", []) if "missing_values" in data_json: self._missing_values = [] for mv_data in data_json["missing_values"]: mv = PreprocessingMissingValues() mv.from_json(mv_data) self._missing_values += [mv] if "categorical" in data_json: self._categorical = [] for cat_data in data_json["categorical"]: cat = PreprocessingCategorical() cat.from_json(cat_data) self._categorical += [cat] if "scale" in data_json: self._scale = [] for scale_data in data_json["scale"]: sc = Scale() sc.from_json(scale_data) self._scale += [sc] if "categorical_y" in data_json: if "new_columns" in data_json["categorical_y"]: self._categorical_y = LabelBinarizer() else: self._categorical_y = LabelEncoder() self._categorical_y.from_json(data_json["categorical_y"]) if "scale_y" in data_json: self._scale_y = Scale() self._scale_y.from_json(data_json["scale_y"]) if "ml_task" in data_json: self._params["ml_task"] = data_json["ml_task"]
def from_json(self, data_json): self._params = data_json.get("params", self._params) if "remove_columns" in data_json: self._remove_columns = data_json.get("remove_columns", []) if "missing_values" in data_json: self._missing_values = [] for mv_data in data_json["missing_values"]: mv = PreprocessingMissingValues() mv.from_json(mv_data) self._missing_values += [mv] if "categorical" in data_json: self._categorical = [] for cat_data in data_json["categorical"]: cat = PreprocessingCategorical() cat.from_json(cat_data) self._categorical += [cat] if "datetime_transforms" in data_json: self._datetime_transforms = [] for dtt_params in data_json["datetime_transforms"]: dtt = DateTimeTransformer() dtt.from_json(dtt_params) self._datetime_transforms += [dtt] if "text_transforms" in data_json: self._text_transforms = [] for tt_params in data_json["text_transforms"]: tt = TextTransformer() tt.from_json(tt_params) self._text_transforms += [tt] if "golden_features" in data_json: self._golden_features = GoldenFeaturesTransformer() self._golden_features.from_json(data_json["golden_features"]) if "scale" in data_json: self._scale = [] for scale_data in data_json["scale"]: sc = Scale() sc.from_json(scale_data) self._scale += [sc] if "categorical_y" in data_json: if "new_columns" in data_json["categorical_y"]: self._categorical_y = LabelBinarizer() else: self._categorical_y = LabelEncoder() self._categorical_y.from_json(data_json["categorical_y"]) if "scale_y" in data_json: self._scale_y = Scale() self._scale_y.from_json(data_json["scale_y"]) if "ml_task" in data_json: self._params["ml_task"] = data_json["ml_task"] self._add_random_feature = data_json.get("add_random_feature", False) self._drop_features = data_json.get("drop_features", [])