def testInputArray(self): X = np.random.normal(0, 1, size=(200, 3)) y = np.random.randint(0, 2, size=200) lastochka = LastochkaTransformer() lastochka.fit(X, y) X_w = lastochka.transform(X) self.assertIsInstance(X_w, np.ndarray)
def testSpecial(self): _boston = load_boston() X = pd.DataFrame(_boston["data"], columns=_boston["feature_names"]) y = (_boston["target"] >= np.median(_boston["target"])).astype(int) specs = {"PTRATIO": [20.2, 14.7]} lastochka = LastochkaTransformer(verbose=True, n_final=3, n_initial=10, specials=specs) lastochka.fit(X, y) lastochka.transform(X) self.assertTrue(list(lastochka.get_transformer("PTRATIO").specials_stats.keys()) == specs["PTRATIO"])
def testVerbose(self): N_SAMPLES = 100 X, y = make_classification(n_samples=N_SAMPLES, n_features=5, n_informative=2, random_state=42) column_names = ['X%i' % i for i in range(5)] X_df = pd.DataFrame(X, columns=column_names) X_df["CAT"] = np.random.choice(list("ABCDF"), N_SAMPLES) lastochka = LastochkaTransformer(verbose=False, n_final=3, n_initial=10) lastochka.fit(X_df, y)
def testMissing(self): _boston = load_boston() X = pd.DataFrame(_boston["data"], columns=_boston["feature_names"]) np.random.seed(2) indexes = np.random.choice(X.index.tolist(), 200) X.loc[indexes, "ZN"] = np.nan y = (_boston["target"] >= np.median(_boston["target"])).astype(int) lastochka = LastochkaTransformer(verbose=True, n_final=3, n_initial=10) lastochka.fit(X, y) lastochka.transform(X) self.assertTrue(lastochka.get_transformer("ZN").missing_woe_value is not None)
def testCategory(self): _boston = load_boston() X = pd.DataFrame(_boston["data"], columns=_boston["feature_names"]) X["RAD_CAT"] = X["RAD"].astype(str) X = X.drop("RAD", axis=1) y = (_boston["target"] >= np.median(_boston["target"])).astype(int) lastochka = LastochkaTransformer(verbose=True, n_final=3, n_initial=10) lastochka.fit(X, y) lastochka.transform(X) optimizer_instance = lastochka.get_transformer("RAD_CAT").optimizer_instance self.assertIsInstance(optimizer_instance, CategoryOptimizer)