Exemple #1
0
    def testNumeric(self):
        N_SAMPLES = 200

        X, y = make_classification(n_samples=N_SAMPLES, n_features=10, n_informative=2, random_state=42)
        X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, test_size=0.3, random_state=42)
        column_names = ['X%i' % i for i in range(10)]

        D_train = pd.DataFrame(X_train, columns=column_names)
        D_test = pd.DataFrame(X_test, columns=column_names)

        lastochka = LastochkaTransformer(verbose=True, n_final=3, n_initial=10)
        log = LogisticRegression()

        pipe = Pipeline(steps=[
                ('lastochka', lastochka),
                ('log', log)])

        pipe.fit(D_train, y_train)
        X_w = lastochka.transform(D_train)
        X_wt = lastochka.transform(D_test)

        for variable in column_names:
            vt = lastochka.get_transformer(variable)
            acceptable_values = vt.optimizer_instance.bin_stats["woe_value"]
            real_values_train = X_w[variable].unique()
            real_values_test = X_wt[variable].unique()
            self.assertTrue(set(acceptable_values) == set(real_values_train))
            self.assertTrue((set(acceptable_values)) == set(real_values_test))
Exemple #2
0
 def testSpecial(self):
     _boston = load_boston()
     X = pd.DataFrame(_boston["data"], columns=_boston["feature_names"])
     y = (_boston["target"] >= np.median(_boston["target"])).astype(int)
     specs = {"PTRATIO": [20.2, 14.7]}
     lastochka = LastochkaTransformer(verbose=True, n_final=3, n_initial=10, specials=specs)
     lastochka.fit(X, y)
     lastochka.transform(X)
     self.assertTrue(list(lastochka.get_transformer("PTRATIO").specials_stats.keys()) == specs["PTRATIO"])
Exemple #3
0
 def testMissing(self):
     _boston = load_boston()
     X = pd.DataFrame(_boston["data"], columns=_boston["feature_names"])
     np.random.seed(2)
     indexes = np.random.choice(X.index.tolist(), 200)
     X.loc[indexes, "ZN"] = np.nan
     y = (_boston["target"] >= np.median(_boston["target"])).astype(int)
     lastochka = LastochkaTransformer(verbose=True, n_final=3, n_initial=10)
     lastochka.fit(X, y)
     lastochka.transform(X)
     self.assertTrue(lastochka.get_transformer("ZN").missing_woe_value is not None)
Exemple #4
0
 def testCategory(self):
     _boston = load_boston()
     X = pd.DataFrame(_boston["data"], columns=_boston["feature_names"])
     X["RAD_CAT"] = X["RAD"].astype(str)
     X = X.drop("RAD", axis=1)
     y = (_boston["target"] >= np.median(_boston["target"])).astype(int)
     lastochka = LastochkaTransformer(verbose=True, n_final=3, n_initial=10)
     lastochka.fit(X, y)
     lastochka.transform(X)
     optimizer_instance = lastochka.get_transformer("RAD_CAT").optimizer_instance
     self.assertIsInstance(optimizer_instance, CategoryOptimizer)