def test_clone(self):
     conv = SkBaseTransformStacking([
         LinearRegression(normalize=True),
         DecisionTreeClassifier(max_depth=3)
     ], 'predict')
     cloned = clone(conv)
     conv.test_equality(cloned, exc=True)
 def test_pipeline_with_params(self):
     conv = SkBaseTransformStacking([LinearRegression(normalize=True),
                                     DecisionTreeClassifier(max_depth=3)])
     pipe = make_pipeline(conv, DecisionTreeRegressor())
     pars = pipe.get_params(deep=True)
     self.assertIn(
         'skbasetransformstacking__models_0__model__fit_intercept', pars)
     self.assertEqual(
         pars['skbasetransformstacking__models_0__model__normalize'], True)
     conv = SkBaseTransformStacking([LinearRegression(normalize=False),
                                     DecisionTreeClassifier(max_depth=2)])
     pipe = make_pipeline(conv, DecisionTreeRegressor())
     pipe.set_params(**pars)
     pars = pipe.get_params()
     self.assertIn(
         'skbasetransformstacking__models_0__model__fit_intercept', pars)
     self.assertEqual(
         pars['skbasetransformstacking__models_0__model__normalize'], True)
Beispiel #3
0
 def test_pipeline_with_two_transforms(self):
     data = load_iris()
     X, y = data.data, data.target
     X_train, X_test, y_train, y_test = train_test_split(X, y)
     conv = SkBaseTransformStacking([Normalizer(), MinMaxScaler()])
     pipe = make_pipeline(conv, DecisionTreeClassifier())
     pipe.fit(X_train, y_train)
     pred = pipe.predict(X_test)
     score = accuracy_score(y_test, pred)
     self.assertGreater(score, 0.8)
     score2 = pipe.score(X_test, y_test)
     self.assertEqual(score, score2)
     rp = repr(conv)
     self.assertStartsWith("SkBaseTransformStacking([Normalizer(", rp)
    def test_pickle(self):
        data = load_iris()
        X, y = data.data, data.target
        # X_train, X_test, y_train, y_test = train_test_split(X, y)
        conv = SkBaseTransformStacking([LinearRegression(normalize=True),
                                        DecisionTreeClassifier(max_depth=3)])
        model = make_pipeline(conv, DecisionTreeRegressor())
        model.fit(X, y)

        pred = model.predict(X)

        st = BytesIO()
        pickle.dump(model, st)
        st = BytesIO(st.getvalue())
        rec = pickle.load(st)
        pred2 = rec.predict(X)
        self.assertEqualArray(pred, pred2)
 def test_pipeline_with_two_classifiers(self):
     data = load_iris()
     X, y = data.data, data.target
     X_train, X_test, y_train, y_test = train_test_split(X, y)
     conv = SkBaseTransformStacking(
         [LogisticRegression(),
          DecisionTreeClassifier()])
     pipe = make_pipeline(conv, DecisionTreeClassifier())
     pipe.fit(X_train, y_train)
     pred = pipe.predict(X_test)
     score = accuracy_score(y_test, pred)
     self.assertGreater(score, 0.8)
     score2 = pipe.score(X_test, y_test)
     self.assertEqual(score, score2)
     rp = repr(conv)
     self.assertStartsWith(
         'SkBaseTransformStacking([LogisticRegression(C=1.0, class_weight=None,',
         rp)
    def test_grid(self):
        data = load_iris()
        X, y = data.data, data.target
        # X_train, X_test, y_train, y_test = train_test_split(X, y)
        conv = SkBaseTransformStacking([LinearRegression(normalize=True),
                                        DecisionTreeClassifier(max_depth=3)])
        model = make_pipeline(conv, DecisionTreeRegressor())

        res = model.get_params(True)
        self.assertGreater(len(res), 0)

        parameters = {
            'skbasetransformstacking__models_1__model__max_depth': [2, 3]}
        clf = GridSearchCV(model, parameters)
        clf.fit(X, y)

        pred = clf.predict(X)
        self.assertEqualArray(y, pred)
Beispiel #7
0
    def test_pipeline_wines(self):
        df = load_wines_dataset(shuffle=True)
        X = df.drop(['quality', 'color'], axis=1)
        y = df['quality']
        X_train, X_test, y_train, y_test = train_test_split(X, y)
        model = make_pipeline(
            SkBaseTransformStacking([LogisticRegression(n_jobs=1)],
                                    'decision_function'),
            RandomForestClassifier())
        try:
            model.fit(X_train, y_train)
        except AttributeError as e:
            if compare_module_version(sklver, "0.24") < 0:
                return
            raise e
        auc_pipe = roc_auc_score(y_test == model.predict(X_test),
                                 model.predict_proba(X_test).max(axis=1))
        acc = model.score(X_test, y_test)
        accu = accuracy_score(y_test, model.predict(X_test))
        self.assertGreater(auc_pipe, 0.6)
        self.assertGreater(acc, 0.5)
        self.assertGreater(accu, 0.5)
        grid = GridSearchCV(estimator=model,
                            param_grid={},
                            cv=3,
                            refit='acc',
                            scoring=dict(acc=make_scorer(accuracy_score)))
        grid.fit(X, y)
        best = grid.best_estimator_
        step = grid.best_estimator_.steps[0][1]
        meth = step.method
        self.assertEqual(meth, 'decision_function')

        res = cross_val_score(model, X, y, cv=5)
        acc1 = best.score(X_test, y_test)
        accu1 = accuracy_score(y_test, best.predict(X_test))

        best.fit(X_train, y_train)
        acc2 = best.score(X_test, y_test)
        accu2 = accuracy_score(y_test, best.predict(X_test))
        self.assertGreater(res.min(), 0.5)
        self.assertGreater(min([acc2, accu2, acc1, accu1]), 0.5)
 def test_pipeline_with_two_classifiers(self):
     data = load_iris()
     X, y = data.data, data.target
     X_train, X_test, y_train, y_test = train_test_split(X, y)
     conv = SkBaseTransformStacking(
         [LogisticRegression(n_jobs=1), DecisionTreeClassifier()])
     pipe = make_pipeline(conv, DecisionTreeClassifier())
     try:
         pipe.fit(X_train, y_train)
     except AttributeError as e:
         if compare_module_version(sklver, "0.24") < 0:
             return
         raise e
     pred = pipe.predict(X_test)
     score = accuracy_score(y_test, pred)
     self.assertGreater(score, 0.8)
     score2 = pipe.score(X_test, y_test)
     self.assertEqual(score, score2)
     rp = repr(conv)
     self.assertStartsWith(
         'SkBaseTransformStacking([LogisticRegression(', rp)