Ejemplos de StackingCVClassifier en Python, ejemplos de mlxtend.classifier.StackingCVClassifier en Python

Ejemplo n.º 1

0

Mostrar archivo

def test_use_clones():
    np.random.seed(123)
    X, y = iris_data()

    meta = LogisticRegression(solver='liblinear', multi_class='ovr')
    clf1 = RandomForestClassifier(n_estimators=10)
    clf2 = GaussianNB()
    StackingCVClassifier(classifiers=[clf1, clf2],
                         use_clones=True,
                         meta_classifier=meta,
                         shuffle=False).fit(X, y)

    assert_raises(
        exceptions.NotFittedError,
        "This RandomForestClassifier instance is not fitted yet."
        " Call 'fit' with appropriate arguments"
        " before using this estimator.", clf1.predict, X)

    StackingCVClassifier(classifiers=[clf1, clf2],
                         use_probas=True,
                         use_clones=False,
                         meta_classifier=meta,
                         shuffle=False).fit(X, y)

    clf1.predict(X)

Ejemplo n.º 2

0

Mostrar archivo

Archivo: StackingDemo.py Proyecto: JYLFamily/Python_Study_Note

    def function_set(self):
        # param
        self.__params = {
            # 注意名称必须是这样
            "logisticregression__C":
            list(np.linspace(start=0.1, stop=10, num=5)),
            "gradientboostingclassifier__learning_rate":
            list(np.linspace(start=0.1, stop=1, num=10)),
            "randomforestclassifier__n_estimators":
            list(range(5, 16)),
            "meta-logisticregression__C":
            list(np.linspace(start=0.1, stop=10, num=5))
        }

        # model
        self.__lr = LogisticRegression()
        self.__gb = GradientBoostingClassifier()
        self.__rf = RandomForestClassifier()
        self.__sclf = StackingCVClassifier(
            classifiers=[self.__lr, self.__gb, self.__rf],
            meta_classifier=self.__lr,
            use_probas=True,
            cv=5,
            use_features_in_secondary=True,
            verbose=1)

        self.__grid = GridSearchCV(estimator=self.__sclf,
                                   param_grid=self.__params,
                                   cv=5,
                                   refit=True)

Ejemplo n.º 3

0

Mostrar archivo

Archivo: test_stacking_cv_classifier.py Proyecto: rasbt/mlxtend

def test_get_params():
    clf1 = KNeighborsClassifier(n_neighbors=1)
    clf2 = RandomForestClassifier(random_state=1)
    clf3 = GaussianNB()
    lr = LogisticRegression(multi_class='ovr', solver='liblinear')
    sclf = StackingCVClassifier(classifiers=[clf1, clf2, clf3],
                                meta_classifier=lr)

    got = sorted(list({s.split('__')[0] for s in sclf.get_params().keys()}))

    expect = ['classifiers',
              'cv',
              'drop_last_proba',
              'gaussiannb',
              'kneighborsclassifier',
              'meta_classifier',
              'n_jobs',
              'pre_dispatch',
              'random_state',
              'randomforestclassifier',
              'shuffle',
              'store_train_meta_features',
              'stratify',
              'use_clones',
              'use_features_in_secondary',
              'use_probas',
              'verbose']
    assert got == expect, got

Ejemplo n.º 4

0

Mostrar archivo

Archivo: test_stacking_cv_classifier.py Proyecto: rasbt/mlxtend

def test_sparse_inputs_with_features_in_secondary():
    rf = RandomForestClassifier(n_estimators=10, random_state=42)
    lr = LogisticRegression(multi_class='ovr', solver='liblinear')
    stclf = StackingCVClassifier(classifiers=[rf, rf],
                                 meta_classifier=lr,
                                 random_state=42,
                                 use_features_in_secondary=True)
    X_train, X_test, y_train, y_test = train_test_split(X_breast, y_breast,
                                                        test_size=0.3)

    # dense
    stclf.fit(X_train, y_train)

    if Version(sklearn_version) < Version("0.21"):
        expected_value = 1.0
    else:
        expected_value = 0.99

    assert round(stclf.score(X_train, y_train), 2) == expected_value, \
        round(stclf.score(X_train, y_train), 2)

    # sparse
    stclf.fit(sparse.csr_matrix(X_train), y_train)

    if Version(sklearn_version) < Version("0.21"):
        expected_value = 1.0
    else:
        expected_value = 0.99
    assert round(stclf.score(X_train, y_train), 2) == expected_value, \
        round(stclf.score(X_train, y_train), 2)

Ejemplo n.º 5

0

Mostrar archivo

def test_train_meta_features_():
    knn = KNeighborsClassifier()
    lr = LogisticRegression(multi_class='ovr', solver='liblinear')
    gnb = GaussianNB()
    stclf = StackingCVClassifier(classifiers=[knn, gnb],
                                 meta_classifier=lr,
                                 store_train_meta_features=True)
    X_train, _, y_train, _ = train_test_split(X_iris, y_iris, test_size=0.3)
    stclf.fit(X_train, y_train)
    train_meta_features = stclf.train_meta_features_
    assert train_meta_features.shape == (X_train.shape[0], 2)

Ejemplo n.º 6

0

Mostrar archivo

Archivo: test_stacking_cv_classifier.py Proyecto: venkatesh-1729/mlxtend

def test_verbose():
    np.random.seed(123)
    meta = LogisticRegression()
    clf1 = RandomForestClassifier()
    clf2 = GaussianNB()
    sclf = StackingCVClassifier(classifiers=[clf1, clf2],
                                use_probas=True,
                                meta_classifier=meta,
                                shuffle=False,
                                verbose=3)
    sclf.fit(iris.data, iris.target)

Ejemplo n.º 7

0

Mostrar archivo

Archivo: test_stacking_cv_classifier.py Proyecto: rasbt/mlxtend

def test_verbose():
    np.random.seed(123)
    meta = LogisticRegression(multi_class='ovr', solver='liblinear')
    clf1 = RandomForestClassifier(n_estimators=10)
    clf2 = GaussianNB()
    sclf = StackingCVClassifier(classifiers=[clf1, clf2],
                                use_probas=True,
                                meta_classifier=meta,
                                shuffle=False,
                                verbose=3)
    sclf.fit(X_iris, y_iris)

Ejemplo n.º 8

0

Mostrar archivo

Archivo: test_stacking_cv_classifier.py Proyecto: rasbt/mlxtend

def test_no_weight_support():
    w = np.array([random.random() for _ in range(len(y_iris))])
    meta = LogisticRegression(multi_class='ovr', solver='liblinear')
    clf1 = RandomForestClassifier(n_estimators=10)
    clf2 = GaussianNB()
    clf3 = KNeighborsClassifier()
    sclf = StackingCVClassifier(classifiers=[clf1, clf2, clf3],
                                meta_classifier=meta,
                                shuffle=False)
    with pytest.raises(TypeError):
        sclf.fit(X_iris, y_iris, sample_weight=w)

Ejemplo n.º 9

0

Mostrar archivo

Archivo: test_stacking_cv_classifier.py Proyecto: venkatesh-1729/mlxtend

def test_train_meta_features_():
    knn = KNeighborsClassifier()
    lr = LogisticRegression()
    gnb = GaussianNB()
    stclf = StackingCVClassifier(classifiers=[knn, gnb],
                                 meta_classifier=lr,
                                 store_train_meta_features=True)
    X_train, X_test, y_train,  y_test = train_test_split(X, y, test_size=0.3)
    stclf.fit(X_train, y_train)
    train_meta_features = stclf.train_meta_features_
    assert train_meta_features.shape == (X_train.shape[0], 2)

Ejemplo n.º 10

0

Mostrar archivo

def test_verbose():
    np.random.seed(123)
    meta = LogisticRegression()
    clf1 = RandomForestClassifier()
    clf2 = GaussianNB()
    sclf = StackingCVClassifier(classifiers=[clf1, clf2],
                                use_probas=True,
                                meta_classifier=meta,
                                shuffle=False,
                                verbose=3)
    sclf.fit(iris.data, iris.target)

Ejemplo n.º 11

0

Mostrar archivo

Archivo: test_stacking_cv_classifier.py Proyecto: rasbt/mlxtend

def test_no_weight_support_meta():
    w = np.array([random.random() for _ in range(len(y_iris))])
    meta = KNeighborsClassifier()
    clf1 = RandomForestClassifier(n_estimators=10)
    clf2 = GaussianNB()
    sclf = StackingCVClassifier(classifiers=[clf1, clf2],
                                meta_classifier=meta,
                                shuffle=False)

    with pytest.raises(TypeError):
        sclf.fit(X_iris, y_iris, sample_weight=w)

Ejemplo n.º 12

0

Mostrar archivo

Archivo: test_stacking_cv_classifier.py Proyecto: vladimiralencar/mlxtend

def test_train_meta_features_():
    knn = KNeighborsClassifier()
    lr = LogisticRegression()
    gnb = GaussianNB()
    stclf = StackingCVClassifier(classifiers=[knn, gnb],
                                 meta_classifier=lr,
                                 store_train_meta_features=True)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)
    stclf.fit(X_train, y_train)
    train_meta_features = stclf.train_meta_features_
    assert train_meta_features.shape == (X_train.shape[0], 2)

Ejemplo n.º 13

0

Mostrar archivo

Archivo: data_modeling.py Proyecto: Data-Science-Project-G13/monitoring-athletes-performance

 def _build_model(self, X_train, y_train):
     knn = KNeighborsClassifier(n_neighbors=1)
     rf = RandomForestClassifier(max_depth=3,max_features=6,n_estimators=50,random_state=0)
     SVM = svm.SVC(C=1.0,kernel='poly',degree=5)
     Xgb = XGBClassifier(alpha=15, colsample_bytree=0.1,learning_rate=1, max_depth=5,reg_lambda=10.0)
     gnb = GaussianNB()
     lr = LogisticRegression(C = 10.0, dual=False, max_iter=100, solver='lbfgs')
     sclf = StackingCVClassifier(classifiers=[knn, rf,lr,SVM,Xgb],
                                 meta_classifier=gnb,
                                 random_state=42)
     sclf.fit(X_train,y_train)
     return sclf

Ejemplo n.º 14

0

Mostrar archivo

Archivo: test_stacking_cv_classifier.py Proyecto: rasbt/mlxtend

def test_predict_meta_features():
    knn = KNeighborsClassifier()
    lr = LogisticRegression(multi_class='ovr', solver='liblinear')
    gnb = GaussianNB()
    X_train, X_test, y_train, y_test = train_test_split(X_iris, y_iris,
                                                        test_size=0.3)
    #  test default (class labels)
    stclf = StackingCVClassifier(classifiers=[knn, gnb],
                                 meta_classifier=lr,
                                 store_train_meta_features=True)
    stclf.fit(X_train, y_train)
    test_meta_features = stclf.predict(X_test)
    assert test_meta_features.shape == (X_test.shape[0],)

Ejemplo n.º 15

0

Mostrar archivo

Archivo: stacking_baseline.py Proyecto: dujijundavid/Home_Default_Risk

 def model_fit(self):
     self.__ef = ExtraTreesClassifier(n_jobs=-1)
     self.__rf = RandomForestClassifier(n_jobs=-1)
     self.__lr = LogisticRegression()
     self.__gb = GradientBoostingClassifier()
     self.__xgb = XGBClassifier(n_jobs=-1, missing=-999.0)
     self.__sclf = StackingCVClassifier(
         classifiers=[self.__ef, self.__rf, self.__gb, self.__xgb],
         meta_classifier=self.__lr,
         use_probas=True,
         cv=3)
     self.__sclf.fit(self.__application_train_feature.values,
                     self.__application_train_label.values)

Ejemplo n.º 16

0

Mostrar archivo

Archivo: test_stacking_cv_classifier.py Proyecto: vladimiralencar/mlxtend

def test_pandas():
    X_df = pd.DataFrame(X)
    meta = LogisticRegression()
    clf1 = RandomForestClassifier()
    clf2 = GaussianNB()
    sclf = StackingCVClassifier(classifiers=[clf1, clf2],
                                use_probas=True,
                                meta_classifier=meta,
                                shuffle=False,
                                verbose=0)
    try:
        sclf.fit(X_df, iris.target)
    except KeyError as e:
        assert 'are NumPy arrays. If X and y are pandas DataFrames' in str(e)

Ejemplo n.º 17

0

Mostrar archivo

Archivo: test_stacking_cv_classifier.py Proyecto: zmandyhe/mlxtend

def test_pandas():
    X_df = pd.DataFrame(X_iris)
    meta = LogisticRegression(multi_class='ovr', solver='liblinear')
    clf1 = RandomForestClassifier(n_estimators=10)
    clf2 = GaussianNB()
    sclf = StackingCVClassifier(classifiers=[clf1, clf2],
                                use_probas=True,
                                meta_classifier=meta,
                                shuffle=False,
                                verbose=0)
    try:
        sclf.fit(X_df, y_iris)
    except KeyError as e:
        assert 'are NumPy arrays. If X and y are pandas DataFrames' in str(e)

Ejemplo n.º 18

0

Mostrar archivo

Archivo: test_stacking_cv_classifier.py Proyecto: venkatesh-1729/mlxtend

def test_pandas():
    X_df = pd.DataFrame(X)
    meta = LogisticRegression()
    clf1 = RandomForestClassifier()
    clf2 = GaussianNB()
    sclf = StackingCVClassifier(classifiers=[clf1, clf2],
                                use_probas=True,
                                meta_classifier=meta,
                                shuffle=False,
                                verbose=0)
    try:
        sclf.fit(X_df, iris.target)
    except KeyError as e:
        assert 'are NumPy arrays. If X and y are pandas DataFrames' in str(e)

Ejemplo n.º 19

0

Mostrar archivo

def test_meta_feat_reordering():
    knn = KNeighborsClassifier()
    lr = LogisticRegression()
    gnb = GaussianNB()
    stclf = StackingCVClassifier(classifiers=[knn, gnb],
                                 meta_classifier=lr,
                                 shuffle=True,
                                 store_train_meta_features=True)
    X_train, X_test, y_train,  y_test = train_test_split(X_breast, y_breast,
                                                         test_size=0.3)
    stclf.fit(X_train, y_train)

    assert round(roc_auc_score(y_train,
                 stclf.train_meta_features_[:, 1]), 2) == 0.88

Ejemplo n.º 20

0

Mostrar archivo

Archivo: test_stacking_cv_classifier.py Proyecto: vladimiralencar/mlxtend

def test_list_of_lists():
    X_list = [i for i in X]
    meta = LogisticRegression()
    clf1 = RandomForestClassifier()
    clf2 = GaussianNB()
    sclf = StackingCVClassifier(classifiers=[clf1, clf2],
                                use_probas=True,
                                meta_classifier=meta,
                                shuffle=False,
                                verbose=0)

    try:
        sclf.fit(X_list, iris.target)
    except TypeError as e:
        assert 'are NumPy arrays. If X and y are lists' in str(e)

Ejemplo n.º 21

0

Mostrar archivo

Archivo: test_stacking_cv_classifier.py Proyecto: venkatesh-1729/mlxtend

def test_list_of_lists():
    X_list = [i for i in X]
    meta = LogisticRegression()
    clf1 = RandomForestClassifier()
    clf2 = GaussianNB()
    sclf = StackingCVClassifier(classifiers=[clf1, clf2],
                                use_probas=True,
                                meta_classifier=meta,
                                shuffle=False,
                                verbose=0)

    try:
        sclf.fit(X_list, iris.target)
    except TypeError as e:
        assert 'are NumPy arrays. If X and y are lists' in str(e)

Ejemplo n.º 22

0

Mostrar archivo

Archivo: StackingDemo.py Proyecto: JYLFamily/Python_Study_Note

 def pick_the_best_function(self):
     self.__lr = LogisticRegression(C=0.1)
     self.__gb = GradientBoostingClassifier(learning_rate=0.1)
     self.__rf = RandomForestClassifier(n_estimators=5)
     self.__sclf = StackingCVClassifier(
         classifiers=[self.__lr, self.__gb, self.__rf],
         meta_classifier=self.__lr,
         use_probas=True,
         cv=5,
         use_features_in_secondary=True,
         verbose=1)
     self.__sclf.fit(self.__train, self.__train_label)
     print(
         roc_auc_score(self.__test_label,
                       self.__sclf.predict_proba(self.__test)[:, 1]))

Ejemplo n.º 23

0

Mostrar archivo

Archivo: test_stacking_cv_classifier.py Proyecto: zmandyhe/mlxtend

def test_list_of_lists():
    X_list = [i for i in X_iris]
    meta = LogisticRegression(multi_class='ovr', solver='liblinear')
    clf1 = RandomForestClassifier(n_estimators=10)
    clf2 = GaussianNB()
    sclf = StackingCVClassifier(classifiers=[clf1, clf2],
                                use_probas=True,
                                meta_classifier=meta,
                                shuffle=False,
                                verbose=0)

    try:
        sclf.fit(X_list, y_iris)
    except TypeError as e:
        assert 'are NumPy arrays. If X and y are lists' in str(e)

Ejemplo n.º 24

0

Mostrar archivo

def test_not_fitted():
    np.random.seed(123)
    meta = LogisticRegression()
    clf1 = RandomForestClassifier()
    clf2 = GaussianNB()
    sclf = StackingCVClassifier(classifiers=[clf1, clf2],
                                use_probas=True,
                                meta_classifier=meta, shuffle=False)

    assert_raises(NotFittedError,
                  "This StackingCVClassifier instance is not fitted yet."
                  " Call 'fit' with appropriate arguments"
                  " before using this method.",
                  sclf.predict,
                  iris.data)

    assert_raises(NotFittedError,
                  "This StackingCVClassifier instance is not fitted yet."
                  " Call 'fit' with appropriate arguments"
                  " before using this method.",
                  sclf.predict_proba,
                  iris.data)

    assert_raises(NotFittedError,
                  "This StackingCVClassifier instance is not fitted yet."
                  " Call 'fit' with appropriate arguments"
                  " before using this method.",
                  sclf.predict_meta_features,
                  iris.data)

Ejemplo n.º 25

0

Mostrar archivo

Archivo: plain_classifiers.py Proyecto: fletch22/nba_win_predictor

def get_stacked_classifiers():
    dict_clfs = get_classifiers()

    kwargs = {
        'C': 100.0,
        'dual': False,
        'fit_intercept': True,
        'multi_class': 'multinomial',
        'penalty': 'l2',
        'solver': 'saga'
    }
    lr = LogisticRegression(**kwargs)

    names = [clf_name for clf_name in dict_clfs.keys()]
    classifiers = [dict_clfs[clf_name] for clf_name in names]

    # names = names[0:1]
    # classifiers = classifiers[0:1]

    clf_stacked = StackingCVClassifier(classifiers=classifiers,
                                       use_probas=True,
                                       use_features_in_secondary=True,
                                       meta_classifier=lr)
    names.append(CLF_TYPES.StackingCVClassifier)
    classifiers.append(clf_stacked)

    return classifiers, names

Ejemplo n.º 26

0

Mostrar archivo

def stacking_clf_and_gridsearch_and_same_algo_multi_times(X, y):
    clf1 = KNeighborsClassifier(n_neighbors=1)
    clf2 = RandomForestClassifier(random_state=RANDOM_SEED)
    # clf3 = GaussianNB
    lr = LogisticRegression()

    # using a regression algorithm multiple times
    sclf = StackingCVClassifier(classifiers=[clf1, clf1, clf2],
                                meta_classifier=lr,
                                random_state=RANDOM_SEED)
    params = {
        'kneighborsclassifier-1__n_neighbors':
        [1, 5],  # add an additional number suffix in the parameter grid
        'kneighborsclassifier-2__n_neighbors':
        [1, 5],  # add an additional number suffix in the parameter grid
        'randomforestclassifier__n_estimators': [10, 50],
        'meta_classifier__C': [1, 10]
    }
    grid = model_selection.GridSearchCV(estimator=sclf,
                                        param_grid=params,
                                        cv=5,
                                        refit=True)
    grid.fit(X, y)
    cv_key = ('mean_test_score', 'std_test_score', 'params')
    for r, _ in enumerate(grid.cv_results_['mean_test_score']):
        print('%0.3f +/- %0.2f %r' %
              (grid.cv_results_[cv_key[0]][r], grid.cv_results_[cv_key[1]][r],
               grid.cv_results_[cv_key[2]][r]))
    print('Best paras: %s' % grid.best_params_)
    print('Best scores: %0.3f' % grid.best_score_)

Ejemplo n.º 27

0

Mostrar archivo

def simple_stacking_cv_classification_and_gridsearch(X, y):
    clf1 = KNeighborsClassifier(n_neighbors=1)
    clf2 = RandomForestClassifier(random_state=RANDOM_SEED)
    clf3 = GaussianNB()
    lr = LogisticRegression()

    sclf = StackingCVClassifier(classifiers=[clf1, clf2, clf3],
                                meta_classifier=lr,
                                random_state=RANDOM_SEED)
    params = {
        'kneighborsclassifier__n_neighbors': [1, 5],
        'randomforestclassifier__n_estimators': [10, 50],
        'meta_classifier__C': [1, 10]
    }
    grid = model_selection.GridSearchCV(estimator=sclf,
                                        param_grid=params,
                                        cv=5,
                                        refit=True)
    grid.fit(X, y)
    cv_keys = ('mean_test_score', 'std_test_score', 'params')
    for r, _ in enumerate(grid.cv_results_['mean_test_score']):
        print(
            "%0.3f +/- %0.2f %r" %
            (grid.cv_results_[cv_keys[0]][r], grid.cv_results_[cv_keys[1]][r] /
             2.0, grid.cv_results_[cv_keys[2]][r]))

    print('Best parameters: %s' % grid.best_params_)
    print('Accuracy: %0.2f' % grid.best_score_)

Ejemplo n.º 28

0

Mostrar archivo

Archivo: CDH_C_ST_LG_PSO.py Proyecto: FutureLYU/iNuc-en-NDPE

def aimfuc(k):

    obj = np.ones(k.shape[0])

    for i in range(0, k.shape[0]):

        clf_LG = LogisticRegression(C=k[i, 0],
                                    solver='liblinear',
                                    random_state=RANDOM_SEED,
                                    penalty='l2')
        sclf = StackingCVClassifier(
            classifiers=[clf_SVM, clf_RF, clf_MLP, clf_Ad],
            meta_classifier=clf_LG,
            use_probas=True)

        # sclf.fit(X_C, Y_C)
        scores_ST = cross_val_score(sclf,
                                    X_train,
                                    Y_train,
                                    cv=6,
                                    scoring='accuracy',
                                    n_jobs=-1)
        f = scores_ST.mean()  # 0.9001018065904675 3svm
        obj[i] = -f
    return obj

Ejemplo n.º 29

0

Mostrar archivo

Archivo: test_stacking_cv_classifier.py Proyecto: vladimiralencar/mlxtend

def test_get_params():
    clf1 = KNeighborsClassifier(n_neighbors=1)
    clf2 = RandomForestClassifier(random_state=1)
    clf3 = GaussianNB()
    lr = LogisticRegression()
    sclf = StackingCVClassifier(classifiers=[clf1, clf2, clf3],
                                meta_classifier=lr)

    got = sorted(list({s.split('__')[0] for s in sclf.get_params().keys()}))
    expect = [
        'classifiers', 'cv', 'gaussiannb', 'kneighborsclassifier',
        'meta-logisticregression', 'meta_classifier', 'randomforestclassifier',
        'refit', 'shuffle', 'store_train_meta_features', 'stratify',
        'use_features_in_secondary', 'use_probas', 'verbose'
    ]
    assert got == expect, got

Ejemplo n.º 30

0

Mostrar archivo

Archivo: stacking_cv.py Proyecto: hikekang/hkx_tf_practice

def fun2():
    from sklearn import model_selection
    from sklearn.linear_model import LogisticRegression
    from sklearn.neighbors import KNeighborsClassifier
    from sklearn.naive_bayes import GaussianNB
    from sklearn.ensemble import RandomForestClassifier
    from mlxtend.classifier import StackingClassifier
    from sklearn.ensemble import RandomForestClassifier
    from mlxtend.classifier import StackingClassifier
    from mlxtend.classifier import StackingCVClassifier
    import numpy as np
    import warnings
    clf1 = KNeighborsClassifier(n_neighbors=1)
    clf2 = RandomForestClassifier(random_state=1)
    clf3 = GaussianNB()
    lr = LogisticRegression()

    sclf = StackingCVClassifier(classifiers=[clf1, clf2, clf3],
                                use_probas=True,
                                meta_classifier=lr,
                                random_state=42)

    print('3-fold cross validation:\n')

    for clf, label in zip([clf1, clf2, clf3, sclf],
                          ['KNN',
                           'Random Forest',
                           'Naive Bayes',
                           'StackingClassifier']):
        scores = model_selection.cross_val_score(clf, X, y,
                                                 cv=3, scoring='accuracy')
        print("Accuracy: %0.2f (+/- %0.2f) [%s]"
              % (scores.mean(), scores.std(), label))

Ejemplo n.º 31

0

Mostrar archivo

Archivo: test_stacking_cv_classifier.py Proyecto: rasbt/mlxtend

def test_works_with_df_if_fold_indexes_missing():
    """This is a regression test to make sure fitting will still work even if
    training data has ids that cannot be indexed using the indexes from the cv
    (e.g. skf)

    Some possibilities:
    + Output of the folds are not neatly consecutive (i.e. [341, 345, 543, ...]
      instead of [0, 1, ... n])
    + Indexes just start from some number greater than the size of the input
      (see test case)

    Training data sometimes has ids that carry other information, and selection
    of rows based on cv should not break.

    This is fixed in the code using `safe_indexing`
    """

    np.random.seed(123)
    rf = RandomForestClassifier(n_estimators=10, random_state=42)
    lr = LogisticRegression(multi_class='ovr', solver='liblinear')
    stclf = StackingCVClassifier(classifiers=[rf, rf],
                                 meta_classifier=lr,
                                 random_state=42,
                                 use_features_in_secondary=True)

    X_modded = pd.DataFrame(X_breast,
                            index=np.arange(X_breast.shape[0]) + 1000)
    y_modded = pd.Series(y_breast,
                         index=np.arange(y_breast.shape[0]) + 1000)

    X_train, X_test, y_train, y_test = train_test_split(X_modded,
                                                        y_modded,
                                                        test_size=0.3)

    # dense
    stclf.fit(X_train, y_train)
    assert round(stclf.score(X_train, y_train), 2) == 0.99, \
        round(stclf.score(X_train, y_train), 2)

Ejemplo n.º 32

0

Mostrar archivo

Archivo: test_stacking_cv_classifier.py Proyecto: rasbt/mlxtend

def test_meta_feat_reordering():
    knn = KNeighborsClassifier()
    lr = LogisticRegression(multi_class='ovr', solver='liblinear')
    gnb = GaussianNB()
    stclf = StackingCVClassifier(classifiers=[knn, gnb],
                                 meta_classifier=lr,
                                 shuffle=True,
                                 random_state=42,
                                 store_train_meta_features=True)
    X_train, X_test, y_train,  y_test = train_test_split(X_breast, y_breast,
                                                         random_state=0,
                                                         test_size=0.3)
    stclf.fit(X_train, y_train)

    if Version(sklearn_version) < Version("0.21"):
        expected_value = 0.86
    else:
        expected_value = 0.87

    assert round(roc_auc_score(y_train,
                 stclf.train_meta_features_[:, 1]), 2) == expected_value, \
        round(roc_auc_score(y_train,
              stclf.train_meta_features_[:, 1]), 2)

Ejemplo n.º 33

0

Mostrar archivo

Archivo: test_stacking_cv_classifier.py Proyecto: NextNight/mlxtend

def test_get_params():
    clf1 = KNeighborsClassifier(n_neighbors=1)
    clf2 = RandomForestClassifier(random_state=1)
    clf3 = GaussianNB()
    lr = LogisticRegression()
    sclf = StackingCVClassifier(classifiers=[clf1, clf2, clf3],
                                meta_classifier=lr)

    got = sorted(list({s.split('__')[0] for s in sclf.get_params().keys()}))
    expect = ['classifiers',
              'cv',
              'gaussiannb',
              'kneighborsclassifier',
              'meta-logisticregression',
              'meta_classifier',
              'randomforestclassifier',
              'refit',
              'shuffle',
              'store_train_meta_features',
              'stratify',
              'use_features_in_secondary',
              'use_probas',
              'verbose']
    assert got == expect, got

Ejemplo n.º 34

0

Mostrar archivo

Archivo: test_stacking_cv_classifier.py Proyecto: rasbt/mlxtend

def test_sparse_inputs():
    np.random.seed(123)
    rf = RandomForestClassifier(n_estimators=10)
    lr = LogisticRegression(multi_class='ovr', solver='liblinear')
    stclf = StackingCVClassifier(classifiers=[rf, rf],
                                 meta_classifier=lr,
                                 random_state=42)
    X_train, X_test, y_train,  y_test = train_test_split(X_breast, y_breast,
                                                         test_size=0.3)

    # dense
    stclf.fit(X_train, y_train)
    assert round(stclf.score(X_train, y_train), 2) == 0.99

    # sparse
    stclf.fit(sparse.csr_matrix(X_train), y_train)
    assert round(stclf.score(X_train, y_train), 2) == 0.99

Ejemplo n.º 35

0

Mostrar archivo

Archivo: test_stacking_cv_classifier.py Proyecto: rasbt/mlxtend

def test_no_weight_support_with_no_weight():
    logit = LogisticRegression(multi_class='ovr', solver='liblinear')
    rf = RandomForestClassifier(n_estimators=10)
    gnb = GaussianNB()
    knn = KNeighborsClassifier()
    sclf = StackingCVClassifier(classifiers=[logit, rf, gnb],
                                meta_classifier=knn,
                                shuffle=False)
    sclf.fit(X_iris, y_iris)

    sclf = StackingCVClassifier(classifiers=[logit, knn, gnb],
                                meta_classifier=rf,
                                shuffle=False)
    sclf.fit(X_iris, y_iris)

Ejemplo n.º 36

0

Mostrar archivo

Archivo: test_stacking_cv_classifier.py Proyecto: rasbt/mlxtend

def test_sample_weight():
    # with no weight given
    np.random.seed(123)
    meta = LogisticRegression(multi_class='ovr', solver='liblinear')
    clf1 = RandomForestClassifier(n_estimators=10)
    clf2 = GaussianNB()
    sclf = StackingCVClassifier(classifiers=[clf1, clf2],
                                meta_classifier=meta,
                                shuffle=False)
    prob1 = sclf.fit(X_iris, y_iris).predict_proba(X_iris)

    # with weight = 1
    np.random.seed(123)
    meta = LogisticRegression(multi_class='ovr', solver='liblinear')
    clf1 = RandomForestClassifier(n_estimators=10)
    clf2 = GaussianNB()
    sclf = StackingCVClassifier(classifiers=[clf1, clf2],
                                meta_classifier=meta,
                                shuffle=False)
    w = np.ones(len(y_iris))
    prob2 = sclf.fit(X_iris, y_iris,
                     sample_weight=w).predict_proba(X_iris)

    # with random weight
    random.seed(87)
    w = np.array([random.random() for _ in range(len(y_iris))])
    np.random.seed(123)
    meta = LogisticRegression(multi_class='ovr', solver='liblinear')
    clf1 = RandomForestClassifier(n_estimators=10)
    clf2 = GaussianNB()
    sclf = StackingCVClassifier(classifiers=[clf1, clf2],
                                meta_classifier=meta,
                                shuffle=False)
    prob3 = sclf.fit(X_iris, y_iris,
                     sample_weight=w).predict_proba(X_iris)

    diff12 = np.max(np.abs(prob1 - prob2))
    diff23 = np.max(np.abs(prob2 - prob3))
    assert diff12 < 1e-3, "max diff is %.4f" % diff12
    assert diff23 > 1e-3, "max diff is %.4f" % diff23

Ejemplo n.º 37

0

Mostrar archivo

Archivo: test_stacking_cv_classifier.py Proyecto: rasbt/mlxtend

def test_StackingClassifier_drop_last_proba():
    np.random.seed(123)
    lr1 = LogisticRegression(solver='liblinear',
                             multi_class='ovr')
    sclf1 = StackingCVClassifier(classifiers=[lr1, lr1],
                                 use_probas=True,
                                 drop_last_proba=False,
                                 meta_classifier=lr1)

    sclf1.fit(X_iris, y_iris)
    r1 = sclf1.predict_meta_features(X_iris[:2])
    assert r1.shape == (2, 6)

    sclf2 = StackingCVClassifier(classifiers=[lr1, lr1],
                                 use_probas=True,
                                 drop_last_proba=True,
                                 meta_classifier=lr1)

    sclf2.fit(X_iris, y_iris)
    r2 = sclf2.predict_meta_features(X_iris[:2])
    assert r2.shape == (2, 4), r2.shape

    sclf3 = StackingCVClassifier(classifiers=[lr1, lr1],
                                 use_probas=True,
                                 drop_last_proba=True,
                                 meta_classifier=lr1)

    sclf3.fit(X_iris[0:100], y_iris[0:100])  # only 2 classes
    r3 = sclf3.predict_meta_features(X_iris[:2])
    assert r3.shape == (2, 2), r3.shape