Python StackingCVClassifier.score Examples, mlxtend.classifier.StackingCVClassifier.score Python Examples

Example #1

0

Show file

File: test_stacking_cv_classifier.py Project: rasbt/mlxtend

def test_sparse_inputs_with_features_in_secondary():
    rf = RandomForestClassifier(n_estimators=10, random_state=42)
    lr = LogisticRegression(multi_class='ovr', solver='liblinear')
    stclf = StackingCVClassifier(classifiers=[rf, rf],
                                 meta_classifier=lr,
                                 random_state=42,
                                 use_features_in_secondary=True)
    X_train, X_test, y_train, y_test = train_test_split(X_breast, y_breast,
                                                        test_size=0.3)

    # dense
    stclf.fit(X_train, y_train)

    if Version(sklearn_version) < Version("0.21"):
        expected_value = 1.0
    else:
        expected_value = 0.99

    assert round(stclf.score(X_train, y_train), 2) == expected_value, \
        round(stclf.score(X_train, y_train), 2)

    # sparse
    stclf.fit(sparse.csr_matrix(X_train), y_train)

    if Version(sklearn_version) < Version("0.21"):
        expected_value = 1.0
    else:
        expected_value = 0.99
    assert round(stclf.score(X_train, y_train), 2) == expected_value, \
        round(stclf.score(X_train, y_train), 2)

Example #2

0

Show file

File: test_stacking_cv_classifier.py Project: vmirly/mlxtend

def test_sparse_inputs_with_features_in_secondary():
    rf = RandomForestClassifier(n_estimators=10, random_state=42)
    lr = LogisticRegression(multi_class='ovr', solver='liblinear')
    stclf = StackingCVClassifier(classifiers=[rf, rf],
                                 meta_classifier=lr,
                                 random_state=42,
                                 use_features_in_secondary=True)
    X_train, X_test, y_train, y_test = train_test_split(X_breast,
                                                        y_breast,
                                                        test_size=0.3)

    # dense
    stclf.fit(X_train, y_train)

    expected_value = 1.0

    assert round(stclf.score(X_train, y_train), 2) == expected_value, \
        round(stclf.score(X_train, y_train), 2)

    # sparse
    stclf.fit(sparse.csr_matrix(X_train), y_train)

    if Version(sklearn_version) < Version("0.21"):
        expected_value = 1.0
    if Version(sklearn_version) < Version("0.22"):
        expected_value = 0.99
    else:
        expected_value = 1.00


    assert round(stclf.score(X_train, y_train), 2) == expected_value, \
        round(stclf.score(X_train, y_train), 2)

Example #3

0

Show file

def test_sparse_inputs():
    rf = RandomForestClassifier(random_state=1)
    lr = LogisticRegression()
    stclf = StackingCVClassifier(classifiers=[rf, rf], meta_classifier=lr)
    X_train, X_test, y_train, y_test = train_test_split(X_breast,
                                                        y_breast,
                                                        test_size=0.3)

    # dense
    stclf.fit(X_train, y_train)
    assert round(stclf.score(X_train, y_train), 2) == 0.99

    # sparse
    stclf.fit(sparse.csr_matrix(X_train), y_train)
    assert round(stclf.score(X_train, y_train), 2) == 0.99

Example #4

0

Show file

def test_sparse_inputs():
    np.random.seed(123)
    rf = RandomForestClassifier(n_estimators=10)
    lr = LogisticRegression(multi_class='ovr', solver='liblinear')
    stclf = StackingCVClassifier(classifiers=[rf, rf], meta_classifier=lr)
    X_train, X_test, y_train, y_test = train_test_split(X_breast,
                                                        y_breast,
                                                        test_size=0.3)

    # dense
    stclf.fit(X_train, y_train)
    assert round(stclf.score(X_train, y_train), 2) == 0.99

    # sparse
    stclf.fit(sparse.csr_matrix(X_train), y_train)
    assert round(stclf.score(X_train, y_train), 2) == 0.99

Example #5

0

Show file

File: test_stacking_cv_classifier.py Project: rasbt/mlxtend

def test_sparse_inputs():
    np.random.seed(123)
    rf = RandomForestClassifier(n_estimators=10)
    lr = LogisticRegression(multi_class='ovr', solver='liblinear')
    stclf = StackingCVClassifier(classifiers=[rf, rf],
                                 meta_classifier=lr,
                                 random_state=42)
    X_train, X_test, y_train,  y_test = train_test_split(X_breast, y_breast,
                                                         test_size=0.3)

    # dense
    stclf.fit(X_train, y_train)
    assert round(stclf.score(X_train, y_train), 2) == 0.99

    # sparse
    stclf.fit(sparse.csr_matrix(X_train), y_train)
    assert round(stclf.score(X_train, y_train), 2) == 0.99

Example #6

0

Show file

File: test_stacking_cv_classifier.py Project: vmirly/mlxtend

def test_works_with_df_if_fold_indexes_missing():
    """This is a regression test to make sure fitting will still work even if
    training data has ids that cannot be indexed using the indexes from the cv
    (e.g. skf)

    Some possibilities:
    + Output of the folds are not neatly consecutive (i.e. [341, 345, 543, ...]
      instead of [0, 1, ... n])
    + Indexes just start from some number greater than the size of the input
      (see test case)

    Training data sometimes has ids that carry other information, and selection
    of rows based on cv should not break.

    This is fixed in the code using `safe_indexing`
    """

    np.random.seed(123)
    rf = RandomForestClassifier(n_estimators=10, random_state=42)
    lr = LogisticRegression(multi_class='ovr', solver='liblinear')
    stclf = StackingCVClassifier(classifiers=[rf, rf],
                                 meta_classifier=lr,
                                 random_state=42,
                                 use_features_in_secondary=True)

    X_modded = pd.DataFrame(X_breast,
                            index=np.arange(X_breast.shape[0]) + 1000)
    y_modded = pd.Series(y_breast, index=np.arange(y_breast.shape[0]) + 1000)

    X_train, X_test, y_train, y_test = train_test_split(X_modded,
                                                        y_modded,
                                                        test_size=0.3)

    # dense
    stclf.fit(X_train, y_train)

    if Version(sklearn_version) < Version("0.22"):
        assert round(stclf.score(X_train, y_train), 2) == 0.99, \
            round(stclf.score(X_train, y_train), 2)
    else:
        assert round(stclf.score(X_train, y_train), 2) == 0.98, \
            round(stclf.score(X_train, y_train), 2)

Example #7

0

Show file

File: test_stacking_cv_classifier.py Project: rasbt/mlxtend

def test_works_with_df_if_fold_indexes_missing():
    """This is a regression test to make sure fitting will still work even if
    training data has ids that cannot be indexed using the indexes from the cv
    (e.g. skf)

    Some possibilities:
    + Output of the folds are not neatly consecutive (i.e. [341, 345, 543, ...]
      instead of [0, 1, ... n])
    + Indexes just start from some number greater than the size of the input
      (see test case)

    Training data sometimes has ids that carry other information, and selection
    of rows based on cv should not break.

    This is fixed in the code using `safe_indexing`
    """

    np.random.seed(123)
    rf = RandomForestClassifier(n_estimators=10, random_state=42)
    lr = LogisticRegression(multi_class='ovr', solver='liblinear')
    stclf = StackingCVClassifier(classifiers=[rf, rf],
                                 meta_classifier=lr,
                                 random_state=42,
                                 use_features_in_secondary=True)

    X_modded = pd.DataFrame(X_breast,
                            index=np.arange(X_breast.shape[0]) + 1000)
    y_modded = pd.Series(y_breast,
                         index=np.arange(y_breast.shape[0]) + 1000)

    X_train, X_test, y_train, y_test = train_test_split(X_modded,
                                                        y_modded,
                                                        test_size=0.3)

    # dense
    stclf.fit(X_train, y_train)
    assert round(stclf.score(X_train, y_train), 2) == 0.99, \
        round(stclf.score(X_train, y_train), 2)

Example #8

0

Show file

vc = VotingClassifier(estimators, voting='hard')
vc.fit(X_train, y_train)
vc.score(X_test, y_test)

#stacking
clf1 = KNeighborsClassifier(n_neighbors=10)
clf2 = GaussianNB()
clf3 = RandomForestClassifier(random_state=46)
lr = LogisticRegression()

sclf = StackingCVClassifier(classifiers=[clf1, clf2, clf3],
                            meta_classifier=lr,
                            random_state=46)

sclf.fit(X_train, y_train)
a = sclf.score(X_test, y_test)
print(a)
sclf = StackingCVClassifier(classifiers=[clf1, clf2, clf3],
                            meta_classifier=lr,
                            random_state=46,
                            use_probas=True)

sclf.fit(X_train, y_train)
b = sclf.score(X_test, y_test)

print(b)

variance_inflation_factor(X_train.values, 0)

for i in range(len(X_train.columns)):
    print(variance_inflation_factor(X_train.values, i))

Example #9

0

Show file

File: Training.py Project: snowdj/MachineLearning_V

        xgb.XGBClassifier(max_depth=6, n_estimators=100, num_round=5),
        RandomForestClassifier(n_estimators=100, max_depth=6, oob_score=True),
        GradientBoostingClassifier(learning_rate=0.3,
                                   max_depth=6,
                                   n_estimators=100)
    ]
    clf2 = LogisticRegression(C=0.5, max_iter=100)
    #============================================================================#
    from mlxtend.classifier import StackingClassifier, StackingCVClassifier
    sclf = StackingClassifier(classifiers=clfs, meta_classifier=clf2)
    sclf.fit(X_train, Y_train)
    print(sclf.score(X_train, Y_train))
    sclf_pre = sclf.predict(X_test)
    sclf_sub = pd.DataFrame({
        "PassengerId": test_df["PassengerId"],
        "Survived": sclf_pre
    })
    sclf_sub.to_csv("../data/sclf_sub.csv", index=False)
    #===============================================================================#
    sclf2 = StackingCVClassifier(classifiers=clfs, meta_classifier=clf2, cv=5)
    x = np.array(X_train)
    y = np.array(Y_train).flatten()
    sclf2.fit(x, y)
    print(sclf2.score(x, y))
    sclf2_pre = sclf2.predict(np.array(X_test))
    sclf2_sub = pd.DataFrame({
        "PassengerId": test_df["PassengerId"],
        "Survived": sclf2_pre
    })
    sclf2_sub.to_csv("../data/sclf2_sub.csv", index=False)

Example #10

0

Show file

File: stacking.py Project: spareribs/kaggleSpareribs

# x_train = x_train.reset_index(drop=True)
# x_vali = x_vali.reset_index(drop=True)
y_train = y_train.reset_index(drop=True)
# y_vali = y_vali.reset_index(drop=True)
"""=====================================================================================================================
2 模型融合；
学习参考:https://blog.csdn.net/LAW_130625/article/details/78573736
"""

lr_clf = clfs["lr"]  # meta_classifier
svm_clf = clfs["svm_ploy"]
rf_clf = clfs["rf"]
xgb_clf = clfs["xgb"]
lgb_clf = clfs["lgb"]

sclf = StackingCVClassifier(
    classifiers=[lr_clf, svm_clf, rf_clf, xgb_clf, lgb_clf],
    meta_classifier=lr_clf,
    use_probas=True,
    verbose=3)

sclf.fit(x_train, y_train)

print("测试模型 & 模型参数如下：\n{0}".format(sclf))
print("=" * 20)
pre_train = sclf.predict(x_train)
print("训练集正确率: {0:.4f}".format(sclf.score(x_train, y_train)))
print("训练集f1分数: {0:.4f}".format(f1_score(y_train, pre_train)))
print("训练集auc分数: {0:.4f}".format(roc_auc_score(y_train, pre_train)))

Example #11

0

Show file

#create voting classifier
vc = VotingClassifier(estimators)
vc.fit(X_train, y_train)
vc.score(X_test, y_test)

#Simple Stacking CV classifier
clf1 = KNeighborsClassifier(n_neighbors=10)
clf2 = RandomForestClassifier(random_state=42)
clf3 = GaussianNB()
lr = LogisticRegression()

sclf = StackingCVClassifier(classifiers=[clf1, clf2, clf3],
                            meta_classifier=lr,
                            random_state=42)
sclf.fit(X_train, y_train)
sclf.score(X_test, y_test)

#Stacking classifier using probabilities as Meta-Features
clf1 = KNeighborsClassifier(n_neighbors=10)
clf2 = RandomForestClassifier(random_state=42)
clf3 = GaussianNB()
lr = LogisticRegression()

sclf = StackingCVClassifier(classifiers=[clf1, clf2, clf3],
                            meta_classifier=lr,
                            use_probas=True,
                            random_state=42)
sclf.fit(X_train, y_train)
sclf.score(X_test, y_test)

Example #12

0

Show file

File: 4-classifiers-for-titanic-dataset.py Project: Charlybrown60/automatic_wat_discovery

# In[ ]:

from sklearn.linear_model import LogisticRegression
from mlxtend.classifier import StackingCVClassifier
from sklearn import model_selection
from sklearn.model_selection import train_test_split

clf1 = svm.SVC(C=1, gamma=0.1)
clf2 = MLPClassifier(hidden_layer_sizes=(50, ), max_iter=600, alpha=1)
clf3 = DecisionTreeClassifier(max_depth=10, min_samples_split=4)
clf4 = RandomForestClassifier(n_estimators=250,
                              max_depth=10,
                              min_samples_split=4,
                              criterion='gini')
lr = LogisticRegression()

sclf = StackingCVClassifier(classifiers=[clf1, clf2, clf3, clf4],
                            meta_classifier=lr)

X_train2, X_cv, y_train2, y_cv = train_test_split(X_train,
                                                  y_train,
                                                  test_size=0.33,
                                                  random_state=0)

sclf.fit(X_train2.values, y_train2.values)
print("[Stacking] score on training data is %0.2f",
      sclf.score(X_train2.values, y_train2.values))
print("[Stacking] score on the crossvalidation data is %0.2f",
      sclf.score(X_cv.values, y_cv.values))

Example #13

0

Show file

File: stacking_test.py Project: oxygensu/Titan

# X即特征属性值
X = train_np[:, 1:]

# train_x, valid_x, train_y, valid_y = train_test_split(X, y, test_size=0.3)
test_df = test_data
test_np = test_df.values
test_x = test_np[:, 0:]

lr = LogisticRegression(C=0.8,penalty='l2',tol=1e-6)
DTree = DecisionTreeClassifier(max_depth=20)
rfc=RandomForestClassifier(n_estimators=5000)

xgbc = XGBClassifier(learning_rate=0.001, n_estimators=5000, max_depth=30, objective='binary:logitraw')
gbc=GradientBoostingClassifier(learning_rate=0.001, n_estimators=5000, max_depth=30)

sclf = StackingCVClassifier(classifiers=[lr,DTree,rfc,SVC(probability=True)], meta_classifier=xgbc, use_probas=True)
sclf.fit(X, y)
print(sclf.score(X, y))

test_id = pd.read_csv('data/origin/test.csv')

predictions = sclf.predict(test_x)
result = pd.DataFrame({'PassengerId':test_id['PassengerId'].values, 'Survived':predictions.astype(np.int32)})
result.to_csv("data/predictions/stacking_test4.csv", index=False)

answer = pd.read_csv('data/predictions/submission.csv')
answer_np = answer['Survived'].values
print('acc = %.5f' % accuracy_score(answer_np, predictions))

# from calculate_acc import calculate_acc
# calculate_acc(predictions)