Beispiel #1
0
    def stacking(self):
        train_data, test_data = self.Extract_feature.extract_count()
        from sklearn.svm import SVR
        from sklearn.pipeline import make_pipeline
        from sklearn.preprocessing import RobustScaler, MinMaxScaler
        from sklearn.preprocessing import StandardScaler
        from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
        from xgboost import XGBClassifier
        import lightgbm as lgb
        from lightgbm import LGBMClassifier
        import xgboost as xgb
        from mlxtend.classifier import StackingClassifier

        import scipy as sc
        from sklearn import model_selection

        lasso = make_pipeline(SVC(C=2.1, gamma=0.005))
        rforest = make_pipeline(
            RandomForestClassifier(random_state=0, n_estimators=6))
        Gboost = GradientBoostingClassifier(n_estimators=500,
                                            learning_rate=0.01,
                                            max_depth=12,
                                            max_features="sqrt",
                                            min_samples_leaf=15,
                                            min_samples_split=97,
                                            random_state=200)
        model_xgb = xgb.XGBClassifier(colsample_bytree=0.4603,
                                      gamma=10,
                                      learning_rate=0.01,
                                      max_depth=11,
                                      n_estimators=500,
                                      reg_alpha=0.01,
                                      reg_lambda=5,
                                      subsample=0.5213,
                                      seed=1024,
                                      nthread=-1)

        lr = LogisticRegression()
        classifiers = [rforest, lasso, Gboost, model_xgb, lr]
        stregr = StackingClassifier(classifiers=classifiers,
                                    meta_classifier=lr)
        stregr.fit(train_data, self.train_label)

        prediction = stregr.predict(test_data)
        classification = classification_report(y_true=self.test_label,
                                               y_pred=prediction)
        print("classification:{}".format(classification))
        print("测试集的score:{}".format(stregr.score(test_data, self.test_label)))
        for clf, label in zip(
            [rforest, lasso, Gboost, lr, model_xgb, stregr],
            ['rf', 'svr', 'gboost', 'lr', 'xgb', 'stackingclassifier']):
            scores = model_selection.cross_val_score(clf,
                                                     train_data,
                                                     self.train_label,
                                                     cv=3,
                                                     scoring='accuracy')
            print("Accuracy: %0.2f (+/- %0.2f) [%s]" %
                  (scores.mean(), scores.std(), label))
Beispiel #2
0
def test_fit_base_estimators_false():
    np.random.seed(123)
    meta = LogisticRegression(solver='liblinear', multi_class='ovr')
    clf1 = RandomForestClassifier(n_estimators=10)
    clf2 = GaussianNB()

    clf1.fit(X, y)
    clf2.fit(X, y)

    sclf = StackingClassifier(classifiers=[clf1, clf2],
                              meta_classifier=meta,
                              fit_base_estimators=False)

    sclf.fit(X, y)
    assert round(sclf.score(X, y), 2) == 0.98
Beispiel #3
0
lr = LogisticRegression()
slf = StackingClassifier(classifiers=[clf1, clf2, clf3], meta_classifier=lr)

slf.fit(X_train, y_train)

#交叉验证
scores = cross_validation.cross_val_score(gbm, X, y, cv=5)
#得分
dtc_score = dtc.score(X_test, y_test)
rfc_score = rfc.score(X_test, y_test)
#gbc_score = cross_val_score(gbc,X,y,cv=10)
gbc_score = gbc.score(X_test, y_test)
gbm_score = gbm.score(X_test, y_test)

knn_score = knn.score(X_test, y_test)
slf_score = slf.score(X_test, y_test)
#输出结果
print(np.mean(scores))
#print (dtc_score)
print(rfc_score)
print(gbc_score)
print(gbm_score)
#result1 = (418-np.sum(abs(gbc_predict-data_gender)))/418
#result2 = (418-np.sum(abs(xgb_predict-data_gender)))/418
#print(result1)
#print(result2)
#prin0t(gbc_predict.shape)
#print(data_gender.shape)
#data_train.info()
#print(data_gender.shape)
#可视化图形
Beispiel #4
0
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.2,
                                                    random_state=54,
                                                    shuffle=True)
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import ExtraTreesClassifier
from mlxtend.classifier import StackingClassifier
'''
from sklearn.ensemble import IsolationForest
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import BaggingClassifier
import sklearn.ensemble.GradientBoostingClassifier
import xgboost as xgb
gb=GradientBoostingClassifier(n_estimators=682)
bg=BaggingClassifier(n_estimators=682)
xg=xgb.XGBClassifier(n_estimators=682)
iso=IsolationForest(n_estimators=682)
ada=AdaBoostClassifier(xg,n_estimators=682,algorithm='SAMME.R')
'''
ext = ExtraTreesClassifier(n_estimators=682,
                           min_samples_split=2,
                           random_state=8)
rf = RandomForestClassifier(n_estimators=682,
                            max_depth=None,
                            min_samples_split=2,
                            random_state=8)
clf = StackingClassifier(classifiers=[ext], meta_classifier=rf)
clf.fit(X_train, y_train)
print(clf.score(X_test, y_test))
label = ['stacking']
sclf.fit(X_train_standar, y_train)
score_stacking = cross_val_score(sclf,
                                 X_train_standar,
                                 y_train,
                                 scoring='accuracy')
cross_val_score(sclf, X_train_standar, y_train, scoring='f1')
score_mean_sclf = score_stacking.mean()
print('stacking final score\'s mean is % .2f' % score_mean_sclf)

print('accuracy: %.2f (+/- %.2f) [%s]' %
      (score_stacking.mean(), score_stacking.std(), label))

result_stacking = sclf.predict(X_test_stander)
result_stacking_proba = sclf.predict_proba(X_test_stander)
clf_stacking_test_score = sclf.score(X_test_stander, y_test)

precision, recall, thresholds = precision_recall_curve(y_test,
                                                       sclf.predict(X_test))
report = result_stacking_proba[:, 1] >= 0.8
print(classification_report(y_test, report, target_names=['0', '1']))

# ==============================================================================
# 模型持久化
# os.chdir(u'D:\【01】行健金融\【01】数据中心\【05】数据分析项目\【03】2018\May\规则引擎_分期商城_风控+授信')
# joblib.dump(sclf, 'stackingpkl.pkl')
# joblib.dump(scaler, 'scaler.pkl')

# ==============================================================================

time2 = time.time()
    print("Accuracy: %.2f (+/- %.2f) [%s]" %
          (scores.mean(), scores.std(), label))
    clf_cv_mean.append(scores.mean())
    clf_cv_std.append(scores.std())

    clf.fit(X, y)

#plot classifier accuracy
(_, caps, _) = plt.errorbar(range(3),
                            clf_cv_mean,
                            yerr=clf_cv_std,
                            c='blue',
                            fmt='-o',
                            capsize=5)
for cap in caps:
    cap.set_markeredgewidth(1)
plt.xticks(range(3), ['SVM', 'RF', 'Stacking'])
plt.ylabel('Accuracy')
plt.xlabel('Classifier')
plt.title('Stacking Ensemble')
plt.show()

import pickle
pickle.dump(sclf, open('stack_model_identify.sav', 'wb'))

pred = sclf.predict(X_test)
print(pred)

scores = sclf.score(X_test, y_test)
print("Test score: {0:.2f} %".format(100 * scores))
Beispiel #7
0
 from sklearn.ensemble import RandomForestClassifier
 from sklearn.ensemble import GradientBoostingClassifier
 clfs = [
     LogisticRegression(C=0.5, max_iter=100),
     xgb.XGBClassifier(max_depth=6, n_estimators=100, num_round=5),
     RandomForestClassifier(n_estimators=100, max_depth=6, oob_score=True),
     GradientBoostingClassifier(learning_rate=0.3,
                                max_depth=6,
                                n_estimators=100)
 ]
 clf2 = LogisticRegression(C=0.5, max_iter=100)
 #============================================================================#
 from mlxtend.classifier import StackingClassifier, StackingCVClassifier
 sclf = StackingClassifier(classifiers=clfs, meta_classifier=clf2)
 sclf.fit(X_train, Y_train)
 print(sclf.score(X_train, Y_train))
 sclf_pre = sclf.predict(X_test)
 sclf_sub = pd.DataFrame({
     "PassengerId": test_df["PassengerId"],
     "Survived": sclf_pre
 })
 sclf_sub.to_csv("../data/sclf_sub.csv", index=False)
 #===============================================================================#
 sclf2 = StackingCVClassifier(classifiers=clfs, meta_classifier=clf2, cv=5)
 x = np.array(X_train)
 y = np.array(Y_train).flatten()
 sclf2.fit(x, y)
 print(sclf2.score(x, y))
 sclf2_pre = sclf2.predict(np.array(X_test))
 sclf2_sub = pd.DataFrame({
     "PassengerId": test_df["PassengerId"],
Beispiel #8
0
class Classifier(object):
    """
    LR Bayes SVM XGB  decisian tree random forest ...
    """

    def __init__(self, model_name):
        self.model_name = model_name
        self.model = None

    def save_model(self):
        with open('./model/' + self.model_name + '.pkl', 'wb') as fw:
            pickle.dump(self.model, fw)

    def load_model(self):
        with open('./model/' + self.model_name + '.pkl', 'rb') as fr:
            self.model = pickle.load(self.model, fr)

    def eval_prob(self, X_test):
        return self.model.predict_proba(X_test)

    def cls(self, X_train, X_test, y_train, y_test):
        if self.model_name == 'lr':
            self.model = LogisticRegression()
        elif self.model_name == 'xgb':
            self.model = XGBClassifier()
        elif self.model_name == 'svm':
            self.model = SVC(kernel='linear')
        elif self.model_name == 'bayes':
            # self.model = GaussianNB()
            self.model = BernoulliNB()  # 最好
            # self.model = MultinomialNB()
        elif self.model_name == 'rf':
            """ 在bagging 的基础上,引入特征抽样,即随机抽取若干特征"""
            self.model = RandomForestClassifier()
        elif self.model_name == 'dt':
            self.model = DecisionTreeClassifier()
        elif self.model_name == 'et':
            self.model = ExtraTreesClassifier()
        elif self.model_name == 'ensemble':
            model1 = LogisticRegression()
            model2 = BernoulliNB()
            model3 = RandomForestClassifier()
            model4 = DecisionTreeClassifier()
            model5 = ExtraTreesClassifier()
            model6 = GradientBoostingClassifier()
            # self.model = VotingClassifier(estimators=[('lr', model1), ('dt', model4)])

            # self.model = BaggingClassifier(model4, n_estimators=100, max_samples=0.3)  # dt 进行bag == rf

            self.model = StackingClassifier(classifiers=[model3, model4, model5, model6],
                                            use_probas=True,
                                            average_probas=False,
                                            meta_classifier=model1)
            # self.model = GradientBoostingClassifier()

            #  采用网格搜索参数
            # params = {'lr__C': [1.0, 100.0], 'rf__n_estimators': [20, 200]}
            # self.model = GridSearchCV(estimator=model_ensemble, param_grid=params, cv=5)

        X_train = np.array(X_train)
        y_train = np.array(y_train)
        self.model.fit(X_train, y_train)

        print(self.model.score(X_test, y_test))
        print(classification_report(self.model.predict(X_test), y_test))
Beispiel #9
0
    stacking_dic[flag]=tmp
    out[flag]=(tmp,id)
print(stacking_dic)
print(out)
def correspond(flag):
    if flag=='lr':
        return LogisticRegression(random_state=1113)
    elif flag=='svm':
        return svm.SVC(random_state=1113)
    elif flag=='rf':
        return RandomForestClassifier(random_state=1113)
    elif flag=='extra':
        return ExtraTreesClassifier(random_state=1113)
    elif flag=='gbdt':
        return GradientBoostingClassifier(random_state=1113)
    elif flag=='adaboost':
        return GradientBoostingClassifier(random_state=1113,loss='exponential')
pipe=[]
i=1
for item in stacking_dic.keys():
    locals()['pipe'+str(i)] = make_pipeline(ColumnSelector(cols=[j for j in range(stacking_dic[item])]),
                      correspond(item))
    pipe.append(locals()['pipe'+str(i)])
    i+=1
sclf = StackingClassifier(classifiers=pipe,
                          meta_classifier=LogisticRegression())
joblib.dump(sclf,'sclf.model')
sclf.fit(data, mark)
print('准确率:',sclf.score(data, mark))

'''== 0.732394366197'''
Beispiel #10
0
print("\n Gradient Boosting: ", classifier6.score(x, y))

from sklearn.tree import DecisionTreeClassifier
classifier7 = DecisionTreeClassifier(criterion='entropy')
classifier7.fit(x, y)
y_pred7 = classifier7.predict(x_test)
print("\n Decision Tree: ", classifier7.score(x, y))

from mlxtend.classifier import StackingClassifier
classifier8 = StackingClassifier(classifiers=[
    classifier1, classifier3, classifier4, classifier5, classifier6,
    classifier7
],
                                 meta_classifier=classifier2)
classifier8.fit(x, y)
print("\n Ensemble Vote Classifier: ", classifier8.score(x, y))
y_pred8 = classifier8.predict(x_test)
'''
from sklearn import model_selection

for clf, label in zip([classifier1,classifier3,classifier4,
                                    classifier5,classifier6, classifier7,
                                    classifier8],
                       ['SVC', 
                       'KNN', 
                       'Naive Bayes',
                       'Random Forests',
                       'Gradient Boosting',
                       'Decision Tree',
                       'Stacking Vote Classifier'
                       ]):
Beispiel #11
0
class Danny_ML_CLF:
    def __init__(self):
        self.X = ''
        self.y = ''

        self.svm = ''
        self.tree = ''
        self.bayes = ''
        self.knn = ''
        self.xgb = ''
        self.stacking = ''
        self.voting = ''
        self.bagging = ''
        self.rf = ''  # random forest
        self.adaboost = ''

        self.svm_pred = ''
        self.tree_pred = ''
        self.bayes_pred = ''
        self.knn_pred = ''
        self.xgb_pred = ''
        self.stacking_pred = ''
        self.voting_pred = ''
        self.bagging_pred = ''
        self.rf_pred = ''
        self.adaboost_pred = ''

        self.svm_report = ''
        self.tree_report = ''
        self.bayes_report = ''
        self.knn_report = ''
        self.xgb_report = ''
        self.stacking_report = ''
        self.voting_report = ''
        self.bagging_report = ''
        self.rf_report = ''
        self.adaboost_report = ''

        self.svm_cm = ''
        self.tree_cm = ''
        self.bayes_cm = ''
        self.knn_cm = ''
        self.xgb_cm = ''
        self.stacking_cm = ''
        self.voting_cm = ''
        self.bagging_cm = ''
        self.rf_cm = ''
        self.adaboost_cm = ''

        self.svm_score = ''
        self.tree_score = ''
        self.bayes_score = ''
        self.knn_score = ''
        self.xgb_score = ''
        self.stacking_score = ''
        self.voting_score = ''
        self.bagging_score = ''
        self.rf_score = ''
        self.adaboost_score = ''

    def Fit_value(self, x, y):
        self.X = x
        self.y = y

    def Split_data(self, raw_X, raw_y, test_size, Standard=True):
        train_X, test_X, train_y, test_y = train_test_split(
            raw_X, raw_y, test_size=test_size, shuffle=True)
        if Standard:
            sc = StandardScaler()
            sc.fit(train_X)
            train_X = sc.transform(train_X)
            test_X = sc.transform(test_X)
        self.X = train_X
        self.y = train_y
        return train_X, test_X, train_y, test_y

    def SVM(self, C=1, kernel='rbf'):
        self.svm = SVC(C=C, kernel=kernel, degree=5, probability=True)
        self.svm.fit(self.X, self.y)

    def SVM_predict(self, pred_x):
        self.svm_pred = self.svm.predict(pred_x)
        return self.svm_pred

    def Tree(self, criterion='gini', max_depth=5):
        self.tree = DecisionTreeClassifier(criterion=criterion,
                                           max_depth=max_depth)
        self.tree.fit(self.X, self.y)

    def Tree_predict(self, pred_x):
        self.tree_pred = self.tree.predict(pred_x)
        return self.tree_pred

    def Bayes(self):
        self.bayes = GaussianNB()
        self.bayes.fit(self.X, self.y)

    def Bayes_predict(self, pred_x):
        self.bayes_pred = self.bayes.predict(pred_x)
        return self.bayes_pred

    def KNN(self, n_neighbors=3, weights='distance'):
        self.knn = KNeighborsClassifier(n_neighbors=n_neighbors,
                                        weights=weights)
        self.knn.fit(self.X, self.y)

    def KNN_predict(self, pred_x):
        self.knn_pred = self.knn.predict(pred_x)
        return self.knn_pred

    def XGB(self):
        self.xgb = xgb.XGBClassifier()
        self.xgb.fit(self.X, self.y)

    def XGB_prediction(self, pred_x):
        self.xgb_pred = self.xgb.predict(pred_x)
        return self.xgb_pred

    def Stacking(self):
        meta_clf = LogisticRegression()
        self.stacking = StackingClassifier(
            classifiers=[self.svm, self.tree, self.bayes, self.knn, self.xgb],
            meta_classifier=meta_clf)
        self.stacking.fit(self.X, self.y)

    def Stacking_prediction(self, pred_x):
        self.stacking_pred = self.stacking.predict(pred_x)
        return self.stacking_pred

    def Voting(self):
        self.voting = VotingClassifier(estimators=[('svm', self.svm),
                                                   ('tree', self.tree),
                                                   ('bayes', self.bayes),
                                                   ('knn', self.knn),
                                                   ('xgb', self.xgb)],
                                       voting='soft',
                                       weights=[1, 1, 1, 1, 1])
        self.voting.fit(self.X, self.y)

    def Voting_prediction(self, pred_x):
        self.voting_pred = self.voting.predict(pred_x)
        return self.voting_pred

    def Bagging(self, n_estimators=100, oob_score=False):
        self.bagging = BaggingClassifier(n_estimators=n_estimators,
                                         oob_score=oob_score)
        self.bagging.fit(self.X, self.y)

    def Bagging_prediction(self, pred_x):
        self.bagging_pred = self.bagging.predict(pred_x)
        return self.bagging_pred

    def RF(self,
           n_estimators=200,
           criterion='gini',
           max_features='auto',
           oob_score=False):
        self.rf = RandomForestClassifier(n_estimators=n_estimators,
                                         criterion=criterion,
                                         max_features=max_features,
                                         oob_score=oob_score)
        self.rf.fit(self.X, self.y)

    def RF_prediction(self, pred_x):
        self.rf_pred = self.rf.predict(pred_x)
        return self.rf_pred

    def Adaboost(self, n_estimators=100):
        self.adaboost = AdaBoostClassifier(n_estimators=n_estimators)
        self.adaboost.fit(self.X, self.y)

    def Adaboost_prediction(self, pred_x):
        self.adaboost_pred = self.adaboost.predict(pred_x)
        return self.adaboost_pred

    def Train(self):
        self.SVM()
        self.Tree()
        self.Bayes()
        self.KNN()
        self.XGB()
        self.Stacking()
        self.Voting()
        self.Bagging()
        self.RF()
        self.Adaboost()

    def Report(self, test_X, test_y, labels, show_cm=True):
        self.SVM_predict(test_X)
        self.Tree_predict(test_X)
        self.Bayes_predict(test_X)
        self.KNN_predict(test_X)
        self.XGB_prediction(test_X)
        self.Stacking_prediction(test_X)
        self.Voting_prediction(test_X)
        self.Bagging_prediction(test_X)
        self.RF_prediction(test_X)
        self.Adaboost_prediction(test_X)

        self.svm_score = self.svm.score(test_X, test_y)
        self.tree_score = self.tree.score(test_X, test_y)
        self.bayes_score = self.bayes.score(test_X, test_y)
        self.knn_score = self.knn.score(test_X, test_y)
        self.xgb_score = self.xgb.score(test_X, test_y)
        self.stacking_score = self.stacking.score(test_X, test_y)
        self.voting_score = self.voting.score(test_X, test_y)
        self.bagging_score = self.bagging.score(test_X, test_y)
        self.rf_score = self.rf.score(test_X, test_y)
        self.adaboost_score = self.adaboost.score(test_X, test_y)

        self.svm_report = metrics.classification_report(test_y, self.svm_pred)
        self.tree_report = metrics.classification_report(
            test_y, self.tree_pred)
        self.bayes_report = metrics.classification_report(
            test_y, self.bayes_pred)
        self.knn_report = metrics.classification_report(test_y, self.knn_pred)
        self.xgb_report = metrics.classification_report(test_y, self.xgb_pred)
        self.voting_report = metrics.classification_report(
            test_y, self.voting_pred)
        self.stacking_report = metrics.classification_report(
            test_y, self.stacking_pred)
        self.bagging_report = metrics.classification_report(
            test_y, self.bagging_pred)
        self.rf_report = metrics.classification_report(test_y, self.rf_pred)
        self.adaboost_report = metrics.classification_report(
            test_y, self.adaboost_pred)

        self.svm_cm = metrics.confusion_matrix(test_y,
                                               self.svm_pred,
                                               labels=labels)
        self.tree_cm = metrics.confusion_matrix(test_y,
                                                self.tree_pred,
                                                labels=labels)
        self.bayes_cm = metrics.confusion_matrix(test_y,
                                                 self.bayes_pred,
                                                 labels=labels)
        self.knn_cm = metrics.confusion_matrix(test_y,
                                               self.knn_pred,
                                               labels=labels)
        self.xgb_cm = metrics.confusion_matrix(test_y,
                                               self.xgb_pred,
                                               labels=labels)
        self.stacking_cm = metrics.confusion_matrix(test_y,
                                                    self.stacking_pred,
                                                    labels=labels)
        self.voting_cm = metrics.confusion_matrix(test_y,
                                                  self.voting_pred,
                                                  labels=labels)
        self.bagging_cm = metrics.confusion_matrix(test_y,
                                                   self.bagging_pred,
                                                   labels=labels)
        self.rf_cm = metrics.confusion_matrix(test_y,
                                              self.rf_pred,
                                              labels=labels)
        self.adaboost_cm = metrics.confusion_matrix(test_y,
                                                    self.adaboost_pred,
                                                    labels=labels)

        if show_cm:
            self.plot_confusion_matrix(self.svm_cm,
                                       classes=labels,
                                       title='SVM')
            self.plot_confusion_matrix(self.tree_cm,
                                       classes=labels,
                                       title='Tree')
            self.plot_confusion_matrix(self.bayes_cm,
                                       classes=labels,
                                       title='Bayes')
            self.plot_confusion_matrix(self.knn_cm,
                                       classes=labels,
                                       title='KNN')
            self.plot_confusion_matrix(self.xgb_cm,
                                       classes=labels,
                                       title='XGB')
            self.plot_confusion_matrix(self.stacking_cm,
                                       classes=labels,
                                       title='Stacking')
            self.plot_confusion_matrix(self.voting_cm,
                                       classes=labels,
                                       title='Voting')
            self.plot_confusion_matrix(self.bagging_cm,
                                       classes=labels,
                                       title='Bagging')
            self.plot_confusion_matrix(self.rf_cm, classes=labels, title='RF')
            self.plot_confusion_matrix(self.adaboost_cm,
                                       classes=labels,
                                       title='Adaboost')

    def History(self):
        print('******************\nSVM : ', self.svm_report)
        print('******************\nTree : ', self.tree_report)
        print('******************\nBayes : ', self.bayes_report)
        print('******************\nKNN : ', self.knn_report)
        print('******************\nXGB : ', self.xgb_report)
        print('******************\nStacking : ', self.stacking_report)
        print('******************\nVoting : ', self.voting_report)
        print('******************\nBagging : ', self.bagging_report)
        print('******************\nRF : ', self.rf_report)
        print('******************\nAdaboost : ', self.adaboost_report)

    def Score(self):
        print('SVM Score : ', self.svm_score)
        print('Tree Score : ', self.tree_score)
        print('Bayes Score : ', self.bayes_score)
        print('KNN Score : ', self.knn_score)
        print('XGB Score : ', self.xgb_score)
        print('Stacking Score : ', self.stacking_score)
        print('Voting Score : ', self.voting_score)
        print('Bagging Score : ', self.bagging_score)
        print('RF Score : ', self.rf_score)
        print('Adaboost Score : ', self.adaboost_score)

    def Report2txt(self, filename):
        f = open(filename, 'w')
        f.write('SVM Score : ' + str(self.svm_score) + '\n')
        f.write('Tree Score : ' + str(self.tree_score) + '\n')
        f.write('Bayes Score : ' + str(self.bayes_score) + '\n')
        f.write('KNN Score : ' + str(self.knn_score) + '\n')
        f.write('XGB Score : ' + str(self.xgb_score) + '\n')
        f.write('Stacking Score : ' + str(self.stacking_score) + '\n')
        f.write('Voting Score : ' + str(self.voting_score) + '\n')
        f.write('Bagging Score : ' + str(self.bagging_score) + '\n')
        f.write('RF Score : ' + str(self.rf_score) + '\n')
        f.write('Adaboost Score : ' + str(self.adaboost_score) + '\n')
        f.write('Adaboost Score : ' + str(self.adaboost_score) + '\n')
        f.write('XXXX\n')
        f.write('******************\nSVM : ' + str(self.svm_report) + '\n')
        f.write('******************\nTree : ' + str(self.tree_report) + '\n')
        f.write('******************\nBayes : ' + str(self.bayes_report) + '\n')
        f.write('******************\nKNN : ' + str(self.knn_report) + '\n')
        f.write('******************\nXGB : ' + str(self.xgb_report) + '\n')
        f.write('******************\nStacking : ' + str(self.stacking_report) +
                '\n')
        f.write('******************\nVoting : ' + str(self.voting_report) +
                '\n')
        f.write('******************\nBagging : ' + str(self.bagging_report) +
                '\n')
        f.write('******************\nRF : ' + str(self.rf_report) + '\n')
        f.write('******************\nAdaboost : ' + str(self.adaboost_report) +
                '\n')
        f.close()

    def plot_confusion_matrix(self,
                              cm,
                              classes,
                              normalize=False,
                              title='Confusion matrix',
                              cmap=plt.cm.Blues):
        """
           This function prints and plots the confusion matrix.
           Normalization can be applied by setting `normalize=True`.
           """
        plt.figure()
        plt.imshow(cm, interpolation='nearest', cmap=cmap)
        plt.title(title)
        plt.colorbar()
        tick_marks = np.arange(len(classes))
        plt.xticks(tick_marks, classes, rotation=45)
        plt.yticks(tick_marks, classes)

        if normalize:
            cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
            print("Normalized confusion matrix")
        else:
            print(title, ' Confusion matrix, without normalization')

        print(cm)

        thresh = cm.max() / 2.
        for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
            plt.text(j,
                     i,
                     cm[i, j],
                     horizontalalignment="center",
                     color="white" if cm[i, j] > thresh else "black")

        plt.tight_layout()
        plt.ylabel('True label')
        plt.xlabel('Predicted label')
        plt.show()
from sklearn.naive_bayes import GaussianNB
import mlxtend
from mlxtend.classifier import StackingClassifier
lr = LogisticRegression()
knn = KNeighborsClassifier()
svm = SVC(probability=True)
NB = GaussianNB()
DT = DecisionTreeClassifier()

sclf = StackingClassifier(classifiers=[svm, lr, knn, DT, rand_GSOpt],
                          use_probas=True,
                          meta_classifier=rand_GSOpt)
sclf.fit(x_train, y_train)
sclf_y_pred = sclf.predict(x_test)
print('Accuracy of ensembeled stacking model classifier on test set: {:.2f}'.
      format(sclf.score(x_test, y_test)))

print('sclf_F1 Score =',
      sklearn.metrics.f1_score(y_test, sclf_y_pred, average="macro"))
print('sclf_Precision =',
      sklearn.metrics.precision_score(y_test, sclf_y_pred, average="macro"))
print('sclf_Recall =',
      sklearn.metrics.recall_score(y_test, sclf_y_pred, average="macro"))

import xgboost
from xgboost import XGBClassifier
XGBoost = XGBClassifier(learning_rate=0.2, max_depth=2, n_estimators=43)
XGBoost.fit(x_train, y_train)
XGBoost_y_pred = XGBoost.predict(x_test)

print('Accuracy of XGBoost on test set: {:.2f}'.format(
Beispiel #13
0
from sklearn.datasets import load_iris
from mlxtend.classifier import StackingClassifier
from mlxtend.feature_selection import ColumnSelector
from sklearn.pipeline import make_pipeline
from sklearn.linear_model import LogisticRegression

iris = load_iris()
X = iris.data
y = iris.target

pipe1 = make_pipeline(ColumnSelector(cols=(0, 2)),
                      LogisticRegression())
pipe2 = make_pipeline(ColumnSelector(cols=(1, 2, 3)),
                      LogisticRegression())

sclf = StackingClassifier(classifiers=[pipe1, pipe2],
                          meta_classifier=LogisticRegression())

sclf.fit(X, y)
print(sclf.score(X,y))