Ejemplo n.º 1
0
    def __train_stacks__(self, train_input, test_input, train_output,
                         test_output):
        scaler = MinMaxScaler()
        scaler.fit(train_input)
        scaled_train_input = scaler.transform(train_input)
        scaled_test_input = scaler.transform(test_input)
        clf1 = MLPClassifier(hidden_layer_sizes=[50, 50, 50, 50],
                             activation='logistic')
        clf2 = DecisionTreeClassifier()
        clf3 = SVC()
        clf4 = QuadraticDiscriminantAnalysis()
        clf5 = KNeighborsClassifier()
        clf6 = RandomForestClassifier()
        clf7 = LinearSVC()
        clf8 = LinearDiscriminantAnalysis()
        sclf1 = StackingClassifier(classifiers=[clf2, clf2, clf2, clf2],
                                   meta_classifier=clf6)
        sclf2 = StackingClassifier(classifiers=[clf7, clf7, clf7, clf7],
                                   meta_classifier=clf3)
        sclf3 = StackingClassifier(classifiers=[clf8, clf8, clf8, clf8],
                                   meta_classifier=clf4)
        sclf4 = StackingClassifier(classifiers=[clf5, clf5, clf5, clf5],
                                   meta_classifier=clf5)
        main_clf = StackingClassifier(classifiers=[sclf1, sclf2, sclf3, sclf4],
                                      meta_classifier=clf1)
        ada_main = AdaBoostClassifier(main_clf)
        training_scr = np.mean(
            cross_val_score(ada_main, scaled_train_input, train_output, cv=4))
        ada_main.fit(scaled_train_input, train_output)
        pred_output = ada_main.predict(scaled_test_input)

        return ada_main, pred_output
Ejemplo n.º 2
0
    def fit(self,x,y):
        '''
        拟合:
        '''
        model_list = []
        basic_cls = ['logistic','knn','svm','dt','rf','adaBoost','gbm','xgb','bp']
        for model_name in self.listModelName:
            if model_name in basic_cls:

                cls = cls_model(model_name,isGridSearch = self.isGridSearch)
                
                if model_name in self.dict_para.keys():
                    #如果用户自定义了参数范围,则对模型参数进行设置
                    cls.set_parameters(self.dict_para[model_name])
                else:
                    pass
                #模型拟合
                cls.fit(x,y)
                model_list.append(cls.cls_model)
                
                self.train_model[model_name] = cls
        
        if self.meta_reg == 'logistic':
            meta_cls = linear_model.LogisticRegression()
            
        elif self.meta_reg == 'knn':
            meta_cls = KNeighborsClassifier()
            
        self.stack = StackingClassifier(classifiers = model_list,meta_classifier = meta_cls)
        self.stack.fit(x.values,y.values.reshape(len(y)))
Ejemplo n.º 3
0
    def __init__(self,
                 alpha=0.1,
                 n_jobs=-1,
                 max_features='sqrt',
                 n_estimators=1000,
                 RandomForest=True,
                 KMeansFeatures=True,
                 NaiveBayes=True):
        """
        INPUT:
        - alpha = Additive laplace smoothing parameter for NaiveBayes
        - n_jobs = Number of jobs to run RFC on
        - max_features = Number of featres to consider on RFC
        - n_estimators = Number of trees in RFC
        - RandomForest = Bool, run RFC
        - KMeansFeatures = Bool, include K means features in RFC
        - NaiveBayes = Bool, run MNB

        ATTRIBUTES:
        - RFC = Random Forest Classifier
        - MNB = Multinomial Naive Bayes Classifier
        """
        self.RFC = RandomForestClassifier(n_jobs=n_jobs,
                                          max_features=max_features,
                                          n_estimators=n_estimators)
        self.MNB = MultinomialNB(alpha=alpha)
        self.LogR = LogisticRegression()
        self.STK = StackingClassifier(classifiers=[self.RFC, self.MNB],
                                      meta_classifier=self.LogR,
                                      use_probas=True)

        self.RandomForest = RandomForest
        self.KMeansFeatures = KMeansFeatures
        self.NaiveBayes = NaiveBayes
def create_stacked(dataset, x_train, y_train):
    for i, y in enumerate(dataset.y_true):
        dataset.y_true[i] = dataset.class_labels.index(y)

    for i, y in enumerate(y_train):
        y_train[i] = dataset.class_labels.index(y)
    dataset.class_labels = range(0, len(dataset.class_labels))

    clf1 = RandomForestClassifier(n_estimators=1000,
                                  n_jobs=-1,
                                  random_state=42)
    clf2 = KNeighborsClassifier(n_neighbors=10)
    clf3 = GaussianNB()
    clf4 = MLPClassifier(activation='relu',
                         max_iter=100000,
                         hidden_layer_sizes=(50, 50, 50, 50, 50))
    clf5 = MLPClassifier(activation='relu',
                         max_iter=1000000,
                         hidden_layer_sizes=(500, 500))
    clf6 = RandomForestClassifier(n_estimators=100, n_jobs=-1, random_state=42)
    clf_meta = LogisticRegression()
    clf = StackingClassifier(classifiers=[clf1, clf2, clf3, clf4, clf5, clf6],
                             meta_classifier=clf_meta,
                             use_probas=True)

    clf.fit(x_train, y_train)

    return clf
def test_StackingClassifier_avg_vs_concat():
    np.random.seed(123)
    lr1 = LogisticRegression(solver='liblinear', multi_class='ovr')
    sclf1 = StackingClassifier(classifiers=[lr1, lr1],
                               use_probas=True,
                               average_probas=True,
                               meta_classifier=lr1)

    sclf1.fit(X, y)
    r1 = sclf1.predict_meta_features(X[:2])
    assert r1.shape == (2, 3)
    assert_almost_equal(np.sum(r1[0]), 1.0, decimal=6)
    assert_almost_equal(np.sum(r1[1]), 1.0, decimal=6)

    sclf2 = StackingClassifier(classifiers=[lr1, lr1],
                               use_probas=True,
                               average_probas=False,
                               meta_classifier=lr1)

    sclf2.fit(X, y)
    r2 = sclf2.predict_meta_features(X[:2])
    assert r2.shape == (2, 6)
    assert_almost_equal(np.sum(r2[0]), 2.0, decimal=6)
    assert_almost_equal(np.sum(r2[1]), 2.0, decimal=6)
    np.array_equal(r2[0][:3], r2[0][3:])
Ejemplo n.º 6
0
class ClassifierBlender:
    def __init__(self, x_train, x_test, y_train, y_test=None):
        x_train.drop(['Unnamed: 0', 'Id'], axis=1, inplace=True)
        x_test.drop(['Unnamed: 0', 'Id'], axis=1, inplace=True)
        self.x_train = x_train
        self.x_test = x_test
        self.y_train = y_train['y'].values
        if self.y_train is not None:
            self.y_test = y_test['y'].values

    def clf_blend(self):
        mete_clf = LinearRegression()
        clf1 = model.svm_regressor()
        clf2 = model.randomforest_regressor()
        clf3 = model.xgb_regressor()
        self.blend = StackingClassifier(classifiers=[clf1, clf2, clf3],
                                        meta_classifier=mete_clf)
        self.blend.fit(self.x_train, self.y_train)
        return self.blend

    def score(self):
        scores = cross_val_score(self.blend,
                                 X=self.x_train,
                                 y=self.y_train,
                                 cv=10,
                                 verbose=2)
        return scores

    def prediction(self):
        y_pred = self.blend.predict(self.x_test)
        return y_pred
Ejemplo n.º 7
0
def stacking_clf(train_x, train_y):
    clf1 = RandomForestClassifier(n_estimators=300,
                                  max_features="sqrt",
                                  min_samples_split=20,
                                  min_samples_leaf=15,
                                  max_depth=6,
                                  bootstrap=True,
                                  n_jobs=8)
    clf2 = svm.SVC(C=10)
    clf3 = xgb.XGBClassifier(n_estimators=300,
                             learning_rate=0.1,
                             n_jobs=8,
                             object="multi:softmax",
                             colsample_bylevel=0.8,
                             reg_lambda=1,
                             max_depth=6,
                             min_child_weight=1)

    clf4 = GradientBoostingClassifier(n_estimators=300,
                                      learning_rate=0.1,
                                      min_samples_split=20,
                                      min_samples_leaf=15,
                                      max_depth=6,
                                      max_features="sqrt")

    clf5 = LogisticRegression(penalty='l2', C=100, multi_class='ovr')

    sclf = StackingClassifier(
        classifiers=[clf1, clf3, clf4],
        meta_classifier=clf5,
    )
    sclf.fit(train_x, train_y)
    return sclf
Ejemplo n.º 8
0
    def model_cross_validation(self, model, best_params):

        print 'Model Cross Validation'
        print 'Start:' + datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
        
        lr = self.model_init(model)
        clf1 = self.model_init('KNN')
        clf2 = self.model_init('RFC')
        clf3 = self.model_init('GNB')
        sclf = StackingClassifier(classifiers=[clf1, clf2, clf3],meta_classifier=lr)

        sclf.set_params(**best_params)
        
        train_data = self.train.values.copy() 
        train_label = self.train_label['label'].values.copy()
        
        train_label = train_label.reshape(train_label.shape[0])

        scores = cross_val_score(sclf, train_data, train_label, cv=5, scoring='roc_auc', n_jobs=3)
        
        print sclf
        print scores
        print np.mean(scores)
        
        print 'Model: {0} ; Train: {1}'.format(model,np.mean(scores))
        
        print 'End:' + datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
        
        return np.mean(scores)
    def run(cls) -> StackingClassifier:
        """
        Run a Stacking Classifier using all registered models
        """
        sc = cls()
        X, y = sc.load_train()

        # Define the StackingClassifier using all models registered.
        classifiers = [Model() for Model in sc._models if Model.__name__ != 'DumbModel']

        clf = StackingClassifier(classifiers=classifiers,
                                 meta_classifier=LogisticRegression(),
                                 verbose=1,
                                 average_probas=False,
                                 use_probas=True
                                 )

        # Run cross-val to get an idea of what to expect for final output
        #scores = cross_val_score(clf, X.copy(), y.copy(), scoring='neg_log_loss', cv=2)

        #print('\n---------\nCross validation (3) --> StackingClassifier - Avg Log Loss: {:.8f} - STD: {:.4f}\n---------'
        #      .format(scores.mean(), scores.std())
        #      )

        # Finally, refit clf to entire dataset
        print('Fitting Stacking Classifier to entire training dataset...')
        clf.fit(X.copy(), y.copy())
        return clf
def stacking(classifier, optimalRF, optimalKNN, task=0):
    if task == 0:
        for x in range(1, 51):
            classifier.predict(
                StackingClassifier(classifiers=[
                    RandomForestClassifier(optimalRF, n_jobs=8),
                    KNeighborsClassifier(optimalKNN, n_jobs=8)
                ],
                                   meta_classifier=KNeighborsClassifier(
                                       x, n_jobs=8)))
    elif task == 2:
        classifier.predict(
            StackingClassifier(classifiers=[
                RandomForestClassifier(optimalRF, n_jobs=8),
                KNeighborsClassifier(optimalKNN, n_jobs=8)
            ],
                               meta_classifier=tree.DecisionTreeClassifier()))
    elif task == 5:
        classifier.predict(
            StackingClassifier(classifiers=[
                RandomForestClassifier(optimalRF, n_jobs=8),
                KNeighborsClassifier(optimalKNN, n_jobs=8)
            ],
                               meta_classifier=svm.SVC(kernel='linear')))
    elif task == 6:
        classifier.predict(
            StackingClassifier(classifiers=[
                RandomForestClassifier(optimalRF, n_jobs=8),
                KNeighborsClassifier(optimalKNN, n_jobs=8)
            ],
                               meta_classifier=svm.SVC(kernel='rbf')))
Ejemplo n.º 11
0
def stacking2():
    from sklearn.datasets import load_iris
    from mlxtend.classifier import StackingClassifier
    from mlxtend.feature_selection import ColumnSelector
    from sklearn.pipeline import make_pipeline
    from sklearn.linear_model import LogisticRegression
    from sklearn import model_selection

    iris = load_iris()
    X = iris.data
    y = iris.target

    pipe1 = make_pipeline(ColumnSelector(cols=(0, 2)), LogisticRegression())
    pipe2 = make_pipeline(ColumnSelector(cols=(1, 2, 3)), LogisticRegression())
    sclf = StackingClassifier(classifiers=[pipe1, pipe2],
                              meta_classifier=LogisticRegression(),
                              use_features_in_secondary=True,
                              store_train_meta_features=True)
    sclf.fit(X, y)
    scores = model_selection.cross_val_score(sclf,
                                             X,
                                             y,
                                             cv=5,
                                             scoring='accuracy')
    print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std()))
Ejemplo n.º 12
0
def model_stack2():
    _, test_df, train_label = data_process.get_person_data()
    train_data, test_data = data_process.get_scale_data()
    X_train, X_val, y_train, y_val = train_test_split(train_data,
                                                      train_label,
                                                      test_size=0.2,
                                                      random_state=66)
    id_list = list(test_df.pop('ID'))
    model1 = gbt.XGBRegressor(n_estimators=1000,
                              subsample=0.8,
                              learning_rate=0.25,
                              objective='reg:linear')
    model2 = gbt.XGBRegressor(n_estimators=1000,
                              subsample=0.8,
                              learning_rate=0.25,
                              objective='reg:gamma')
    model3 = gbt.XGBRegressor(n_estimators=1000,
                              subsample=0.8,
                              learning_rate=0.25,
                              objective='reg:tweedie')
    model4 = svm.SVR()
    stack_model = StackingClassifier(
        classifiers=[model1, model2, model3, model4], meta_classifier=model3)
    stack_model.fit(train_data, train_label)
    yHat = stack_model.predict(test_data)
    result = pd.DataFrame({'id': id_list, 'yhat': yHat})
    result.to_csv('result/result6.csv',
                  index=False,
                  header=None,
                  encoding='utf-8')
Ejemplo n.º 13
0
def stacking(para, X, y):
    stack_lvl_0 = StackingClassifier(classifiers=para["lvl_0"],
                                     meta_classifier=para["top"])
    stack_lvl_1 = StackingClassifier(classifiers=para["lvl_1"],
                                     meta_classifier=stack_lvl_0)
    scores = cross_val_score(stack_lvl_1, X, y, cv=3)

    return scores.mean()
Ejemplo n.º 14
0
 def blending(self):
     mete_clf = LogisticRegression()
     clf1 = model.svm_classifier()
     clf2 = model.dt_classifier()
     # reg3 = model.xgb_classifier()
     self.blend = StackingClassifier(classifiers=[clf1, clf2], meta_classifier=mete_clf)
     self.blend.fit(self.x_train, self.y_train)
     return self.blend
Ejemplo n.º 15
0
 def Stacking(self):
     meta_clf = LogisticRegression()
     self.stacking = StackingClassifier(classifiers=[self.svm,
                                                     self.tree,
                                                     self.bayes,
                                                     self.knn,
                                                     self.xgb], meta_classifier=meta_clf)
     self.stacking.fit(self.X, self.y)
Ejemplo n.º 16
0
def stacking_prediction2(m1, m2, meta):
    # model_train, model_test = stacking(clf, Xtrain2,ytrain2, Xtest2)
    # model.fit(model_train, ytrain2)
    tr, ts = scaling(Xtrain2,Xtest2,MaxAbsScaler())
    m = StackingClassifier(classifiers=[m1, m2],meta_classifier=meta) 
    m.fit(tr, ytrain2)
    predict_mm = m.predict(ts)
    return predict_mm
Ejemplo n.º 17
0
def test_weight_unsupported_no_weight():
    # This is okay since we do not pass sample weight
    meta = LogisticRegression()
    clf1 = RandomForestClassifier()
    clf2 = GaussianNB()
    clf3 = KNeighborsClassifier()
    sclf = StackingClassifier(classifiers=[clf1, clf2, clf3],
                              meta_classifier=meta)
    sclf.fit(X, y)
def test_multivariate_class():
    np.random.seed(123)
    meta = KNeighborsClassifier()
    clf1 = RandomForestClassifier(n_estimators=10)
    clf2 = KNeighborsClassifier()
    sclf = StackingClassifier(classifiers=[clf1, clf2], meta_classifier=meta)
    y_pred = sclf.fit(X, y2).predict(X)
    ca = .973
    assert round((y_pred == y2).mean(), 3) == ca
Ejemplo n.º 19
0
 def clf_blend(self):
     mete_clf = LinearRegression()
     clf1 = model.svm_regressor()
     clf2 = model.randomforest_regressor()
     clf3 = model.xgb_regressor()
     self.blend = StackingClassifier(classifiers=[clf1, clf2, clf3],
                                     meta_classifier=mete_clf)
     self.blend.fit(self.x_train, self.y_train)
     return self.blend
Ejemplo n.º 20
0
def data_ensemble(cancer_type,feat):
	data_dir = "/home/ubuntu/cancer/"
	data_file = data_dir + cancer_type + "_matrix.csv"
	features = data_dir + cancer_type + "_output.txt"
	output_file = data_dir + cancer_type + "_accuracy.txt"
	file = open(features, "r")
	o_file = open(output_file, "w")
	line = file.readline()
	line = file.readline()
	df = pd.read_csv(data_file)
	df = shuffle(df)
	file_ids=df.pop('file_id')
	y = df.pop('label').values
	dataf=df.pop(line[:-1])
	#dataframe consisting of only important features
	for x in range(feat):
		line = file.readline()
		dataf=np.column_stack((dataf,df.pop(line[:-1])))
	X=normalize(dataf)
	X=scale(X)
	pca=PCA()
	pca.fit(X)
	X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=42)
        #multiple classifiers
	clf1 = RandomForestClassifier(random_state=1,n_estimators=100)
	clf2 = GradientBoostingClassifier(n_estimators=1200,subsample=0.5,random_state=3)
	clf3 = SVC(gamma='auto')
	clf4 = KNeighborsClassifier(n_neighbors=1)
	clf5 = DecisionTreeClassifier(random_state=0)
	lr = LogisticRegression(solver='lbfgs')
	#stacking for data ensemble
	sclf = StackingClassifier(classifiers=[clf1, clf2, clf3, clf4, clf5], meta_classifier=lr)
	clf1.fit(X_train,y_train)
	clf2.fit(X_train,y_train)
	clf3.fit(X_train,y_train)
	clf4.fit(X_train,y_train)
	clf5.fit(X_train,y_train)
	sclf.fit(X_train,y_train)
	y_test_predict=sclf.predict(X_test)
	precision = precision_score(y_test, y_test_predict)
	accuracy = accuracy_score(y_test, y_test_predict)
	f1 = f1_score(y_test, y_test_predict)
	recall = recall_score(y_test, y_test_predict)
	scores = [precision,accuracy,f1,recall]
	label = ['RF', 'GBDT', 'SVM','KNN','DT','Stacking']
	clf_list = [clf1, clf2, clf3, clf4, clf5, sclf]
	#score calculation
	for clf, label in zip(clf_list, label):
		y_test_predict = clf.predict(X_test)
		tn, fp, fn, tp = confusion_matrix(y_test, y_test_predict).ravel()
		specificity = tn / (tn+fp)
		recall = tp / (tp+fn)
		precision = tp / (tp+fp)
		accuracy = (tp + tn) / (tp+tn+fp+fn)
		f1 = 2*tp / (2*tp+fp+fn)
		o_file.write("\nAccuracy: %.2f [%s] \nPrecision: %.2f [%s] \nRecall: %.2f [%s] \nF1 score: %.2f [%s] \nSpecificity: %.2f [%s]\n" %(accuracy,label,precision, label, recall, label, f1, label, specificity, label))
Ejemplo n.º 21
0
def test_verbose():
    np.random.seed(123)
    meta = LogisticRegression()
    clf1 = RandomForestClassifier()
    clf2 = GaussianNB()
    sclf = StackingClassifier(classifiers=[clf1, clf2],
                              use_probas=True,
                              meta_classifier=meta,
                              verbose=3)
    sclf.fit(iris.data, iris.target)
Ejemplo n.º 22
0
def stackingPerformanceEditor():
    nb_clf = GaussianNB()
    svm_clf = RandomForestClassifier(n_estimators=100, max_depth=400, random_state=5)
    mlp_clff = MLPClassifier(hidden_layer_sizes=(500,500))
    label = ["NB","RF","MLP"]

    acc = StackingClassifier(classifiers=[nb_clf,svm_clf,mlp_clff], meta_classifier=svm_clf)
    acc.fit(Xtrain2, ytrain2)
    pred = accuracy_score(ytest, acc.predict(Xtest2))
    return pred
def test_weight_unsupported_no_weight():
    # This is okay since we do not pass sample weight
    np.random.seed(123)
    meta = LogisticRegression(solver='liblinear', multi_class='ovr')
    clf1 = RandomForestClassifier(n_estimators=10)
    clf2 = GaussianNB()
    clf3 = KNeighborsClassifier()
    sclf = StackingClassifier(classifiers=[clf1, clf2, clf3],
                              meta_classifier=meta)
    sclf.fit(X, y)
Ejemplo n.º 24
0
def test_verbose():
    np.random.seed(123)
    meta = LogisticRegression()
    clf1 = RandomForestClassifier()
    clf2 = GaussianNB()
    sclf = StackingClassifier(classifiers=[clf1, clf2],
                              use_probas=True,
                              meta_classifier=meta,
                              verbose=3)
    sclf.fit(iris.data, iris.target)
Ejemplo n.º 25
0
def test_multivariate_class():
    np.random.seed(123)
    meta = KNeighborsClassifier()
    clf1 = RandomForestClassifier()
    clf2 = KNeighborsClassifier()
    sclf = StackingClassifier(classifiers=[clf1, clf2],
                              meta_classifier=meta)
    y_pred = sclf.fit(X, y2).predict(X)
    ca = .973
    assert round((y_pred == y2).mean(), 3) == ca
def test_sample_weight():
    # Make sure that:
    #    prediction with weight
    # != prediction with no weight
    # == prediction with weight ones
    random.seed(87)
    w = np.array([random.random() for _ in range(len(y))])

    np.random.seed(123)
    meta = LogisticRegression(solver='liblinear', multi_class='ovr')
    clf1 = RandomForestClassifier(n_estimators=10)
    clf2 = GaussianNB()
    sclf = StackingClassifier(classifiers=[clf1, clf2], meta_classifier=meta)
    prob1 = sclf.fit(X, y, sample_weight=w).predict_proba(X)

    np.random.seed(123)
    meta = LogisticRegression(solver='liblinear', multi_class='ovr')
    clf1 = RandomForestClassifier(n_estimators=10)
    clf2 = GaussianNB()
    sclf = StackingClassifier(classifiers=[clf1, clf2], meta_classifier=meta)
    prob2 = sclf.fit(X, y, sample_weight=None).predict_proba(X)

    maxdiff = np.max(np.abs(prob1 - prob2))
    assert maxdiff > 1e-3, "max diff is %.4f" % maxdiff

    np.random.seed(123)
    meta = LogisticRegression(solver='liblinear', multi_class='ovr')
    clf1 = RandomForestClassifier(n_estimators=10)
    clf2 = GaussianNB()
    sclf = StackingClassifier(classifiers=[clf1, clf2], meta_classifier=meta)
    prob3 = sclf.fit(X, y, sample_weight=np.ones(len(y))).predict_proba(X)

    maxdiff = np.max(np.abs(prob2 - prob3))
    assert maxdiff < 1e-3, "max diff is %.4f" % maxdiff
Ejemplo n.º 27
0
 def stacking(self):
     from sklearn.svm import SVC
     from sklearn.pipeline import make_pipeline
     from sklearn.preprocessing import RobustScaler, MinMaxScaler
     from sklearn.preprocessing import StandardScaler
     from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
     from lightgbm import LGBMClassifier
     import xgboost as xgb
     from mlxtend.classifier import StackingClassifier
     import scipy as sc
     svc = make_pipeline(SVC(kernel='rbf', C=2.8, gamma=2))
     rf = RandomForestClassifier(random_state=590, n_estimators=6)
     GBoost = GradientBoostingClassifier(n_estimators=500,
                                         learning_rate=0.01,
                                         max_depth=12,
                                         max_features='sqrt',
                                         min_samples_leaf=15,
                                         min_samples_split=97,
                                         random_state=200)
     model_xgb = xgb.XGBClassifier(colsample_bytree=0.4603,
                                   gamma=10,
                                   learning_rate=0.01,
                                   max_depth=11,
                                   min_child_weight=1.7817,
                                   n_estimators=500,
                                   reg_alpha=0.01,
                                   reg_lambda=5,
                                   subsample=0.5213,
                                   silent=1,
                                   seed=1024,
                                   nthread=-1)
     model_lgb = LGBMClassifier(objective='regression',
                                num_leaves=5,
                                learning_rate=0.05,
                                n_estimators=550,
                                max_bin=25,
                                bagging_fraction=1,
                                bagging_freq=5,
                                feature_fraction=0.7,
                                feature_fraction_seed=9,
                                bagging_seed=9,
                                min_data_in_leaf=42,
                                min_sum_hessian_in_leaf=40)
     regressors = [rf, svc, GBoost, model_lgb, model_xgb]
     stregr = StackingClassifier(classifiers=regressors,
                                 meta_classifier=model_xgb,
                                 verbose=1)
     stregr.fit(self.X_train, self.y_train)
     print(
         "the model is stregr and the valid's f1 is: ",
         f1_score(self.y_test, stregr.predict(self.X_test),
                  average="macro"))
     # print("the model is stregr and the valid's precision_score is: ", precision_score(self.y_test, stregr.predict(self.X_test),average="macro"))
     # print("the model is stregr and the valid's recall_score is: ", recall_score(self.y_test, stregr.predict(self.X_test),average="macro"))
     return stregr
Ejemplo n.º 28
0
def test_weight_unsupported_no_weight():
    # This is okay since we do not pass sample weight
    np.random.seed(123)
    meta = LogisticRegression(solver='liblinear',
                              multi_class='ovr')
    clf1 = RandomForestClassifier(n_estimators=10)
    clf2 = GaussianNB()
    clf3 = KNeighborsClassifier()
    sclf = StackingClassifier(classifiers=[clf1, clf2, clf3],
                              meta_classifier=meta)
    sclf.fit(X, y)
Ejemplo n.º 29
0
    def train_model(self, X, y):
        print('>> KFOLD Iteration <<')
        # Define Models
        m1 = CatBoostClassifier(custom_loss=['Accuracy'], random_seed=42, logging_level='Silent')
        m2 = AdaBoostClassifier(n_estimators=500)
        m3 = XGBClassifier()
        meta = LogisticRegression(random_state=0, solver='lbfgs', multi_class='ovr')

        model = StackingClassifier(classifiers=[m1, m2, m3], meta_classifier=meta)
        model = model.fit(X, y)
        return model
Ejemplo n.º 30
0
def test_train_meta_features_():
    knn = KNeighborsClassifier()
    lr = LogisticRegression()
    gnb = GaussianNB()
    stclf = StackingClassifier(classifiers=[knn, gnb],
                               meta_classifier=lr,
                               store_train_meta_features=True)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)
    stclf.fit(X_train, y_train)
    train_meta_features = stclf.train_meta_features_
    assert train_meta_features.shape == (X_train.shape[0], 2)
def train3():
    iris = datasets.load_iris()
    x = iris.data
    y = iris.target

    pipe1 = make_pipeline(ColumnSelector(cols=(0, 2)), LogisticRegression())
    pipe2 = make_pipeline(ColumnSelector(cols=(1, 2, 3)), LogisticRegression())

    sclf = StackingClassifier(classifiers=[pipe1, pipe2], meta_classifier=LogisticRegression())

    sclf.fit(x, y)
def test_verbose():
    np.random.seed(123)
    meta = LogisticRegression(solver='liblinear', multi_class='ovr')
    clf1 = RandomForestClassifier(n_estimators=10)
    clf2 = GaussianNB()
    sclf = StackingClassifier(classifiers=[clf1, clf2],
                              use_probas=True,
                              meta_classifier=meta,
                              verbose=3)
    X, y = iris_data()
    sclf.fit(X, y)
Ejemplo n.º 33
0
def test_train_meta_features_():
    knn = KNeighborsClassifier()
    lr = LogisticRegression()
    gnb = GaussianNB()
    stclf = StackingClassifier(classifiers=[knn, gnb],
                               meta_classifier=lr,
                               store_train_meta_features=True)
    X_train, X_test, y_train,  y_test = train_test_split(X, y, test_size=0.3)
    stclf.fit(X_train, y_train)
    train_meta_features = stclf.train_meta_features_
    assert train_meta_features.shape == (X_train.shape[0], 2)
Ejemplo n.º 34
0
def test_weight_unsupported():
    # Error since KNN does not support sample_weight
    meta = LogisticRegression()
    clf1 = RandomForestClassifier()
    clf2 = GaussianNB()
    clf3 = KNeighborsClassifier()
    sclf = StackingClassifier(classifiers=[clf1, clf2, clf3],
                              meta_classifier=meta)
    random.seed(87)
    w = np.array([random.random() for _ in range(len(y))])
    sclf.fit(X, y, sample_seight=w)
Ejemplo n.º 35
0
def test_verbose():
    np.random.seed(123)
    meta = LogisticRegression(solver='liblinear',
                              multi_class='ovr')
    clf1 = RandomForestClassifier(n_estimators=10)
    clf2 = GaussianNB()
    sclf = StackingClassifier(classifiers=[clf1, clf2],
                              use_probas=True,
                              meta_classifier=meta,
                              verbose=3)
    X, y = iris_data()
    sclf.fit(X, y)
Ejemplo n.º 36
0
def test_predict_meta_features():
    knn = KNeighborsClassifier()
    lr = LogisticRegression()
    gnb = GaussianNB()
    X_train, X_test, y_train,  y_test = train_test_split(X, y, test_size=0.3)

    #  test default (class labels)
    stclf = StackingClassifier(classifiers=[knn, gnb],
                               meta_classifier=lr,
                               store_train_meta_features=True)
    stclf.fit(X_train, y_train)
    test_meta_features = stclf.predict(X_test)
    assert test_meta_features.shape == (X_test.shape[0],)
Ejemplo n.º 37
0
def test_train_meta_features_():
    np.random.seed(123)
    knn = KNeighborsClassifier()
    lr = LogisticRegression(solver='liblinear',
                            multi_class='ovr')
    gnb = GaussianNB()
    stclf = StackingClassifier(classifiers=[knn, gnb],
                               meta_classifier=lr,
                               store_train_meta_features=True)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)
    stclf.fit(X_train, y_train)
    train_meta_features = stclf.train_meta_features_
    assert train_meta_features.shape == (X_train.shape[0], 2)
Ejemplo n.º 38
0
def test_StackingClassifier_avg_vs_concat():
    np.random.seed(123)
    lr1 = LogisticRegression()
    sclf1 = StackingClassifier(classifiers=[lr1, lr1],
                               use_probas=True,
                               average_probas=True,
                               meta_classifier=lr1)

    sclf1.fit(X, y)
    r1 = sclf1._predict_meta_features(X[:2])
    assert r1.shape == (2, 3)
    assert_almost_equal(np.sum(r1[0]), 1.0, places=6)
    assert_almost_equal(np.sum(r1[1]), 1.0, places=6)

    sclf2 = StackingClassifier(classifiers=[lr1, lr1],
                               use_probas=True,
                               average_probas=False,
                               meta_classifier=lr1)

    sclf2.fit(X, y)
    r2 = sclf2._predict_meta_features(X[:2])
    assert r2.shape == (2, 6)
    assert_almost_equal(np.sum(r2[0]), 2.0, places=6)
    assert_almost_equal(np.sum(r2[1]), 2.0, places=6)
    np.array_equal(r2[0][:3], r2[0][3:])
Ejemplo n.º 39
0
def test_use_features_in_secondary_predict_proba():
    np.random.seed(123)
    meta = LogisticRegression()
    clf1 = RandomForestClassifier()
    clf2 = GaussianNB()
    sclf = StackingClassifier(classifiers=[clf1, clf2],
                              use_features_in_secondary=True,
                              meta_classifier=meta)

    sclf.fit(X, y)
    idx = [0, 1, 2]
    y_pred = sclf.predict_proba(X[idx])[:, 0]
    expect = np.array([0.911, 0.829, 0.885])
    np.testing.assert_almost_equal(y_pred, expect, 3)
Ejemplo n.º 40
0
def test_weight_unsupported():
    # Error since KNN does not support sample_weight
    np.random.seed(123)
    meta = LogisticRegression(solver='liblinear',
                              multi_class='ovr')
    clf1 = RandomForestClassifier(n_estimators=10)
    clf2 = GaussianNB()
    clf3 = KNeighborsClassifier()
    sclf = StackingClassifier(classifiers=[clf1, clf2, clf3],
                              meta_classifier=meta)
    random.seed(87)
    w = np.array([random.random() for _ in range(len(y))])

    with pytest.raises(TypeError):
        sclf.fit(X, y, sample_seight=w)
Ejemplo n.º 41
0
def test_use_features_in_secondary_sparse_input_predict_proba():
    np.random.seed(123)
    meta = LogisticRegression(solver='liblinear',
                              multi_class='ovr')
    clf1 = RandomForestClassifier(n_estimators=10)
    sclf = StackingClassifier(classifiers=[clf1],
                              use_features_in_secondary=True,
                              meta_classifier=meta)

    sclf.fit(sparse.csr_matrix(X), y)
    idx = [0, 1, 2]
    y_pred = sclf.predict_proba(
        sparse.csr_matrix(X[idx])
    )[:, 0]
    expect = np.array([0.910, 0.829, 0.882])
    np.testing.assert_almost_equal(y_pred, expect, 3)
Ejemplo n.º 42
0
def test_use_features_in_secondary_predict_proba():
    np.random.seed(123)
    X, y = iris_data()
    meta = LogisticRegression(solver='liblinear',
                              multi_class='ovr',
                              random_state=1)
    clf1 = RandomForestClassifier(n_estimators=10, random_state=1)
    clf2 = GaussianNB()
    sclf = StackingClassifier(classifiers=[clf1, clf2],
                              use_features_in_secondary=True,
                              meta_classifier=meta)

    sclf.fit(X, y)
    idx = [0, 1, 2]
    y_pred = sclf.predict_proba(X[idx])[:, 0]
    expect = np.array([0.916, 0.828, 0.889])
    np.testing.assert_almost_equal(y_pred, expect, 3)
Ejemplo n.º 43
0
def model_processing(X_train,X_test,y_train,y_test):
    log_reg = LogisticRegression(C=0.01, penalty='l2')
    svc = SVC(C=0.7, kernel='linear')
    tree_clf = DecisionTreeClassifier(criterion='entropy', max_depth=3, min_samples_leaf=5)
    rf_clf = RandomForestClassifier(n_estimators=70,criterion='entropy', max_features='auto',min_samples_leaf=6)
    xgb = XGBClassifier(gamma=0.3, max_depth=4, min_child_weight=8,reg_alpha=0.05)
    
    sclf = StackingClassifier(classifiers=[log_reg,svc,tree_clf,rf_clf],meta_classifier=xgb)
    sclf.fit(X_train,y_train)
    y_pred_train = sclf.predict(X_train)
    y_pred = sclf.predict(X_test)
    
    print('*' * 30,'在训练集上的得分' )
    
    accuracy = accuracy_score(y_train,y_pred_train)
    precision = precision_score(y_train,y_pred_train)
    f1 = f1_score(y_train,y_pred_train)
    recall = recall_score(y_train,y_pred_train)
    auc = roc_auc_score(y_train,y_pred_train)
    model_name = '堆叠模型-训练集'
     
    print('{} 精确度 (accuracy):{:.2f}'.format(model_name,accuracy))
    print('{} 准确度(precision):{:.2f}'.format(model_name,precision))
    print('{} F1 Score :{:.2f}'.format(model_name,f1))
    print('{} 召回率(recall Score):{:.2f}'.format(model_name,recall))
    print('{} auc Score:{:.2f}'.format(model_name,auc))
    
    
    print('*' * 30,'在测试集上的得分' )
    
    accuracy = accuracy_score(y_test,y_pred)
    precision = precision_score(y_test,y_pred)
    f1 = f1_score(y_test,y_pred)
    recall = recall_score(y_test,y_pred)
    auc = roc_auc_score(y_test,y_pred)
    model_name = '堆叠模型'
     
    print('{} 精确度 (accuracy):{:.2f}'.format(model_name,accuracy))
    print('{} 准确度(precision):{:.2f}'.format(model_name,precision))
    print('{} F1 Score :{:.2f}'.format(model_name,f1))
    print('{} 召回率(recall Score):{:.2f}'.format(model_name,recall))
    print('{} auc Score:{:.2f}'.format(model_name,auc))    
def test_get_params():
    clf1 = KNeighborsClassifier(n_neighbors=1)
    clf2 = RandomForestClassifier(random_state=1)
    clf3 = GaussianNB()
    lr = LogisticRegression()
    sclf = StackingClassifier(classifiers=[clf1, clf2, clf3],
                              meta_classifier=lr)

    got = sorted(list({s.split('__')[0] for s in sclf.get_params().keys()}))
    expect = ['average_probas',
              'classifiers',
              'gaussiannb',
              'kneighborsclassifier',
              'meta-logisticregression',
              'meta_classifier',
              'randomforestclassifier',
              'store_train_meta_features',
              'use_features_in_secondary',
              'use_probas',
              'verbose']
    assert got == expect, got
Ejemplo n.º 45
0
def test_get_params():
    np.random.seed(123)
    clf1 = KNeighborsClassifier(n_neighbors=1)
    clf2 = RandomForestClassifier(n_estimators=10)
    clf3 = GaussianNB()
    lr = LogisticRegression(solver='liblinear',
                            multi_class='ovr')
    sclf = StackingClassifier(classifiers=[clf1, clf2, clf3],
                              meta_classifier=lr)

    got = sorted(list({s.split('__')[0] for s in sclf.get_params().keys()}))
    expect = ['average_probas',
              'classifiers',
              'drop_last_proba',
              'gaussiannb',
              'kneighborsclassifier',
              'meta_classifier',
              'randomforestclassifier',
              'store_train_meta_features',
              'use_clones',
              'use_features_in_secondary',
              'use_probas',
              'verbose']
    assert got == expect, got
Ejemplo n.º 46
0
    def model_test(self,model,best_params):

        print 'Model Test'
        print 'Start:' + datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
        
        lr = self.model_init(model)
        clf1 = self.model_init('KNN')
        clf2 = self.model_init('RFC')
        clf3 = self.model_init('GNB')
        sclf = StackingClassifier(classifiers=[clf1, clf2, clf3],meta_classifier=lr)
        
        sclf.set_params(**best_params)
        
        train_data = self.train.values.copy() 
        train_label = self.train_label['label'].values.copy()
        
        sclf.fit(train_data, train_label)
        
        if model.upper()=='LR':
            coef=sclf.coef_.reshape(clf.coef_.shape[1])
            ind=coef.argsort()
            att=self.train.columns[ind[-30:]].tolist()
            print att
        elif model.upper()=='RFC':
            imp=sclf.feature_importances_
            print imp
            ind=imp.argsort()
            att=self.train.columns[ind[-30:]].tolist()
            print att
        elif model.upper()=='XGB':
            imp=sclf.feature_importances_
            print imp
            ind=imp.argsort()
            att=self.train.columns[ind[-30:]].tolist()
            print att           
            
        test_data = self.test.values.copy()
        test_label = self.test_label['label'].values.copy()
        test_label = test_label.reshape(test_label.shape[0])
            
        res_proba=sclf.predict_proba(test_data)              
        res_auc=roc_auc_score(test_label,res_proba[:,1])
        
        print 'Model: {0} ; Test: {1}'.format(model,res_auc)
                
        print 'End:' + datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
        
        return res_auc
Ejemplo n.º 47
0
def test_sample_weight():
    # Make sure that:
    #    prediction with weight
    # != prediction with no weight
    # == prediction with weight ones
    random.seed(87)
    w = np.array([random.random() for _ in range(len(y))])

    np.random.seed(123)
    meta = LogisticRegression(solver='liblinear',
                              multi_class='ovr')
    clf1 = RandomForestClassifier(n_estimators=10)
    clf2 = GaussianNB()
    sclf = StackingClassifier(classifiers=[clf1, clf2],
                              meta_classifier=meta)
    prob1 = sclf.fit(X, y, sample_weight=w).predict_proba(X)

    np.random.seed(123)
    meta = LogisticRegression(solver='liblinear',
                              multi_class='ovr')
    clf1 = RandomForestClassifier(n_estimators=10)
    clf2 = GaussianNB()
    sclf = StackingClassifier(classifiers=[clf1, clf2],
                              meta_classifier=meta)
    prob2 = sclf.fit(X, y, sample_weight=None).predict_proba(X)

    maxdiff = np.max(np.abs(prob1 - prob2))
    assert maxdiff > 1e-3, "max diff is %.4f" % maxdiff

    np.random.seed(123)
    meta = LogisticRegression(solver='liblinear',
                              multi_class='ovr')
    clf1 = RandomForestClassifier(n_estimators=10)
    clf2 = GaussianNB()
    sclf = StackingClassifier(classifiers=[clf1, clf2],
                              meta_classifier=meta)
    prob3 = sclf.fit(X, y, sample_weight=np.ones(len(y))).predict_proba(X)

    maxdiff = np.max(np.abs(prob2 - prob3))
    assert maxdiff < 1e-3, "max diff is %.4f" % maxdiff
Ejemplo n.º 48
0
def test_StackingClassifier_drop_last_proba():
    np.random.seed(123)
    lr1 = LogisticRegression(solver='liblinear',
                             multi_class='ovr')
    sclf1 = StackingClassifier(classifiers=[lr1, lr1],
                               use_probas=True,
                               drop_last_proba=False,
                               meta_classifier=lr1)

    sclf1.fit(X, y)
    r1 = sclf1.predict_meta_features(X[:2])
    assert r1.shape == (2, 6)

    sclf2 = StackingClassifier(classifiers=[lr1, lr1],
                               use_probas=True,
                               drop_last_proba=True,
                               meta_classifier=lr1)

    sclf2.fit(X, y)
    r2 = sclf2.predict_meta_features(X[:2])
    assert r2.shape == (2, 4), r2.shape

    sclf3 = StackingClassifier(classifiers=[lr1, lr1],
                               use_probas=True,
                               drop_last_proba=True,
                               meta_classifier=lr1)

    sclf3.fit(X[0:100], y[0:100])  # only 2 classes
    r3 = sclf3.predict_meta_features(X[:2])
    assert r3.shape == (2, 2), r3.shape
Ejemplo n.º 49
0
def predictor_ev():
    print "Building Neural Net classifiers for devices with events"
    n_input = X_train_ev.shape[1]
    n_train = X_train_ev.shape[0]
    
    from keras.models import Sequential
    from keras.layers import Dense, Activation
    from keras.layers.core import Dropout
    from keras.layers.advanced_activations import PReLU
    from keras.regularizers import l2
    from keras.optimizers import Adadelta
    from keras.optimizers import SGD
    from keras.wrappers.scikit_learn import KerasClassifier
    from keras.callbacks import ModelCheckpoint
    
    def create_model(n_hidden_layers=1, nodes=[50], reg=1.0, dropouts=[.5], acts=['relu']):
        n_in = n_input    
        model = Sequential()
        for i in xrange(n_hidden_layers):
            n_out = nodes[i]
            dropout = dropouts[i]
            act = acts[i]
            model.add(Dense(output_dim=n_out, input_dim=n_in, W_regularizer=l2(reg)))
            model.add(Activation(act))
            model.add(Dropout(dropout))
            n_in = n_out
        model.add(Dense(output_dim=12, W_regularizer=l2(reg)))
        model.add(Activation("softmax"))
        # Compile model
        adadelta = Adadelta(lr=1.0, rho=0.95, epsilon=1e-08)
        sgd = SGD(lr=0.05, decay=1e-6, momentum=0.9, nesterov=True)
        model.compile(loss='categorical_crossentropy', optimizer=adadelta, metrics=['accuracy'])
        return model
    
    class KerasClassifier2(KerasClassifier):
            
        def __init__(self, build_fn, fn_args, random_state=0, nb_epoch=10, batch_size=500, verbose=2):
            self.random_state = random_state
            self.nb_epoch = nb_epoch
            self.batch_size = batch_size
            self.verbose = verbose
            super(KerasClassifier2, self).__init__(build_fn, **fn_args)
            self.classes_= np.arange(12)
            self.n_classes_ = 12
            self.model = build_fn(**fn_args)
            
        def fit(self, X, y, sample_weight=None):
            return super(KerasClassifier2, self).fit(X, indicator(y),
                             verbose = self.verbose, sample_weight=sample_weight,
                             validation_data=(X_cv_ev, indicator(y_cv_ev)),
                             nb_epoch=self.nb_epoch, batch_size=self.batch_size)
    
    
        def predict_proba(self, X):
            return super(KerasClassifier2, self).predict_proba(X, batch_size=500, verbose=0)
            
        def predict(self, X):
            return super(KerasClassifier2, self).predict_proba(X, batch_size=500, verbose=0)            
    
    nn1_args = {'n_hidden_layers': 2, 'nodes': [600, 400], 'reg': 1.8,
                'dropouts': [.3, .4], 'acts': ['relu', 'relu']}
    nn2_args = {'n_hidden_layers': 3, 'nodes': [300, 100, 50], 'reg': 2.0,
                'dropouts': [.2, .4, .5], 'acts': ['relu', 'relu', 'relu']}
    nn3_args = {'n_hidden_layers': 4, 'nodes': [1001, 511, 245, 99], 'reg': 2.0,
                'dropouts': [.2, .3, .2, .3], 'acts': ['relu', 'relu', 'relu', 'relu']}
    nn4_args = {'n_hidden_layers': 1, 'nodes': [500], 'reg': 1.2,
                'dropouts': [.25], 'acts': ['relu']}
    nn5_args = {'n_hidden_layers': 5, 'nodes': [1343, 1012, 757, 539, 117],
                'reg': 2.5, 'dropouts': [.2, .3, .4, .4, .4],
                'acts': ['relu', 'relu', 'relu', 'relu', 'relu']}
    
    clfNN1 = KerasClassifier2(create_model, nn1_args, random_state=5, nb_epoch=5)
    clfNN2 = KerasClassifier2(create_model, nn2_args, random_state=23, nb_epoch=11)
    clfNN3 = KerasClassifier2(create_model, nn3_args, random_state=710, nb_epoch=6)
    clfNN4 = KerasClassifier2(create_model, nn4_args, random_state=5072, nb_epoch=6)
    clfNN5 = KerasClassifier2(create_model, nn5_args, random_state=2016, nb_epoch=12)
    
    print "Building XGBoost classifiers for devices with events"
    xgb_params = {
    "objective": "multi:softprob",
    "num_class": 12,
    "booster": "gblinear",
    "max_depth": 6,
    "eval_metric": "mlogloss",
    "eta": 0.07,
    "silent": 1,
    "alpha": 3.5,
    }
    
    class XGBClassifier2(xgb.XGBClassifier):
    
        def __init__(self, max_depth=xgb_params['max_depth'],
                     objective='multi:softprob', missing=None, 
                     learning_rate=xgb_params['eta'], n_estimators=40, subsample=1,
                     reg_alpha=xgb_params['alpha'], seed=2016, booster='gblinear'):
            super(XGBClassifier2, self).__init__(max_depth=max_depth, seed=seed,
                        objective=objective, missing=missing,
                        learning_rate=learning_rate, n_estimators=n_estimators,
                        subsample=subsample, reg_alpha=reg_alpha)
            self.booster = xgb_params['booster']
            
        def fit(self, X, y):
            super(XGBClassifier2, self).fit(X.tocsc(), y, eval_metric='mlogloss',
                                            eval_set=[(X_cv_ev.tocsc(), y_cv_ev)])
    
    gbm1 = XGBClassifier2(seed=0, booster='gblinear', n_estimators=28)
    gbm2 = XGBClassifier2(seed=6, booster='gblinear', n_estimators=28)
    gbm3 = XGBClassifier2(seed=151, booster='gbtree', n_estimators=28)
    gbm4 = XGBClassifier2(seed=1047, booster='gbtree', n_estimators=28)
    gbm5 = XGBClassifier2(seed=22, booster='dart', n_estimators=28)
    
    print "Building Logistic Regression classifier for devices with events"
    clfLR = LogisticRegression(C=.02, random_state=2016, multi_class='multinomial', solver='newton-cg')
    
    #Combine results of classifiers
    print "Stacking classifiers for devices with events"
    clf_ls = [gbm1,gbm2,gbm3,gbm4,gbm5,clfNN1,clfNN2,clfNN3,clfNN4,clfNN5,clfLR]
    meta = LogisticRegression()
    stack = StackingClassifier(clf_ls, meta, use_probas=True, verbose=1)
    
    stack.fit(X_train_ev, y_train_ev)
    print log_loss(y_cv_ev, stack.predict_proba(X_cv_ev))
    y_pred_ev = stack.predict_proba(X_test_ev)
    #y_pre = (pred_prob_nn+y_pre)/2.0
    return y_pred_ev
Ejemplo n.º 50
0
#KNN
#clfKNN = KNeighborsClassifier(n_neighbors=5)
#clfKNN.fit(X_train_noev, y_train_noev)
#print log_loss(y_cv_noev, clfKNN.predict_proba(X_cv_noev))
#
##NB
#clfNB = MultinomialNB(alpha=1.0)
#clfNB.fit(X_train_noev, y_train_noev)
#print log_loss(y_cv_noev, clfNB.predict_proba(X_cv_noev))

#Combine results of classifiers
print "Stacking classifiers for devices with no events"
clf_ls = [gbm1,gbm2,gbm3,gbm4,gbm5,clfNN1,clfNN2,clfNN3,clfNN4,clfNN5,clfLR]
meta = LogisticRegression()
stack = StackingClassifier(clf_ls, meta, use_probas=True, verbose=1)

stack.fit(X_train_noev, y_train_noev)
print log_loss(y_cv_noev, stack.predict_proba(X_cv_noev))
y_pred_noev = stack.predict_proba(X_test_noev)
    #y_pre = (pred_prob_nn+y_pre)/2.0
#    return y_pred_noev

y_pred_ev = predictor_ev()
#y_pred_noev = predictor_noev()

# Write results
result = pd.DataFrame(np.hstack(y_pred_ev, y_pred_noev), columns=le.classes_)
result["device_id"] = test_dev
result = result.set_index("device_id")
result.to_csv('stacking_1.gz', index=True,