Beispiel #1
0
    def model_cross_validation(self, model, best_params):

        print 'Model Cross Validation'
        print 'Start:' + datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
        
        lr = self.model_init(model)
        clf1 = self.model_init('KNN')
        clf2 = self.model_init('RFC')
        clf3 = self.model_init('GNB')
        sclf = StackingClassifier(classifiers=[clf1, clf2, clf3],meta_classifier=lr)

        sclf.set_params(**best_params)
        
        train_data = self.train.values.copy() 
        train_label = self.train_label['label'].values.copy()
        
        train_label = train_label.reshape(train_label.shape[0])

        scores = cross_val_score(sclf, train_data, train_label, cv=5, scoring='roc_auc', n_jobs=3)
        
        print sclf
        print scores
        print np.mean(scores)
        
        print 'Model: {0} ; Train: {1}'.format(model,np.mean(scores))
        
        print 'End:' + datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
        
        return np.mean(scores)
Beispiel #2
0
    def model_test(self,model,best_params):

        print 'Model Test'
        print 'Start:' + datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
        
        lr = self.model_init(model)
        clf1 = self.model_init('KNN')
        clf2 = self.model_init('RFC')
        clf3 = self.model_init('GNB')
        sclf = StackingClassifier(classifiers=[clf1, clf2, clf3],meta_classifier=lr)
        
        sclf.set_params(**best_params)
        
        train_data = self.train.values.copy() 
        train_label = self.train_label['label'].values.copy()
        
        sclf.fit(train_data, train_label)
        
        if model.upper()=='LR':
            coef=sclf.coef_.reshape(clf.coef_.shape[1])
            ind=coef.argsort()
            att=self.train.columns[ind[-30:]].tolist()
            print att
        elif model.upper()=='RFC':
            imp=sclf.feature_importances_
            print imp
            ind=imp.argsort()
            att=self.train.columns[ind[-30:]].tolist()
            print att
        elif model.upper()=='XGB':
            imp=sclf.feature_importances_
            print imp
            ind=imp.argsort()
            att=self.train.columns[ind[-30:]].tolist()
            print att           
            
        test_data = self.test.values.copy()
        test_label = self.test_label['label'].values.copy()
        test_label = test_label.reshape(test_label.shape[0])
            
        res_proba=sclf.predict_proba(test_data)              
        res_auc=roc_auc_score(test_label,res_proba[:,1])
        
        print 'Model: {0} ; Test: {1}'.format(model,res_auc)
                
        print 'End:' + datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
        
        return res_auc
# train mean of score: 0.6473931873941305
# train std of score: 0.001041262887225388
# test mean of score: 0.6063831053298916
# test std of score: 0.003201456042199307

#%%
print('===============XGboost regression with decision tree===============')
#It is very easy for Stacking to get overfitted, so we reduce the model complexity here
sclf = StackingClassifier(classifiers=[xgbr],
                          meta_classifier=DecisionTreeClassifier(
                              min_samples_leaf=500,
                              random_state=model_random_state))
sclf_updated_dict = {'xgbregressor__' + k: v for k, v in updated_dict.items()}
sclf_updated_dict['xgbregressor__subsample'] = .4
sclf_updated_dict['xgbregressor__min_child_weight'] = 100
sclf.set_params(**sclf_updated_dict)
sclf_scores = evaluation.cv_scores(sclf,
                                   X_train,
                                   y_train,
                                   cv=cv,
                                   scoring=quadratic_weighted_kappa_round,
                                   return_estimator=True)

# train mean of score: 0.6434094278182996
# train std of score: 0.0021768938733617974
# test mean of score: 0.6069677840155301
# test std of score: 0.007928572700424638

#%%
print('===============XGboost classifiction with rounding===============')
xgbc = XGBClassifier(random_state=model_random_state,