return P # In[114]: models = get_models() P = train_predict(models,X_train_sc,X_test_sc,y_train_sc,y_test_sc) # In[115]: from mlens.visualization import corrmat corrmat(P.corr(), inflate=False) # Errors are significantly correlated, which is to be expected for models that perform well, since it's typically the outliers that are hard to get right. In fact, if we look at error correlations on a class prediction basis things look a bit more promising: # In[116]: corrmat(P.apply(lambda predic: 1*(predic >= 0.5) - y_test_sc).corr(), inflate=False) # # 6.3 Stacking # In[117]:
models = get_models() P = train_predict(models) score_models(P, ytest) ''' 各分类器auc值: knn : 0.779 naive bayes : 0.803 gbm : 0.878 logistic : 0.857 random forest : 0.844 svm : 0.850 mlp-nn : 0.851 ''' # 绘制各分类器产生数据的相关性 corrmat(P.corr(), inflate=False) plt.savefig('correlation_matrix.png') # 查看集成后的得分 print("Ensemble ROC-AUC score: %.3f" % roc_auc_score(ytest, P.mean(axis=1))) # 绘制各分类器产生的ROC曲线 def plot_roc_curve(ytest, P_base_learners, P_ensemble, labels, ens_label, name): """Plot the roc curve for base learners and ensemble.""" plt.figure(figsize=(10, 8)) plt.plot([0, 1], [0, 1], 'k--') cm = [ plt.cm.rainbow(i)
############################################################################## # **Correlation matrix plot** # # The :class:`corrmat` function plots the lower triangle of # a correlation matrix and is adapted the `Seaborn`_ correlation matrix. from mlens.visualization import corrmat # Generate som different predictions to correlate params = [0.1, 0.3, 1.0, 3.0, 10, 30] preds = np.zeros((150, 6)) for i, c in enumerate(params): preds[:, i] = LogisticRegression(C=c).fit(X, y).predict(X) corr = DataFrame(preds, columns=['C=%.1f' % i for i in params]).corr() corrmat(corr) plt.show() ############################################################################## # **Clustered correlation heatmap plot** # # The :class:`clustered_corrmap` function is similar to :class:`corrmat`, # but differs in two respects. First, and most importantly, it uses a user # supplied clustering estimator to cluster the correlation matrix on similar # features, which can often help visualize whether there are blocks of highly # correlated features. Secondly, it plots the full matrix (as opposed to the # lower triangle). from mlens.visualization import clustered_corrmap from sklearn.cluster import KMeans Z = DataFrame(X, columns=['f_%i' % i for i in range(1, 5)])
def score_models(P, y): """Score model in prediction DF""" print("Scoring models.") for m in P.columns: score = metrics.roc_auc_score(y, P.loc[:, m]) print("%-26s: %.3f" % (m, score)) print("Done.\n") models = get_models() P = train_predict(models) score_models(P, test_y) from mlens.visualization import corrmat f, ax = plt.subplots(figsize=(25, 25)) corrmat(P.corr(), inflate=False, ax=ax) plt.show() f.savefig('G:/Cardiac/ModelCorrmat.jpg') from sklearn.metrics import roc_curve def plot_roc_curve(test_y, P_base_learners, P_ensemble, labels, ens_label): """Plot the roc curve for base learners and ensemble.""" plt.figure(figsize=(10, 8)) plt.plot([0, 1], [0, 1], 'k--') cm = [ plt.cm.rainbow(i) for i in np.linspace(0, 1.0, P_base_learners.shape[1] + 1) ]
# In[ ]: P = np.zeros((xtest.shape[0], len(base_learners))) P = pd.DataFrame(P, columns=[e for e, _ in base_learners]) for est_name, est in base_learners: est.fit(xtrain, ytrain) p = est.predict(xtest) P.loc[:, est_name] = p print("%3s : %.4f" % (est_name, mean_absolute_error(ytest, p))) # So they all score relatively close. However, they seem to capture different aspects of the feature space, as shown by the low correlation of their predictions: # In[ ]: ax = corrmat(P.corr()) show() # They are in fact not particularly correlated in their scoring (except the linear models), and hence # an ensemble may be able to outperform any single model by learning to combine their respective strength. # ## 2. Comparing base learners # # *emphasized text*To facilitate base learner comparison, ML-Ensemble implements a randomized grid search # class that allows specification of several estimators (and preprocessing pipelines) in # one grid search. # In[ ]: # Put their parameter dictionaries in a dictionary with the # estimator names as keys
axes[1].plot(recall, precision, label="stacked_ensembe") axes[0].legend(loc="lower right") axes[0].set_xlabel("FPR") axes[0].set_ylabel("TPR") axes[0].set_title("ROC curve") axes[1].legend() axes[1].set_xlabel("recall") axes[1].set_ylabel("precision") axes[1].set_title("PR curve") plt.tight_layout() plt.show() from mlens.visualization import corrmat probs_df = pd.DataFrame(meta_features, columns=["xgb", "svm", "rf"]) corrmat(probs_df.corr(), inflate=True) second_stack_probs = second_stack.predict_proba(first_stack.transform(X_test)) second_stack_preds = second_stack.predict(first_stack.transform(X_test)) conf_mat = confusion_matrix(y_test, second_stack_preds) plt.figure(figsize=(16, 8)) plt.matshow(conf_mat, cmap=plt.cm.Reds, alpha=0.2) for i in range(2): for j in range(2): plt.text(x=j, y=i, s=conf_mat[i, j], ha="center", va="center") plt.title("Confusion matrix", y=1.1, fontdict={"fontsize": 20}) plt.xlabel("Predicted", fontdict={"fontsize": 14}) plt.ylabel("Actual", fontdict={"fontsize": 14}) plt.show()
def PlotHeatMapTriangle(data_set): """ Plot triangle heat map by using mlen module """ from mlens.visualization import corrmat corrmat(data_set.corr(), inflate=False)
print(acc_results) # ## prediction corelation # In[46]: pred_df = pd.DataFrame(pred_class_base) pred_df.columns = ["cnn", "lstm", "bi_lstm", "cnn_lstm", "cnn_bi_lstm"] # In[47]: # pred_df.head() # In[48]: corrmat(pred_df.corr(), inflate=False, show=False) plt.savefig('results/corr_matrix_base_xg.png', bbox_inches='tight') # corrmat(pred_df.corr(), inflate=False) # ## average # In[49]: avg_pred_prob = pred_prob_base.mean(axis=2) # In[50]: avg_pred_class = np.argmax(avg_pred_prob, axis=1) avg_pred_class = avg_pred_class.astype(int) # In[51]:
def score_models(P, y): """Score model in prediction DF""" print("Scoring models.") for m in P.columns: score = roc_auc_score(y, P.loc[:, m]) print("%-26s: %.3f" % (m, score)) print("Done.\n") models = get_models() P = train_predict(models) score_models(P, ytest) # You need ML-Ensemble for this figure: you can install it with: pip install mlens from mlens.visualization import corrmat corrmat(P.corr(), inflate=False) plt.show() corrmat(P.apply(lambda pred: 1*(pred >= 0.5) - ytest.values).corr(), inflate=False) plt.show() print("Ensemble ROC-AUC score: %.3f" % roc_auc_score(ytest, P.mean(axis=1))) from sklearn.metrics import roc_curve def plot_roc_curve(ytest, P_base_learners, P_ensemble, labels, ens_label): """Plot the roc curve for base learners and ensemble.""" plt.figure(figsize=(10, 8)) plt.plot([0, 1], [0, 1], 'k--') cm = [plt.cm.rainbow(i)