예제 #1
0
    return P


# In[114]:


models = get_models()
P = train_predict(models,X_train_sc,X_test_sc,y_train_sc,y_test_sc)


# In[115]:


from mlens.visualization import corrmat

corrmat(P.corr(), inflate=False)


# Errors are significantly correlated, which is to be expected for models that perform well, since it's typically the outliers that are hard to get right. In fact, if we look at error correlations on a class prediction basis things look a bit more promising:

# In[116]:


corrmat(P.apply(lambda predic: 1*(predic >= 0.5) - y_test_sc).corr(), inflate=False)


# # 6.3 Stacking

# In[117]:

예제 #2
0
models = get_models()
P = train_predict(models)
score_models(P, ytest)
'''
各分类器auc值:
knn                       : 0.779
naive bayes               : 0.803
gbm                       : 0.878
logistic                  : 0.857
random forest             : 0.844
svm                       : 0.850
mlp-nn                    : 0.851
'''

# 绘制各分类器产生数据的相关性
corrmat(P.corr(), inflate=False)
plt.savefig('correlation_matrix.png')

# 查看集成后的得分
print("Ensemble ROC-AUC score: %.3f" % roc_auc_score(ytest, P.mean(axis=1)))


# 绘制各分类器产生的ROC曲线
def plot_roc_curve(ytest, P_base_learners, P_ensemble, labels, ens_label,
                   name):
    """Plot the roc curve for base learners and ensemble."""
    plt.figure(figsize=(10, 8))
    plt.plot([0, 1], [0, 1], 'k--')

    cm = [
        plt.cm.rainbow(i)
##############################################################################
# **Correlation matrix plot**
#
# The :class:`corrmat` function plots the lower triangle of
# a correlation matrix and is adapted the `Seaborn`_ correlation matrix.

from mlens.visualization import corrmat
# Generate som different predictions to correlate
params = [0.1, 0.3, 1.0, 3.0, 10, 30]
preds = np.zeros((150, 6))
for i, c in enumerate(params):
    preds[:, i] = LogisticRegression(C=c).fit(X, y).predict(X)

corr = DataFrame(preds, columns=['C=%.1f' % i for i in params]).corr()
corrmat(corr)
plt.show()

##############################################################################
# **Clustered correlation heatmap plot**
#
# The :class:`clustered_corrmap` function is similar to :class:`corrmat`,
# but differs in two respects. First, and most importantly, it uses a user
# supplied clustering estimator to cluster the correlation matrix on similar
# features, which can often help visualize whether there are blocks of highly
# correlated features. Secondly, it plots the full matrix (as opposed to the
# lower triangle).

from mlens.visualization import clustered_corrmap
from sklearn.cluster import KMeans
Z = DataFrame(X, columns=['f_%i' % i for i in range(1, 5)])
예제 #4
0
def score_models(P, y):
    """Score model in prediction DF"""
    print("Scoring models.")
    for m in P.columns:
        score = metrics.roc_auc_score(y, P.loc[:, m])
        print("%-26s: %.3f" % (m, score))
    print("Done.\n")


models = get_models()
P = train_predict(models)
score_models(P, test_y)

from mlens.visualization import corrmat
f, ax = plt.subplots(figsize=(25, 25))
corrmat(P.corr(), inflate=False, ax=ax)
plt.show()
f.savefig('G:/Cardiac/ModelCorrmat.jpg')

from sklearn.metrics import roc_curve


def plot_roc_curve(test_y, P_base_learners, P_ensemble, labels, ens_label):
    """Plot the roc curve for base learners and ensemble."""
    plt.figure(figsize=(10, 8))
    plt.plot([0, 1], [0, 1], 'k--')

    cm = [
        plt.cm.rainbow(i)
        for i in np.linspace(0, 1.0, P_base_learners.shape[1] + 1)
    ]
예제 #5
0
# In[ ]:

P = np.zeros((xtest.shape[0], len(base_learners)))
P = pd.DataFrame(P, columns=[e for e, _ in base_learners])

for est_name, est in base_learners:
    est.fit(xtrain, ytrain)
    p = est.predict(xtest)
    P.loc[:, est_name] = p
    print("%3s : %.4f" % (est_name, mean_absolute_error(ytest, p)))

# So they all score relatively close. However, they seem to capture different aspects of the feature space, as shown by the low correlation of their predictions:

# In[ ]:

ax = corrmat(P.corr())
show()

# They are in fact not particularly correlated in their scoring (except the linear models), and hence
# an ensemble may be able to outperform any single model by learning to combine their respective strength.

# ## 2. Comparing base learners
#
# *emphasized text*To facilitate base learner comparison, ML-Ensemble implements a randomized grid search
# class that allows specification of several estimators (and preprocessing pipelines) in
# one grid search.

# In[ ]:

# Put their parameter dictionaries in a dictionary with the
# estimator names as keys
예제 #6
0
axes[1].plot(recall, precision, label="stacked_ensembe")
axes[0].legend(loc="lower right")
axes[0].set_xlabel("FPR")
axes[0].set_ylabel("TPR")
axes[0].set_title("ROC curve")
axes[1].legend()
axes[1].set_xlabel("recall")
axes[1].set_ylabel("precision")
axes[1].set_title("PR curve")
plt.tight_layout()
plt.show()

from mlens.visualization import corrmat

probs_df = pd.DataFrame(meta_features, columns=["xgb", "svm", "rf"])
corrmat(probs_df.corr(), inflate=True)

second_stack_probs = second_stack.predict_proba(first_stack.transform(X_test))
second_stack_preds = second_stack.predict(first_stack.transform(X_test))
conf_mat = confusion_matrix(y_test, second_stack_preds)

plt.figure(figsize=(16, 8))
plt.matshow(conf_mat, cmap=plt.cm.Reds, alpha=0.2)
for i in range(2):
    for j in range(2):
        plt.text(x=j, y=i, s=conf_mat[i, j], ha="center", va="center")
plt.title("Confusion matrix", y=1.1, fontdict={"fontsize": 20})
plt.xlabel("Predicted", fontdict={"fontsize": 14})
plt.ylabel("Actual", fontdict={"fontsize": 14})
plt.show()
예제 #7
0
def PlotHeatMapTriangle(data_set):
    """ Plot triangle heat map by using mlen module """
    from mlens.visualization import corrmat
    corrmat(data_set.corr(), inflate=False)
예제 #8
0
print(acc_results)

# ## prediction corelation

# In[46]:

pred_df = pd.DataFrame(pred_class_base)
pred_df.columns = ["cnn", "lstm", "bi_lstm", "cnn_lstm", "cnn_bi_lstm"]

# In[47]:

# pred_df.head()

# In[48]:

corrmat(pred_df.corr(), inflate=False, show=False)
plt.savefig('results/corr_matrix_base_xg.png', bbox_inches='tight')
# corrmat(pred_df.corr(), inflate=False)

# ## average

# In[49]:

avg_pred_prob = pred_prob_base.mean(axis=2)

# In[50]:

avg_pred_class = np.argmax(avg_pred_prob, axis=1)
avg_pred_class = avg_pred_class.astype(int)

# In[51]:
예제 #9
0
def score_models(P, y):
    """Score model in prediction DF"""
    print("Scoring models.")
    for m in P.columns:
        score = roc_auc_score(y, P.loc[:, m])
        print("%-26s: %.3f" % (m, score))
    print("Done.\n")

models = get_models()
P = train_predict(models)
score_models(P, ytest)

# You need ML-Ensemble for this figure: you can install it with: pip install mlens
from mlens.visualization import corrmat

corrmat(P.corr(), inflate=False)
plt.show()

corrmat(P.apply(lambda pred: 1*(pred >= 0.5) - ytest.values).corr(), inflate=False)
plt.show()

print("Ensemble ROC-AUC score: %.3f" % roc_auc_score(ytest, P.mean(axis=1)))

from sklearn.metrics import roc_curve

def plot_roc_curve(ytest, P_base_learners, P_ensemble, labels, ens_label):
    """Plot the roc curve for base learners and ensemble."""
    plt.figure(figsize=(10, 8))
    plt.plot([0, 1], [0, 1], 'k--')

    cm = [plt.cm.rainbow(i)