def test_calibration_curve(): """Check calibration_curve function""" y_true = np.array([0, 0, 0, 1, 1, 1]) y_pred = np.array([0., 0.1, 0.2, 0.8, 0.9, 1.]) prob_true, prob_pred = calibration_curve(y_true, y_pred, n_bins=2) prob_true_unnormalized, prob_pred_unnormalized = \ calibration_curve(y_true, y_pred * 2, n_bins=2, normalize=True) assert_equal(len(prob_true), len(prob_pred)) assert_equal(len(prob_true), 2) assert_almost_equal(prob_true, [0, 1]) assert_almost_equal(prob_pred, [0.1, 0.9]) assert_almost_equal(prob_true, prob_true_unnormalized) assert_almost_equal(prob_pred, prob_pred_unnormalized) # probabilities outside [0, 1] should not be accepted when normalize # is set to False assert_raises(ValueError, calibration_curve, [1.1], [-0.1], normalize=False) # test that quantiles work as expected y_true2 = np.array([0, 0, 0, 0, 1, 1]) y_pred2 = np.array([0., 0.1, 0.2, 0.5, 0.9, 1.]) prob_true_quantile, prob_pred_quantile = calibration_curve( y_true2, y_pred2, n_bins=2, strategy='quantile') assert len(prob_true_quantile) == len(prob_pred_quantile) assert len(prob_true_quantile) == 2 assert_almost_equal(prob_true_quantile, [0, 2 / 3]) assert_almost_equal(prob_pred_quantile, [0.1, 0.8]) # Check that error is raised when invalid strategy is selected assert_raises(ValueError, calibration_curve, y_true2, y_pred2, strategy='percentile')
def plot_probability_calibration_curves(self): """ Compute true and predicted probabilities for a calibration plot fraction_of_positives - The true probability in each bin (fraction of positives). mean_predicted_value - The mean predicted probability in each bin. """ fig = plt.figure() ax1 = plt.subplot2grid((3, 1), (0, 0), rowspan=2) ax2 = plt.subplot2grid((3, 1), (2, 0), rowspan=2) ax1.set_ylabel("Fraction of positives") ax1.set_ylim([-0.05, 1.05]) ax1.legend(loc="lower right") ax1.set_title('Calibration plots (reliability curve) ' + self.description) ax2.set_xlabel("Mean predicted value") ax2.set_ylabel("Count") ax2.legend(loc="upper center", ncol=2) clf_score = brier_score_loss(self.y_true, self.y_pred, pos_label=1) fraction_of_positives, mean_predicted_value = calibration_curve(self.y_true, self.y_pred, n_bins=50) ax1.plot(mean_predicted_value, fraction_of_positives, "s-", color="#660066", alpha = 0.6, label="%s (%1.3f)" % (self.description, clf_score)) ax2.hist(self.y_pred, range=(0, 1), bins=50, color="#660066", linewidth=2.0 , alpha = 0.6, label="%s (%1.3f)" % (self.description, clf_score), histtype="step", lw=2) plt.yscale('log') return
def plot_calibration_curve(est, name, fig_index): """Plot calibration curve for est w/o and with calibration. """ # Calibrated with isotonic calibration isotonic = CalibratedClassifierCV(est, cv=2, method='isotonic') # Calibrated with sigmoid calibration sigmoid = CalibratedClassifierCV(est, cv=2, method='sigmoid') # Calibrated with ROC convex hull calibration rocch = CalibratedClassifierCV(est, cv=2, method='rocch') # Logistic regression with no calibration as baseline lr = LogisticRegression(C=1., solver='lbfgs') fig = plt.figure(fig_index, figsize=(10, 10)) ax1 = plt.subplot2grid((3, 1), (0, 0), rowspan=2) ax2 = plt.subplot2grid((3, 1), (2, 0)) ax1.plot([0, 1], [0, 1], "k:", label="Perfectly calibrated") for clf, name in [(lr, 'Logistic'), (est, name), (isotonic, name + ' + Isotonic'), (sigmoid, name + ' + Sigmoid'), (rocch, name + ' + ROCConvexHull')]: clf.fit(X_train, y_train) y_pred = clf.predict(X_test) if hasattr(clf, "predict_proba"): prob_pos = clf.predict_proba(X_test)[:, 1] else: # use decision function prob_pos = clf.decision_function(X_test) prob_pos = \ (prob_pos - prob_pos.min()) / (prob_pos.max() - prob_pos.min()) clf_score = brier_score_loss(y_test, prob_pos, pos_label=y.max()) print("%s:" % name) print("\tBrier: %1.4f" % (clf_score)) print("\tPrecision: %1.3f" % precision_score(y_test, y_pred)) print("\tRecall: %1.3f" % recall_score(y_test, y_pred)) print("\tF1: %1.3f" % f1_score(y_test, y_pred)) print("\tAuc: %1.4f\n" % roc_auc_score(y_test, prob_pos)) fraction_of_positives, mean_predicted_value = \ calibration_curve(y_test, prob_pos, n_bins=10) ax1.plot(mean_predicted_value, fraction_of_positives, "s-", label="%s (%1.4f)" % (name, clf_score)) ax2.hist(prob_pos, range=(0, 1), bins=10, label=name, histtype="step", lw=2) ax1.set_ylabel("Fraction of positives") ax1.set_ylim([-0.05, 1.05]) ax1.legend(loc="lower right") ax1.set_title('Calibration plots (reliability curve)') ax2.set_xlabel("Mean predicted value") ax2.set_ylabel("Count") ax2.legend(loc="upper center", ncol=2) plt.tight_layout()
def plot_calibration_curve(est, name, fig_index): ''' Plot calibration curve for est w/o and with calibration. ''' # Calibrated with isotonic calibration isotonic = CalibratedClassifierCV(est, cv=2, method='isotonic') # Calibrated with sigmoid calibration sigmoid = CalibratedClassifierCV(est, cv=2, method='sigmoid') # Logistic regression with no calibration as baseline lr = LogisticRegression(C=1.0, solver='lbfgs') fig = plt.figure(fig_index, figsize=(10, 10)) ax1 = plt.subplot2grid((3, 1), (0, 0), rowspan=2) ax2 = plt.subplot2grid((3, 1), (2, 0)) ax1.plot([0, 1], [0, 1], 'k:', label='Perfectly calibrated') for clf, name in [ (lr, 'Logistic'), (est, name), (isotonic, name + ' + Isotonic'), (sigmoid, name + ' + Sigmoid') ]: clf.fit(X_train, y_train) y_pred = clf.predict(X_test) if hasattr(clf, 'predict_proba'): prob_pos = clf.predict_proba(X_test)[:, 1] else: # use decision function prob_pos = clf.decision_function(X_test) prob_pos = \ (prob_pos - prob_pos.min()) / (prob_pos.max() - prob_pos.min()) clf_score = brier_score_loss(y_test, prob_pos, pos_label=y.max()) print('%s:' % name) print('\tBrier: %1.3f' % (clf_score)) print('\tPrecision: %1.3f' % precision_score(y_test, y_pred)) print('\tRecall: %1.3f' % recall_score(y_test, y_pred)) print('\tF1: %1.3f\n' % f1_score(y_test, y_pred)) fraction_of_positives, mean_predicted_value = \ calibration_curve(y_test, prob_pos, n_bins = 10) ax1.plot(mean_predicted_value, fraction_of_positives, 's-', label='%s (%1.3f)' % (name, clf_score)) ax2.hist(prob_pos, range=(0, 1), bins=10, label=name, histtype='step', lw=2) ax1.set_ylabel('Fraction of positives') ax1.set_ylim([-0.05, 1.05]) ax1.legend(loc='lower right') ax1.set_title('Calibration plots (reliability curve)') ax2.set_xlabel('Mean predicted value') ax2.set_ylabel('Count') ax2.legend(loc='upper center', ncol=2) plt.tight_layout()
def test_calibration_curve(): """Check calibration_curve function""" y_true = np.array([0, 0, 0, 1, 1, 1]) y_pred = np.array([0., 0.1, 0.2, 0.8, 0.9, 1.]) prob_true, prob_pred = calibration_curve(y_true, y_pred, n_bins=2) prob_true_unnormalized, prob_pred_unnormalized = \ calibration_curve(y_true, y_pred * 2, n_bins=2, normalize=True) assert_equal(len(prob_true), len(prob_pred)) assert_equal(len(prob_true), 2) assert_almost_equal(prob_true, [0, 1]) assert_almost_equal(prob_pred, [0.1, 0.9]) assert_almost_equal(prob_true, prob_true_unnormalized) assert_almost_equal(prob_pred, prob_pred_unnormalized) # probabilities outside [0, 1] should not be accepted when normalize # is set to False assert_raises(ValueError, calibration_curve, [1.1], [-0.1], normalize=False)
def plot_calibration_curve_cv(X, y, est, name, bins=10, n_folds=8, n_jobs=1, fig_index=1): """Plot calibration curve for est w/o and with calibration. """ import sklearn.cross_validation as cross_validation from sklearn import (metrics, cross_validation) from model_selection import cross_val_predict_proba # Calibrated with isotonic calibration cv = 2 isotonic = CalibratedClassifierCV(est, cv=cv, method='isotonic') # Calibrated with sigmoid calibration sigmoid = CalibratedClassifierCV(est, cv=cv, method='sigmoid') fig = plt.figure(fig_index, figsize=(10, 10)) ax1 = plt.subplot2grid((3, 1), (0, 0), rowspan=2) ax2 = plt.subplot2grid((3, 1), (2, 0)) ax1.plot([0, 1], [0, 1], "k:", label="Perfectly calibrated") for clf, name in [(est, name), (isotonic, name + ' + Isotonic'), (sigmoid, name + ' + Sigmoid')]: y_true = y scoring = 'roc_auc' cv1 = cross_validation.StratifiedKFold(y,n_folds) y_proba, scores = cross_val_predict_proba(clf, X, y, scoring=scoring, cv=cv1, n_jobs=n_jobs, verbose=0, fit_params=None, pre_dispatch='2*n_jobs') y_pred = np.array(y_proba>0.5,dtype=int) clf_score = brier_score_loss(y_true, y_proba, pos_label=y_true.max()) print("%s:" % name) print("\tBrier: %1.3f" % (clf_score)) print("\tPrecision: %1.3f" % precision_score(y_true, y_pred)) print("\tRecall: %1.3f" % recall_score(y_true, y_pred)) print("\tF1: %1.3f\n" % f1_score(y_true, y_pred)) fraction_of_positives, mean_predicted_value = \ calibration_curve(y_true, y_proba, n_bins=bins) ax1.plot(mean_predicted_value, fraction_of_positives, "s-", label="%s (%1.3f)" % (name, clf_score)) ax2.hist(y_proba, range=(0, 1), bins=bins, label=name, histtype="step", lw=2) ax1.set_ylabel("Fraction of positives") ax1.set_ylim([-0.05, 1.05]) ax1.legend(loc="lower right") ax1.set_title('Calibration plots (reliability curve)') ax2.set_xlabel("Mean predicted value") ax2.set_ylabel("Count") ax2.legend(loc="upper center", ncol=2) plt.tight_layout()
def plot_calibration_curve(y_true, pred_proba, n_bins=10): fraction_of_positives, mean_predicted_value = calibration_curve(y_true, pred_proba, normalize=False, n_bins=10) plt.plot([0,1],[0,1], 'k:', label='Perfectly Calibrated') plt.plot(mean_predicted_value, fraction_of_positives, "s-", label="Actual") plt.ylabel("Fraction of positives") plt.title('Calibration plots (reliability curve)')
def plot_calibration_curve(est, name, fig_index): """Plot calibration curve for est w/o and with calibration. """ # Calibrated with isotonic calibration isotonic = CalibratedClassifierCV(est, cv=2, method="isotonic") # Calibrated with sigmoid calibration sigmoid = CalibratedClassifierCV(est, cv=2, method="sigmoid") # Logistic regression with no calibration as baseline lr = LogisticRegression(C=1.0, solver="lbfgs") fig = plt.figure(fig_index, figsize=(10, 10)) ax1 = plt.subplot2grid((3, 1), (0, 0), rowspan=2) ax2 = plt.subplot2grid((3, 1), (2, 0)) ax1.plot([0, 1], [0, 1], "k:", label="Perfectly calibrated") for clf, name in [(lr, "Logistic"), (est, name), (isotonic, name + " + Isotonic"), (sigmoid, name + " + Sigmoid")]: clf.fit(X_train, y_train) y_pred = clf.predict(X_val) if hasattr(clf, "predict_proba"): prob_pos = clf.predict_proba(X_val)[:, 1] else: # use decision function prob_pos = clf.decision_function(X_val) prob_pos = (prob_pos - prob_pos.min()) / (prob_pos.max() - prob_pos.min()) clf_score = log_loss(y_val, prob_pos) print("%s:" % name) print("\tLog Loss: %1.3f" % (clf_score)) fraction_of_positives, mean_predicted_value = calibration_curve(y_val, prob_pos, n_bins=10) ax1.plot(mean_predicted_value, fraction_of_positives, "s-", label="%s (%1.3f)" % (name, clf_score)) ax2.hist(prob_pos, range=(0, 1), bins=10, label=name, histtype="step", lw=2) ax1.set_ylabel("Fraction of positives") ax1.set_ylim([-0.05, 1.05]) ax1.legend(loc="lower right") ax1.set_title("Calibration plots (reliability curve)") ax2.set_xlabel("Mean predicted value") ax2.set_ylabel("Count") ax2.legend(loc="upper center", ncol=2) plt.tight_layout()
def plot_recall_precision_curve( y, y_prediction, bins=50, outputfile=None ): print(y) precision, recall, thresholds = metrics.precision_recall_curve( y, y_prediction ) thresholds = np.append(thresholds, 1) fraction_of_positives, mean_predicted_value = calibration.calibration_curve( y, y_prediction, n_bins=bins ) fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(13, 5)) delta = 1 / bins bins = np.linspace(0 - delta / 2, 1 + delta / 2, bins) ax1.hist(y_prediction[y == 1], bins=bins, histtype='step', label='signal') ax1.hist(y_prediction[y == 0], bins=bins, histtype='step', label='background') ax1.set_xlim(-delta, 1 + delta) ax1.legend(loc='upper center') ax1.set_xlabel('Probabilities') ax2.plot(thresholds, recall, label='Recall', linestyle='-') ax2.plot(thresholds, precision, label='Precision', linestyle='-') ax2.set_ylim(0, 1.05) ax2.set_xlim(-0.05, 1.05) ax2.legend(loc='lower center') ax2.set_xlabel('Confidence Threshold') ax3.plot(mean_predicted_value, fraction_of_positives) ax3.plot([0, 1], [0, 1], color='gray', linestyle='--') ax3.set_xlabel('Mean Predicted Value') ax3.set_ylabel('Fraction of positives') fig.tight_layout(pad=0) if outputfile: fig.savefig(outputfile) else: plt.show()
def evaluate_sigmoid_match(self,X_test,y_test,A,B): from sklearn.calibration import calibration_curve import matplotlib.pyplot as plt from sklearn.metrics import (brier_score_loss, precision_score, recall_score,f1_score) prob_pos = 1. / (1. + (np.exp(A * X_test + B))) clf_score = brier_score_loss(y_test, prob_pos, pos_label=y_test.max()) fraction_of_positives, mean_predicted_value = calibration_curve(y_test, prob_pos, n_bins=10) print("SVC_sigmoid:") print("\tBrier: %1.3f" % (clf_score)) fig = plt.figure(2, figsize=(10, 10)) ax1 = plt.subplot2grid((3, 1), (0, 0), rowspan=2) ax2 = plt.subplot2grid((3, 1), (2, 0)) ax1.plot([0, 1], [0, 1], "k:", label="Perfectly calibrated") ax1.plot(mean_predicted_value, fraction_of_positives, "s-",label="%s (%1.3f)" % ("SVC_sigmoid", clf_score)) ax2.hist(prob_pos, range=(0, 1), bins=10, label="SVC_sigmoid",histtype="step", lw=2) ax1.set_ylabel("Fraction of positives") ax1.set_ylim([-0.05, 1.05]) ax1.legend(loc="lower right") ax1.set_title('Calibration plots (reliability curve)') ax2.set_xlabel("Mean predicted value") ax2.set_ylabel("Count") ax2.legend(loc="upper center", ncol=2) plt.tight_layout() plt.show()
def calibration_model_compare(X_train, y_train, X_test, y_test): max = y_test.max() if (y_train.max() > y_test.max()): max = y_train.max() svc_linear = SVC(kernel='linear') svc_non_linear = SVC(kernel='rbf') gnb = GaussianNB() lr = LogisticRegression() #C=1., solver='lbfgs' dtree_gini = DecisionTreeClassifier() dtree_entropy = DecisionTreeClassifier(criterion="entropy") rf_50 = RandomForestClassifier(n_estimators=50) rf_100 = RandomForestClassifier(n_estimators=100) knn_2 = KNeighborsClassifier(n_neighbors=2) knn_6 = KNeighborsClassifier(n_neighbors=6) pyplot.figure(figsize=(9, 9)) ax1 = pyplot.subplot2grid((3, 1), (0, 0), rowspan=2) ax2 = pyplot.subplot2grid((3, 1), (2, 0)) ax1.plot([0, 1], [0, 1], "k:", label="Perfectly calibrated") for clf, name in [(svc_linear, 'svc_linear'), (svc_non_linear, 'svc_non_linear'), (gnb, 'Naive Bayes'), (lr, 'Logistic Regression'), (dtree_gini, 'dtree_gini'), (dtree_entropy, 'dtree_entropy'), (rf_50, 'Random Forest-50'), (rf_100, 'Random Forest-100'), (knn_2, 'KNN_2'), (knn_6, 'KNN_6')]: clf.fit(X_train, y_train) y_pred = clf.predict(X_test) if hasattr(clf, "predict_proba"): prob_pos = clf.predict_proba(X_test)[:, 1] else: # use decision function prob_pos = clf.decision_function(X_test) prob_pos = (prob_pos - prob_pos.min()) / (prob_pos.max() - prob_pos.min()) clf_score = brier_score_loss(y_test, prob_pos, pos_label=max) print("%s:" % name) print("\tBrier: %1.3f" % (clf_score)) print("\tPrecision: %1.3f" % precision_score(y_test, y_pred)) print("\tRecall: %1.3f" % recall_score(y_test, y_pred)) print("\tF1: %1.3f\n" % f1_score(y_test, y_pred)) fraction_of_positives, mean_predicted_value = calibration_curve( y_test, prob_pos, n_bins=10) ax1.plot(mean_predicted_value, fraction_of_positives, "s-", label="%s" % (name, )) ax2.hist(prob_pos, range=(0, 1), bins=10, label=name, histtype="step", lw=2) ax1.set_ylabel("Fraction of positives") ax1.set_ylim([-0.05, 1.05]) ax1.legend(loc="lower right") ax1.set_title('Calibration plots (reliability curve)') ax2.set_xlabel("Mean predicted value") ax2.set_ylabel("Count") ax2.legend(loc="upper center", ncol=2) pyplot.tight_layout()
trainMarkedWords = naiveBayes.setOfWordsListToVecTor( vocabularyList, [mailWords[i] for i in train_index]) print("trainMarkedWords finished") testMarkedWords = naiveBayes.setOfWordsListToVecTor( vocabularyList, [mailWords[i] for i in test_index]) # # change it to array # trainMarkedWords = np.array(trainMarkedWords) # print("data to matrix finished") clf = GaussianNB() clf.fit(trainMarkedWords, [classLables[i] for i in train_index]) prob_pos = clf.predict_proba(testMarkedWords)[:, 1] fraction_of_positives, mean_predicted_value = \ calibration_curve( [classLables[i] for i in test_index], prob_pos, n_bins=10) ax1.plot(mean_predicted_value, fraction_of_positives, "s-", label="%s" % ("GNB", )) ax2.hist(prob_pos, range=(0, 1), bins=10, label="GNB", histtype="step", lw=2) # predicted = clf.predict(testMarkedWords) # # # Compate predicted values with ground truth (accuracy)
def plot_calibration(model, partition): r"""Display scikit-learn calibration plots. Parameters ---------- model : alphapy.Model The model object with plotting specifications. partition : alphapy.Partition Reference to the dataset. Returns ------- None : None References ---------- Code excerpts from authors: * Alexandre Gramfort <*****@*****.**> * Jan Hendrik Metzen <*****@*****.**> http://scikit-learn.org/stable/auto_examples/calibration/plot_calibration_curve.html#sphx-glr-auto-examples-calibration-plot-calibration-curve-py """ logger.info("Generating Calibration Plot") # For classification only if model.specs['model_type'] != ModelType.classification: logger.info('Calibration plot is for classification only') return None # Get X, Y for correct partition X, y = get_partition_data(model, partition) plt.style.use('classic') plt.figure(figsize=(10, 10)) ax1 = plt.subplot2grid((3, 1), (0, 0), rowspan=2) ax2 = plt.subplot2grid((3, 1), (2, 0)) ax1.plot([0, 1], [0, 1], "k:", label="Perfectly Calibrated") for algo in model.algolist: logger.info("Calibration for Algorithm: %s", algo) clf = model.estimators[algo] if hasattr(clf, "predict_proba"): prob_pos = model.probas[(algo, partition)] else: # use decision function prob_pos = clf.decision_function(X) prob_pos = \ (prob_pos - prob_pos.min()) / (prob_pos.max() - prob_pos.min()) fraction_of_positives, mean_predicted_value = \ calibration_curve(y, prob_pos, n_bins=10) ax1.plot(mean_predicted_value, fraction_of_positives, "s-", label="%s" % (algo, )) ax2.hist(prob_pos, range=(0, 1), bins=10, label=algo, histtype="step", lw=2) ax1.set_ylabel("Fraction of Positives") ax1.set_ylim([-0.05, 1.05]) ax1.legend(loc="lower right") ax1.set_title('Calibration Plots [Reliability Curve]') ax2.set_xlabel("Mean Predicted Value") ax2.set_ylabel("Count") ax2.legend(loc="upper center", ncol=2) plot_dir = get_plot_directory(model) pstring = datasets[partition] write_plot('matplotlib', plt, 'calibration', pstring, plot_dir)
def run_cross_validation_linear(_df, _classifier, _features_columns, _id, _target, _prob, _n_iter=5, _test_size=.3, _random_state=0, _normalized=True): # cross validation type can be changed here ss = ShuffleSplit(len(_df[_id].unique()), n_iter=_n_iter, test_size=_test_size, random_state=_random_state) results_cv_targeting = pd.DataFrame([], columns=[_id, _target, 'fold', _prob]) mean_tpr = 0.0 mean_fpr = np.linspace(0, 1, 100) mean_precision = 0.0 mean_recall = np.linspace(0, 1, 100) mean_lift = 0.0 mean_lift_decay = 0.0 nb_calls_cv = pd.DataFrame([], columns=[ 'nb_contacts', 'total_population', 'total_pos_targets', 'nb_pos_targets', 'pos_rate', 'Percentage_of_pos_targets_found', 'Percentage_of_Population', 'Lift' ]) feature_importances = pd.DataFrame( [], columns=['feature', 'importance', 'fold']) fig = plt.figure(figsize=(6, 12)) fig.subplots_adjust(bottom=-0.5, left=-0.5, top=0.5, right=1.5) print('modeling started') plt.gcf().clear() colors = ['#d7191c', '#fdae61', '#ffffbf', '#abdda4', '#2b83ba'] plt.rcParams["font.family"] = "monospace" for i, (train_index, valid_index) in enumerate(ss): customer_id = _df[_id].unique().copy() shuffled_customer_id = np.array( sorted(customer_id, key=lambda k: random.random())) train_customer_id = shuffled_customer_id[train_index] valid_customer_id = shuffled_customer_id[valid_index] train = _df.loc[_df[_id].isin(train_customer_id), np.concatenate([_features_columns, [_target]], axis=0 )].copy().reset_index(drop=True) valid = _df.loc[_df[_id].isin(valid_customer_id), np.concatenate([_features_columns, [_target]], axis=0 )].copy().reset_index(drop=True) temp = valid[[_id, _target]].copy() temp['fold'] = i # modeling# train_X = train.drop([_id, _target], axis=1) valid_X = valid.drop([_id, _target], axis=1) if _normalized: scaler = StandardScaler().fit(train_X) train_X_scaled = scaler.transform(train_X) valid_X_scaled = scaler.transform(valid_X) train_X = pd.DataFrame(train_X_scaled, index=train_X.index, columns=train_X.columns) valid_X = pd.DataFrame(valid_X_scaled, index=valid_X.index, columns=valid_X.columns) train_Y = np.array(train[_target].astype(np.uint8)) valid_Y = np.array(valid[_target].astype(np.uint8)) probas_ = _classifier.fit(train_X, train_Y).predict_proba(valid_X) probabilities = pd.DataFrame(data=probas_[:, 1], index=valid_X.index, columns=[_prob]) temp = temp.join(probabilities, how='left') results_cv_targeting = results_cv_targeting.append(temp) ############################################################################### # Plot probability distribution plt.subplot(3, 3, 1) plt.hist(probas_[:, 1], range=(0, 1), bins=100, label="fold %d" % (i), color=colors[i], alpha=0.5) #histtype="step", ############################################################################### # plot proba distribution for both class target_probs = pd.DataFrame(valid_Y, columns=['target']) target_probs['probs'] = probas_[:, 1] plt.subplot(3, 3, 2) plt.hist(target_probs[target_probs['target'] == 1]['probs'], range=(0, 1), bins=100, label="fold %d class 1" % (i), color='#abdda4', alpha=0.5) plt.hist(target_probs[target_probs['target'] == 0]['probs'], range=(0, 1), bins=100, label="fold %d class 0" % (i), color='#d53e4f', alpha=0.5) ############################################################################### # Plot calibration plots fraction_of_positives, mean_predicted_value = calibration_curve( valid_Y, probas_[:, 1], n_bins=20) plt.subplot(3, 3, 3) plt.plot(mean_predicted_value, fraction_of_positives, "P-", label="fold %d" % (i), lw=1, color=colors[i]) ############################################################################### # plot evals_result # plt.subplot(3, 3, 4) # plt.plot(range(len(evals_result)), evals_result, label='Fold %d' %(i), lw=1, color=colors[i]) ############################################################################### # Compute ROC curve and area the curve fpr, tpr, thresholds = roc_curve(valid_Y, probas_[:, 1]) mean_tpr += interp(mean_fpr, fpr, tpr) mean_tpr[0] = 0.0 roc_auc = auc(fpr, tpr) plt.subplot(3, 3, 4) plt.plot(fpr, tpr, label='Fold %d: %0.2f' % (i, roc_auc), lw=1, color=colors[i]) ############################################################################### # Compute Precision-Recall curve and area the curve precision, recall, thresholds = precision_recall_curve( valid_Y, probas_[:, 1]) mean_precision += interp(mean_recall, recall[::-1], precision[::-1]) pr_auc = auc(recall, precision) plt.subplot(3, 3, 5) plt.plot(recall, precision, label='Fold %d: %0.2f' % (i, pr_auc), lw=1, color=colors[i]) ############################################################################### # calculate lift related information cust_rank = temp[[_target, _prob]].copy() cust_rank = cust_rank.sort_values( by=_prob, ascending=False).reset_index(drop=True) cust_rank['rank'] = cust_rank.index + 1 cust_rank['num_pos_target'] = np.cumsum(cust_rank[_target]) pos_rate = temp[_target].mean() lift_cums = [] lift_decays = [] for q in range(10, 110, 10): small_q = (q - 10) / 100.0 big_q = q / 100.0 if q == 100: lift_cum = cust_rank[_target].mean() / pos_rate lift_decay = cust_rank[int(small_q * cust_rank.shape[0] ):][_target].mean() / pos_rate else: lift_cum = cust_rank[:int(big_q * cust_rank.shape[0] )][_target].mean() / pos_rate lift_decay = cust_rank[int(small_q * cust_rank.shape[0]):int( big_q * cust_rank.shape[0])][_target].mean() / pos_rate lift_cums.append(lift_cum) lift_decays.append(lift_decay) print('shuffle: %i, AUC: %f, lift at 10 percent: %f' % (i, roc_auc, lift_cums[0])) mean_lift += np.array(lift_cums) mean_lift_decay += np.array(lift_decays) ############################################################################### # calculate number of calls nb_calls = cust_rank.copy() nb_calls['nb_contacts_100'] = nb_calls.loc[nb_calls.num_pos_target == 100, 'rank'].min() nb_calls['nb_contacts_200'] = nb_calls.loc[nb_calls.num_pos_target == 200, 'rank'].min() nb_calls['nb_contacts_500'] = nb_calls.loc[nb_calls.num_pos_target == 500, 'rank'].min() nb_calls['nb_contacts_1000'] = nb_calls.loc[nb_calls.num_pos_target == 1000, 'rank'].min() nb_calls['nb_contacts_2000'] = nb_calls.loc[nb_calls.num_pos_target == 2000, 'rank'].min() #nb_calls['nb_contacts_3000'] = nb_calls.loc[nb_calls.num_pos_target==3000,'rank'].min() nb_calls['nb_contacts_all'] = nb_calls.loc[ nb_calls.num_pos_target == nb_calls.num_pos_target.max(), 'rank'].min() nb_calls = nb_calls[[ 'nb_contacts_100', 'nb_contacts_200', 'nb_contacts_500', 'nb_contacts_1000', 'nb_contacts_2000', 'nb_contacts_all' ]].min() #'nb_contacts_3000', nb_calls = pd.DataFrame(nb_calls, columns=['nb_contacts']) nb_calls['total_population'] = cust_rank.shape[0] nb_calls['total_pos_targets'] = cust_rank[_target].sum() nb_calls['nb_pos_targets'] = [ 100, 200, 500, 1000, 2000, cust_rank[_target].sum() ] #3000, nb_calls['pos_rate'] = nb_calls.nb_pos_targets / nb_calls.nb_contacts nb_calls[ 'Percentage_of_pos_targets_found'] = nb_calls.nb_pos_targets / nb_calls.total_pos_targets nb_calls[ 'Percentage_of_Population'] = nb_calls.nb_contacts / nb_calls.total_population nb_calls[ 'Lift'] = nb_calls.Percentage_of_pos_targets_found / nb_calls.Percentage_of_Population nb_calls_cv = nb_calls_cv.append(nb_calls) ############################################################################### feature_importances_data = [] features = train_X.columns for feature_name, feature_importance in zip(features, _classifier.coef_.ravel()): feature_importances_data.append({ 'feature': feature_name, 'importance': feature_importance }) temp = pd.DataFrame(feature_importances_data) temp['fold'] = i feature_importances = feature_importances.append(temp) for feature in nb_calls_cv.columns.values: nb_calls_cv[feature] = pd.to_numeric(nb_calls_cv[feature], errors='coerce') nb_calls_cv = nb_calls_cv.reset_index().groupby( 'index').mean().sort_values(by='nb_pos_targets') results_cv_targeting = results_cv_targeting.reset_index(drop=True) feature_importances = feature_importances.groupby( 'feature')['importance'].agg([np.mean, np.std]) feature_importances = feature_importances.sort_values(by='mean') feature_importances = feature_importances.reset_index() # plot probas for probas plt.subplot(3, 3, 1) plt.ylabel('proba', fontsize=10) plt.title('predicted probas', fontsize=12, fontweight="bold") #plt.legend(loc="lower right") # plot probas for both classes plt.subplot(3, 3, 2) plt.ylabel('proba', fontsize=10) plt.title('predicted probas for different classes', fontsize=12, fontweight="bold") #plt.legend(loc="lower right") # plot the perfectly calibrated curve plt.subplot(3, 3, 3) plt.plot([0, 1], [0, 1], "k--", label="Perfectly calibrated", lw=1, color='grey') plt.ylabel("Fraction of positives", fontsize=10) plt.xlabel("Mean predicted value", fontsize=10) plt.ylim([-0.05, 1.05]) #plt.legend(loc="lower right") plt.title('Calibration plots (reliability curve)', fontsize=12, fontweight="bold") # plot evals_result # plt.subplot(3, 3, 4) # plt.xlabel('n_estimators', fontsize=10) # plt.ylabel('roc_auc', fontsize=10) # plt.title('ROC through n_estimators', fontsize=12, fontweight="bold") #plt.legend(loc="lower right") # plot the averaged ROC curve plt.subplot(3, 3, 4) mean_tpr /= len(ss) mean_tpr[-1] = 1.0 mean_auc = auc(mean_fpr, mean_tpr) plt.plot(mean_fpr, mean_tpr, 'k--', label='Mean ROC: %0.2f' % mean_auc, lw=1, color='grey') plt.plot([0, 1], [0, 1], '--', color=(0.6, 0.6, 0.6)) plt.xlim([-0.05, 1.05]) plt.ylim([-0.05, 1.05]) plt.xlabel('False Positive Rate', fontsize=10) plt.ylabel('True Positive Rate', fontsize=10) plt.title('ROC', fontsize=12, fontweight="bold") plt.legend(loc="lower right") # plot averaged PR curve plt.subplot(3, 3, 5) mean_precision /= len(ss) mean_pr_auc = auc(mean_recall, mean_precision) plt.plot(mean_recall, mean_precision, 'k--', label='Mean PR: %0.2f' % mean_pr_auc, lw=1, color='grey') plt.xlabel('Recall', fontsize=10) plt.ylabel('Precision', fontsize=10) plt.title('Precision-recall', fontsize=12, fontweight="bold") plt.legend(loc="lower right") def autolabel(rects, ax, mark): """ Attach a text label above each bar displaying its height """ for rect in rects: height = rect.get_height() if mark == 'int': ax.text(rect.get_x() + rect.get_width() / 2., 1.05 * height, '%d' % int(height), ha='center', va='bottom', fontsize=10) else: ax.text(rect.get_x() + rect.get_width() / 2., 1.05 * height, '%s' % str(round(height, 2)), ha='center', va='bottom', fontsize=10) # plot lift cumulative ax1 = plt.subplot(3, 3, 6) mean_lift /= len(ss) rects1 = plt.bar(range(10), mean_lift, color='#abdda4') plt.axhline(y=1, color='grey', linestyle='--', lw=1) plt.xticks(range(10), ['0-%d' % (num) for num in range(10, 110, 10)], rotation='vertical') plt.xlabel('Rank percentage interval', fontsize=10) plt.ylabel('lift', fontsize=10) plt.title('Lift cumulative plot', fontsize=12, fontweight="bold") plt.ylim([ax1.get_ylim()[0], ax1.get_ylim()[1] * 1.2]) # plot lift decay ax2 = plt.subplot(3, 3, 7) mean_lift_decay /= len(ss) rects2 = plt.bar(range(10), mean_lift_decay, color='#fdae61') plt.axhline(y=1, color='grey', linestyle='--', lw=1) plt.xticks(range(10), ['%d-%d' % (num - 10, num) for num in range(10, 110, 10)], rotation='vertical') plt.xlabel('Rank percentage interval', fontsize=10) plt.ylabel('lift', fontsize=10) plt.title('Lift decay plot', fontsize=12, fontweight="bold") plt.ylim([ax2.get_ylim()[0], ax2.get_ylim()[1] * 1.2]) # plot number of calls ax3 = plt.subplot(3, 3, 8) rects3 = plt.bar(range(5), nb_calls_cv['nb_contacts'].values[:-1], color='#6baed6') plt.xticks(range(5), [100, 200, 500, 1000, 2000], rotation='vertical') #, 3000 plt.xlabel('Number of target get', fontsize=10) plt.ylabel('Number of contacts', fontsize=10) plt.title('Number of calls', fontsize=12, fontweight="bold") plt.ylim([ax3.get_ylim()[0], ax3.get_ylim()[1] * 1.2]) autolabel(rects1, ax1, 'float') autolabel(rects2, ax2, 'float') autolabel(rects3, ax3, 'int') fig.subplots_adjust(hspace=.25, wspace=0.25) plt.show() plt.gcf().clear() return results_cv_targeting, feature_importances, nb_calls_cv
ax1 = plt.subplot2grid((3, 1), (0, 0), rowspan=2) ax2 = plt.subplot2grid((3, 1), (2, 0)) ax1.plot([0, 1], [0, 1], "k:", label="Perfectly calibrated") for clf, name in [(lr, 'Logistic'), (gnb, 'Naive Bayes'), (svc, 'Support Vector Classification'), (rfc, 'Random Forest')]: clf.fit(X_train1, y_train1) if hasattr(clf, "predict_proba"): prob_pos = clf.predict_proba(X_test1)[:, 1] else: # use decision function prob_pos = clf.decision_function(X_test1) prob_pos = (prob_pos - prob_pos.min()) / (prob_pos.max() - prob_pos.min()) fraction_of_positives, mean_predicted_value = calibration_curve(y_test1, prob_pos, n_bins=10) ax1.plot(mean_predicted_value, fraction_of_positives, "s-", label="%s" % (name, )) ax2.hist(prob_pos, range=(0, 1), bins=10, label=name, histtype="step", lw=2) ax1.set_ylabel("Fraction of positives")
plt.plot(np.arange(0, epochs), H.history["acc"], label="Train Accuracy") plt.plot(np.arange(0, epochs), H.history["val_acc"], label="Validation Accuracy") plt.plot(np.arange(0, epochs), H.history['brier_score'], label='Brier Score') plt.plot(np.arange(0, epochs), H.history['val_brier_score'], label='Validation Brier Score') plt.title("Model 1: Loss, Accuracy, and Brier Score on MNIST Dataset") plt.xlabel("Epoch #") plt.ylabel("Loss/Accuracy/Brier Score") plt.legend(loc="center right") plt.savefig("lossAccBrierPlot1.png") # Plot 2: Calibration Plot nn_y1, nn_x1 = calibration_curve(y_test1, y_pred1, n_bins=11) fig, ax = plt.subplots() plt.plot(nn_x1, nn_y1, marker='o', linewidth=1, label='Model 1 NN') line = mlines.Line2D([0, 1], [0, 1], color='black') transform = ax.transAxes line.set_transform(transform) ax.add_line(line) fig.suptitle('MNIST Data Calibration with Model 1') ax.set_xlabel('Predicted Probability') ax.set_ylabel('True Probability per Bin') plt.legend(loc='upper left') plt.savefig("calibrationPlot1.png") # Plot 3: 2-class Density Plot plt.figure() for i in [0, 1]:
def compute_calibration_summary( eval_dict: Dict[str, pd.DataFrame], label_col: str = 'label', score_col: str = 'score', n_bins: int = 15, strategy: str = 'quantile', round_digits: int = 4, show: bool = True, save_plot_path: Optional[str] = None) -> pd.DataFrame: """ Plots the calibration curve and computes the summary statistics for the model. Parameters ---------- eval_dict : dict We can evaluate multiple calibration model's performance in one go. The key is the model name used to distinguish different calibration model, the value is the dataframe that stores the binary true targets and the predicted score for the positive class. label_col : str Column name for the dataframe in ``eval_dict`` that stores the binary true targets. score_col : str Column name for the dataframe in ``eval_dict`` that stores the predicted score. n_bins : int, default 15 Number of bins to discretize the calibration curve plot and calibration error statistics. A bigger number requires more data, but will be closer to the true calibration error. strategy : {'uniform', 'quantile'}, default 'quantile' Strategy used to define the boundary of the bins. - uniform: The bins have identical widths. - quantile: The bins have the same number of samples and depend on the predicted score. round_digits : default 4 Round the evaluation metric. show : bool, default True Whether to show the plots on the console or jupyter notebook. save_plot_path : str, default None Path where we'll store the calibration plot. None means it will not save the plot. Returns ------- df_metrics : pd.DataFrame Corresponding metrics for all the input dataframe. """ fig, (ax1, ax2) = plt.subplots(2) # estimator_metrics stores list of dict, e.g. # [{'auc': 0.776, 'name': 'xgb'}] estimator_metrics = [] for name, df_eval in eval_dict.items(): prob_true, prob_pred = calibration_curve(df_eval[label_col], df_eval[score_col], n_bins=n_bins, strategy=strategy) calibration_error = compute_calibration_error(df_eval[label_col], df_eval[score_col], n_bins, round_digits) metrics_dict = compute_binary_score(df_eval[label_col], df_eval[score_col], round_digits) metrics_dict['calibration_error'] = calibration_error metrics_dict['name'] = name estimator_metrics.append(metrics_dict) ax1.plot(prob_pred, prob_true, 's-', label=name) ax2.hist(df_eval[score_col], range=(0, 1), bins=n_bins, label=name, histtype='step', lw=2) ax1.plot([0, 1], [0, 1], 'k:', label='perfect') ax1.set_xlabel('Fraction of positives (Predicted)') ax1.set_ylabel('Fraction of positives (Actual)') ax1.set_ylim([-0.05, 1.05]) ax1.legend(loc='upper left', ncol=2) ax1.set_title('Calibration Plots (Reliability Curve)') ax2.set_xlabel('Predicted scores') ax2.set_ylabel('Count') ax2.set_title('Histogram of Predicted Scores') ax2.legend(loc='upper right', ncol=2) plt.tight_layout() if show: plt.show() if save_plot_path is not None: save_dir = os.path.dirname(save_plot_path) if save_dir: os.makedirs(save_dir, exist_ok=True) fig.savefig(save_plot_path, dpi=300, bbox_inches='tight') plt.close(fig) df_metrics = pd.DataFrame(estimator_metrics) return df_metrics
def plot_calibration_curve(y_true, probas_list, clf_names=None, n_bins=10, title='Calibration plots (Reliability Curves)', ax=None, figsize=None, cmap='nipy_spectral', title_fontsize="large", text_fontsize="medium"): """Plots calibration curves for a set of classifier probability estimates. Plotting the calibration curves of a classifier is useful for determining whether or not you can interpret their predicted probabilities directly as as confidence level. For instance, a well-calibrated binary classifier should classify the samples such that for samples to which it gave a score of 0.8, around 80% should actually be from the positive class. This function currently only works for binary classification. Args: y_true (array-like, shape (n_samples)): Ground truth (correct) target values. probas_list (list of array-like, shape (n_samples, 2) or (n_samples,)): A list containing the outputs of binary classifiers' :func:`predict_proba` method or :func:`decision_function` method. clf_names (list of str, optional): A list of strings, where each string refers to the name of the classifier that produced the corresponding probability estimates in `probas_list`. If ``None``, the names "Classifier 1", "Classifier 2", etc. will be used. n_bins (int, optional): Number of bins. A bigger number requires more data. title (string, optional): Title of the generated plot. Defaults to "Calibration plots (Reliabilirt Curves)" ax (:class:`matplotlib.axes.Axes`, optional): The axes upon which to plot the curve. If None, the plot is drawn on a new set of axes. figsize (2-tuple, optional): Tuple denoting figure size of the plot e.g. (6, 6). Defaults to ``None``. cmap (string or :class:`matplotlib.colors.Colormap` instance, optional): Colormap used for plotting the projection. View Matplotlib Colormap documentation for available options. https://matplotlib.org/users/colormaps.html title_fontsize (string or int, optional): Matplotlib-style fontsizes. Use e.g. "small", "medium", "large" or integer-values. Defaults to "large". text_fontsize (string or int, optional): Matplotlib-style fontsizes. Use e.g. "small", "medium", "large" or integer-values. Defaults to "medium". Returns: :class:`matplotlib.axes.Axes`: The axes on which the plot was drawn. Example: >>> import scikitplot as skplt >>> rf = RandomForestClassifier() >>> lr = LogisticRegression() >>> nb = GaussianNB() >>> svm = LinearSVC() >>> rf_probas = rf.fit(X_train, y_train).predict_proba(X_test) >>> lr_probas = lr.fit(X_train, y_train).predict_proba(X_test) >>> nb_probas = nb.fit(X_train, y_train).predict_proba(X_test) >>> svm_scores = svm.fit(X_train, y_train).decision_function(X_test) >>> probas_list = [rf_probas, lr_probas, nb_probas, svm_scores] >>> clf_names = ['Random Forest', 'Logistic Regression', ... 'Gaussian Naive Bayes', 'Support Vector Machine'] >>> skplt.metrics.plot_calibration_curve(y_test, ... probas_list, ... clf_names) <matplotlib.axes._subplots.AxesSubplot object at 0x7fe967d64490> >>> plt.show() .. image:: _static/examples/plot_calibration_curve.png :align: center :alt: Calibration Curves """ y_true = np.asarray(y_true) if not isinstance(probas_list, list): raise ValueError('`probas_list` does not contain a list.') classes = np.unique(y_true) if len(classes) > 2: raise ValueError('plot_calibration_curve only ' 'works for binary classification') if clf_names is None: clf_names = ['Classifier {}'.format(x+1) for x in range(len(probas_list))] if len(clf_names) != len(probas_list): raise ValueError('Length {} of `clf_names` does not match length {} of' ' `probas_list`'.format(len(clf_names), len(probas_list))) if ax is None: fig, ax = plt.subplots(1, 1, figsize=figsize) ax.plot([0, 1], [0, 1], "k:", label="Perfectly calibrated") for i, probas in enumerate(probas_list): probas = np.asarray(probas) if probas.ndim > 2: raise ValueError('Index {} in probas_list has invalid ' 'shape {}'.format(i, probas.shape)) if probas.ndim == 2: probas = probas[:, 1] if probas.shape != y_true.shape: raise ValueError('Index {} in probas_list has invalid ' 'shape {}'.format(i, probas.shape)) probas = (probas - probas.min()) / (probas.max() - probas.min()) fraction_of_positives, mean_predicted_value = \ calibration_curve(y_true, probas, n_bins=n_bins) color = plt.cm.get_cmap(cmap)(float(i) / len(probas_list)) ax.plot(mean_predicted_value, fraction_of_positives, 's-', label=clf_names[i], color=color) ax.set_title(title, fontsize=title_fontsize) ax.set_xlabel('Mean predicted value', fontsize=text_fontsize) ax.set_ylabel('Fraction of positives', fontsize=text_fontsize) ax.set_ylim([-0.05, 1.05]) ax.legend(loc='lower right') return ax
def plot_calibration_curve(y_true, probs, n_bins=10, ax=None, bins_by_percentile=True, hist=_calibration_curve_hist, normalize=False, color='b', show_metrics=False, plot_lowess=False, sz=18, as_subplot=False, **hist_params): """ Plot a calibration curve Some taken from Andreas Muller's course: http://www.cs.columbia.edu/~amueller/comsw4995s18/ Parameters ---------- y_true : True labels probs: probabilites for the positive class n_bins: number of bins for calibration curve ax: axis bins_by_percentile: True will generate curve bins by percentiles instead of values hist: function with inputs (ytest, ypred, ax, **hist_params), use None to avoid plotting histogram normalize: True will normalize predicted values into interval [0, 1] color: graph color show_metrics: True will add a panel with some metrics. Unusable with abnormal figure sizes. plot_lowess: calculates a moving average and plots smooth curve by lowess method sz: Fontsize for plots as_subplot: plots clibration curve and under it a histogram as subplots Returns ------- curve: axes of calibration curve brier_score: brier score result """ y_true, probs = possibly_values(y_true), possibly_values(probs) if (ax is None) & (as_subplot == False): fig, ax = plt.subplots(1, 1, figsize=(6, 5)) if normalize: # Normalize predicted values into interval [0, 1] probs = (probs - probs.min()) / (probs.max() - probs.min()) if bins_by_percentile: prob_true, prob_pred = _calibration_curve_percentiles(y_true, probs, n_bins=n_bins, normalize=False) x_lbl = 'Predicted probability' else: prob_true, prob_pred = calibration_curve(y_true, probs, n_bins=n_bins, normalize=False) x_lbl = 'Predicted probability (#bins=' + str(n_bins) + ')' if as_subplot: # plot calibration curve and hist as subplots fig, axes = plt.subplots(2, 1, figsize=(6, 7), sharex=False, gridspec_kw={'height_ratios': [4, 1]}) ax = axes[0] # plot hist ax_hist = axes[1] _calibration_curve_hist(y_true, probs, ax=ax, ax_hist=ax_hist, as_subplot=True, sz=sz, y_lim_multiplier=1) hist = None fig.subplots_adjust(hspace=0.04) if hist is not None: hist(y_true, probs, ax=ax, **hist_params) ax.plot([0, 1], [0, 1], ':', c='k', label='Ideal') if plot_lowess: # win size is the size of bins win_size = len(probs) // n_bins # sort by probs sorted_inds = np.argsort(probs) # obtain moving aberages mean_x = moving_average(probs[sorted_inds], win_size) mean_y = moving_average(y_true[sorted_inds], win_size) # smoothen with lowess ax.plot(mean_x, lowess(mean_y, mean_x, frac=1 / 4)[:, 1], color=color, alpha=0.4, lw=3, label='Non-parametric') ax.plot(prob_pred, prob_true, ls='', marker="d", markersize=8, color=color, label='Grouped patients') ax.set_xlabel(x_lbl, fontsize=sz) ax.set_ylabel('Fraction of positive samples', fontsize=sz) ax.set_title('Calibration curve', fontsize=sz + 2) # Visuals ax.grid() ax.legend(fontsize=sz // 1.1) ax.set_xticks(np.arange(0, 1.01, 0.1)) if as_subplot: ax.set_xlabel('') xticklabels = ['' for i in np.arange(0, 101, 10)] else: xticklabels = [str(i) + '%' for i in np.arange(0, 101, 10)] ax.set_xticklabels(xticklabels, fontdict={'fontsize': sz // 1.4}) yticklabels = np.round(np.arange(0, 1.01, 0.1), 1) ax.set_yticks(yticklabels) ax.set_yticklabels(yticklabels, fontdict={'fontsize': sz // 1.4}) # ax.set_aspect('equal'); ax.set_xlim(0, 1.0) ax.set_ylim(0, 1.0) brier_score = brier_score_loss(y_true, probs) if show_metrics: intercept = np.mean(y_pred) - np.mean(y_true) fpr, tpr, roc_thresholds = roc_curve(y_true, probs) roc_auc = auc(fpr, tpr) label = "\nintercept {:.3f}".format(slope) \ + "\nC (ROC) {:.3f}".format(roc_auc) \ + "\nBrier {:.3f}".format(brier_score) \ ax.text(x=0.05, y=0.68, s=label, fontsize=11, bbox={ 'facecolor': 'white', 'alpha': 0.7, 'pad': 10 }) return (ax, brier_score)
TrF = Train.drop(["11"], axis=1) TsL = TsL.values.ravel() # rewrites into correct format TrL = TrL.values.ravel() for clf, name in [(lr, 'Logistic'), (gbc, 'Gradient Boosting Classifier')]: clf.fit(TrF, TrL) # train with the classifier # Predict with the classifier if hasattr(clf, "predict_proba"): prob_pos = clf.predict_proba(TsF)[:, 1] else: # use decision function prob_pos = clf.decision_function(TsF) prob_pos = \ (prob_pos - prob_pos.min()) / (prob_pos.max() - prob_pos.min()) fraction_of_positives, mean_predicted_value = \ calibration_curve(TsL, prob_pos, n_bins=10) # compute the test accuracy and brier score accuracy for the classifier y_pred = clf.predict(TsF) clf_score = brier_score_loss(TsL, prob_pos, pos_label=TsL.max()) print("%s:" % name) print("\tAccuracy:", metrics.accuracy_score(TsL, y_pred)) print("\tBrier(p): %1.3f" % (clf_score)) #print("\tPrecision: %1.3f" % precision_score(TsL, y_pred)) #print("\tRecall: %1.3f" % recall_score(TsL, y_pred)) #print("\tF1: %1.3f\n" % f1_score(TsL, y_pred)) if name == 'Gradient Boosting Classifier': # cumulate the accuracy and brier for the gradient boosting classifier for cross-validation method Acc += metrics.accuracy_score(TsL, y_pred) brier += clf_score else:
# generate 2 class dataset X, y = make_classification(n_samples=1000, n_classes=2, weights=[1, 1], random_state=1) # split into train/test sets trainX, testX, trainy, testy = train_test_split(X, y, test_size=0.5, random_state=2) # uncalibrated predictions yhat_uncalibrated = uncalibrated(trainX, testX, trainy) # calibrated predictions yhat_calibrated = calibrated(trainX, testX, trainy) # reliability diagrams fop_uncalibrated, mpv_uncalibrated = calibration_curve(testy, yhat_uncalibrated, n_bins=10, normalize=True) fop_calibrated, mpv_calibrated = calibration_curve(testy, yhat_calibrated, n_bins=10) # plot perfectly calibrated pyplot.plot([0, 1], [0, 1], linestyle='--', color='black') # plot model reliabilities pyplot.plot(mpv_uncalibrated, fop_uncalibrated, marker='.') pyplot.plot(mpv_calibrated, fop_calibrated, marker='.') pyplot.savefig('caliberation_performance.png') pyplot.show()
def plot_all_performances(y, yhat, target_type="CLASS", ylim=None, w=18, h=12, pdf=None): # y=df_test["target"]; yhat=yhat_test; ylim = None; w=12; h=8 fig, ax = plt.subplots(2, 3) if target_type == "CLASS": # Roc curve ax_act = ax[0, 0] fpr, tpr, cutoff = roc_curve(y, yhat[:, 1]) roc_auc = roc_auc_score(y, yhat[:, 1]) sns.lineplot(fpr, tpr, ax=ax_act, palette=sns.xkcd_palette(["red"])) props = { 'xlabel': r"fpr: P($\^y$=1|$y$=0)", 'ylabel': r"tpr: P($\^y$=1|$y$=1)", 'title': "ROC (AUC = {0:.2f})".format(roc_auc) } ax_act.set(**props) # Confusion matrix ax_act = ax[0, 1] df_conf = pd.DataFrame( confusion_matrix(y, np.where(yhat[:, 1] > 0.5, 1, 0))) acc = accuracy_score(y, np.where(yhat[:, 1] > 0.5, 1, 0)) sns.heatmap(df_conf, annot=True, fmt=".5g", cmap="Greys", ax=ax_act) props = { 'xlabel': "Predicted label", 'ylabel': "True label", 'title': "Confusion Matrix (Acc ={0: .2f})".format(acc) } ax_act.set(**props) # Distribution plot ax_act = ax[0, 2] sns.distplot(yhat[:, 1][y == 1], color="red", label="1", bins=20, ax=ax_act) sns.distplot(yhat[:, 1][y == 0], color="blue", label="0", bins=20, ax=ax_act) props = { 'xlabel': r"Predictions ($\^y$)", 'ylabel': "Density", 'title': "Distribution of Predictions", 'xlim': (0, 1) } ax_act.set(**props) ax_act.legend(title="Target", loc="best") # Calibration ax_act = ax[1, 0] true, predicted = calibration_curve(y, yhat[:, 1], n_bins=10) sns.lineplot(predicted, true, ax=ax_act, marker="o") props = { 'xlabel': r"$\bar{\^y}$ in $\^y$-bin", 'ylabel': r"$\bar{y}$ in $\^y$-bin", 'title': "Calibration" } ax_act.set(**props) # Precision Recall ax_act = ax[1, 1] prec, rec, cutoff = precision_recall_curve(y, yhat[:, 1]) prec_rec_auc = average_precision_score(y, yhat[:, 1]) sns.lineplot(rec, prec, ax=ax_act, palette=sns.xkcd_palette(["red"])) props = { 'xlabel': r"recall=tpr: P($\^y$=1|$y$=1)", 'ylabel': r"precision: P($y$=1|$\^y$=1)", 'title': "Precision Recall Curve (AUC = {0:.2f})".format(prec_rec_auc) } ax_act.set(**props) for thres in np.arange(0.1, 1, 0.1): i_thres = np.argmax(cutoff > thres) ax_act.annotate("{0: .1f}".format(thres), (rec[i_thres], prec[i_thres]), fontsize=10) # Precision ax_act = ax[1, 2] pct_tested = np.array([]) for thres in cutoff: pct_tested = np.append(pct_tested, [np.sum(yhat[:, 1] >= thres) / len(yhat)]) sns.lineplot(pct_tested, prec[:-1], ax=ax_act, palette=sns.xkcd_palette(["red"])) props = { 'xlabel': "% Samples Tested", 'ylabel': r"precision: P($y$=1|$\^y$=1)", 'title': "Precision Curve" } ax_act.set(**props) for thres in np.arange(0.1, 1, 0.1): i_thres = np.argmax(cutoff > thres) ax_act.annotate("{0: .1f}".format(thres), (pct_tested[i_thres], prec[i_thres]), fontsize=10) if target_type == "REGR": def plot_scatter(x, y, xlabel="x", ylabel="y", title=None, ylim=None, ax_act=None): if ylim is not None: ax_act.set_ylim(ylim) tmp_scale = (ylim[1] - ylim[0]) / (np.max(y) - np.min(y)) else: tmp_scale = 1 tmp_cmap = colors.LinearSegmentedColormap.from_list( "wh_bl_yl_rd", [(1, 1, 1, 0), "blue", "yellow", "red"]) p = ax_act.hexbin(x, y, gridsize=(int(50 * tmp_scale), 50), cmap=tmp_cmap) plt.colorbar(p, ax=ax_act) sns.regplot(x, y, lowess=True, scatter=False, color="black", ax=ax_act) ax_act.set_title(title) ax_act.set_ylabel(ylabel) ax_act.set_xlabel(xlabel) ax_act.set_facecolor('white') # ax_act.grid(False) ylim = ax_act.get_ylim() xlim = ax_act.get_xlim() # Inner Histogram on y ax_act.set_xlim(xlim[0] - 0.3 * (xlim[1] - xlim[0])) inset_ax = ax_act.inset_axes([0, 0, 0.2, 1]) inset_ax.set_axis_off() ax_act.get_shared_y_axes().join(ax_act, inset_ax) sns.distplot(y, color="grey", vertical=True, ax=inset_ax) # Inner-inner Boxplot on y xlim_inner = inset_ax.get_xlim() inset_ax.set_xlim(xlim_inner[0] - 0.3 * (xlim_inner[1] - xlim_inner[0])) inset_inset_ax = inset_ax.inset_axes([0, 0, 0.2, 1]) inset_inset_ax.set_axis_off() inset_ax.get_shared_y_axes().join(inset_ax, inset_inset_ax) sns.boxplot(y, palette=["grey"], orient="v", ax=inset_inset_ax) # Inner Histogram on x ax_act.set_ylim(ylim[0] - 0.3 * (ylim[1] - ylim[0])) inset_ax = ax_act.inset_axes([0, 0, 1, 0.2]) inset_ax.set_axis_off() ax_act.get_shared_x_axes().join(ax_act, inset_ax) sns.distplot(x, color="grey", ax=inset_ax) # Inner-inner Boxplot on x ylim_inner = inset_ax.get_ylim() inset_ax.set_ylim(ylim_inner[0] - 0.3 * (ylim_inner[1] - ylim_inner[0])) inset_inset_ax = inset_ax.inset_axes([0, 0, 1, 0.2]) inset_inset_ax.set_axis_off() inset_ax.get_shared_x_axes().join(inset_ax, inset_inset_ax) sns.boxplot(x, palette=["grey"], ax=inset_inset_ax) ax_act.set_xlim(xlim[0] - 0.3 * (xlim[1] - xlim[0])) # need to set again # Scatter plots plot_scatter(yhat, y, xlabel=r"$\^y$", ylabel="y", title=r"Observed vs. Fitted ($\rho_{Spearman}$ = " + str(spearman_loss_func(y, yhat).round(3)) + ")", ylim=ylim, ax_act=ax[0, 0]) plot_scatter(yhat, y - yhat, xlabel=r"$\^y$", ylabel=r"y-$\^y$", title="Residuals vs. Fitted", ylim=ylim, ax_act=ax[1, 0]) plot_scatter(yhat, abs(y - yhat), xlabel=r"$\^y$", ylabel=r"|y-$\^y$|", title="Absolute Residuals vs. Fitted", ylim=ylim, ax_act=ax[1, 1]) plot_scatter(yhat, abs(y - yhat) / abs(y), xlabel=r"$\^y$", ylabel=r"|y-$\^y$|/|y|", title="Relative Residuals vs. Fitted", ylim=ylim, ax_act=ax[1, 2]) # Calibration ax_act = ax[0, 1] df_calib = pd.DataFrame({"y": y, "yhat": yhat})\ .assign(bin=lambda x: pd.qcut(x["yhat"], 10, duplicates="drop").astype("str"))\ .groupby(["bin"], as_index=False).agg("mean")\ .sort_values("yhat") sns.lineplot("yhat", "y", data=df_calib, ax=ax_act, marker="o") props = { 'xlabel': r"$\bar{\^y}$ in $\^y$-bin", 'ylabel': r"$\bar{y}$ in $\^y$-bin", 'title': "Calibration" } ax_act.set(**props) # Distribution ax_act = ax[0, 2] sns.distplot(y, color="blue", label="y", ax=ax_act) sns.distplot(yhat, color="red", label=r"$\^y$", ax=ax_act) ax_act.set_ylabel("density") ax_act.set_xlabel("") ax_act.set_title("Distribution") ylim = ax_act.get_ylim() ax_act.set_ylim(ylim[0] - 0.3 * (ylim[1] - ylim[0])) inset_ax = ax_act.inset_axes([0, 0, 1, 0.2]) inset_ax.set_axis_off() ax_act.get_shared_x_axes().join(ax_act, inset_ax) df_distr = pd.concat([ pd.DataFrame({ "type": "y", "values": y }), pd.DataFrame({ "type": "yhat", "values": yhat }) ]) sns.boxplot( x=df_distr["values"], y=df_distr["type"].astype("category"), # order=df[feature_act].value_counts().index.values[::-1], palette=["blue", "red"], ax=inset_ax) ax_act.legend(title="", loc="best") # Adapt figure fig.set_size_inches(w=w, h=h) fig.tight_layout() if pdf is not None: fig.savefig(pdf) # plt.close(fig) plt.show()
def fraction_of_positives_runs(month_day, runs, horizons, bounds_dict, N_bins, base_folder='/a2/uaren/travis', ): bins = np.arange(N_bins) multi_column = [np.repeat(horizons, bins.size), np.tile(bins, len(horizons))] multi_column = list(zip(*multi_column)) multi_column = pd.MultiIndex.from_tuples( multi_column, names=['horizon', 'bin']) for this_month_day in month_day: print(this_month_day) year = 2014 month = this_month_day[0] day = this_month_day[1] truth = os.path.join( base_folder, f'data/{year:04}/{month:02}/{day:02}/data.nc') truth = xr.open_dataset(truth) truth = truth['ci'] truth = letkf_io.add_crop_attributes(truth) truth = return_error_domain(truth) truth = truth.load() full_index = truth.time.to_pandas().index for run in runs: print(run) full_day = letkf_io.return_day( year, month, day, run, base_folder) full_day = letkf_io.add_crop_attributes(full_day) full_day = return_error_domain(full_day) full_day = full_day['ci'] full_day = full_day.load() for bound_name, bounds in bounds_dict.items(): print(bound_name) if bounds[0] == 0: truth_bounded = (truth < bounds[1]).astype('float') full_day_bounded = (full_day < bounds[1]).astype('float') elif bounds[1] == 1: truth_bounded = (truth >= bounds[0]).astype('float') full_day_bounded = (full_day >= bounds[0]).astype('float') else: truth_bounded = np.logical_and( truth >= bounds[0], truth < bounds[1]).astype('float') full_day_bounded = np.logical_and( full_day >= bounds[0], full_day < bounds[1]).astype('float') brier_score = pd.DataFrame( index=full_index, columns=horizons) fraction_of_positives = pd.DataFrame( index=full_index, columns=multi_column) mean_predicted_prob = fraction_of_positives.copy() forecast_hist = fraction_of_positives.copy() truth_hist = pd.DataFrame( index=full_index, columns=bins) for tt in range(truth_bounded.shape[0]): hist, temp = np.histogram( truth_bounded.values[tt], bins=N_bins, range=(0, 1)) truth_hist.iloc[tt] = hist for horizon in horizons: this_full_day = return_horizon(full_day_bounded, horizon) these_error_times = np.intersect1d( full_index, this_full_day.time.to_pandas().index) this_full_day = this_full_day.sel(time=these_error_times) this_full_day = this_full_day.mean(dim='ensemble_number') # account for boundary cases this_full_day = (this_full_day - 1e-8).clip(0, 1) this_truth = truth_bounded.sel(time=these_error_times) this_brier_score = ps.brier_score( this_truth.values.ravel(), this_full_day.values.ravel()) this_brier_score = this_brier_score.reshape( this_truth.shape).mean(axis=(1, 2)) this_brier_score = pd.Series(this_brier_score, index=these_error_times) brier_score[horizon] = this_brier_score this_fraction_of_positives = np.ones( [this_truth.shape[0], N_bins]) * np.nan this_mean_predicted_prob = np.ones( [this_truth.shape[0], N_bins]) * np.nan this_forecast_hist = this_fraction_of_positives.copy() for tt in range(this_truth.shape[0]): this_forecast_hist[tt], temp = np.histogram( this_full_day.values[tt], bins=N_bins, range=(0, 1)) fop, mpp = calibration.calibration_curve( this_truth.values[tt].ravel(), this_full_day.values[tt].ravel(), n_bins=N_bins) if fop.size < N_bins: correct_bins = np.floor(mpp*N_bins).astype('int') indexes = np.setdiff1d(bins, correct_bins) indexes -= np.arange(indexes.size) fop = np.insert(fop, indexes, 0) mpp = np.insert(mpp, indexes, 0) this_fraction_of_positives[tt] = fop this_mean_predicted_prob[tt] = mpp this_forcast_hist = pd.DataFrame( this_forecast_hist, index=these_error_times, columns=bins) forecast_hist[horizon] = this_forcast_hist this_fraction_of_positives = pd.DataFrame( this_fraction_of_positives, index=these_error_times, columns=bins) fraction_of_positives[horizon] = this_fraction_of_positives this_mean_predicted_prob = pd.DataFrame( this_mean_predicted_prob, index=these_error_times, columns=bins) mean_predicted_prob[horizon] = this_mean_predicted_prob file_path = os.path.join( base_folder, 'results', f'{year:04}', f'{month:02}', f'{day:02}', run) file_path = letkf_io.find_latest_run(file_path) this_folder = (bound_name + '_' + str(bounds[0]).replace('.', 'p') + '_' + str(bounds[1]).replace('.', 'p')) file_path = os.path.join( file_path, this_folder) if not os.path.exists(file_path): os.mkdir(file_path) this_file_path = os.path.join(file_path, 'brier_score.h5') brier_score.to_hdf(this_file_path, 'brier_score') this_file_path = os.path.join(file_path, 'truth_hist.h5') truth_hist.to_hdf(this_file_path, 'truth_hist') this_file_path = os.path.join(file_path, 'forecast_hist.h5') forecast_hist.to_hdf(this_file_path, 'forecast_hist') this_file_path = os.path.join(file_path, 'fraction_of_positives.h5') fraction_of_positives.to_hdf(this_file_path, 'fraction_of_positives') this_file_path = os.path.join(file_path, 'mean_predicted_prob.h5') mean_predicted_prob.to_hdf(this_file_path, 'mean_predicted_prob')
# y, y_pred, prob_pos = get_model_outcomes(model_type, model_lab, website_path, results_path) plt.close() fig_index = 1 fig = plt.figure(fig_index, figsize=(10, 10)) ax1 = plt.subplot2grid((3, 1), (0, 0), rowspan=2) ax2 = plt.subplot2grid((3, 1), (2, 0)) ax1.plot([0, 1], [0, 1], "k:", label="Perfectly calibrated") name = "Partners" fraction_of_positives, mean_predicted_value = \ calibration_curve(summary_match[outcome], summary_match['AverageProbability'], n_bins=10, strategy = strategy) model_score = brier_score_loss(summary_match[outcome], summary_match['AverageProbability'], pos_label=1) ax1.plot(mean_predicted_value, fraction_of_positives, "s-", label="%s (%1.3f)" % (name, model_score)) ax2.hist(summary_match['AverageProbability'], range=(0, 1), bins=10, label=name, histtype="step",
def plot_calibration_curve(est, name, fig_index): ''' Plot calibration curve for est w/o and with calibration. ''' # Calibrated with isotonic calibration isotonic = CalibratedClassifierCV(est, cv=2, method='isotonic') # Calibrated with sigmoid calibration sigmoid = CalibratedClassifierCV(est, cv=2, method='sigmoid') # Logistic regression with no calibration as baseline lr = LogisticRegression(C=1.0, solver='lbfgs') fig = plt.figure(fig_index, figsize=(10, 10)) ax1 = plt.subplot2grid((3, 1), (0, 0), rowspan=2) ax2 = plt.subplot2grid((3, 1), (2, 0)) ax1.plot([0, 1], [0, 1], 'k:', label='Perfectly calibrated') for clf, name in [(lr, 'Logistic'), (est, name), (isotonic, name + ' + Isotonic'), (sigmoid, name + ' + Sigmoid')]: clf.fit(X_train, y_train) y_pred = clf.predict(X_test) if hasattr(clf, 'predict_proba'): prob_pos = clf.predict_proba(X_test)[:, 1] else: # use decision function prob_pos = clf.decision_function(X_test) prob_pos = \ (prob_pos - prob_pos.min()) / (prob_pos.max() - prob_pos.min()) clf_score = brier_score_loss(y_test, prob_pos, pos_label=y.max()) print('%s:' % name) print('\tBrier: %1.3f' % (clf_score)) print('\tPrecision: %1.3f' % precision_score(y_test, y_pred)) print('\tRecall: %1.3f' % recall_score(y_test, y_pred)) print('\tF1: %1.3f\n' % f1_score(y_test, y_pred)) fraction_of_positives, mean_predicted_value = \ calibration_curve(y_test, prob_pos, n_bins = 10) ax1.plot(mean_predicted_value, fraction_of_positives, 's-', label='%s (%1.3f)' % (name, clf_score)) ax2.hist(prob_pos, range=(0, 1), bins=10, label=name, histtype='step', lw=2) ax1.set_ylabel('Fraction of positives') ax1.set_ylim([-0.05, 1.05]) ax1.legend(loc='lower right') ax1.set_title('Calibration plots (reliability curve)') ax2.set_xlabel('Mean predicted value') ax2.set_ylabel('Count') ax2.legend(loc='upper center', ncol=2) plt.tight_layout()
ha='right', va='top', transform=ax.transAxes, fontweight='bold') # calibration ax = fig.add_subplot(gs[1, 1]) ax.plot([0, 1], [0, 1], c='k', ls='--') #levels = np.arange(K-1) levels = [4] for i in levels: y2 = [(ys[bti] > i).astype(int) for bti in range(Nbt + 1)] yp2 = [yp_probs[bti][:, i + 1:].sum(axis=1) for bti in range(Nbt + 1)] obss_preds = [ calibration_curve(y2[bti], yp2[bti], n_bins=10, strategy='quantile') for bti in range(Nbt + 1) ] obss = [x[0] for x in obss_preds] preds = [x[1] for x in obss_preds] #pred, obss = bootstrap_curves(preds[0], preds, obss)#, bounds=[0,1]) obs = obss[0] pred = preds[0] cslopes = [linregress(x[1], x[0])[0] for x in obss_preds] cslope, intercept, _, _, _ = linregress(pred, obs) if Nbt > 0: cslope_lb, cslope_ub = np.percentile(cslopes[1:], (2.5, 97.5)) obs_lb, obs_ub = np.percentile(obss[1:], (2.5, 97.5), axis=0) else: cslope_lb = np.nan cslope_ub = np.nan
def report(clf, x_train, y_train, x_test, y_test, sample_weight=None, refit=False, importance_plot=False, confusion_labels=None, feature_labels=None, verbose=True): """ Trains the passed classifier if not already trained and reports various metrics of the trained classifier """ dump = dict() # Train Predictions and Accuracy train_predictions = clf.predict(x_train) train_acc = accuracy_score(y_train, train_predictions) ## Testing start = timeit.default_timer() test_predictions = clf.predict(x_test) test_acc = accuracy_score(y_test, test_predictions) y_probs = clf.predict_proba(x_test)[:, 1] roc_auc = roc_auc_score(y_test, y_probs) train_avg_cost = calc_custom_cost_score(y_train.values, train_predictions, list(x_train['item_price'])) test_avg_cost = calc_custom_cost_score(y_test.values, test_predictions, list(x_test['item_price'])) ## Model Memory model_mem = round(model_memory_size(clf) / 1024, 2) print(clf) print("\n=============================> TRAIN-TEST DETAILS <======================================") ## Metrics print(f"Train Size: {x_train.shape[0]} samples") print(f" Test Size: {x_test.shape[0]} samples") print("---------------------------------------------") print("Train Accuracy: ", train_acc) print(" Test Accuracy: ", test_acc) print("---------------------------------------------") print("Train Average Cost: ", train_avg_cost) print(" Test Average Cost: ", test_avg_cost) print("---------------------------------------------") print(" Area Under ROC (test): ", roc_auc) print("---------------------------------------------") print(f"Model Memory Size: {model_mem} kB") print("\n=============================> CLASSIFICATION REPORT <===================================") ## Classification Report clf_rep = classification_report(y_test, test_predictions, output_dict=True) print(classification_report(y_test, test_predictions, target_names=confusion_labels)) cost_matrix = calc_custom_cost_score(y_test.values, test_predictions, list(x_test['item_price']), matrix = True) # Calculate calibration using calibration_curve function prob_true, prob_pred = calibration_curve(y_test, y_probs, n_bins = 20) # Calculate Bayes optimal threshold threshold_bayes = (cost_matrix[1][0] # C(b,G) /(cost_matrix[1][0] # C(b,G) +cost_matrix[0][1])).round(5) # C(g,B) #Find optimal cutoff based on random cutoff values possible_cutoffs = np.arange(0.0, 1.0, 0.001) costs = {} for cutoff in possible_cutoffs: pred = np.where(y_probs >= cutoff, 1, 0) costs[cutoff] = (calc_custom_cost_score(y_test.values, pred, list(x_test['item_price']))) threshold_empiric = min(costs, key=costs.get) # Compare Thresholds pred_default = np.where(y_probs >= 0.5, 1, 0) # 0.5 is the default cut-off, equivalant to y_pred from above pred_bayes= np.where(y_probs >= threshold_bayes, 1, 0) # Using the cut-off defined by the cost-minimal threshold function pred_empiric = np.where(y_probs >= threshold_empiric, 1 , 0)# Empric cut-off err_cost_default = test_avg_cost err_cost_cost_bayes = calc_custom_cost_score(y_test.values,pred_bayes, list(x_test['item_price'])) err_cost_empiric = calc_custom_cost_score(y_test.values,pred_empiric, list(x_test['item_price'])) accurracy_default = accuracy_score(y_test, pred_default) accuracy_bayes = accuracy_score(y_test, pred_bayes) accuracy_empiric = accuracy_score(y_test, pred_empiric) # save best cutoff cutoffs = {0.5 : err_cost_default, threshold_bayes : err_cost_cost_bayes, threshold_empiric : err_cost_empiric } best_cutoff = min(cutoffs, key=cutoffs.get) best_err_cost = cutoffs[best_cutoff] # Compare Cutoffs table_data = [ ['', 'Default Cutoff', ' cost-minimal Bayes cutoff', 'Empric minimal cutoff'], ['Test Cutoff Threshold', 0.5, threshold_bayes, threshold_empiric], ['Test Error Cost', err_cost_default, err_cost_cost_bayes, err_cost_empiric], ['Test Accuracy', accurracy_default, accuracy_bayes, accuracy_empiric] ] print(tabulate(table_data, headers = 'firstrow')) if verbose: print("\n================================> COST-SENSITIVE EVALUTATION <=====================================") # Calibration curve plt.rcParams["figure.figsize"] = (12,6) # Plot results plt.plot(prob_pred, prob_true, marker = '.', label = clf.__class__.__name__) plt.title(f'Calibration Plot for {clf.__class__.__name__} model') plt.ylabel("True Probability per Bin") plt.xlabel("Predicted Probability") plt.plot([0, 1], [0, 1], linestyle = '--', label = 'Ideally Calibrated'); plt.legend(); plt.show() print("------------------------------------------------------------------------------------------") print("\n================================> CONFUSION MATRICES <=====================================") #Compare default error cost and accuracy to bayes error cost and accuracy cmat_default = metrics.confusion_matrix(y_test, pred_default) cmat_bayes = metrics.confusion_matrix(y_test, pred_bayes) cmat_empiric = metrics.confusion_matrix(y_test, pred_empiric) plt.rcParams["figure.figsize"] = (12,4) display(confusion_plot([cmat_default, cmat_bayes, cmat_empiric], nrows=1, ncols=3, labels=confusion_labels, titles=['Default', 'Bayes', 'Empiric'] )) print("\n================================> CALIBRATION CURVE (RELIABILITY PLOT) <=====================================") # Calibration curve (reliability plot) # Calculate all FPRs and TPRs for the LogitCV model fpr, tpr, _ = metrics.roc_curve(y_test, y_probs, pos_label=1) # Calculate TPR and FPR for both cutoffs tpr_best_cutoff, fpr_best_cutoff = tpr_fpr_calc(best_cutoff, y_probs, y_test) tpr_default, fpr_default = tpr_fpr_calc(0.5, y_probs, y_test) # Plot ROC curve and mark cutoffs on the curve plt.rcParams["figure.figsize"] = (12,6) plt.plot(fpr, tpr, label = "ROC curve") plt.plot(fpr_default, tpr_default, marker="x", markersize=20, label ="0.5 cutoff") plt.plot(fpr_best_cutoff, tpr_best_cutoff, marker="x", markersize=20, label =f"Optimal cutoff") plt.xlabel("1-Specificity") plt.ylabel("Sensitivity") plt.title('Calibration curve (reliability plot)') plt.legend(); plt.show() print("\n=======================================> FEATURE IMPORTANCE AND ROC <=========================================") ## Variable importance plot fig, axes = plt.subplots(nrows=2, ncols=2, figsize=(14, 10)) roc_axes = axes[0, 0] pr_axes = axes[0, 1] importances = None if importance_plot: if not feature_labels: raise RuntimeError("'feature_labels' argument not passed " "when 'importance_plot' is True") try: importances = pd.Series(clf.feature_importances_, index=feature_labels) \ .sort_values(ascending=False) except AttributeError: try: importances = pd.Series(clf.coef_.ravel(), index=feature_labels) \ .sort_values(ascending=False) except AttributeError: pass if importances is not None: # Modifying grid grid_spec = axes[0, 0].get_gridspec() for ax in axes[:, 0]: ax.remove() # remove first column axes large_axs = fig.add_subplot(grid_spec[0:, 0]) # Plot importance curve feature_importance_plot(importances=importances.values, feature_labels=importances.index, ax=large_axs) large_axs.axvline(x=0) # Axis for ROC and PR curve roc_axes = axes[0, 1] pr_axes = axes[1, 1] else: # remove second row axes for ax in axes[1, :]: ax.remove() else: # remove second row axes for ax in axes[1, :]: ax.remove() ## ROC and Precision-Recall curves clf_name = clf.__class__.__name__ roc_plot(y_test, y_probs, clf_name, ax=roc_axes) precision_recall_plot(y_test, y_probs, clf_name, ax=pr_axes) fig.subplots_adjust(wspace=5) fig.tight_layout() display(fig) ## Dump to report_dict dump = dict(clf=clf, accuracy=[train_acc, test_acc], train_predictions=train_predictions, test_predictions=test_predictions, test_probs=y_probs, report=clf_rep, roc_auc=roc_auc, model_memory=model_mem, opt_cutoff = best_cutoff, total_cost = best_err_cost) return dump
def plot_calibration_curve(est, X, y, name, fig_index=0): """Plot calibration curve for est w/o and with calibration. """ # Calibrated with isotonic calibration isotonic = CalibratedClassifierCV(est, method='isotonic') # Calibrated with sigmoid calibration sigmoid = CalibratedClassifierCV(est, method='sigmoid') # Logistic regression with no calibration as baseline lr = LogisticRegression(C=1.0) fig = plt.figure(fig_index, figsize=(17, 12)) ax1 = plt.subplot2grid((3, 1), (0, 0), rowspan=2) ax2 = plt.subplot2grid((3, 1), (2, 0)) ax1.plot([0, 1], [0, 1], "k:", label="Perfectly calibrated") for clf, name in [ (lr, 'Logistic'), (est, name), (isotonic, name + ' + Isotonic'), (sigmoid, name + ' + Sigmoid'), ]: clf.fit(X_train, y_train) y_pred = clf.predict(X_test) if hasattr(clf, "predict_proba"): prob_pos = clf.predict_proba(X_test)[:, 1] else: # use decision function prob_pos = clf.decision_function(X_test) prob_pos = (prob_pos - prob_pos.min()) / (prob_pos.max() - prob_pos.min()) clf_score = brier_score_loss(y_test, prob_pos, pos_label=y.max()) print("%s:" % name) print("\tBrier: %1.3f" % (clf_score)) print("\tPrecision: %1.3f" % precision_score(y_test, y_pred)) print("\tRecall: %1.3f" % recall_score(y_test, y_pred)) print("\tF1: %1.3f\n" % f1_score(y_test, y_pred)) fraction_of_positives, mean_predicted_value = calibration_curve( y_test, prob_pos, n_bins=10) ax1.plot( mean_predicted_value, fraction_of_positives, "s-", label="%s (%1.3f)" % (name, clf_score), ) ax2.hist(prob_pos, range=(0, 1), bins=10, label=name, histtype="step", lw=2) ax1.set_ylabel("Fraction of positives") ax1.set_ylim([-0.05, 1.05]) ax1.legend(loc="lower right") ax1.set_title('Calibration plots (reliability curve)') ax2.set_xlabel("Mean predicted value") ax2.set_ylabel("Count") ax2.legend(loc="upper center", ncol=2) plt.tight_layout()
def plot_calibration_curve(y,prob,ax,n_bins=10): prob_true,prob_pred = calibration_curve(y, prob,n_bins) ax.scatter(prob_pred,prob_true)
# IZRAČUNAJ OPTIMALNI THRESHOLD KOD PREDIKCIJE POZITIVNE KLASE (1) # PROVJERI SVE PRAGOVE OD 0 DO 1, POVEĆAJ SVAKU ITERACIJU ZA 0.001 thresholds = np.arange(0, 1, 0.001) # ispiši F score za svaki threshold rezultati = [ f1_score(y_test, to_labels(model_vjerojatnosti, t)) for t in thresholds ] # ISPIŠI NAJBOLJI THRESHOLD ix = np.argmax(rezultati) print('Threshold=%.3f, F-Score=%.5f' % (thresholds[ix], rezultati[ix])) # crtaj krivulju presjecanja plot_precision_recall_vs_threshold(precision, recall, threshold) # crtaj calibration krivulju fop, mpv = calibration_curve(y_test, model_vjerojatnosti) # crtaj savršeno kalibrirani model plt.figure() plt.plot([0, 1], [0, 1], linestyle='--') # crtaj trenutni model plt.plot(mpv, fop, 'm', marker='.') plt.title('Nekalibrirano') plt.show() # Spremi model nakon treninga filename = 'Model_CV_' + str(brojac) + '.pkl' direktorij = './Modeli/' putanja = os.path.join(direktorij, filename) pickle.dump(model, open(putanja, 'wb')) print("Model spremljen !!")
coef = LogRes.coef_[0] print(coef) #histogram treat_plt = plt.hist(prop_scores[t == 1], fc=(0, 0, 1, 0.5), bins=20, label='Treated') cont_plt = plt.hist(prop_scores[t == 0], fc=(1, 0, 0, 0.5), bins=20, label='Control') plt.legend() plt.xlabel('propensity score') plt.ylabel('number of counties') plt.show() #calibration prob_true, prob_pred = calibration_curve(t, prop_scores, n_bins=15) epochs = [ind for ind in range(15)] """plt.plot(epochs, prob_true, '-b', label="predicted_prob" ) plt.plot(epochs, prob_pred, '-r' , label="real_prob") plt.legend(loc="lower right") plt.show()""" brier = np.sum((np.array(prob_true) - np.array(prob_pred))**2) / 15 print(brier) #choose prop scores only between 0.06 and 0.885
def plot_calibration_curve(clf=None, X=None, y=None, clf_name='Classifier'): """ Plots how well calibrated the predicted probabilities of a classifier are and how to calibrate an uncalibrated classifier. Compares estimated predicted probabilities by a baseline logistic regression model, the model passed as an argument, and by both its isotonic calibration and sigmoid calibrations. The closer the calibration curves are to a diagonal the better. A sine wave like curve represents an overfitted classifier, while a cosine wave like curve represents an underfitted classifier. By training isotonic and sigmoid calibrations of the model and comparing their curves we can figure out whether the model is over or underfitting and if so which calibration (sigmoid or isotonic) might help fix this. For more details, see https://scikit-learn.org/stable/auto_examples/calibration/plot_calibration_curve.html. Should only be called with a fitted classifer (otherwise an error is thrown). Please note this function fits variations of the model on the training set when called. Arguments: model (clf): Takes in a fitted classifier. X (arr): Training set features. y (arr): Training set labels. model_name (str): Model name. Defaults to 'Classifier' Returns: Nothing. To see plots, go to your W&B run page then expand the 'media' tab under 'auto visualizations'. Example: wandb.sklearn.plot_calibration_curve(clf, X, y, 'RandomForestClassifier') """ if (test_missing(clf=clf, X=X, y=y) and test_types(clf=clf, X=X, y=y) and test_fitted(clf)): y = np.asarray(y) # Create dataset of classification task with many redundant and few # informative features X, y = datasets.make_classification(n_samples=100000, n_features=20, n_informative=2, n_redundant=10, random_state=42) X_train, X_test, y_train, y_test = model_selection.train_test_split(X, y, test_size=0.99, random_state=42) # Calibrated with isotonic calibration isotonic = CalibratedClassifierCV(clf, cv=2, method='isotonic') # Calibrated with sigmoid calibration sigmoid = CalibratedClassifierCV(clf, cv=2, method='sigmoid') # Logistic regression with no calibration as baseline lr = LogisticRegression(C=1.) model_dict = [] # color frac_positives_dict = [] # y axis mean_pred_value_dict = [] # x axis hist_dict = [] # barchart y edge_dict = [] # barchart x # Add curve for perfectly calibrated model # format: model, fraction_of_positives, mean_predicted_value model_dict.append('Perfectly calibrated') frac_positives_dict.append(0) mean_pred_value_dict.append(0) hist_dict.append(0) edge_dict.append(0) model_dict.append('Perfectly calibrated') hist_dict.append(0) edge_dict.append(0) frac_positives_dict.append(1) mean_pred_value_dict.append(1) X_train, X_test, y_train, y_test = model_selection.train_test_split(X, y, test_size=0.98, random_state=42) # Add curve for LogisticRegression baseline and other models for clf, name in [(lr, 'Logistic'), (clf, clf_name), (isotonic, clf_name + ' + Isotonic'), (sigmoid, clf_name + ' + Sigmoid')]: clf.fit(X_train, y_train) y_pred = clf.predict(X_test) if hasattr(clf, "predict_proba"): prob_pos = clf.predict_proba(X_test)[:, 1] else: # use decision function prob_pos = clf.decision_function(X_test) prob_pos = \ (prob_pos - prob_pos.min()) / (prob_pos.max() - prob_pos.min()) clf_score = brier_score_loss(y_test, prob_pos, pos_label=y.max()) fraction_of_positives, mean_predicted_value = \ calibration_curve(y_test, prob_pos, n_bins=10) hist, edges = np.histogram( prob_pos, bins=10, density=False) # format: model, fraction_of_positives, mean_predicted_value for i in range(len(fraction_of_positives)): hist_dict.append(hist[i]) edge_dict.append(edges[i]) model_dict.append(name) frac_positives_dict.append(round_3(fraction_of_positives[i])) mean_pred_value_dict.append(round_3(mean_predicted_value[i])) if i >= (chart_limit-2): wandb.termwarn("wandb uses only the first %d datapoints to create the plots."% wandb.Table.MAX_ROWS) break def calibration_curves(model_dict, frac_positives_dict, mean_pred_value_dict, hist_dict, edge_dict): return wandb.visualize( 'wandb/calibration/v1', wandb.Table( columns=['model', 'fraction_of_positives', 'mean_predicted_value', 'hist_dict', 'edge_dict'], data=[ [model_dict[i], frac_positives_dict[i], mean_pred_value_dict[i], hist_dict[i], edge_dict[i]] for i in range(len(model_dict)) ] )) wandb.log({'calibration_curve': calibration_curves(model_dict, frac_positives_dict, mean_pred_value_dict, hist_dict, edge_dict)})
# Only supports Binary callibrations ax1.plot([0, 1], [0, 1], "k:", label="Perfectly calibrated") for i in range(0, 4): prob_pos = prob_pos_all[:, i] trueY = np.copy(Ytest) if i == 0: trueY[trueY > 0] = 2 trueY[trueY == 0] = 1 trueY[trueY != 1] = 0 else: trueY[trueY != i] = 0 trueY[trueY == i] = 1 fraction_of_positives, mean_predicted_value = \ calibration_curve(trueY, prob_pos, n_bins=25) ax1.plot(mean_predicted_value, fraction_of_positives, "s-", label=i) ax2.hist(prob_pos, range=(0, 1), bins=25, histtype="step", lw=2) ax1.set_ylabel("Fraction of positives") # ax1.set_ylim([-0.05, mean_predicted_value[-1] + .1]) # ax1.set_xlim([-.05, mean_predicted_value[-1] + .1]) # ax2.set_xlim([-.05, mean_predicted_value[-1] + .1]) ax1.legend(loc="lower right") ax1.set_title('Calibration plots (reliability curve)') ax2.set_xlabel("Mean predicted value") ax2.set_ylabel("Count")
def draw_calibration_curve(prob_list, label_list): prod_true, prod_pred = calibration_curve(label_list, prob_list) pass
index=False)) # In[13]: (confidence_score_df.tail(10).sort_values("cal", ascending=False).drop( ["candidate_id", "curated_ctd"], axis=1).round(3).to_csv("output/top_ten_high_confidence_scores.tsv", sep="\t", index=False)) # In[14]: from sklearn.calibration import calibration_curve cnn_y, cnn_x = calibration_curve(confidence_score_df.curated_ctd, confidence_score_df.uncal, n_bins=10) all_cnn_y, all_cnn_x = calibration_curve(confidence_score_df.curated_ctd, confidence_score_df.cal, n_bins=10) calibration_df = pd.DataFrame.from_records( list( map( lambda x: { "predicted": x[0], "actual": x[1], "model_calibration": 'before' }, zip(cnn_x, cnn_y))) + list( map( lambda x: {
def _show_calibration_curve(estimators, X, y, name): from sklearn.calibration import calibration_curve from sklearn.metrics import brier_score_loss import matplotlib.pyplot as plt import seaborn as sns # sns.set_context("paper", font_scale=1.5) plt.figure(figsize=(7, 10)) ax1 = plt.subplot2grid((2, 1), (0, 0), rowspan=1) ax1.plot([0, 1], [0, 1], "k:", label="Perfectly calibrated") clf = estimators[0] y_predict_prob = clf.predict_proba(X) prob_pos = y_predict_prob[:, 1] # compute calibration curve fraction_of_positives, mean_predicted_value = calibration_curve( y, prob_pos, n_bins=100, strategy="quantile" ) clf_score = np.round(brier_score_loss(y, prob_pos, pos_label=np.array(y).max()), 2) print(clf_score) print(fraction_of_positives, mean_predicted_value) # frac_pred_vals = [] # mean_pred_values = np.linspace(0, 1.0, 200) # brier_scores = [] # for i, clf in enumerate(estimators): # y_predict_prob = clf.predict_proba(X) # prob_pos = y_predict_prob[:, 1] # # compute calibration curve # fraction_of_positives, mean_predicted_value = calibration_curve( # y, prob_pos, n_bins=10, strategy="quantile" # ) # # clf_score = np.round( # brier_score_loss(y, prob_pos, pos_label=np.array(y).max()), 2 # ) # # # create a linear interpolation of the calibration # interp_frac_positives = np.interp( # mean_pred_values, mean_predicted_value, fraction_of_positives # ) # interp_frac_positives[0] = 0.0 # # # store curves + scores # brier_scores.append(clf_score) # frac_pred_vals.append(interp_frac_positives) # # mean_frac_pred_values = np.mean(frac_pred_vals, axis=0) # ax1.plot( # mean_pred_values, # mean_frac_pred_values, # "s-", # label=rf"{name.capitalize()} ({np.round(np.mean(brier_scores),2)} $\pm$ {np.round(np.std(brier_scores), 2)}", # ) # # # get upper and lower bound for tpr # std_fpv = np.std(frac_pred_vals, axis=0) # tprs_upper = np.minimum(mean_frac_pred_values + std_fpv, 1) # tprs_lower = np.maximum(mean_frac_pred_values - std_fpv, 0) # ax1.fill_between( # mean_pred_values, # tprs_lower, # tprs_upper, # color="grey", # alpha=0.2, # # label=r"$\pm$ 1 std. dev.", # ) # actually do the plot ax1.plot( mean_predicted_value, fraction_of_positives, "s-", label=f"{name.capitalize()} ({clf_score})", ) # set ax1.plot() ax1.set( ylabel="Fraction of Success Outcomes (y label of 1)", xlabel="Mean predicted confidence statistic", ylim=[-0.05, 1.05], title="Calibration plots (reliability curve)", ) ax1.legend(loc="lower right") return ax1
from sklearn import datasets from sklearn.naive_bayes import GaussianNB from sklearn.linear_model import LogisticRegression from sklearn.ensemble import RandomForestClassifier from sklearn.svm import LinearSVC from sklearn.calibration import calibration_curve probs = np.loadtxt('out_logit_cl_full.txt') y_test = np.loadtxt('real_logit_full.txt') ax1 = plt.subplot2grid((3, 1), (0, 0), rowspan=2) ax2 = plt.subplot2grid((3, 1), (2, 0)) ax1.plot([0, 1], [0, 1], "k:", label="Perfectly calibrated") frac_of_pos, mean_pred_value = calibration_curve(y_test, probs, n_bins=10) ax1.plot(mean_pred_value, frac_of_pos, "s-", label='calibration') ax1.set_ylabel("Fraction of positives") ax1.set_ylim([-0.05, 1.05]) ax1.legend(loc="lower right") ax1.set_title(f'Calibration plot ()') ax2.hist(probs, range=(0, 1), bins=10, label='calibration', histtype="step", lw=2) ax2.set_xlabel("Mean predicted value") ax2.set_ylabel("Count")
kaggle_data_clean_pred = kaggle_data["test"].drop(["ID"],axis=1).values dtest2 = xgb.DMatrix(data=kaggle_data_clean_pred) preds2 = pred2.predict(dtest2) upload_no_pca = pd.DataFrame({"ID":kaggle_data["test"]["ID"].values, "TARGET":preds2}) upload_no_pca.to_csv("upload_no_pca.csv", index=False) upload_pca = pd.DataFrame({"ID":kaggle_data["test"]["ID"].values,"TARGET":preds}) upload_pca.to_csv("upload_pca.csv", index=False) clf = linear_model.LogisticRegression(penalty="l1",class_weight=None) clf.fit(kaggle_data["train"].drop(["ID","TARGET"],axis=1).values,kaggle_data["train"]['TARGET'].values) train_preds = clf.predict_proba(kaggle_data["train"].drop(["ID","TARGET"],axis=1).values) calibration_curve(kaggle_data["train"]['TARGET'].values, train_preds[:,1], n_bins=10) print metrics.roc_auc_score(kaggle_data["train"]['TARGET'].values, train_preds[:, 1]) test_probs = clf.predict_proba(kaggle_data["test"].drop(["ID"],axis=1).values) test_probs=pd.DataFrame(test_probs) test_final=pd.DataFrame({"ID":kaggle_data["test"]["ID"].values,"TARGET":test_probs.ix[:,1].values}) test_final.to_csv("test_final.csv",index=False)
def plot_calibration_curve_from_data(X, y, est, name, fig_index): """Plot calibration curve for est w/o and with calibration. """ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=7) # Calibrated with isotonic calibration isotonic = CalibratedClassifierCV(est, cv=2, method="isotonic") # Calibrated with sigmoid calibration sigmoid = CalibratedClassifierCV(est, cv=2, method="sigmoid") # Logistic regression with no calibration as baseline lr = LogisticRegression(C=1.0, solver="lbfgs") fig = plt.figure(fig_index, figsize=(10, 10)) ax1 = plt.subplot2grid((3, 1), (0, 0), rowspan=2) ax2 = plt.subplot2grid((3, 1), (2, 0)) ax1.plot([0, 1], [0, 1], "k:", label="Perfectly calibrated") for clf, name in [ (lr, "Logistic Regression"), (est, name), (isotonic, name + " + Isotonic"), (sigmoid, name + " + Sigmoid"), ]: clf.fit(X_train, y_train) # clf.fit(X_train[:,:10], X_train[:, 10]) y_pred = clf.predict(X_test) # y_pred = clf.predict(X_test[:,:10]) if hasattr(clf, "predict_proba"): prob_pos = clf.predict_proba(X_test)[:, 1] # prob_pos = clf.predict_proba(X_test[:,:10])[:, 1] # prob_pos = clf.predict_proba(X_test[:,:10])[:, 1]*weights[1] # prob_pos = np_average( 1 - clf.predict_proba(X_test[:,:10]), axis=1, weights=weights ) else: # use decision function prob_pos = clf.decision_function(X_test) # prob_pos = clf.decision_function(X_test[:,:10])[:, 1] # prob_pos = clf.decision_function(X_test[:,:10])[:, 1]*weights[1] # prob_pos = np_average( 1 - clf.decision_function(X_test[:,:10]), axis=1, weights=weights ) prob_pos = (prob_pos - prob_pos.min()) / (prob_pos.max() - prob_pos.min()) clf_score = brier_score_loss(y_test, prob_pos, pos_label=y.max()) print("* %s:" % name) OP_append("* %s:" % name) print(" * Brier: %1.3f" % (clf_score)) OP_append(" * Brier: %1.3f" % (clf_score)) print(" * Precision: %1.3f" % precision_score(y_test, y_pred)) OP_append(" * Precision: %1.3f" % precision_score(y_test, y_pred)) print(" * Recall: %1.3f" % recall_score(y_test, y_pred)) OP_append(" * Recall: %1.3f" % recall_score(y_test, y_pred)) print(" * F1: %1.3f\n" % f1_score(y_test, y_pred)) OP_append(" * F1: %1.3f\n" % f1_score(y_test, y_pred)) fraction_of_positives, mean_predicted_value = calibration_curve(y_test, prob_pos, n_bins=10) ax1.plot(mean_predicted_value, fraction_of_positives, "s-", label="%s (%1.3f)" % (name, clf_score)) ax2.hist(prob_pos, range=(0, 1), bins=10, label=name, histtype="step", lw=2) ax1.set_ylabel("Fraction of positives") ax1.set_ylim([-0.05, 1.05]) ax1.legend(loc="lower right") ax1.set_title("Calibration plots (reliability curve)") ax2.set_xlabel("Mean predicted value") ax2.set_ylabel("Count") ax2.legend(loc="upper center", ncol=2) plt.tight_layout() fig.savefig("NF/%s.png" % name, dpi=fig.dpi)
range=[0, 1], histtype='step', color='r', linewidth=2, label='1') plt.hist(x[y == 0], 50, range=[0, 1], histtype='step', color='b', linewidth=2, label='1') plt.title('patient: {0}'.format(nsubject)) plt.grid() fraction_of_positives, mean_predicted_value = calibration_curve( y, x, n_bins=10) xtmp = np.arange(0, 1, 0.01) plt.figure(100 + nsubject) plt.plot(xtmp, xtmp, 'k--') plt.plot(mean_predicted_value, fraction_of_positives, color='b', marker='x') plt.grid() plt.legend() y_hat_x = x if nsubject == 1 else np.hstack((y_hat_x, x)) y_all_p = y if nsubject == 1 else np.hstack((y_all_p, y)) Y[ind == nsubject, r] = x.ravel()
def eval_model_v2( context, xtest, ytest, model, pcurve_bins: int = 10, pcurve_names: List[str] = ["my classifier"], plots_artifact_path: str = "", pred_params: dict = {}, cmap="Blues", is_xgb=False, ): """generate predictions and validation stats pred_params are non-default, scikit-learn api prediction-function parameters. For example, a tree-type of model may have a tree depth limit for its prediction function. :param xtest: features array type Union(DataItem, DataFrame, numpy array) :param ytest: ground-truth labels Union(DataItem, DataFrame, Series, numpy array, List) :param model: estimated model :param pcurve_bins: (10) subdivide [0,1] interval into n bins, x-axis :param pcurve_names: label for each calibration curve :param pred_params: (None) dict of predict function parameters :param cmap: ('Blues') matplotlib color map :param is_xgb """ if hasattr(model, "get_xgb_params"): is_xgb = True def df_blob(df): return bytes(df.to_csv(index=False), encoding="utf-8") if isinstance(ytest, np.ndarray): unique_labels = np.unique(ytest) elif isinstance(ytest, list): unique_labels = set(ytest) else: try: ytest = ytest.values unique_labels = np.unique(ytest) except Exception as exc: raise Exception(f"unrecognized data type for ytest {exc}") n_classes = len(unique_labels) is_multiclass = True if n_classes > 2 else False # INIT DICT...OR SOME OTHER COLLECTOR THAT CAN BE ACCESSED plots_path = plots_artifact_path or context.artifact_subpath("plots") extra_data = {} ypred = model.predict(xtest, **pred_params) if isinstance(ypred.flat[0], np.floating): accuracy = mean_absolute_error(ytest, ypred) else: accuracy = float(metrics.accuracy_score(ytest, ypred)) context.log_results({ "accuracy": accuracy, "test-error": np.sum(ytest != ypred) / ytest.shape[0] }) # PROBABILITIES if hasattr(model, "predict_proba"): yprob = model.predict_proba(xtest, **pred_params) if not is_multiclass: fraction_of_positives, mean_predicted_value = calibration_curve( ytest, yprob[:, -1], n_bins=pcurve_bins, strategy="uniform") cmd = plot_calibration_curve(ytest, [yprob], pcurve_names) calibration = context.log_artifact( PlotArtifact( "probability-calibration", body=cmd.get_figure(), title="probability calibration plot", ), artifact_path=plots_path, db_key=False, ) extra_data["probability calibration"] = calibration # CONFUSION MATRIX if is_classifier(model): cm = sklearn_confusion_matrix(ytest, ypred, normalize="all") df = pd.DataFrame(data=cm) extra_data["confusion matrix table.csv"] = df_blob(df) cmd = metrics.plot_confusion_matrix( model, xtest, ytest, normalize="all", values_format=".2g", cmap=plt.get_cmap(cmap), ) confusion = context.log_artifact( PlotArtifact( "confusion-matrix", body=cmd.figure_, title="Confusion Matrix - Normalized Plot", ), artifact_path=plots_path, db_key=False, ) extra_data["confusion matrix"] = confusion # LEARNING CURVES if hasattr(model, "evals_result") and is_xgb is False: results = model.evals_result() train_set = list(results.items())[0] valid_set = list(results.items())[1] learning_curves_df = None if is_multiclass: if hasattr(train_set[1], "merror"): learning_curves_df = pd.DataFrame({ "train_error": train_set[1]["merror"], "valid_error": valid_set[1]["merror"], }) else: if hasattr(train_set[1], "error"): learning_curves_df = pd.DataFrame({ "train_error": train_set[1]["error"], "valid_error": valid_set[1]["error"], }) if learning_curves_df: extra_data["learning curve table.csv"] = df_blob( learning_curves_df) _, ax = plt.subplots() plt.xlabel("# training examples") plt.ylabel("error rate") plt.title("learning curve - error") ax.plot(learning_curves_df["train_error"], label="train") ax.plot(learning_curves_df["valid_error"], label="valid") learning = context.log_artifact( PlotArtifact("learning-curve", body=plt.gcf(), title="Learning Curve - error"), artifact_path=plots_path, db_key=False, ) extra_data["learning curve"] = learning # FEATURE IMPORTANCES if hasattr(model, "feature_importances_"): (fi_plot, fi_tbl) = feature_importances(model, xtest.columns) extra_data["feature importances"] = context.log_artifact( fi_plot, db_key=False, artifact_path=plots_path) extra_data["feature importances table.csv"] = df_blob(fi_tbl) # AUC - ROC - PR CURVES if is_multiclass and is_classifier(model): lb = LabelBinarizer() ytest_b = lb.fit_transform(ytest) extra_data["precision_recall_multi"] = context.log_artifact( precision_recall_multi(ytest_b, yprob, unique_labels), artifact_path=plots_path, db_key=False, ) extra_data["roc_multi"] = context.log_artifact( roc_multi(ytest_b, yprob, unique_labels), artifact_path=plots_path, db_key=False, ) # AUC multiclass aucmicro = metrics.roc_auc_score(ytest_b, yprob, multi_class="ovo", average="micro") aucweighted = metrics.roc_auc_score(ytest_b, yprob, multi_class="ovo", average="weighted") context.log_results({ "auc-micro": aucmicro, "auc-weighted": aucweighted }) # others (todo - macro, micro...) f1 = metrics.f1_score(ytest, ypred, average="macro") ps = metrics.precision_score(ytest, ypred, average="macro") rs = metrics.recall_score(ytest, ypred, average="macro") context.log_results({ "f1-score": f1, "precision_score": ps, "recall_score": rs }) elif is_classifier(model): yprob_pos = yprob[:, 1] extra_data["precision_recall_bin"] = context.log_artifact( precision_recall_bin(model, xtest, ytest, yprob_pos), artifact_path=plots_path, db_key=False, ) extra_data["roc_bin"] = context.log_artifact( roc_bin(ytest, yprob_pos, clear=True), artifact_path=plots_path, db_key=False, ) rocauc = metrics.roc_auc_score(ytest, yprob_pos) brier_score = metrics.brier_score_loss(ytest, yprob_pos, pos_label=ytest.max()) f1 = metrics.f1_score(ytest, ypred) ps = metrics.precision_score(ytest, ypred) rs = metrics.recall_score(ytest, ypred) context.log_results({ "rocauc": rocauc, "brier_score": brier_score, "f1-score": f1, "precision_score": ps, "recall_score": rs, }) elif is_regressor(model): r_squared = r2_score(ytest, ypred) rmse = mean_squared_error(ytest, ypred, squared=False) mse = mean_squared_error(ytest, ypred, squared=True) mae = mean_absolute_error(ytest, ypred) context.log_results({ "R2": r_squared, "root_mean_squared_error": rmse, "mean_squared_error": mse, "mean_absolute_error": mae, }) # return all model metrics and plots return extra_data
def main(): x = np.load(os.getcwd() + '/data/x.npy') y = np.load(os.getcwd() + '/data/y.npy') skf = StratifiedKFold(n_splits=10) coefs = [] y_pred = [] y_true = [] accuracy = [] precision = [] sensitivity = [] specificity = [] roc_auc = [] prc_auc = [] balanced_acc = [] for train_index, test_index in skf.split(x, y): y_train, y_test = y[train_index], y[test_index] imputer = SimpleImputer() scaler = StandardScaler() x_train = scaler.fit_transform(imputer.fit_transform(x[train_index])) x_test = scaler.transform(imputer.transform(x[test_index])) lgr = LogisticRegression(class_weight='balanced').fit(x_train, y_train) coefs.append(lgr.coef_) y_pred.append(lgr.predict_proba(x_test)) y_true.append(y_test) for fold in range(len(y_pred)): tn, fp, fn, tp = confusion_matrix(y_true[fold], np.round(y_pred[fold][:, 1])).ravel() accuracy.append((tp + tn) / (tp + tn + fp + fn)) precision.append(tp / (tp + fp)) sensitivity.append(tp / (tp + fn)) specificity.append(tn / (tn + fp)) roc_auc.append( roc_auc_score(y_true[fold], np.round(y_pred[fold][:, 1]))) prc_auc.append( average_precision_score(y_true[fold], np.round(y_pred[fold][:, 1]))) balanced_acc.append( balanced_accuracy_score(y_true[fold], np.round(y_pred[fold][:, 1]))) mean, confidence_interval = mean_confidence_interval(accuracy) print('Accuracy Mean and confidence interval: {:4f}, {:4f}'.format( mean, confidence_interval)) mean, confidence_interval = mean_confidence_interval(precision) print('Precision Mean and confidence interval: {:4f}, {:4f}'.format( mean, confidence_interval)) mean, confidence_interval = mean_confidence_interval(sensitivity) print('Sensitivity Mean and confidence interval: {:4f}, {:4f}'.format( mean, confidence_interval)) mean, confidence_interval = mean_confidence_interval(specificity) print('Specificity Mean and confidence interval: {:4f}, {:4f}'.format( mean, confidence_interval)) mean, confidence_interval = mean_confidence_interval(roc_auc) print('ROC_AUC Mean and confidence interval: {:4f}, {:4f}'.format( mean, confidence_interval)) mean, confidence_interval = mean_confidence_interval(prc_auc) print('PRC_AUC Mean and confidence interval: {:4f}, {:4f}'.format( mean, confidence_interval)) mean, confidence_interval = mean_confidence_interval(balanced_acc) print( 'Balanced Accuracy Mean and confidence interval: {:4f}, {:4f}'.format( mean, confidence_interval)) fpr, tpr, thresholds = roc_curve(np.hstack(y_true), np.vstack(y_pred)[:, 1]) plt.plot(fpr, tpr) plt.title('ROC Curve') plt.xlabel('FPR') plt.ylabel('TPR') plt.show() precision, recall, thresholds = precision_recall_curve( np.hstack(y_true), np.vstack(y_pred)[:, 1]) plt.plot(precision, recall) plt.title('PRC Curve') plt.xlabel('Precision') plt.ylabel('Recall') plt.show() prob_true, prob_pred = calibration_curve(np.hstack(y_true), np.vstack(y_pred)[:, 1]) plt.plot(prob_true, prob_pred) plt.title('Calibration Curve') plt.xlabel('Prob True') plt.ylabel('Prob Pred') plt.show()
plt.xlabel("Flase Positive Rate", fontsize=15) plt.yticks(np.arange(0.0, 1.1, step=0.1)) plt.ylabel("True Positive Rate", fontsize=15) plt.title('ROC Curve Analysis', fontweight='bold', fontsize=15) plt.legend(prop={'size': 13}, loc='lower right') plt.show() # %% #*lets check how bad the classifiers are predicting probs # reliability diagram fop, mpv = calibration_curve(y_test, model_predict_test['AdaBoostClassifier_pr'], n_bins=10) # plot perfectly calibrated plt.plot([0, 1], [0, 1], linestyle='--') # plot model reliability plt.plot(mpv, fop, marker='.') fop_extra, mpv_extra = calibration_curve( y_test, model_predict_test['ExtraTreesClassifier_pr'], n_bins=10) # plot model reliability plt.plot(mpv_extra, fop_extra, marker='.') fop_rf, mpv_rf = calibration_curve( y_test, model_predict_test['RandomForestClassifier_pr'], n_bins=10) # plot model reliability plt.plot(mpv_rf, fop_rf, marker='.')
ax2 = plt.subplot2grid((3, 1), (2, 0)) ax1.plot([0, 1], [0, 1], "k:", label="Perfectly calibrated") for clf, name in [(lr, 'Logistic'), (gnb, 'Naive Bayes'), (svc, 'Support Vector Classification'), (rfc, 'Random Forest')]: clf.fit(X_train, y_train) if hasattr(clf, "predict_proba"): prob_pos = clf.predict_proba(X_test)[:, 1] else: # use decision function prob_pos = clf.decision_function(X_test) prob_pos = \ (prob_pos - prob_pos.min()) / (prob_pos.max() - prob_pos.min()) fraction_of_positives, mean_predicted_value = \ calibration_curve(y_test, prob_pos, n_bins=10) ax1.plot(mean_predicted_value, fraction_of_positives, "s-", label="%s" % (name, )) ax2.hist(prob_pos, range=(0, 1), bins=10, label=name, histtype="step", lw=2) ax1.set_ylabel("Fraction of positives") ax1.set_ylim([-0.05, 1.05]) ax1.legend(loc="lower right") ax1.set_title('Calibration plots (reliability curve)') ax2.set_xlabel("Mean predicted value") ax2.set_ylabel("Count") ax2.legend(loc="upper center", ncol=2)
ax1 = plt.subplot2grid((3, 1), (0, 0), rowspan=2) ax2 = plt.subplot2grid((3, 1), (2, 0)) ax1.plot([0, 1], [0, 1], "k:", label="Perfectly calibrated") for clf, name in [(lr, 'Logistic'), (gnb, 'Naive Bayes'), (svc, 'Support Vector Classification'), (rfc, 'Random Forest')]: clf.fit(X_train, y_train) if hasattr(clf, "predict_proba"): prob_pos = clf.predict_proba(X_test)[:, 1] else: # use decision function prob_pos = clf.decision_function(X_test) prob_pos = \ (prob_pos - prob_pos.min()) / (prob_pos.max() - prob_pos.min()) fraction_of_positives, mean_predicted_value = \ calibration_curve(y_test, prob_pos, n_bins=10) ax1.plot(mean_predicted_value, fraction_of_positives, "s-", label="%s" % (name, )) ax2.hist(prob_pos, range=(0, 1), bins=10, label=name, histtype="step", lw=2) ax1.set_ylabel("Fraction of positives") ax1.set_ylim([-0.05, 1.05])
def multi_plot(x): print "Evaluation and Visualization" colors = "bgrcmykw" fig1 = plt.figure() ax1 = plt.subplot2grid((3, 1), (0, 0), rowspan=2) ax2 = plt.subplot2grid((3, 1), (2, 0), rowspan=2) ax1.set_ylabel("Fraction of positives") ax1.set_ylim([-0.05, 1.05]) ax1.set_title('Calibration plots (reliability curve) ') ax2.set_xlabel("Mean predicted value") ax2.set_ylabel("Count") for idx, i in enumerate(x): y_true = i[0] y_pred = i[1] description = i[2] # Calibration curve fraction_of_positives, mean_predicted_value = calibration_curve(y_true, y_pred, n_bins=50) ax1.plot(mean_predicted_value, fraction_of_positives, "s-", color=colors[idx], alpha = 0.6, label=description) ax2.hist(y_pred, range=(0, 1), bins=50, color=colors[idx], linewidth=2.0 , alpha = 0.6, label=description, histtype="step", lw=2) plt.yscale('log') ax1.legend(loc="upper center", fancybox=True) #ax2.legend(loc="upper center", ncol=2, fancybox=True) plt.show() fig3 = plt.figure() plt.plot([0, 1], [0, 1], 'k--') plt.xlabel('Recall') plt.ylabel('Precision') plt.title('Precision-Recall Curve') for idx, i in enumerate(x): y_true = i[0] y_pred = i[1] description = i[2] # Computes precision-recall pairs for different probability thresholds precision, recall, thresholds = precision_recall_curve(y_true, y_pred) _auc1 = average_precision_score(y_true, y_pred) # Precision-Recall curve plt.plot(recall, precision, color=colors[idx], alpha = 0.6, label=description+' Precision-Recall AUC = %0.4f' % _auc1) plt.legend(loc=2, fancybox=True) plt.show() fig4 = plt.figure() plt.plot([0, 1], [0, 1], 'k--') plt.xlim([0.0, 1.0]) plt.ylim([0.0, 1.05]) plt.xlabel('False Positive Rate') plt.ylabel('True Positive Rate') plt.title('Receiver operating characteristic') for idx, i in enumerate(x): y_true = i[0] y_pred = i[1] description = i[2] ############################################################################## # ROC curve # Compute Receiver operating characteristic (ROC) fpr, tpr, thresholds = roc_curve(y_true, y_pred) #Compute Area Under the Curve (AUC) from prediction scores _auc2 = roc_auc_score(y_true, y_pred) plt.plot(fpr, tpr, color=colors[idx], alpha = 0.6, label=description+' ROC curve (area = %0.4f)' % _auc2) plt.legend(loc="lower right", fancybox=True) plt.show() return
def plot_calibration_curve(n_bins,shots,ax): y_true = [shot.result for shot in shots] y_prob = [shot.pred for shot in shots] prob_true,prob_pred = calibration_curve(y_true, y_prob,n_bins) ax.scatter(prob_pred,prob_true)