def check_calibration(method): # Adpated from sklearn/tests/test_calibration.py # Authors: Alexandre Gramfort # License: BSD 3 clause n_samples = 100 X, y = make_classification(n_samples=2 * n_samples, n_features=6, random_state=42) X -= X.min() # MultinomialNB only allows positive X # split train and test X_train, y_train = X[:n_samples], y[:n_samples] X_test, y_test = X[n_samples:], y[n_samples:] # Naive-Bayes clf = MultinomialNB().fit(X_train, y_train) prob_pos_clf = clf.predict_proba(X_test)[:, 1] pc_clf = CalibratedClassifierCV(clf, cv=y.size + 1) assert_raises(ValueError, pc_clf.fit, X, y) pc_clf = CalibratedClassifierCV(clf, method=method, cv=2) # Note that this fit overwrites the fit on the entire training set pc_clf.fit(X_train, y_train) prob_pos_pc_clf = pc_clf.predict_proba(X_test)[:, 1] # Check that brier score has improved after calibration assert_greater(brier_score_loss(y_test, prob_pos_clf), brier_score_loss(y_test, prob_pos_pc_clf)) # Check invariance against relabeling [0, 1] -> [1, 2] pc_clf.fit(X_train, y_train + 1) prob_pos_pc_clf_relabeled = pc_clf.predict_proba(X_test)[:, 1] assert_array_almost_equal(prob_pos_pc_clf, prob_pos_pc_clf_relabeled) # Check invariance against relabeling [0, 1] -> [-1, 1] pc_clf.fit(X_train, 2 * y_train - 1) prob_pos_pc_clf_relabeled = pc_clf.predict_proba(X_test)[:, 1] assert_array_almost_equal(prob_pos_pc_clf, prob_pos_pc_clf_relabeled) # Check invariance against relabeling [0, 1] -> [1, 0] pc_clf.fit(X_train, (y_train + 1) % 2) prob_pos_pc_clf_relabeled = pc_clf.predict_proba(X_test)[:, 1] if method == "sigmoid": assert_array_almost_equal(prob_pos_pc_clf, 1 - prob_pos_pc_clf_relabeled) else: # Isotonic calibration is not invariant against relabeling # but should improve in both cases assert_greater(brier_score_loss(y_test, prob_pos_clf), brier_score_loss((y_test + 1) % 2, prob_pos_pc_clf_relabeled))
def test_brier_score_loss(): """Check brier_score_loss function""" y_true = np.array([0, 1, 1, 0, 1, 1]) y_pred = np.array([0.1, 0.8, 0.9, 0.3, 1., 0.95]) true_score = linalg.norm(y_true - y_pred) ** 2 / len(y_true) assert_almost_equal(brier_score_loss(y_true, y_true), 0.0) assert_almost_equal(brier_score_loss(y_true, y_pred), true_score) assert_almost_equal(brier_score_loss(1. + y_true, y_pred), true_score) assert_almost_equal(brier_score_loss(2 * y_true - 1, y_pred), true_score) assert_raises(ValueError, brier_score_loss, y_true, y_pred[1:]) assert_raises(ValueError, brier_score_loss, y_true, y_pred + 1.) assert_raises(ValueError, brier_score_loss, y_true, y_pred - 1.)
def process(self): # 读取数据 data = pd.DataFrame.from_csv(self.parameters['ex']) self.y_score = data[['pre_below', 'pre_normal', 'pre_above']] self.y_true = data[['obs_below', 'obs_normal', 'obs_above']] # 绘图 fpr = dict() # False Positive Rate tpr = dict() # True Positive Rate roc_auc = dict() #ROC AREA UNDER CURVE bs = dict() #Brier Score Loss # turn off the interactive mode plt.clf() fpr[self.parameters['index']], tpr[self.parameters['index']], _ = metrics.roc_curve(self.y_true.ix[:, self.parameters['index']], self.y_score.ix[:, self.parameters['index']]) roc_auc[self.parameters['index']] = metrics.roc_auc_score(self.y_true.ix[:, self.parameters['index']], self.y_score.ix[:, self.parameters['index']]) bs[self.parameters['index']] = metrics.brier_score_loss(self.y_true.ix[:, self.parameters['index']], self.y_score.ix[:, self.parameters['index']]) if self.args.verbose: print("====False Positive Ratio(fpr) And True Positive Ratio(tpr) Pair====") for idx,val in enumerate(fpr[self.parameters['index']]): print(idx,val,fpr[self.parameters['index']][idx]) plt.plot(fpr[self.parameters['index']], tpr[self.parameters['index']],label='Num:%d,AUC: %0.2f,BS: %0.2f' \ %(self.y_true.shape[0], roc_auc[self.parameters['index']],bs[self.parameters['index']])) plt.plot([0, 1], [0, 1], 'k--') plt.xlim([0.0, 1.05]) plt.ylim([0.0, 1.05]) plt.xlabel('False Positive Rate') plt.ylabel('True Positive Rate') plt.title(self.args.title[0] if self.args.title else 'Receiver Operating Characteristic(ROC)') plt.legend(loc="lower right") print('Saving image to {}'.format(self.parameters['name'])) plt.savefig(self.parameters['name']) print('Completely Finshed.')
def plot_calibration_curve(est, name, fig_index): """Plot calibration curve for est w/o and with calibration. """ # Calibrated with isotonic calibration isotonic = CalibratedClassifierCV(est, cv=2, method='isotonic') # Calibrated with sigmoid calibration sigmoid = CalibratedClassifierCV(est, cv=2, method='sigmoid') # Calibrated with ROC convex hull calibration rocch = CalibratedClassifierCV(est, cv=2, method='rocch') # Logistic regression with no calibration as baseline lr = LogisticRegression(C=1., solver='lbfgs') fig = plt.figure(fig_index, figsize=(10, 10)) ax1 = plt.subplot2grid((3, 1), (0, 0), rowspan=2) ax2 = plt.subplot2grid((3, 1), (2, 0)) ax1.plot([0, 1], [0, 1], "k:", label="Perfectly calibrated") for clf, name in [(lr, 'Logistic'), (est, name), (isotonic, name + ' + Isotonic'), (sigmoid, name + ' + Sigmoid'), (rocch, name + ' + ROCConvexHull')]: clf.fit(X_train, y_train) y_pred = clf.predict(X_test) if hasattr(clf, "predict_proba"): prob_pos = clf.predict_proba(X_test)[:, 1] else: # use decision function prob_pos = clf.decision_function(X_test) prob_pos = \ (prob_pos - prob_pos.min()) / (prob_pos.max() - prob_pos.min()) clf_score = brier_score_loss(y_test, prob_pos, pos_label=y.max()) print("%s:" % name) print("\tBrier: %1.4f" % (clf_score)) print("\tPrecision: %1.3f" % precision_score(y_test, y_pred)) print("\tRecall: %1.3f" % recall_score(y_test, y_pred)) print("\tF1: %1.3f" % f1_score(y_test, y_pred)) print("\tAuc: %1.4f\n" % roc_auc_score(y_test, prob_pos)) fraction_of_positives, mean_predicted_value = \ calibration_curve(y_test, prob_pos, n_bins=10) ax1.plot(mean_predicted_value, fraction_of_positives, "s-", label="%s (%1.4f)" % (name, clf_score)) ax2.hist(prob_pos, range=(0, 1), bins=10, label=name, histtype="step", lw=2) ax1.set_ylabel("Fraction of positives") ax1.set_ylim([-0.05, 1.05]) ax1.legend(loc="lower right") ax1.set_title('Calibration plots (reliability curve)') ax2.set_xlabel("Mean predicted value") ax2.set_ylabel("Count") ax2.legend(loc="upper center", ncol=2) plt.tight_layout()
def calibration_curve_plotter(y_test, prob_pos, n_bins=10): brier = brier_score_loss(y_test, prob_pos, pos_label=1) fig = plt.figure(0, figsize=(10, 10)) ax1 = plt.subplot2grid((3, 1), (0, 0), rowspan=2) ax2 = plt.subplot2grid((3, 1), (2, 0)) df = pd.DataFrame({"true": y_test}) bins = np.linspace(0.0, 1.0, n_bins + 1) binids = np.digitize(prob_pos, bins) - 1 df["Bin center"] = bins[binids] + 0.5 / n_bins df[""] = "Model calibration: (%1.5f)" % brier o = bins + 0.5 / n_bins df2 = pd.DataFrame({"true": o, "Bin center": o}) df2[""] = "Perfect calibration" df = pd.concat([df, df2]) sns.pointplot(x="Bin center", y="true", data=df, order=o, hue="", ax=ax1) ax2.hist(prob_pos, range=(0, 1), bins=10, label="Model", histtype="step", lw=2) ax1.set_ylabel("Fraction of positives") ax1.set_ylim([-0.05, 1.05]) # ax1.legend(loc="lower right") ax1.set_title("Calibration plots") ax2.set_xlabel("Predicted Probability") ax2.set_ylabel("Count") plt.tight_layout()
def plot_probability_calibration_curves(self): """ Compute true and predicted probabilities for a calibration plot fraction_of_positives - The true probability in each bin (fraction of positives). mean_predicted_value - The mean predicted probability in each bin. """ fig = plt.figure() ax1 = plt.subplot2grid((3, 1), (0, 0), rowspan=2) ax2 = plt.subplot2grid((3, 1), (2, 0), rowspan=2) ax1.set_ylabel("Fraction of positives") ax1.set_ylim([-0.05, 1.05]) ax1.legend(loc="lower right") ax1.set_title('Calibration plots (reliability curve) ' + self.description) ax2.set_xlabel("Mean predicted value") ax2.set_ylabel("Count") ax2.legend(loc="upper center", ncol=2) clf_score = brier_score_loss(self.y_true, self.y_pred, pos_label=1) fraction_of_positives, mean_predicted_value = calibration_curve(self.y_true, self.y_pred, n_bins=50) ax1.plot(mean_predicted_value, fraction_of_positives, "s-", color="#660066", alpha = 0.6, label="%s (%1.3f)" % (self.description, clf_score)) ax2.hist(self.y_pred, range=(0, 1), bins=50, color="#660066", linewidth=2.0 , alpha = 0.6, label="%s (%1.3f)" % (self.description, clf_score), histtype="step", lw=2) plt.yscale('log') return
def plot_calibration_curve(est, name, fig_index): ''' Plot calibration curve for est w/o and with calibration. ''' # Calibrated with isotonic calibration isotonic = CalibratedClassifierCV(est, cv=2, method='isotonic') # Calibrated with sigmoid calibration sigmoid = CalibratedClassifierCV(est, cv=2, method='sigmoid') # Logistic regression with no calibration as baseline lr = LogisticRegression(C=1.0, solver='lbfgs') fig = plt.figure(fig_index, figsize=(10, 10)) ax1 = plt.subplot2grid((3, 1), (0, 0), rowspan=2) ax2 = plt.subplot2grid((3, 1), (2, 0)) ax1.plot([0, 1], [0, 1], 'k:', label='Perfectly calibrated') for clf, name in [ (lr, 'Logistic'), (est, name), (isotonic, name + ' + Isotonic'), (sigmoid, name + ' + Sigmoid') ]: clf.fit(X_train, y_train) y_pred = clf.predict(X_test) if hasattr(clf, 'predict_proba'): prob_pos = clf.predict_proba(X_test)[:, 1] else: # use decision function prob_pos = clf.decision_function(X_test) prob_pos = \ (prob_pos - prob_pos.min()) / (prob_pos.max() - prob_pos.min()) clf_score = brier_score_loss(y_test, prob_pos, pos_label=y.max()) print('%s:' % name) print('\tBrier: %1.3f' % (clf_score)) print('\tPrecision: %1.3f' % precision_score(y_test, y_pred)) print('\tRecall: %1.3f' % recall_score(y_test, y_pred)) print('\tF1: %1.3f\n' % f1_score(y_test, y_pred)) fraction_of_positives, mean_predicted_value = \ calibration_curve(y_test, prob_pos, n_bins = 10) ax1.plot(mean_predicted_value, fraction_of_positives, 's-', label='%s (%1.3f)' % (name, clf_score)) ax2.hist(prob_pos, range=(0, 1), bins=10, label=name, histtype='step', lw=2) ax1.set_ylabel('Fraction of positives') ax1.set_ylim([-0.05, 1.05]) ax1.legend(loc='lower right') ax1.set_title('Calibration plots (reliability curve)') ax2.set_xlabel('Mean predicted value') ax2.set_ylabel('Count') ax2.legend(loc='upper center', ncol=2) plt.tight_layout()
def brier(ytrue, yprob, num_classes): rv = 0. for i in xrange(num_classes): ind = np.where(ytrue == i)[0] tmp = np.zeros(ytrue.size) tmp[ind] += 1 rv += brier_score_loss(ytrue, yprob[:, i]) rv /= num_classes return rv
def calibrate_proba_fitted_models(iDf, iFeatures, iModelsDict): iCalibratedModelsDict = {} for model_name in iModelsDict.keys(): target = model_name.replace('_gbr', '').replace('_rf', '') proba_cal_sig = CalibratedClassifierCV(iModelsDict[model_name], method='sigmoid', cv='prefit') proba_cal_iso = CalibratedClassifierCV(iModelsDict[model_name], method='isotonic', cv='prefit') proba_cal_sig.fit(iDf.loc[:, iFeatures.values], iDf.loc[:, target].values) proba_cal_iso.fit(iDf.loc[:, iFeatures.values], iDf.loc[:, target].values) brier_sig = brier_score_loss(iDf.loc[:, target].value, proba_cal_sig.predict_proba(iDf.loc[:, iFeatures.values])[:, 1]) brier_iso = brier_score_loss(iDf.loc[:, target].value, proba_cal_iso.predict_proba(iDf.loc[:, iFeatures.values])[:, 1]) if brier_sig <= brier_iso: iCalibratedModelsDict[model_name] = proba_cal_sig.calibrated_classifiers_ else: iCalibratedModelsDict[model_name] = proba_cal_iso.calibrated_classifiers_ return iCalibratedModelsDict
def plot_calibration_curve_cv(X, y, est, name, bins=10, n_folds=8, n_jobs=1, fig_index=1): """Plot calibration curve for est w/o and with calibration. """ import sklearn.cross_validation as cross_validation from sklearn import (metrics, cross_validation) from model_selection import cross_val_predict_proba # Calibrated with isotonic calibration cv = 2 isotonic = CalibratedClassifierCV(est, cv=cv, method='isotonic') # Calibrated with sigmoid calibration sigmoid = CalibratedClassifierCV(est, cv=cv, method='sigmoid') fig = plt.figure(fig_index, figsize=(10, 10)) ax1 = plt.subplot2grid((3, 1), (0, 0), rowspan=2) ax2 = plt.subplot2grid((3, 1), (2, 0)) ax1.plot([0, 1], [0, 1], "k:", label="Perfectly calibrated") for clf, name in [(est, name), (isotonic, name + ' + Isotonic'), (sigmoid, name + ' + Sigmoid')]: y_true = y scoring = 'roc_auc' cv1 = cross_validation.StratifiedKFold(y,n_folds) y_proba, scores = cross_val_predict_proba(clf, X, y, scoring=scoring, cv=cv1, n_jobs=n_jobs, verbose=0, fit_params=None, pre_dispatch='2*n_jobs') y_pred = np.array(y_proba>0.5,dtype=int) clf_score = brier_score_loss(y_true, y_proba, pos_label=y_true.max()) print("%s:" % name) print("\tBrier: %1.3f" % (clf_score)) print("\tPrecision: %1.3f" % precision_score(y_true, y_pred)) print("\tRecall: %1.3f" % recall_score(y_true, y_pred)) print("\tF1: %1.3f\n" % f1_score(y_true, y_pred)) fraction_of_positives, mean_predicted_value = \ calibration_curve(y_true, y_proba, n_bins=bins) ax1.plot(mean_predicted_value, fraction_of_positives, "s-", label="%s (%1.3f)" % (name, clf_score)) ax2.hist(y_proba, range=(0, 1), bins=bins, label=name, histtype="step", lw=2) ax1.set_ylabel("Fraction of positives") ax1.set_ylim([-0.05, 1.05]) ax1.legend(loc="lower right") ax1.set_title('Calibration plots (reliability curve)') ax2.set_xlabel("Mean predicted value") ax2.set_ylabel("Count") ax2.legend(loc="upper center", ncol=2) plt.tight_layout()
def test_calibration_prefit(): """Test calibration for prefitted classifiers""" n_samples = 50 X, y = make_classification(n_samples=3 * n_samples, n_features=6, random_state=42) sample_weight = np.random.RandomState(seed=42).uniform(size=y.size) X -= X.min() # MultinomialNB only allows positive X # split train and test X_train, y_train, sw_train = \ X[:n_samples], y[:n_samples], sample_weight[:n_samples] X_calib, y_calib, sw_calib = \ X[n_samples:2 * n_samples], y[n_samples:2 * n_samples], \ sample_weight[n_samples:2 * n_samples] X_test, y_test = X[2 * n_samples:], y[2 * n_samples:] # Naive-Bayes clf = MultinomialNB() clf.fit(X_train, y_train, sw_train) prob_pos_clf = clf.predict_proba(X_test)[:, 1] # Naive Bayes with calibration for this_X_calib, this_X_test in [(X_calib, X_test), (sparse.csr_matrix(X_calib), sparse.csr_matrix(X_test))]: for method in ['isotonic', 'sigmoid']: pc_clf = CalibratedClassifierCV(clf, method=method, cv="prefit") for sw in [sw_calib, None]: pc_clf.fit(this_X_calib, y_calib, sample_weight=sw) y_prob = pc_clf.predict_proba(this_X_test) y_pred = pc_clf.predict(this_X_test) prob_pos_pc_clf = y_prob[:, 1] assert_array_equal(y_pred, np.array([0, 1])[np.argmax(y_prob, axis=1)]) assert_greater(brier_score_loss(y_test, prob_pos_clf), brier_score_loss(y_test, prob_pos_pc_clf))
def print_stats(): print(metrics.classification_report(y_true, y_pred, target_names=target_names)) print("roc_auc_score: {:1.4f} | LogLoss: {:1.3f} | Brier score loss:" " {:1.3f}".format(metrics.roc_auc_score(y_true, y_proba), metrics.log_loss(y_true, y_proba), metrics.brier_score_loss(y_true, y_proba))) if hasattr(model, 'threshold') and model.threshold: precision, sensitivity, specificity = \ precision_sensitivity_specificity(y_true, y_proba, threshold=model.threshold) print("sensitivity(recall): {:1.2f} and specificity: {:1.2f}" " with threshold={:1.2f}".format( sensitivity, specificity, model.threshold))
def get_error(est_track, true_track): """ """ if est_track.ndim > 1: true_track = true_track.reshape((true_track.shape[0],1)) error = np.recarray(shape=est_track.shape, dtype=[('position', float), ('orientation', float), ('orientation_weighted', float)]) # Position error pos_err = (true_track.x - est_track.x)**2 + (true_track.y - est_track.y)**2 error.position = np.sqrt(pos_err) # Orientation error error.orientation = anglediff(true_track.angle, est_track.angle, units='deg') error.orientation_weighted = anglediff(true_track.angle, est_track.angle_w, units='deg') descr = {} bix = np.logical_not(np.isnan(error.orientation)) descr['orientation_median'] = np.median(np.abs(error.orientation[bix])) descr['orientation_mean'] = np.mean(np.abs(error.orientation[bix])) bix = np.logical_not(np.isnan(error.orientation_weighted)) descr['orientation_weighted_median'] = np.nanmedian(np.abs(error.orientation_weighted[bix])) descr['orientation_weighted_mean'] = np.nanmean(np.abs(error.orientation_weighted[bix])) # no angle true_no_angle = np.isnan(true_track.angle) est_no_angle = np.isnan(est_track.angle) agree = np.logical_and(true_no_angle, est_no_angle) disagree = np.logical_xor(true_no_angle, est_no_angle) both = np.logical_or(true_no_angle, est_no_angle) #ipdb.set_trace() descr['no_angle_auc'] = roc_auc_score(true_no_angle, est_no_angle) descr['no_angle_mcc'] = matthews_corrcoef(true_no_angle, est_no_angle) descr['no_angle_brier'] = brier_score_loss(true_no_angle, est_no_angle) descr['no_angle_acc'] = agree.sum()/both.sum() descr['no_angle_p_per_frame'] = disagree.sum()/disagree.shape[0] descr['position_median'] = np.median(error.position) descr['position_mean'] = np.mean(error.position) #print('True frequency of angle-does-not-apply:', # true_no_angle.sum()/true_no_angle.shape[0]) #print('Estimated frequency of angle-does-not-apply:', # est_no_angle.sum()/est_no_angle.shape[0]) return error, descr
def process(self): """ process """ ##directory check files = glob.glob(os.path.join(self.parameters['csv_dir'],'*.csv')) if not files: print('No .csv file found in {}.'.format(self.parameters['csv_dir'])) exit(-1) self.auc = np.zeros([self.lats,self.lons]) self.bs = np.zeros([self.lats,self.lons]) self.sum = np.zeros([self.lats,self.lons]) ##loop for reshape data for lat in np.arange(self.lats): for lon in np.arange(self.lons): if self.args.verbose: print('Now Calculating Grid({},{})......'.format(lat,lon)) y_true = list() y_score = list() for path in files: row = pd.DataFrame.from_csv(path).query('latitude=={} and longitude=={}'.format(lat,lon)) if row.empty: continue y_true.append(row.iloc[0]['obs_'+self.nclass[self.parameters['index']]]) y_score.append(row.iloc[0]['pre_'+self.nclass[self.parameters['index']]]) ##校验y_true结果,如果全是0则跳过后面的计算 if not y_true: print('Warning: y_true is empty in Grid({},{}).'.format(lat,lon)) continue if all(i==0 for i in y_true): print('Warning:Grid({},{}) y_true has only one class(0 or 1)'.format(lat,lon)) continue ##计算auc,bs self.auc[lat,lon] = metrics.roc_auc_score(y_true,y_score) self.bs[lat,lon] = metrics.brier_score_loss(y_true,y_score) self.sum[lat,lon] = len(y_true) print(self.auc[lat,lon],self.bs[lat,lon]) del(y_true) del(y_score) ##save result np.save(self.parameters['name']+'_auc',self.auc) np.save(self.parameters['name']+'_bs',self.bs) np.save(self.parameters['name']+'_sum',self.sum)
def train_model_rfc_calibrated_cv (features, labels, hold_out = False, train_sz = 0.9) : features_train, features_test = [], [] labels_train, labels_test = [], [] if (hold_out == True) : # First, set aside a some of the training set for calibration # Use stratified shuffle split so that class ratios are maintained after the split splitter = StratifiedShuffleSplit(labels, n_iter = 1, train_size = train_sz, random_state = 30) # Length is 1 in this case since we have a single fold for splitting print (len(splitter)) for train_idx, test_idx in splitter: features_train, features_test = features[train_idx], features[test_idx] labels_train, labels_test = labels[train_idx], labels[test_idx] else : features_train = features labels_train = labels print ("features_train shape: ", features_train.shape) print ("labels_train shape: ", labels_train.shape) if (hold_out == True) : print ("features_test shape: ", features_test.shape) print ("labels_test shape: ", labels_test.shape) print ("Parameters selected based on prior grid Search ...") #clf = rfc(random_state = 30, n_jobs = 4, criterion = 'entropy', max_depth = 7, min_samples_leaf = 2, min_samples_split = 5, n_estimators = 50) #clf = rfc(random_state = 30, n_jobs = 4, criterion = 'gini', max_depth = 8, min_samples_leaf = 5, min_samples_split = 2, n_estimators = 120) # clf = rfc(random_state = 30, n_jobs = 4, criterion = 'gini', class_weight = 'auto', max_depth = 5, min_samples_leaf = 5, min_samples_split = 2, n_estimators = 100) clf = rfc(random_state = 30, n_jobs = 4, criterion = 'entropy', class_weight = 'auto', max_depth = 5, min_samples_leaf = 5, min_samples_split = 2, n_estimators = 60) # Perform calibration # Use 'sigmoid' because sklearn cautions against using 'isotonic' for lesser than 1000 calibration samples as it can result in overfitting # 05/22 - Looks like isotonic does better than sigmoid for both Brier score and roc_auc_score. # Using 30-40% holdout actually improves ROC AUC for holdout score from 0.88 to 0.925 with CV=5 print ("Performing Calibration now ...") # sigmoid = CalibratedClassifierCV(clf, cv=5, method='sigmoid') sigmoid = CalibratedClassifierCV(clf, cv=5, method='isotonic') sigmoid.fit(features_train, labels_train) if (hold_out == True) : # Calculate Brier score loss y_probs = sigmoid.predict_proba(features_test)[:, 1] clf_score = brier_score_loss(labels_test, y_probs) print ("Brier score: ", clf_score) auc_score = estimate_roc_auc (sigmoid, features_test, labels_test) return sigmoid
def get_model_results(model, training_data, test_data): """ Find the best hyper parameters for model given the training and test data Parameters ----- model: machine learning model such as Logistic Regression, MultiLayer Perceptron training_data: list containing X,y training data test_data: list containing test X,y test data Returns ------ y_proba, y_pred y_test, accuracy, auc, brier_loss """ # choose model if model == "LR": model = LogisticRegression() elif model == "TF": model = learn.TensorFlowDNNClassifier(hidden_units=[150, 40], n_classes=2, steps=1000, batch_size=25, learning_rate=0.0002, optimizer="Adam") # fit model start = time() X_train, y_train = training_data X_test, y_test = test_data model.fit(X_train, y_train) # accuracy y_pred = model.predict(X_test) accuracy = metrics.accuracy_score(y_test, y_pred) # auc y_proba = model.predict_proba(X_test) auc = metrics.roc_auc_score(y_test, (y_proba[:,1] - y_proba[:,0])) print 'Accuracy: {0:f}'.format(accuracy) print 'AUC: {0:f}'.format(auc) # brier loss brier_loss = metrics.brier_score_loss(y_test, y_proba[:,1], pos_label=1) print 'Model computation duration (secs):', time() - start return (y_proba, y_pred, y_test, accuracy, auc, brier_loss)
def get_stat(self, X_test, y_test): """Print list of score for the current classifier""" y_pred = self.predict(X_test) if hasattr(self.clf, "predict_proba"): prob_pos = self.clf.predict_proba(X_test)[:, 1] else: # use decision function prob_pos = self.clf.decision_function(X_test) prob_pos = (prob_pos - prob_pos.min()) / \ (prob_pos.max() - prob_pos.min()) clf_score = brier_score_loss(y_test, prob_pos) print("%s:" % self.method) print("\tBrier: %1.3f" % (clf_score)) print("\tPrecision: %1.3f" % precision_score(y_test, y_pred)) print("\tRecall: %1.3f" % recall_score(y_test, y_pred)) print("\tF1: %1.3f" % f1_score(y_test, y_pred)) print("\tROC AUC score: %1.3f\n" % roc_auc_score(y_test, prob_pos))
def ProcessAndFit(input): ''' For testing the brier score loss associated with a calibration model given features tt (tt = totest) This is meant to be run in parallel, hence the "input" ''' y, X, tt = input[0] X = X[:,tt] X_train, X_test, y_train, y_test = train_test_split(dummyize(X), y, test_size = 0.5) lr = LogisticRegression() lr.fit(X_train, y_train) y_prob = lr.predict_proba(X_test)[:,1] return brier_score_loss(y_test, y_prob)
def evaluate(estimator, dev_X, dev_y): print('evaluating on development set', flush=True) guess_dev = estimator.predict(dev_X) score_roc_auc_dev = roc_auc_score(dev_y, guess_dev) print('{:.4f} -- roc auc'.format(score_roc_auc_dev)) score_brier_loss_dev = brier_score_loss(dev_y, guess_dev) print('{:.4f} -- brier loss'.format(score_brier_loss_dev)) score_log_loss_dev = log_loss(dev_y, estimator.predict_proba(dev_X)) print('{:.4f} -- log loss'.format(score_log_loss_dev)) guess_dev_negative_one = guess_dev.copy().astype('int8') guess_dev_negative_one[guess_dev_negative_one == 0] = -1 ''' decision_fuction not implemented # score_hinge_loss_dev = hinge_loss(dev_y, estimator.decision_function(dev_X)) ''' score_hinge_loss_dev = hinge_loss(dev_y, guess_dev_negative_one) print('{:.4f} -- hinge loss'.format(score_hinge_loss_dev)) score_matthews_corrcoef_dev = matthews_corrcoef(dev_y, guess_dev_negative_one) print('{:.4f} -- matthews_corrcoef'.format(score_matthews_corrcoef_dev)) print(flush=True) return score_roc_auc_dev, score_brier_loss_dev,\ score_log_loss_dev, score_hinge_loss_dev, score_matthews_corrcoef_dev
def calibration_inner_loop(clf,X,y,train,test,n_bins,n_power,bins_used,minsamples): X_train, y_train = X[train],y[train] X_test, y_test = X[test],y[test] clf.fit(X_train, y_train) if hasattr(clf, "predict_proba"): y_proba = clf.predict_proba(X_test)[:, 1] elif hasattr(clf, "decision_function"): # use decision function prob_pos = clf.decision_function(X_test) y_proba = \ (prob_pos - prob_pos.min()) / (prob_pos.max() - prob_pos.min()) else: raise RuntimeError("clf without predict_proba or decision_function") fraction_of_positives, mean_predicted_value, bins_used, n_bins = \ calibration_curve_nan(y_test, y_proba, n_bins=n_bins, n_power=n_power, bins=bins_used, minsamples=minsamples) #print fraction_of_positives.shape, mean_predicted_value.shape return (\ np.array(list(fraction_of_positives)+list(mean_predicted_value)), brier_score_loss(y_test, y_proba, pos_label=y_test.max()), metrics.roc_auc_score(y_test, y_proba), bins_used, n_bins )
def evaluate_sigmoid_match(self,X_test,y_test,A,B): from sklearn.calibration import calibration_curve import matplotlib.pyplot as plt from sklearn.metrics import (brier_score_loss, precision_score, recall_score,f1_score) prob_pos = 1. / (1. + (np.exp(A * X_test + B))) clf_score = brier_score_loss(y_test, prob_pos, pos_label=y_test.max()) fraction_of_positives, mean_predicted_value = calibration_curve(y_test, prob_pos, n_bins=10) print("SVC_sigmoid:") print("\tBrier: %1.3f" % (clf_score)) fig = plt.figure(2, figsize=(10, 10)) ax1 = plt.subplot2grid((3, 1), (0, 0), rowspan=2) ax2 = plt.subplot2grid((3, 1), (2, 0)) ax1.plot([0, 1], [0, 1], "k:", label="Perfectly calibrated") ax1.plot(mean_predicted_value, fraction_of_positives, "s-",label="%s (%1.3f)" % ("SVC_sigmoid", clf_score)) ax2.hist(prob_pos, range=(0, 1), bins=10, label="SVC_sigmoid",histtype="step", lw=2) ax1.set_ylabel("Fraction of positives") ax1.set_ylim([-0.05, 1.05]) ax1.legend(loc="lower right") ax1.set_title('Calibration plots (reliability curve)') ax2.set_xlabel("Mean predicted value") ax2.set_ylabel("Count") ax2.legend(loc="upper center", ncol=2) plt.tight_layout() plt.show()
def test_calibration(): """Test calibration objects with isotonic and sigmoid""" n_samples = 100 X, y = make_classification(n_samples=2 * n_samples, n_features=6, random_state=42) sample_weight = np.random.RandomState(seed=42).uniform(size=y.size) X -= X.min() # MultinomialNB only allows positive X # split train and test X_train, y_train, sw_train = \ X[:n_samples], y[:n_samples], sample_weight[:n_samples] X_test, y_test = X[n_samples:], y[n_samples:] # Naive-Bayes clf = MultinomialNB().fit(X_train, y_train, sample_weight=sw_train) prob_pos_clf = clf.predict_proba(X_test)[:, 1] pc_clf = CalibratedClassifierCV(clf, cv=y.size + 1) assert_raises(ValueError, pc_clf.fit, X, y) # Naive Bayes with calibration for this_X_train, this_X_test in [(X_train, X_test), (sparse.csr_matrix(X_train), sparse.csr_matrix(X_test))]: for method in ['isotonic', 'sigmoid']: pc_clf = CalibratedClassifierCV(clf, method=method, cv=2) # Note that this fit overwrites the fit on the entire training # set pc_clf.fit(this_X_train, y_train, sample_weight=sw_train) prob_pos_pc_clf = pc_clf.predict_proba(this_X_test)[:, 1] # Check that brier score has improved after calibration assert_greater(brier_score_loss(y_test, prob_pos_clf), brier_score_loss(y_test, prob_pos_pc_clf)) # Check invariance against relabeling [0, 1] -> [1, 2] pc_clf.fit(this_X_train, y_train + 1, sample_weight=sw_train) prob_pos_pc_clf_relabeled = pc_clf.predict_proba(this_X_test)[:, 1] assert_array_almost_equal(prob_pos_pc_clf, prob_pos_pc_clf_relabeled) # Check invariance against relabeling [0, 1] -> [-1, 1] pc_clf.fit(this_X_train, 2 * y_train - 1, sample_weight=sw_train) prob_pos_pc_clf_relabeled = pc_clf.predict_proba(this_X_test)[:, 1] assert_array_almost_equal(prob_pos_pc_clf, prob_pos_pc_clf_relabeled) # Check invariance against relabeling [0, 1] -> [1, 0] pc_clf.fit(this_X_train, (y_train + 1) % 2, sample_weight=sw_train) prob_pos_pc_clf_relabeled = \ pc_clf.predict_proba(this_X_test)[:, 1] if method == "sigmoid": assert_array_almost_equal(prob_pos_pc_clf, 1 - prob_pos_pc_clf_relabeled) else: # Isotonic calibration is not invariant against relabeling # but should improve in both cases assert_greater(brier_score_loss(y_test, prob_pos_clf), brier_score_loss((y_test + 1) % 2, prob_pos_pc_clf_relabeled)) # Check failure cases: # only "isotonic" and "sigmoid" should be accepted as methods clf_invalid_method = CalibratedClassifierCV(clf, method="foo") assert_raises(ValueError, clf_invalid_method.fit, X_train, y_train) # base-estimators should provide either decision_function or # predict_proba (most regressors, for instance, should fail) clf_base_regressor = \ CalibratedClassifierCV(RandomForestRegressor(), method="sigmoid") assert_raises(RuntimeError, clf_base_regressor.fit, X_train, y_train)
clf.fit(X_train, y_train) # GaussianNB itself does not support sample-weights prob_pos_clf = clf.predict_proba(X_test)[:, 1] # Gaussian Naive-Bayes with isotonic calibration clf_isotonic = CalibratedClassifierCV(clf, cv=2, method="isotonic") clf_isotonic.fit(X_train, y_train, sw_train) prob_pos_isotonic = clf_isotonic.predict_proba(X_test)[:, 1] # Gaussian Naive-Bayes with sigmoid calibration clf_sigmoid = CalibratedClassifierCV(clf, cv=2, method="sigmoid") clf_sigmoid.fit(X_train, y_train, sw_train) prob_pos_sigmoid = clf_sigmoid.predict_proba(X_test)[:, 1] print("Brier scores: (the smaller the better)") clf_score = brier_score_loss(y_test, prob_pos_clf, sw_test) print("No calibration: %1.3f" % clf_score) clf_isotonic_score = brier_score_loss(y_test, prob_pos_isotonic, sw_test) print("With isotonic calibration: %1.3f" % clf_isotonic_score) clf_sigmoid_score = brier_score_loss(y_test, prob_pos_sigmoid, sw_test) print("With sigmoid calibration: %1.3f" % clf_sigmoid_score) ############################################################################### # Plot the data and the predicted probabilities plt.figure() y_unique = np.unique(y) colors = cm.rainbow(np.linspace(0.0, 1.0, y_unique.size)) for this_y, color in zip(y_unique, colors): this_X = X_train[y_train == this_y]
def calibration_comparison(base_estimator, n_samples, weights=None, n_bins=10, detail=False): X, y = make_classification(n_samples=3 * n_samples, n_features=6, random_state=42, weights=weights) base_estimator_dict = { "MultinomialNB": MultinomialNB(), "GaussianNB": GaussianNB(), "SVC": LinearSVC() } if (base_estimator == "MultinomialNB"): X -= X.min() # Train data: train binary model. X_train, y_train = X[:n_samples], y[:n_samples] print("Positive Rate: {x}".format(x=y_train.mean())) # calibrate data. X_calib, y_calib = X[n_samples:2 * n_samples], y[n_samples:2 * n_samples] # test data. X_test, y_test = X[2 * n_samples:], y[2 * n_samples:] # train the base estimator clf = base_estimator_dict[base_estimator].fit(X_train, y_train) if (base_estimator == "SVC"): # y_calib_score: training in the calibration model. y_calib_score = clf.decision_function(X_calib) y_calib_score = (y_calib_score - y_calib_score.min()) /\ (y_calib_score.max() - y_calib_score.min()) # y_test_score: evaluation in the calibration model. y_test_score = clf.decision_function(X_test) y_test_score = (y_test_score - y_test_score.min()) /\ (y_test_score.max() - y_test_score.min()) else: # y_calib_score: training in the calibration model. y_calib_score = clf.predict_proba(X_calib) y_calib_score = np.array([score[1] for score in y_calib_score]) # y_test_score: evaluation in the calibration model. y_test_score = clf.predict_proba(X_test) y_test_score = np.array([score[1] for score in y_test_score]) calibrate_model_dict = { "mimic": _MimicCalibration(threshold_pos=5, record_history=False), "isotonic": IsotonicRegression(y_min=0.0, y_max=1.0, out_of_bounds='clip'), # "platt": LogisticRegression() } result = {} result[base_estimator] = {} for cal_name, cal_object in calibrate_model_dict.items(): # import pdb; pdb.set_trace() print(cal_name) cal_object.fit(copy(y_calib_score), copy(y_calib)) if cal_name in ["mimic", "isotonic"]: y_output_score = cal_object.predict(copy(y_test_score)) else: raise "Please specify probability prediction function." frac_pos, predicted_value = calibration_curve(y_test, y_output_score, n_bins=n_bins) b_score = brier_score_loss(y_test, y_output_score, pos_label=1) # precsion = precision_score(y_test, y_output_score) # recall = recall_score(y_test, y_output_score) # f1 = f1_score(y_test, y_output_score) result[base_estimator][cal_name] = { "calibration_curve": [frac_pos, predicted_value], # "eval_score" : [b_score, precsion, recall, f1] "eval_score": [b_score] } if (detail): result[base_estimator][cal_name]["detail"] = { "y_test": y_test, "y_test_calibrate_score": y_output_score } return result
def best_N_experts(X_trainval, y_trainval, X_test, y_test, Nreplicates=10, type='Brier_weighted', average='median'): from sklearn.model_selection import train_test_split from scipy import stats n_experts = X_trainval.shape[1] y_trainval_bin = (y_trainval == 1).astype( int) #convert to Away-based binary labels #Setup the grid search coeff_grid = np.arange(1, n_experts, 5) Ntest = len(coeff_grid) TestScores = np.ones(Ntest) Nopt = np.zeros(Nreplicates).astype(int) for rep in range(Nreplicates): for tst in range(Ntest): N = coeff_grid[tst] #shuffle the data and split into training and validation sets X_train, X_val, y_train, y_val = train_test_split(X_trainval, y_trainval_bin, test_size=0.3, shuffle=True) n_train = X_train.shape[0] #determine the Brier scores of all predictors (in the given order) and also the number of predictions #from the training data. Any prediction at 0.5 is counted as a no-prediction Brier_Scores = np.ones(n_experts) weights = np.zeros(n_experts) for i in range(n_experts): Brier_Scores[i] = brier_score_loss(y_train, X_train[:, i], pos_label=1) weights[i] = sum(X_train[:, i] != 0.5) / n_train Brier_Scores_weighted = 1 - (1 - Brier_Scores) * weights #choose the type of experts if type == 'Brier': #Get expert on pure Brier Score Scores = Brier_Scores elif type == 'Brier_weighted': #Get expert on weighted Brier Score Scores = Brier_Scores_weighted #Determine the number of top experts sorted_expert_indices = np.argsort(Scores) #keep only the top N experts sorted_expert_indices = sorted_expert_indices[:N] #now test on the validation set predictions = X_val[:, sorted_expert_indices] #Average the experts if average == 'weighted': y_prob = np.average(predictions, axis=1, weights=weights[sorted_expert_indices]) elif average == 'median': y_prob = np.median(predictions, axis=1) else: #simple unweighted averaging y_prob = np.mean(predictions, axis=1) #calculate the Brier score on the valdation data TestScores[tst] = brier_score_loss(y_val, y_prob, pos_label=1) #Take the parameter with the minimum Brier score tst_index = np.argmin(TestScores) Nopt[rep] = coeff_grid[tst_index] #Get the mode as the most optimal value Nopt = stats.mode(Nopt)[0][0] #Now evaluate on the test set. The expert indices of both train_val and test sets should be identical #Determine the number of top experts from the full train_val set. Any prediction at 0.5 is counted as a no-prediction Brier_Scores = np.ones(n_experts) weights = np.zeros(n_experts) n_train = X_trainval.shape[0] for i in range(n_experts): Brier_Scores[i] = brier_score_loss(y_trainval_bin, X_trainval[:, i], pos_label=1) weights[i] = sum(X_trainval[:, i] != 0.5) / n_train Brier_Scores_weighted = 1 - (1 - Brier_Scores) * weights #choose the type of experts if type == 'Brier': #Get expert on pure Brier Score Scores = Brier_Scores elif type == 'Brier_weighted': #Get expert on weighted Brier Score Scores = Brier_Scores_weighted sorted_expert_indices = np.argsort(Scores) #keep only the top N experts sorted_expert_indices = sorted_expert_indices[:Nopt] #now test on the test set predictions = X_test[:, sorted_expert_indices] #Average the experts if average == 'weighted': y_prob = np.average(predictions, axis=1, weights=weights[sorted_expert_indices]) elif average == 'median': y_prob = np.median(predictions, axis=1) else: #simple unweighted averaging y_prob = np.mean(predictions, axis=1) return brier_score_loss(y_test, y_prob, pos_label=1), Nopt
obs = obs[flag_nonan] fcst = fcst[flag_nonan] L = np.sum(flag_nonan) o_bar_ = np.mean(obs) o_bar[d] = o_bar_ for n in range(N_boost): ind_bagging = np.random.choice(L, size=L, replace=True) obs_ = obs[ind_bagging] fcst_ = fcst[ind_bagging] prob_true_, prob_pred_ = reliability_diagram( obs_, fcst_, hist_bins) brier_ = brier_score_loss(obs_, fcst_) prob_true[d, :, n] = prob_true_ prob_pred[d, :, n] = prob_pred_ brier[d, n] = brier_ hist_bins_ = np.mean(prob_pred[d, ...], axis=1) use_, _ = np.histogram(fcst, bins=np.array(list(hist_bins_) + [1.0])) use[d, :] = use_ tuple_save = (brier, prob_true, prob_pred, use, o_bar) label_save = ['brier', 'pos_frac', 'pred_value', 'use', 'o_bar'] du.save_hdf5(tuple_save, label_save, save_dir, '{}_Calib_loc{}.hdf'.format(prefix_out, r))
def brier_skill_score(target_values, forecast_probabilities): climo = np.mean((target_values - np.mean(target_values))**2) return 1.0 - brier_score_loss(target_values, forecast_probabilities) / climo
def online_eval(model, dataloader, txtlog, submit_path, uncertaintys_path, save_segmentation, save_uncertainty): txtlog.write("Dice_mean fg|bg|hausdorff_dist|ravd|ece|nll|sklearn_brier\n") my_evaluation = Evaluation() start_time = time.time() with torch.no_grad(): dice_new_list = [] data_dict_list = [] hausdorff_dist_list = [] ravd_list = [] shape_list = [] testset_list_pre = [] testset_list_gt = [] nll_list = [] brier_list = [] brier_sklearn_list = [] ece_list = [] for data_val in dataloader: images_val, targets_val, subject, slice, images_origin = data_val model.eval() images_val = images_val.to(device) targets_val = targets_val.to(device) outputs = model(images_val, test_config.lamda_sem) # final_out [i-1,i,i+1] outputs_val = outputs.final_out softmax = outputs.softmax_out # calculate predicted entropy as uncertainty softmax_1 = torch.unsqueeze(softmax[:, 1, ...], dim=1) softmax_2 = torch.unsqueeze(softmax[:, 3, ...], dim=1) softmax_3 = torch.unsqueeze(softmax[:, 5, ...], dim=1) softmax_fg = torch.cat((softmax_1, softmax_2, softmax_3), dim=1) softmax_fg_numpy = softmax_fg.data.cpu().numpy() softmax_fg_numpy = np.squeeze(softmax_fg_numpy, axis=0) mean_fg = np.mean(softmax_fg_numpy, axis=0) entropy = -mean_fg * np.log(mean_fg) # softmax outputs for uncertainty quantification softmax_final_out = softmax[:, 6:8, ...] softmax_final_out = np.squeeze( softmax_final_out.data.cpu().numpy(), axis=0) # 逐切片处理 outputs_val_1 = outputs_val[:, 0:2, ...] image_origin = images_origin.data.cpu().numpy() image_origin1 = np.squeeze(image_origin, axis=0) image_origin1 = image_origin1[:, :, 1] _, predicted_1 = torch.max(outputs_val_1.data, 1) # ----------Compute dice----------- predicted_val_1 = predicted_1.data.cpu().numpy() subject_val = subject.data.cpu().numpy() slice_val = slice.data.cpu().numpy() slice_val_1 = slice_val[0][1] targets_val = targets_val.data.cpu().numpy() targets_val_1 = targets_val[:, 1, ...] shape_list.append(predicted_val_1.shape) data_dict_list.append({ "subject": subject_val[0], "slice": slice_val_1, "pre": np.squeeze(predicted_val_1, axis=0), "target": np.squeeze(targets_val_1, axis=0), "image": image_origin1, "uncertainty": entropy, "softmax_out": softmax_final_out }) # test the elaps of uncertainty quantification end_time = time.time() print("elapsed:{}".format(end_time - start_time)) # 利用pandas分组 pd_data = pd.DataFrame(data_dict_list) for subject, volume_data in pd_data.groupby("subject"): pre = volume_data["pre"] tar = volume_data["target"] slices = volume_data["slice"] image = volume_data["image"] uncertain = volume_data["uncertainty"] softmax_prob = volume_data["softmax_out"] pre_array = pre.values target_array = tar.values image_array = image.values uncertain_arr = uncertain.values slices_arr = slices.values softmax_prob_arr = softmax_prob.values pre_temp = np.zeros( (len(pre_array), pre_array[0].shape[0], pre_array[0].shape[1]), dtype="int16") target_temp = np.zeros((len(pre_array), target_array[0].shape[0], target_array[0].shape[1]), dtype="int16") # dimentions: slices*class*width*height softmax_probs_temp = np.zeros( (len(pre_array), softmax_prob_arr[0].shape[0], softmax_prob_arr[0].shape[1], softmax_prob_arr[0].shape[2]), dtype="float32") for i in range(len(pre_array)): pre_temp[i, :, :] = pre_array[i] target_temp[i, :, :] = target_array[i] softmax_probs_temp[i, :, :, :] = softmax_prob_arr[i] # 保存预测结果与GT及图像 if save_segmentation: image_slice = image_array[i] # save image and segmentation my_evaluation.save_contour_label( image_slice.astype("int16"), target_array[i], save_path=submit_path, color="red", file_name=str(subject) + "_" + str(slices_arr[i]) + "label", show_mask=True) my_evaluation.save_contour_label( image_slice.astype("int16"), pre_array[i], save_path=submit_path, color="blue", file_name=str(subject) + "_" + str(slices_arr[i]) + "pre", show_mask=True) orig_path = os.path.join( submit_path, str(subject) + "_" + str(slices_arr[i]) + '.png') cv.imwrite(orig_path, image_slice.astype("uint8")) if save_uncertainty: # Predicted error map error = np.abs(pre_array[i] - target_array[i]) error_name = str(subject) + "_" + str( slices_arr[i]) + "error.png" error_file_path = os.path.join(uncertaintys_path, error_name) plt.figure() plt.imshow(error, cmap=plt.cm.Reds, interpolation='nearest') # Visulization of the uncertainty file_name = str(subject) + "_" + str( slices_arr[i]) + ".png" file_path = os.path.join(uncertaintys_path, file_name) plt.colorbar() plt.xticks([]) plt.yticks([]) plt.savefig(error_file_path) plt.clf() plt.cla() plt.close() plt.figure() plt.imshow(uncertain_arr[i], cmap=plt.cm.rainbow, interpolation='nearest') plt.colorbar() plt.xticks([]) plt.yticks([]) # plt.axes('off') plt.savefig(file_path) plt.clf() plt.cla() plt.close() dsc_list1 = [] if 0 == np.count_nonzero(pre_temp): print("zero" + "_" + str(subject)) continue # calculate the dice metric for i in range(0, test_config.num_classes): dsc_i = dice(pre_temp, target_temp, i) dsc_list1.append(dsc_i) # Calculate Hausdorff Distance 以及ravd hausdorff_dist = hd(pre_temp, target_temp, [5, 0.42, 0.42]) # we measure the absolute volume difference ravd = abs(rAVD(pre_temp, target_temp)) # calculate the volume of ICH for GT and predictions volume_gt = calculate_volume(target_temp) volume_pre = calculate_volume(pre_temp) # Evaluate uncertainty qualification with nll, brier, ece softmax_probs_temp = softmax_probs_temp.transpose(1, 0, 2, 3) brier_socre = brier( torch.from_numpy(softmax_probs_temp).float(), torch.from_numpy(target_temp).long()) ece_subject_wise, _, _ = ece(softmax_probs_temp[1, :, :, :], target_temp, 10) # Test sklearn target_onehot_temp = one_hot(target_temp, 2) brier_sklearn = brier_score_loss(target_onehot_temp[0, ...].flatten(), softmax_probs_temp[0, ...].flatten())+\ brier_score_loss(target_onehot_temp[1,...].flatten(), softmax_probs_temp[1,...].flatten()) nll_score = nll( torch.from_numpy(softmax_probs_temp).float(), torch.from_numpy(target_temp).long()) print("nll_score:{} brier_socre:{}".format( nll_score.data.numpy(), brier_socre.data.numpy())) print("dice_bg:{} dice_fg:{} Hausdorff_dist:{} ravd:{}".format( dsc_list1[0], dsc_list1[1], hausdorff_dist, ravd)) txtlog.write( "ID{:30} {:3f} {:3f} {:3f} {:3f} {:3f} {:3f} {:3f} {:3f} {:3f} \n" .format(subject, dsc_list1[0], dsc_list1[1], hausdorff_dist, ravd, ece_subject_wise, nll_score, brier_sklearn, volume_gt, volume_pre)) dice_new_list.append(dsc_list1) hausdorff_dist_list.append(hausdorff_dist) ravd_list.append(ravd) brier_list.append(brier_socre.data.numpy()) nll_list.append(nll_score.data.numpy()) brier_sklearn_list.append(brier_sklearn) ece_list.append(ece_subject_wise) # store all the test data testset_list_pre.append(softmax_probs_temp[1, :, :, :]) testset_list_gt.append(target_temp) dice_array = np.array(dice_new_list) dice_mean = np.mean(dice_array, axis=0) haus_dist_arr = np.array(hausdorff_dist_list) hausdorff_dist_mean = np.mean(haus_dist_arr, axis=0) ravd_arr = np.array(ravd_list) ravd_mean = np.mean(ravd_arr, axis=0) # uncertainty quantification brier_array = np.mean(np.array(brier_list), axis=0) nll_array = np.mean(np.array(nll_list), axis=0) brier_sklearn_mean = np.mean(np.array(brier_sklearn_list), axis=0) ece_subject_mean = np.mean(np.array(ece_list), axis=0) stacked_pre = merge_samples(testset_list_pre) stacked_gt = merge_samples(testset_list_gt) print("pre:{} gt:{}".format(stacked_pre.shape, stacked_gt.shape)) ece_score, confidence, accuracy = ece(stacked_pre, stacked_gt, 10) fraction_of_positives, mean_predicted_value = \ calibration_curve(stacked_gt.flatten(), stacked_pre.flatten(), n_bins=10) # Draw Reliability Diagram (binned version and curve version) x = np.linspace(0, 1. + 1e-8, 10) y3 = x plt.plot([0, 1], [0, 1], "k:") plt.bar(x, height=fraction_of_positives, color='b', width=-0.112, label='Outputs', linewidth=2, edgecolor=['black'] * len(x), align='edge') plt.bar(x, height=y3 - fraction_of_positives, color='g', bottom=fraction_of_positives, width=-0.112, label='Gap', linewidth=2, edgecolor=['black'] * len(x), align='edge') plt.xlim(0., 1.) plt.ylim(0., 1.) plt.xlabel("Confidence") plt.ylabel("Accuracy") # plt.title("Histogram polt") plt.legend(loc="upper left") plt.savefig('reliability_diagram_bined.png', dpi=400, bbox_inches='tight') plt.figure(figsize=(5, 5)) ax1 = plt.subplot2grid((1, 1), (0, 0), rowspan=2) ax1.plot([0, 1], [0, 1], "k:", label="Perfectly calibrated") ax1.plot(mean_predicted_value, fraction_of_positives, "s-", label="calibrated_sklearn") ax1.set_ylabel("Fraction of positives") ax1.set_ylim([-0.05, 1.05]) ax1.legend(loc="upper left") ax1.set_title('Calibration plots (reliability curve)') plt.savefig('reliability_diagram_sklearn.png', dpi=400, bbox_inches='tight') with h5py.File("reliability_se_net.h5", "w") as f: f['condifence'] = confidence f['accuracy'] = accuracy txtlog.write("Dice_mean fg|bg|hausdorff_dist|ravd|ece|brier|nll|sklearn_brier|ece_sub_mea"\ "n:{:3f} ||{:3f}||{:3f}||{:3f}||{:3f}||{:3f}||{:3f}||{:3f} ||{:3f}\n".format(dice_mean[0],\ dice_mean[1], hausdorff_dist_mean, ravd_mean,ece_score,brier_array, nll_array, brier_sklearn_mean,ece_subject_mean)) txtlog.write("Time Elapsed: {}".format(end_time - start_time)) return dice_mean
ax2 = plt.subplot2grid((3, 1), (2, 0)) ax1.plot([0, 1], [0, 1], "k:", label="Perfectly calibrated") for clf, name in [(lr, 'Logistic'), (gnb, 'Naive Bayes'), (svc, 'Support Vector Classification'), (rfc, 'Random Forest')]: clf.fit(X_train, y_train) y_pred = clf.predict(X_test) if hasattr(clf, "predict_proba"): prob_pos = clf.predict_proba(X_test)[:, 1] else: # use decision function prob_pos = clf.decision_function(X_test) prob_pos = (prob_pos - prob_pos.min()) / (prob_pos.max() - prob_pos.min()) clf_score = brier_score_loss(y_test, prob_pos, pos_label=y.max()) print("%s:" % name) print("\tBrier: %1.3f" % (clf_score)) print("\tPrecision: %1.3f" % precision_score(y_test, y_pred)) print("\tRecall: %1.3f" % recall_score(y_test, y_pred)) print("\tF1: %1.3f\n" % f1_score(y_test, y_pred)) fraction_of_positives, mean_predicted_value = calibration_curve(y_test, prob_pos, n_bins=10) ax1.plot(mean_predicted_value, fraction_of_positives, "s-", label="%s" % (name, ))
bestModel = load_model('results/sampling/miTAR_CNN_BiRNN_b' + str(batch) + '_lr' + str(lr) + '_dout' + str(dout) + '_seed' + str(seed) + '.h5') score = bestModel.evaluate(X_test, y_test, verbose=0) print("Accuracy: %.2f%%" % (score[1] * 100)) scores.append(score[1] * 100) y_pred = bestModel.predict_proba(X_test) posthr = 0.5 negthr = 0.5 rm = 0 oneacc, sen, spe, Fmeasure, PPV, NPV = evals(y_test, y_pred, posthr, negthr, rm) brierScore = brier_score_loss(y_test, y_pred) vals.append([oneacc, sen, spe, Fmeasure, PPV, NPV, brierScore]) if score[1] > acc: acc = score[1] paras = [seed] print("best so far, acc=", acc, " paras=", paras) print("finish paras at: seed=", seed) from statistics import mean aveScore = mean(scores) print("the average accuracy is: ", aveScore) aveEvals = [] for i in range(7):
def common_get_brier(self, y_test, y_score): try: brier = brier_score_loss(y_test, y_score) return brier except: return 1.0
def _compute_score(model, X, y, scoring_metric=None, scoring_params=None): '''Helper function that maps metric string names to their function calls. Parameters ---------- model : class inheriting sklearn.base.BaseEstimator The classifier whose hyperparams you need to optimize with grid search. The model must have model.fit(X,y) and model.predict(X) defined. Although it can work without it, its best if you also define model.score(X,y) so you can decide the scoring function for deciding the best parameters. If you are using an sklearn model, everything will work out of the box. To use a model from a different library is no problem, but you need to wrap it in a class and inherit sklearn.base.BaseEstimator as seen in: https://github.com/cgnorthcutt/hyperopt X : np.array of shape (n, m) The training data. y : np.array of shape (n,) or (n, 1) Corresponding labels. scoring_metric : str See hypopt.GridSearch.fit() scoring parameter docstring for list of options. scoring_params : dict All other params you want passed to the scoring function. Params will be passed as scoring_func(**scoring_params).''' if scoring_params is None: scoring_params = {} if scoring_metric == 'accuracy': return metrics.accuracy_score(y, model.predict(X), **scoring_params) elif scoring_metric == 'brier_score_loss': return metrics.brier_score_loss(y, model.predict(X), **scoring_params) elif scoring_metric == 'average_precision': return metrics.average_precision_score(y, model.predict_proba(X)[:, 1], **scoring_params) elif scoring_metric == 'f1': return metrics.f1_score(y, model.predict(X), **scoring_params) elif scoring_metric == 'f1_micro': return metrics.f1_score(y, model.predict(X), average='micro', **scoring_params) elif scoring_metric == 'f1_macro': return metrics.f1_score(y, model.predict(X), average='macro', **scoring_params) elif scoring_metric == 'f1_weighted': return metrics.f1_score(y, model.predict(X), average='weighted', **scoring_params) elif scoring_metric == 'neg_log_loss': return -1. * metrics.log_loss(y, model.predict_proba(X), ** scoring_params) elif scoring_metric == 'precision': return metrics.precision_score(y, model.predict(X), **scoring_params) elif scoring_metric == 'recall': return metrics.recall_score(y, model.predict(X), **scoring_params) elif scoring_metric == 'roc_auc': return metrics.roc_auc_score(y, model.predict_proba(X)[:, 1], **scoring_params) elif scoring_metric == 'explained_variance': return metrics.explained_variance_score(y, model.predict(X), **scoring_params) elif scoring_metric == 'neg_mean_absolute_error': return -1. * metrics.mean_absolute_error(y, model.predict(X), ** scoring_params) elif scoring_metric == 'neg_mean_squared_error': return -1. * metrics.mean_squared_error(y, model.predict(X), ** scoring_params) elif scoring_metric == 'neg_mean_squared_log_error': return -1. * metrics.mean_squared_log_error(y, model.predict(X), ** scoring_params) elif scoring_metric == 'neg_median_absolute_error': return -1. * metrics.median_absolute_error(y, model.predict(X), ** scoring_params) elif scoring_metric == 'r2': return metrics.r2_score(y, model.predict(X), **scoring_params) else: raise ValueError(scoring_metric + 'is not a supported metric.')
def metrics_sklearn(y_true=np.ndarray, y_pred=np.ndarray, y_pred_c=np.ndarray, alpha=0.05, n_boot=5, blocksize=1, clim_prob=None, threshold_pred='upper_clim'): ''' threshold_pred options: 'clim', 'upper_clim', 'int or float' if 'clim' is passed then all positive prediction is forecasted for all values of y_pred above clim_prob if 'upper_clim' is passed, from all values that are above the clim_prob, only the upper 75% of the prediction is used ''' # y_true, y_pred, y_pred_c = y_true_c, ts_logit_c, y_pred_c_c #%% y_true = np.array(y_true).squeeze() cont_pred = np.unique(y_pred).size > 5 metrics_dict = {} if clim_prob is None: clim_prob = np.round((y_true[(y_true == 1)].size / y_true.size), 2) sorval = np.array(sorted(y_pred)) # probability to percentile if threshold_pred == 'clim': # binary metrics calculated for clim prevailance quantile = 1 - y_pred[sorval > clim_prob].size / y_pred.size # old : quantile = 100 * clim_prob elif threshold_pred == 'upper_clim': # binary metrics calculated for top 75% of 'above clim prob' No_vals_above_clim = y_pred[sorval > clim_prob].size / y_pred.size upper_75 = 0.75 * No_vals_above_clim # 0.75 * percentage values above clim quantile = 1 - upper_75 # old: bin_threshold = 100 * (1 - 0.75*clim_prob) # old: quantile = bin_threshold elif isinstance(threshold_pred, int) or isinstance(threshold_pred, float): if threshold_pred < 1: quantile = 1 - y_pred[sorval > threshold_pred].size / y_pred.size else: quantile = 1 - y_pred[sorval > threshold_pred / 100.].size / y_pred.size elif isinstance(threshold_pred, tuple): times = threshold_pred[0] quantile = 1 - (y_pred[sorval > times * clim_prob].size / y_pred.size) percentile_t = 100 * quantile y_pred_b = np.array(y_pred > np.percentile(y_pred, percentile_t), dtype=int) out = get_metrics_bin(y_true, y_pred, t=percentile_t) (prec, recall, FPR, SP, Acc, f1, KSS_score, EDI) = out prec = metrics.precision_score(y_true, y_pred_b) acc = metrics.accuracy_score(y_true, y_pred_b) if cont_pred: AUC_score = metrics.roc_auc_score(y_true, y_pred) fpr, tpr, thresholds = metrics.roc_curve(y_true, y_pred_b) # P : Precision at threshold, R : Recall at threshold, PRthresholds P, R, PRthresholds = metrics.precision_recall_curve(y_true, y_pred) AUCPR_score = metrics.average_precision_score(y_true, y_pred) # convert y_pred to fake probabilities if spatcov is given if y_pred.max() > 1 or y_pred.min() < 0: y_pred = (y_pred + abs(y_pred.min())) / (y_pred.max() + abs(y_pred.min())) else: y_pred = y_pred brier_score = metrics.brier_score_loss(y_true, y_pred) brier_score_clim = metrics.brier_score_loss(y_true, y_pred_c) old_index = range(0, len(y_pred), 1) n_bl = blocksize chunks = [ old_index[n_bl * i:n_bl * (i + 1)] for i in range(int(len(old_index) / n_bl)) ] # divide subchunks to boostrap to all cpus n_boot_sub = int(round((n_boot / max_cpu) + 0.4, 0)) with ProcessPoolExecutor(max_workers=max_cpu) as pool: futures = [] unique_seed = 42 for i_cpu in range(max_cpu): unique_seed += 1 # ensure that no same shuffleling is done futures.append( pool.submit(_bootstrap, y_true, y_pred, n_boot_sub, chunks, percentile_t, unique_seed)) out = [future.result() for future in futures] boots_AUC = [] boots_AUCPR = [] boots_brier = [] boots_prec = [] boots_acc = [] boots_KSS = [] boots_EDI = [] for i_cpu in range(max_cpu): _AUC, _AUCPR, _brier, _prec, _acc, _KSS, _EDI = out[i_cpu] boots_AUC.append(_AUC) boots_AUCPR.append(_AUCPR) boots_brier.append(_brier) boots_prec.append(_prec) boots_acc.append(_acc) boots_KSS.append(_KSS) boots_EDI.append(_EDI) # Computing the lower and upper bound of the 90% confidence interval # You can change the bounds percentiles to 0.025 and 0.975 to get # a 95% confidence interval instead. def get_ci(boots, alpha=0.05): if len(np.array(boots).shape) == 2: boots = flatten(boots) sorted_scores = np.array(boots) sorted_scores.sort() ci_low = sorted_scores[int(alpha * len(sorted_scores))] ci_high = sorted_scores[int((1 - alpha) * len(sorted_scores))] return ci_low, ci_high, sorted_scores if np.array(boots_AUC).ravel().size != 0: if cont_pred: ci_low_AUC, ci_high_AUC, sorted_AUCs = get_ci(boots_AUC, alpha) ci_low_AUCPR, ci_high_AUCPR, sorted_AUCPRs = get_ci( boots_AUCPR, alpha) ci_low_brier, ci_high_brier, sorted_briers = get_ci( boots_brier, alpha) ci_low_KSS, ci_high_KSS, sorted_KSSs = get_ci(boots_KSS, alpha) ci_low_prec, ci_high_prec, sorted_precs = get_ci(boots_prec, alpha) ci_low_acc, ci_high_acc, sorted_accs = get_ci(boots_acc, alpha) ci_low_EDI, ci_high_EDI, sorted_EDIs = get_ci(boots_EDI, alpha) else: if cont_pred: ci_low_AUC, ci_high_AUC, sorted_AUCs = (AUC_score, AUC_score, [AUC_score]) ci_low_AUCPR, ci_high_AUCPR, sorted_AUCPRs = (AUCPR_score, AUCPR_score, [AUCPR_score]) ci_low_brier, ci_high_brier, sorted_briers = (brier_score, brier_score, [brier_score]) ci_low_KSS, ci_high_KSS, sorted_KSSs = (KSS_score, KSS_score, [KSS_score]) ci_low_prec, ci_high_prec, sorted_precs = (prec, prec, [prec]) ci_low_acc, ci_high_acc, sorted_accs = (acc, acc, [acc]) ci_low_EDI, ci_high_EDI, sorted_EDIs = (EDI, EDI, [EDI]) if cont_pred: metrics_dict['AUC'] = (AUC_score, ci_low_AUC, ci_high_AUC, sorted_AUCs) metrics_dict['AUCPR'] = (AUCPR_score, ci_low_AUCPR, ci_high_AUCPR, sorted_AUCPRs) metrics_dict['brier'] = (brier_score, brier_score_clim, ci_low_brier, ci_high_brier, sorted_briers) metrics_dict['fpr_tpr_thres'] = fpr, tpr, thresholds metrics_dict['P_R_thres'] = P, R, PRthresholds metrics_dict['KSS'] = (KSS_score, ci_low_KSS, ci_high_KSS, sorted_KSSs) metrics_dict['prec'] = (prec, ci_low_prec, ci_high_prec, sorted_precs) metrics_dict['acc'] = (acc, ci_low_acc, ci_high_acc, sorted_accs) metrics_dict['EDI'] = EDI, ci_low_EDI, ci_high_EDI, sorted_EDIs # print("Confidence interval for the score: [{:0.3f} - {:0.3}]".format( # confidence_lower, confidence_upper)) #%% return metrics_dict
random_state=None, solver='warn', tol=0.0001, verbose=0, warm_start=False) clfLogisticRegression.fit(X_train, y_train) y_pred_c = clfLogisticRegression.predict(X_test) y_pred_proba_clg = clfLogisticRegression.predict_proba(X_test)[:, 1] confmat_test_c = confusion_matrix(y_true=y_test, y_pred=y_pred_c) print('confmat_test:\n', confmat_test_c) print('the acc is:', accuracy_score(y_test, y_pred_c)) print('the classification_report:', classification_report(y_test, y_pred_c)) print('the auc of logistics is:', roc_auc_score(y_test, y_pred_proba_clg)) print('the brier socre is', brier_score_loss(y_test, y_pred_proba_clg)) #confmat_test: # [[3125 184] # [ 17 51]] #the acc is: 0.9404797157240155 #the classification_report: precision recall f1-score support # # 0.0 0.99 0.94 0.97 3309 # 1.0 0.22 0.75 0.34 68 # # accuracy 0.94 3377 # macro avg 0.61 0.85 0.65 3377 #weighted avg 0.98 0.94 0.96 3377 # #the auc of logistics is: 0.8805752582084511
n = len(methods) # 对比方法指标 for i in range(n): print '========' + str(methods[i]) cutoff = 0.5 #cutoff = test[methods[i] + 'DefaultPred'].median() f1 = f1_score(test.wtbz, pd.Series(test[methods[i] + 'DefaultPred'] > cutoff).apply(lambda x: 1 if x else 0)) print '%.3f' % f1 precision = precision_score(test.wtbz, pd.Series(test[methods[i] + 'DefaultPred'] > cutoff).apply(lambda x: 1 if x else 0)) print '%.3f' % precision fpr, tpr, thresholds = roc_curve(test.wtbz, test[methods[i] + 'DefaultPred'].apply(lambda x:1 if x>1 else x)) print '%.3f' % np.max(tpr - fpr) bs = brier_score_loss(test.wtbz, test[methods[i] + 'DefaultPred'].apply(lambda x: x if x > 0 else 0).apply(lambda x:1 if x>1 else x)) print '%.3f' % bs ap = average_precision_score(test.wtbz, test[methods[i]+'DefaultPred']) print '%.3f'%ap auc = roc_auc_score(test.wtbz, test[methods[i] + 'DefaultPred']) print '%.3f' % auc a = test.wtbz[test[methods[i] + 'DefaultPred'] < cutoff] b = test[methods[i] + 'DefaultPred'][test[methods[i] + 'DefaultPred'] < cutoff] try: auc = roc_auc_score(a, b) except ValueError: pass
def computeTestScore(): # Read data from HDFStore file X1 = pd.read_hdf('trainingDataT1.h5', 'data') y1 = pd.read_hdf('trainingDataT1.h5', 'y') X2 = pd.read_hdf('trainingDataT2.h5', 'data') y2 = pd.read_hdf('trainingDataT2.h5', 'y') X3 = pd.read_hdf('trainingDataT3.h5', 'data') y3 = pd.read_hdf('trainingDataT3.h5', 'y') print 'X and y read' X1.drop(X1.columns[[20, 21, 22]], axis=1, inplace=True) X1.columns = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24 ] X = X2.append(X3) X = X1.append(X) y = y2.append(y3) y = y1.append(y) print 'X shape ', X.shape X = X.as_matrix() y = np.array(y).ravel() y = map(int, y) listX0 = [] listX1 = [] for i in range(X.shape[0]): if y[i] == 1.0: listX1.append(X[i][0]) elif y[i] == 0.0: listX0.append(X[i][0]) print 'min = %d, max=%d' % (np.amin(listX0), np.amax(listX0)) print np.median(listX0) print 'min = %d, max=%d' % (np.amin(listX1), np.amax(listX1)) print np.median(listX1) print 'ratio : ', np.median(listX1) / np.median(listX0) ''' print 'min = %d, max=%d' % (np.amin(X), np.amax(X)) print np.median(X) print 'X and y processed' ''' X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42) clf = GradientBoostingClassifier(random_state=42) ''' # Create GBT algorithm with xgboost library clf = XGBoostClassifier( objective = 'binary:logistic', booster = 'gbtree', eval_metric = 'auc', tree_method = 'exact', num_class = 2, silent = 1, seed = 42, ) parameters = { 'eta': [0.01],#[0.01, 0.015, 0.025, 0.05, 0.1], 'gamma': [0.1],#[0.05, 0.1, 0.3, 0.5, 0.7, 0.9, 1.0], 'max_depth': [2],#[3, 5, 7, 9, 12, 15, 17, 25], 'min_child_weight': [1],#[1, 3, 5, 7], 'subsample': [0.4],#[0.6, 0.7, 0.8, 0.9, 1.0], 'colsample_bytree': [1.0],#[0.6, 0.7, 0.8, 0.9, 1.0], 'lambda': [0.1],#[0.05, 0.1, 1.0], 'alpha': [0.01],#[0, 0.1, 0.5, 1.0], } eval_size = 0.10 kf = StratifiedKFold(y_train, round(1. / eval_size), shuffle=True, random_state=42) scoring_fnc = make_scorer(roc_auc_score) clf = GridSearchCV(clf, parameters, scoring_fnc, cv=kf, n_jobs=-1) ''' clf.fit(X_train, y_train) ''' clf = clf.best_estimator_ ''' prob_pos_clf = clf.predict_proba(X_test)[:, 1] # Model with isotonic calibration clf_isotonic = CalibratedClassifierCV(clf, cv=2, method='isotonic') clf_isotonic.fit(X_train, y_train) prob_pos_isotonic = clf_isotonic.predict_proba(X_test)[:, 1] # Model with sigmoid calibration clf_sigmoid = CalibratedClassifierCV(clf, cv=2, method='sigmoid') clf_sigmoid.fit(X_train, y_train) prob_pos_sigmoid = clf_sigmoid.predict_proba(X_test)[:, 1] print("Brier scores: (the smaller the better)") clf_score = brier_score_loss(y_test, prob_pos_clf) print("No calibration: %1.3f" % clf_score) clf_isotonic_score = brier_score_loss(y_test, prob_pos_isotonic) print("With isotonic calibration: %1.3f" % clf_isotonic_score) clf_sigmoid_score = brier_score_loss(y_test, prob_pos_sigmoid) print("With sigmoid calibration: %1.3f" % clf_sigmoid_score) print("AUC scores:") clf_auc_score = roc_auc_score(y_test, prob_pos_clf) print("No calibration: %1.3f" % clf_auc_score) clf_isotonic_auc_score = roc_auc_score(y_test, prob_pos_isotonic) print("With isotonic calibration: %1.3f" % clf_isotonic_auc_score) clf_sigmoid_auc_score = roc_auc_score(y_test, prob_pos_sigmoid) print("With sigmoid calibration: %1.3f" % clf_sigmoid_auc_score)
def brier_scorer(estimator, X, y): probabilities = estimator.predict_proba(X) return metrics.brier_score_loss( map(lambda d: float(d), y), probabilities[:, 1], )
def getscores(X, pred_y, test_y, harm1test, harm2test, j, predtag, eblcattest): pred_y = pd.DataFrame(pred_y) harm1test = pd.DataFrame(harm1test) harm2test = pd.DataFrame(harm2test) test_y = pd.DataFrame(test_y) # td = [pred, harm1test, harm2test, truthtest] # td = pd.concat(td, axis=1) # td.columns = ['pred', 'harm1', 'harm2','truthtest'] # td.sort_values(by='harm1', ascending=True) sumharm1 = sum(harm1test) sumharm2 = sum(harm2test) thresh_cent = np.arange(0.01, 1, 0.01) cent = 100 - np.arange(1, 100, 1) dummyarray = np.empty((100, 22)) dummyarray[:] = np.nan output = pd.DataFrame(dummyarray) for a in range(0, 99, 1): test_thresh = thresh_cent[a] test_cent = cent[a] pharm_cent = np.percentile((harm2[harm2 > 0]), test_cent) # ppred_cent = np.percentile((td.harm2[td.pred2>0]), test_cent) # rows_h1cent = harm1test.index.values[harm1test >= pharm_cent] # rows_h2cent = harm2test.index.values[harm2test >= pharm_cent] # rows_pcent = td['pred'].index.values[td['pred'] >= pharm_cent] pred3 = to_labels(pred_y, test_thresh) # tempdata_h = td.iloc[rows_hcent, :] # temph_return = getppv(pred3, test_y, harm1test,harm2test, sumharm1, sumharm2) p3truth_yes = np.where(np.array(test_y) == 1) p3truth_no = np.where(np.array(test_y) == 0) p3screen_yes = np.where(np.array(pred3) == 1) p3screen_no = np.where(np.array(pred3) == 0) eblcat1 = np.where(np.array(eblcattest) == 1) eblcat2 = np.where(np.array(eblcattest) == 2) eblcat3 = np.where(np.array(eblcattest) == 3) eblcat4 = np.where(np.array(eblcattest) == 4) tp_temp_yes = np.where(p3screen_yes) tp_capture_pos = np.intersect1d(p3truth_yes, p3screen_yes) tp_capture_neg = np.intersect1d(p3truth_no, p3screen_no) harm2capture_tpos = 0 if (harm2test[pred3 == 1].sum() > 0).bool(): harm2capture_tpos = harm2test[pred3 == 1].sum() / harm2test.sum() harm2capture_pos = 0 if (harm2test.iloc[tp_capture_pos].sum() > 0).bool(): harm2capture_pos = harm2test.iloc[tp_capture_pos].sum( ) / harm2test.sum() harm2capture_tneg = 0 if (harm2test[pred3 == 0].sum() > 0).bool(): harm2capture_tneg = harm2test[pred3 == 0].sum() / harm1test.sum() harm2capture_neg = 0 if (harm2test.iloc[tp_capture_neg].sum() > 0).bool(): harm2capture_neg = harm2test.iloc[tp_capture_neg].sum( ) / harm2test.sum() ptn, pfp, pfn, ptp = confusion_matrix(test_y, pred3).ravel() # eblcat tp_eblcat1 = (np.intersect1d( (np.intersect1d(p3truth_yes, p3screen_yes)), eblcat1)).shape[0] fp_eblcat1 = (np.intersect1d( (np.intersect1d(p3truth_no, p3screen_yes)), eblcat1)).shape[0] tn_eblcat1 = (np.intersect1d((np.intersect1d(p3truth_no, p3screen_no)), eblcat1)).shape[0] fn_eblcat1 = (np.intersect1d( (np.intersect1d(p3truth_yes, p3screen_no)), eblcat1)).shape[0] # eblcat2 tp_eblcat2 = (np.intersect1d( (np.intersect1d(p3truth_yes, p3screen_yes)), eblcat2)).shape[0] fp_eblcat2 = (np.intersect1d( (np.intersect1d(p3truth_no, p3screen_yes)), eblcat2)).shape[0] tn_eblcat2 = (np.intersect1d((np.intersect1d(p3truth_no, p3screen_no)), eblcat2)).shape[0] fn_eblcat2 = (np.intersect1d( (np.intersect1d(p3truth_yes, p3screen_no)), eblcat2)).shape[0] # eblcat 3 tp_eblcat3 = (np.intersect1d( (np.intersect1d(p3truth_yes, p3screen_yes)), eblcat3)).shape[0] fp_eblcat3 = (np.intersect1d( (np.intersect1d(p3truth_no, p3screen_yes)), eblcat3)).shape[0] tn_eblcat3 = (np.intersect1d((np.intersect1d(p3truth_no, p3screen_no)), eblcat3)).shape[0] fn_eblcat3 = (np.intersect1d( (np.intersect1d(p3truth_yes, p3screen_no)), eblcat3)).shape[0] # eblcat 4 tp_eblcat4 = (np.intersect1d( (np.intersect1d(p3truth_yes, p3screen_yes)), eblcat4)).shape[0] fp_eblcat4 = (np.intersect1d( (np.intersect1d(p3truth_no, p3screen_yes)), eblcat4)).shape[0] tn_eblcat4 = (np.intersect1d((np.intersect1d(p3truth_no, p3screen_no)), eblcat4)).shape[0] fn_eblcat4 = (np.intersect1d( (np.intersect1d(p3truth_yes, p3screen_no)), eblcat4)).shape[0] pspec = 0 if ptn != 0: pspec = ptn / (ptn + pfp) psens = 0 if ptp != 0: psens = ptp / (ptp + pfn) pppv = 0 if ptp != 0: pppv = ptp / (ptp + pfp) pnpv = 0 if ptn != 0: pnpv = ptn / (ptn + pfn) oapr = 0 if ptp != 0 and pfp != 0: oapr = pfp / ptp oanr = 0 if ptp != 0 and pfp != 0: oanr = pfn / ptn # ppv by eblcat pppv1 = 0 if tp_eblcat1 != 0: pppv1 = tp_eblcat1 / (tp_eblcat1 + fp_eblcat1) pppv2 = 0 if tp_eblcat2 != 0: pppv2 = tp_eblcat2 / (tp_eblcat2 + fp_eblcat2) pppv3 = 0 if tp_eblcat3 != 0: pppv3 = tp_eblcat3 / (tp_eblcat3 + fp_eblcat3) pppv4 = 0 if tp_eblcat4 != 0: pppv4 = tp_eblcat4 / (tp_eblcat4 + fp_eblcat4) # npv by eblcat pnpv1 = 0 if tn_eblcat1 != 0: pnpv1 = tn_eblcat1 / (tn_eblcat1 + fn_eblcat1) pnpv2 = 0 if tn_eblcat2 != 0: pnpv2 = tn_eblcat2 / (tn_eblcat2 + fn_eblcat2) pnpv3 = 0 if tn_eblcat3 != 0: pnpv3 = tn_eblcat3 / (tn_eblcat3 + fn_eblcat3) pnpv4 = 0 if tn_eblcat4 != 0: pnpv4 = tn_eblcat4 / (tn_eblcat4 + fn_eblcat4) fpr, tpr, _ = roc_curve(test_y, pred3) auc_score = auc(tpr, fpr) precision, recall, _ = precision_recall_curve(test_y, pred3) prc_score = auc(recall, precision) try: tempbrier = brier_score_loss(test_y, pred_y) except: tempbrier = 0 try: t1 = harm2capture_tpos[0] except: t1 = 0 try: t2 = harm2capture_pos[0] except: t2 = 1 try: t3 = 1 - harm2capture_tneg[0] except: t3 = 1 try: t4 = 1 - harm2capture_neg[0] except: t4 = 0 output.iloc[a, 0] = test_thresh output.iloc[a, 1] = psens output.iloc[a, 2] = pspec output.iloc[a, 3] = pppv output.iloc[a, 4] = pnpv output.iloc[a, 5] = t1 output.iloc[a, 6] = t2 output.iloc[a, 7] = t3 output.iloc[a, 8] = t4 output.iloc[a, 9] = pppv1 output.iloc[a, 10] = pppv2 output.iloc[a, 11] = pppv3 output.iloc[a, 12] = pppv4 output.iloc[a, 13] = pnpv1 output.iloc[a, 14] = pnpv2 output.iloc[a, 15] = pnpv3 output.iloc[a, 16] = pnpv4 output.iloc[a, 17] = prc_score output.iloc[a, 18] = auc_score output.iloc[a, 19] = tempbrier output.iloc[a, 20] = oapr output.iloc[a, 21] = oanr output.columns = [ 'Thresh', 'pSens', 'pSpec', 'pPPV', 'pNPV', 'harmCaptureAllPos', 'harmCaptureTruePos', 'harmCaptureAllNeg', 'harm2CaptureTrueNeg', 'PPV_eblcat1', 'PPV_eblcat2', 'PPV_eblcat3', 'PPV_eblcat4', 'NPV_eblcat1', 'NPV_eblcat2', 'NPV_eblcat3', 'NPV_eblcat4', 'PRC', 'AUC', 'Brier', 'OAPR', 'OANR' ] if predtag == 'Yes': filename = 'Z:/2019/PPH/AnalysisMaster/ML/Data/Chromosomes/chr4list.xlsx' if predtag == 'No': filename = 'Z:/2019/PPH/AnalysisMaster/ML/Data/Chromosomes/chr4list.xlsx' output.to_excel(filename) return output
if h == 0: new_bools.append(1) else: new_bools.append(0) from sklearn.metrics import brier_score_loss, average_precision_score, accuracy_score print( "\nAccuracy score as defined by\n " "http://scikit-learn.org/stable/modules/generated/sklearn.metrics.accuracy_score.html#sklearn.metrics.accuracy_score" ) print(accuracy_score(hnr_booleans, binary_prediction)) print( "\nBrier score loss as defined by\n " "http://scikit-learn.org/stable/modules/generated/sklearn.metrics.brier_score_loss.html#sklearn.metrics.brier_score_loss" ) print(brier_score_loss(new_bools, probability_list)) print( "\nArea under the PR-Curver\n " "http://scikit-learn.org/stable/modules/generated/sklearn.metrics.average_precision_score.html#sklearn.metrics.average_precision_score" ) print(average_precision_score(new_bools, probability_list, average='micro')) import numpy print("\nMean across the percentages") print(numpy.mean(std_list)) print("\nSTD of the likelihood") print(numpy.std(std_list)) print("\nVariance") print(numpy.var(std_list))
import numpy as np from sklearn.metrics import brier_score_loss y_true = np.array([0, 1, 1, 0]) y_true_categorical = np.array(["spam", "ham", "ham", "spam"]) y_prob = np.array([0.1, 0.9, 0.8, 0.3]) print(brier_score_loss(y_true, y_prob)) print(brier_score_loss(y_true, 1 - y_prob, pos_label=0)) # brier_score_loss(y_true_categorical, y_prob,pos_label="ham") # brier_score_loss(y_true, np.array(y_prob) > 0.5)
# write classification report print(class_report, file=open(results_folder + "rf_uc_classification_report.txt", "w")) # define confusion matrix cm = confusion_matrix(y_test, y_pred_class) # run accuracy summary on confuxion matrix dx_summary = dx_accuracy(cm) print(dx_summary) # save summary metrics dx_summary.to_csv(results_folder + "rf_uc_dx_summary.csv") """ 2. Brier score """ brier_score = np.round(brier_score_loss(y_test, y_pred[:, 1]), 3) print('Ulcerative Colitis RF Clinical + Labs Features Benchmark', '\nBrier Score:', brier_score, file=open(results_folder + 'brier_score.txt', 'w')) """ 3. ROC """ # roc for prediction of y=1 (2nd part of 2d array) fpr, tpr, thresholds = roc_curve(y_test, y_pred[:, 1]) # auc roc_auc = auc(fpr, tpr) # create roc_df roc_df = pd.DataFrame({'fpr': fpr, 'tpr': tpr, 'thresholds': thresholds})
def squared_err(yPred, yTest): return brier_score_loss(yTest, yPred)
def GlobalBrier_optimiser(X_trainval, y_trainval, weights_trainval, X_test, y_test, curr_year=2020): from scipy.optimize import minimize, Bounds import functools from sklearn.model_selection import KFold, train_test_split #the residual cost function to minimise def forecast_error_func(x, arg1, arg2, arg3, arg4, arg5): Data = arg1 Outcome = arg2 weights = arg3 P_est = Data @ x #weighted Arithmetic mean #P_est = np.power(np.prod(np.power(Data , x),axis=1), 1/sum(x)) #weighted Geometric mean r = P_est - Outcome l1 = arg4 #regularisation coefficient l2 = arg5 #regularisation coefficient reg1 = l1 * np.sum(x * x) #L2 norm reg2 = l2 * np.sum(abs(x)) #L1 norm #Elastic NET # W=np.ones(len(Outcome)) #flat # W = np.exp((curr_year-weights)) #exponential # W = (curr_year-weights)**2 #squared # W= 2**(curr_year-weights) # power W = np.log(1 + curr_year - weights) #logarihmic return np.sum((r * r) * W) / len(Outcome) + reg1 + reg2 def constraint1(x): return np.sum(x) - 1 n_experts = X_trainval.shape[1] y_trainval_bin = (y_trainval == 1).astype( int) #convert to Away-based binary labels #constraints and bounds for the optimisation cons = {'type': 'eq', 'fun': constraint1} bnds = Bounds(0, 1) #Set up the grid search Ntests = 50 coeff_grid = np.linspace(0, 1, Ntests) Brier_Scores = np.ones(Ntests) for i in range(Ntests): coeff = coeff_grid[i] #shuffle the data and split it into train and validation sets X_train, X_val, y_train, y_val, w_train, w_val = train_test_split( X_trainval, y_trainval_bin, weights_trainval, test_size=0.3, shuffle=True, random_state=1) #setup the optimisation problem objective_fun = functools.partial(forecast_error_func, arg1=X_train, arg2=y_train, arg3=w_train, arg4=coeff, arg5=1 - coeff) #initial weights x0 = np.ones(n_experts) / n_experts out = minimize(objective_fun, x0, options={ 'disp': False, 'maxiter': 500 }, method='SLSQP', constraints=cons, bounds=bnds) x_opt = out.x #evaluate on validation data y_prob = np.zeros([len(X_val), 2]) Brier_Scores[i] = brier_score_loss(y_val, X_val @ x_opt, pos_label=1) #get the best coefficients that minimise the brier score best_coeffs = coeff_grid[np.argmin(Brier_Scores)] #fit on train&val and evaluate on test data objective_fun = functools.partial(forecast_error_func, arg1=X_trainval, arg2=y_trainval_bin, arg3=weights_trainval, arg4=best_coeffs, arg5=1 - best_coeffs) out = minimize(objective_fun, x0, options={ 'disp': False, 'maxiter': 500 }, method='SLSQP', constraints=cons, bounds=bnds) x_opt = out.x y_test_bin = (y_test == 1).astype( int) #convert to Away-based binary labels y_prob = np.zeros([len(X_test), 2]) y_prob[:, 0] = X_test @ x_opt y_prob[:, 1] = 1 - y_prob[:, 0] return brier_score_loss(y_test_bin, y_prob[:, 0], pos_label=1)
def plot_calibration_curve(est, name, fig_index, X_train, X_test, y_train, y_test, cv='prefit'): ''' Plot calibration curve for est w/o and with calibration. Inputs: est : the model name : the model name fig_index : which figure to plot it in cv : the cross-validation strategy Stock models will be fitted already and are applicable to 'prefit' Integer values are the number of folds e.g., # Plot calibration curve for Gaussian Naive Bayes plot_calibration_curve(GaussianNB(), "Naive Bayes", 1) # Plot calibration curve for Linear SVC plot_calibration_curve(LinearSVC(), "SVC", 2) ''' # Calibrated with isotonic calibration isotonic = CalibratedClassifierCV(est, cv=cv, method='isotonic') # Calibrated with sigmoid calibration sigmoid = CalibratedClassifierCV(est, cv=cv, method='sigmoid') # Logistic regression with no calibration as baseline lr = LogisticRegression(C=1., solver='lbfgs') # fig = plt.figure(fig_index, figsize=(10, 10)) ax1 = plt.subplot2grid((3, 1), (0, 0), rowspan=2) ax2 = plt.subplot2grid((3, 1), (2, 0)) ax1.plot([0, 1], [0, 1], "k:", label="Perfectly calibrated") for clf, name in [(lr, 'Logistic'), (est, name), (isotonic, name + ' + Isotonic'), (sigmoid, name + ' + Sigmoid')]: # if name == 'Logistic': # clf.fit(X_train, y_train) clf.fit(X_train, y_train) y_pred = clf.predict(X_test) if hasattr(clf, "predict_proba"): prob_pos = clf.predict_proba(X_test)[:, 1] else: # use decision function prob_pos = clf.decision_function(X_test) prob_pos = \ (prob_pos - prob_pos.min()) / (prob_pos.max() - prob_pos.min()) clf_score = brier_score_loss(y_test, prob_pos, pos_label=1) # clf_score = brier_score_loss(y_test, prob_pos) print("%s:" % name) print("\tBrier: %1.3f" % (clf_score)) print("\tPrecision: %1.3f" % precision_score(y_test, y_pred)) print("\tRecall: %1.3f" % recall_score(y_test, y_pred)) print("\tF1: %1.3f\n" % f1_score(y_test, y_pred)) fraction_of_positives, mean_predicted_value = \ calibration_curve(y_test, prob_pos, n_bins=10) ax1.plot(mean_predicted_value, fraction_of_positives, "s-", label="%s (%1.3f)" % (name, clf_score)) ax2.hist(prob_pos, range=(0, 1), bins=10, label=name, histtype="step", lw=2) ax1.set_ylabel("Fraction of positives") ax1.set_ylim([-0.05, 1.05]) ax1.legend(loc="lower right") ax1.set_title('Calibration plots (reliability curve)') ax2.set_xlabel("Mean predicted value") ax2.set_ylabel("Count") ax2.legend(loc="upper center", ncol=2) plt.tight_layout() plt.show()
def cross_val_estimate(estimator, X, y, cv1=None, n_folds=8, n_jobs=1, verbosity=1): """ Estimate the estimator using cross-validation. - Calculate probabilities of the target (dplus) returned by classificator using cross-validation technique: predict targets for validation part after training estimator on training part inside cross-validation cycle - Estimate scores of classificator using roc_auc as a metric - Calculate LogLoss and Brier score loss (mean squared error) to estimate quality of predicted probabilities - Calculate sensitivity and specificity using the best threshold for their harmonic mean - Print classification report using the best threshold for F1-score Parameters ---------- estimator: BaseEstimator-like an estimator to estimate X: array, shape=(n_samples, n_features) the train data samples with values of their features y: array, shape=(n_samples,)) the targets n_folds: int, optional (default=8) number of folds in the cross-validation n_jobs:int, optional (default=1) number of cores to use to speed up calculations verbosity: int, optional (default=1) level of verbosity Returns ------- y_proba: array numpy array of predicted probabilities scores: array numpy array of cross-validated scores """ from sklearn import (metrics, cross_validation) from .model_selection import cross_val_predict_proba from .modsel import ( estimate_scores, precision_sensitivity_specificity, best_threshold) y_true = y scoring = 'roc_auc' if cv1 is None: cv1 = cross_validation.StratifiedKFold(y, n_folds) y_proba, scores = cross_val_predict_proba( estimator, X, y, scoring=scoring, cv=cv1, n_jobs=n_jobs, verbose=0, fit_params=None, pre_dispatch='2*n_jobs') print("\nScores: ", " ".join(["{:.2f}".format(e) for e in scores])) scores_mean, me = estimate_scores(scores, scoring, sampling=False) best_thr1, best_thr2 = best_threshold(y_true, y_proba) precision, sensitivity, specificity = precision_sensitivity_specificity( y_true, y_proba, threshold=best_thr2) print() print( "LogLoss: {:1.3f} | Brier score loss: {:1.3f} | sensitivity(recall): " "{:1.2f} and specificity: {:1.2f} with threshold={:1.2f}".format( metrics.log_loss(y_true, y_proba), metrics.brier_score_loss(y_true, y_proba), sensitivity, specificity, best_thr2) ) target_names = ['class 0', 'class 1'] print("Threshold={:1.2f}:".format(best_thr1)) print(metrics.classification_report( y_true, np.asarray(y_proba > best_thr1, dtype=int), target_names=target_names)) return y_proba, scores
# Gaussian Naive-Bayes with no calibration clf = GaussianNB() clf.fit(X_train, y_train) # GaussianNB itself does not support sample-weights prob_pos_clf = clf.predict_proba(X_test)[:, 1] # Gaussian Naive-Bayes with isotonic calibration clf_isotonic = CalibratedClassifierCV(clf, cv=2, method='isotonic') clf_isotonic.fit(X_train, y_train, sw_train) prob_pos_isotonic = clf_isotonic.predict_proba(X_test)[:, 1] # Gaussian Naive-Bayes with sigmoid calibration clf_sigmoid = CalibratedClassifierCV(clf, cv=2, method='sigmoid') clf_sigmoid.fit(X_train, y_train, sw_train) prob_pos_sigmoid = clf_sigmoid.predict_proba(X_test)[:, 1] print("Brier scores: (the smaller the better)") clf_score = brier_score_loss(y_test, prob_pos_clf) clf_score_auc = auc(y_test, prob_pos_clf, True) print("No calibration: %1.3f, %1.3f" % (clf_score, clf_score_auc)) clf_isotonic_score = brier_score_loss(y_test, prob_pos_isotonic) clf_isotonic_score_auc = auc(y_test, prob_pos_isotonic, True) print("With isotonic calibration: %1.3f, %1.3f" % (clf_isotonic_score, clf_isotonic_score_auc)) clf_sigmoid_score = brier_score_loss(y_test, prob_pos_sigmoid) clf_sigmoid_score_auc = auc(y_test, prob_pos_sigmoid, True) print("With sigmoid calibration: %1.3f, %.3f" % (clf_sigmoid_score, clf_sigmoid_score_auc))
def plot_calibration_curve(est, name, fig_index, y_test, X_test, y_train, X_train): """Plot calibration curve for est w/o and with calibration. """ # Calibrated with isotonic calibration isotonic = CalibratedClassifierCV(est, cv=2, method='isotonic') # Calibrated with sigmoid calibration sigmoid = CalibratedClassifierCV(est, cv=2, method='sigmoid') # Logistic regression with no calibration as baseline lr = LogisticRegression(C=1., solver='lbfgs') fig = plt.figure(fig_index, figsize=(10, 10)) ax1 = plt.subplot2grid((3, 1), (0, 0), rowspan=2) ax2 = plt.subplot2grid((3, 1), (2, 0)) ax1.plot([0, 1], [0, 1], "k:", label="Perfectly calibrated") for clf, name in [(lr, 'Logistic'), (est, name), (isotonic, name + ' + Isotonic'), (sigmoid, name + ' + Sigmoid')]: clf.fit(X_train, y_train) y_pred = clf.predict(X_test) if hasattr(clf, "predict_proba"): prob_pos = clf.predict_proba(X_test)[:, 1] else: # use decision function prob_pos = clf.decision_function(X_test) prob_pos = \ (prob_pos - prob_pos.min()) / (prob_pos.max() - prob_pos.min()) clf_score = brier_score_loss(y_test, prob_pos, pos_label=y_pred.max()) # print("%s:" % name) # print("\tBrier: %1.3f" % (clf_score)) # print("\tPrecision: %1.3f" % precision_score(y_test, y_pred)) # print("\tRecall: %1.3f" % recall_score(y_test, y_pred)) # print("\tF1: %1.3f\n" % f1_score(y_test, y_pred)) fraction_of_positives, mean_predicted_value = \ calibration_curve(y_test, prob_pos, n_bins=10) ax1.plot(mean_predicted_value, fraction_of_positives, "s-", label="%s (%1.3f)" % (name, clf_score)) ax2.hist(prob_pos, range=(0, 1), bins=10, label=name, histtype="step", lw=2) ax1.set_ylabel("Fraction of positives") ax1.set_ylim([-0.05, 1.05]) ax1.legend(loc="lower right") ax1.set_title('Calibration plots (reliability curve)') ax2.set_xlabel("Mean predicted value") ax2.set_ylabel("Count") ax2.legend(loc="upper center", ncol=2) plt.tight_layout()
def RandomGridSearch(self,x_train,y_train,x_test,y_test,splits,path_results,m,itera,clf_g,name,tuned_parameters,opt,ite): """ This function looks for the best set o parameters for RFC method Input: X: training set Y: labels of training set splits: cross validation splits, used to make sure the parameters are stable Output: clf.best_params_: dictionary with the parameters, to use: param_svm['kernel'] """ start_rfc = time.time() #clf_grid = RandomizedSearchCV(clf_g, tuned_parameters, cv=splits,random_state=random_state, # scoring='%s' % opt[0],n_jobs=n_jobs) clf_grid = RandomizedSearchCV(clf_g, tuned_parameters, cv=splits,random_state=random_state, scoring='%s' % opt[0],n_jobs=n_jobs) clf_grid.fit(x_train, y_train) #print("Score",clf.best_score_) end_rfc = time.time() print("Time to process: ",end_rfc - start_rfc) with open(path_results+"parameters_"+name+".txt", "a") as file: for item in clf_grid.best_params_: file.write(" %s %s " %(item,clf_grid.best_params_[item] )) file.write("\n") #clf = clf_g(**clf_grid.best_params_,random_state=random_state) clf = clf_grid.best_estimator_ x_train_t, x_val, y_train_t, y_val = train_test_split(x_train, y_train, test_size=0.2, random_state=random_state) #clf_t = clf_g(**clf_grid.best_params_,random_state=random_state) clf_t = clf.fit(x_train_t,y_train_t) # import shap # #testing feature importance with xgb # x_train_t = pd.DataFrame(x_train,columns=new_cols) # #f = plt.figure(figsize=(25, 19)) # #xgboost.plot_importance(clf_t,importance_type="gain") # # explainer = shap.TreeExplainer(clf_t) # shap_values = explainer.shap_values(x_train_t) # shap.summary_plot(shap_values, x_train_t, plot_type="bar") # #end of test if name=="SVM": decisions = clf.decision_function(x_test) probas=\ (decisions-decisions.min())/(decisions.max()-decisions.min()) decisions_t = clf_t.decision_function(x_val) probas_val=\ (decisions_t-decisions_t.min())/(decisions_t.max()-decisions_t.min()) else: probas = clf.predict_proba(x_test)[:, 1] probas_val = clf_t.predict_proba(x_val)[:, 1] ts=np.linspace(0.1, 0.99, num=100) best_val=0 best_t=0 t_spec=0 found=False found_ppv=False for i in range(ts.shape[0]): p=probas_val>ts[i] #c_f1=f1_score(y_val, p) tn, fp, fn, tp = confusion_matrix(y_val, p).ravel() c_f1 = tp/(tp+fp) c_spec=tn/(tn+fp) #if c_f1>best_val: if c_f1>=0.95 and not found_ppv: best_val=c_f1 best_t=ts[i] found_ppv=True if c_spec>=0.95 and not found: t_spec=ts[i] found=True #print(c_spec) self.model = clf preds = clf.predict(x_test) m.clf_f1_score[ite,itera]=f1_score(y_test, preds) tn, fp, fn, tp = confusion_matrix(y_test, preds).ravel() m.clf_sens[ite,itera]=tp/(tp+fn) m.clf_spec[ite,itera]=tn/(tn+fp) m.clf_ppv[ite,itera]=tp/(tp+fp) m.clf_npv[ite,itera]=tn/(tn+fn) m.f1_score_f1[ite,itera]=f1_score(y_test, probas>best_t) tn, fp, fn, tp = confusion_matrix(y_test, probas>best_t).ravel() m.sens_f1[ite,itera]=tp/(tp+fn) m.spec_f1[ite,itera]=tn/(tn+fp) m.clf_ppv_f1[ite,itera]=tp/(tp+fp) m.clf_npv_f1[ite,itera]=tn/(tn+fn) m.f1_score_spec[ite,itera] = f1_score(y_test, probas>t_spec) tn, fp, fn, tp = confusion_matrix(y_test, probas>t_spec).ravel() m.sens_spec[ite,itera] = tp/(tp+fn) m.spec_spec[ite,itera] = tn/(tn+fp) m.clf_ppv_spec[ite,itera] = tp/(tp+fp) m.clf_npv_spec[ite,itera] = tn/(tn+fn) m.probas = probas m.preds = preds m.clf_auc[ite,itera] = roc_auc_score(y_test,probas) m.clf_thresholds[ite,itera] = t_spec fpr_rf, tpr_rf, _ = roc_curve(y_test, probas) m.clf_brier[ite,itera] = brier_score_loss(y_test, probas) tn, fp, fn, tp = confusion_matrix(y_test, preds).ravel() print(probas_val.shape,y_train.shape) save_prob = np.concatenate((probas.reshape(-1,1),y_test.reshape(-1,1)),axis = 1) save_prob_train = np.concatenate((probas_val.reshape(-1,1),y_val.reshape(-1,1)),axis = 1) #Feature importance weights = list() stds = list() names = list() import eli5 model = clf f = pd.DataFrame() f['name']=name from eli5.sklearn import PermutationImportance perm = PermutationImportance(model, random_state=1,scoring="roc_auc").fit(x_train,y_train) new_cols = np.load(r"\\amc.intra\users\L\laramos\home\Desktop\MrClean_Poor\HPC\organize_cols.npy") html = eli5.explain_weights(perm, feature_names = new_cols.tolist()) for imp in range(len(html.feature_importances.importances)): weights.append(html.feature_importances.importances[imp].weight) stds.append(html.feature_importances.importances[imp].std) names.append(html.feature_importances.importances[imp].feature) import_frame = pd.DataFrame(list(zip(names,weights,stds))) import_frame.columns = ['name','weight','std'] import_frame.to_excel(path_results+'features_'+name+'_'+str(itera)+'.xls') #np.save(path_results+"probabilities_"+name+"_"+str(itera)+".npy",probas) np.save(path_results+"probabilities_"+name+"_"+str(itera)+str(i)+".npy",save_prob) np.save(path_results+"probabilities_train"+name+"_"+str(itera)+str(i)+".npy",save_prob_train) #np.save(path_results+"feature_importance"+name+"_"+str(itera)+str(i)+".npy",clf.coef_) #joblib.dump(clf,path_results+'clf_'+name+str(itera)+str(i)) return(fpr_rf,tpr_rf,probas,clf)
def evaluatingModel(model, model_name, X, y, skv): print(model_name + " STARTS HERE\n\n") # Implement BoW model vectorizer = CountVectorizer(analyzer="word", ngram_range=(1, 1)) # Create Confusion Matrix Dictionary cm_dict = { "tp": 0, "fp": 0, "tn": 0, "fn": 0} # Array to store results accuracy_array = [] precision_array = [] fpr_array = [] auc_array = [] log_loss_array = [] brier_array = [] execution_time_array = [] for train_cv, test_cv in skv.split(X,y): # Seperate the training and testing fold # NOTE: y_test corresponds to y_true X_train, X_test = X[train_cv], X[test_cv] y_train, y_test = y[train_cv], y[test_cv] # Transform X_train and X_test using BoW X_train = vectorizer.fit_transform(X_train).toarray() X_test = vectorizer.transform(X_test).toarray() # Train the model model.fit(X_train , y_train) # Predict and calculate run-time # NOTE: result corresponds to y_pred start = time.time() result = model.predict(X_test) end = time.time() execution_time = end - start # Get the probability scores # Use Logistic Regression for LinearSVC case if model_name == 'SVM': lr = LogisticRegression() lr.fit(X_train, y_train) y_scores = lr.predict_proba(X_test) else: y_scores = model.predict_proba(X_test) # Get AUC score, Log Loss auc_score = roc_auc_score(y_test, y_scores[:, 1]) log_loss_score = log_loss(y_test, y_scores) brier_score = brier_score_loss(y_test, y_scores[:, 1]) # Confusion Matrix tn, fp, fn, tp = confusion_matrix(y_test, result).ravel() # Add the results to confusion matrix cm_dict["tn"] += tn cm_dict["fp"] += fp cm_dict["fn"] += fn cm_dict["tp"] += tp # Evaluation Metrics accuracy = accuracy_score(y_test , result) precision = tp/(tp+fp) fpr = fp/(fp + tn) # False Positive Rate # Append results accuracy_array.append(accuracy) precision_array.append(precision) fpr_array.append(fpr) auc_array.append(auc_score) log_loss_array.append(log_loss_score) brier_array.append(brier_score) execution_time_array.append(execution_time) # Get mean results mean_accuracy = np.mean(accuracy_array) mean_precision = np.mean(precision_array) mean_fpr = np.mean(fpr_array) mean_auc = np.mean(auc_array) mean_log_loss = np.mean(log_loss_array) mean_brier = np.mean(brier_array) mean_execution_time = np.mean(execution_time_array) # Get standard deviation (population) accuracy_std = np.std(accuracy_array) precision_std = np.std(precision_array) fpr_std = np.std(fpr_array) auc_std = np.std(auc_array) log_std = np.std(log_loss_array) brier_std = np.std(brier_array) run_std = np.std(mean_execution_time) # Display results print("MEAN ACCURACY: %0.3f (+/- %0.3f) \n" % (mean_accuracy, accuracy_std)) print("MEAN PRECISION: %0.3f (+/- %0.3f) \n" % (mean_precision, precision_std)) print("MEAN FALSE POSITIVE RATE: %0.3f (+/- %0.3f) \n" % (mean_fpr, fpr_std)) print("MEAN AUC SCORE: %0.3f (+/- %0.3f) \n" % (mean_auc, auc_std)) print("MEAN LOG LOSS SCORE: %0.3f (+/- %0.3f) \n" % (mean_log_loss, log_std)) print("MEAN BRIER SCORE LOSS: %0.3f (+/- %0.3f) \n" % (mean_brier, brier_std)) print("MEAN RUN TIME: %0.3f (+/- %0.3f) \n" % (mean_execution_time, run_std)) print("\n\n" + model_name + " STOPS HERE\n\n")
def baseline_resampling(data_path, bad_sample_num, good_sample_num, reject_sample_num, random_state_for_each_epoch, classifier, resampling_model): warnings.filterwarnings("ignore") raw_data_train = pd.read_csv(data_path, index_col='ID') data_bad = raw_data_train[raw_data_train['label'] == 1] # print data_bad.shape data_good = raw_data_train[(raw_data_train['label'] == 0)] data_reject = raw_data_train[raw_data_train['label'] == -1] data_bad_sampling = data_bad.sample( n=bad_sample_num, random_state=random_state_for_each_epoch) data_good_sampling = data_good.sample( n=good_sample_num, random_state=random_state_for_each_epoch) data_train = pd.concat([data_bad_sampling, data_good_sampling], axis=0) # print("All Data Size:" + str(data_train.shape)) feature_name = list(data_train.columns.values) # print(feature_name) s = 0 np.random.seed(s) sampler = np.random.permutation(len(data_train.values)) data_train_randomized = data_train.take(sampler) y = data_train_randomized['label'].as_matrix() X = data_train_randomized.drop(['label'], axis=1).as_matrix() X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2, random_state=123) X_resampled, y_resampled = resampling_model.fit_sample(X_train, y_train) # borderline2 > > borderline1 # X_resampled, y_resampled = SMOTE(kind='borderline2', k_neighbors=5).fit_sample(X_train, y_train) # X_resampled, y_resampled = ADASYN(n_neighbors=50).fit_sample(X_train, y_train) # X_resampled, y_resampled = TomekLinks(ratio='auto', random_state=100).fit_sample(X_train, y_train) # # X_resampled, y_resampled = SMOTEENN().fit_sample(X_train, y_train) '''Choose a classification model''' y_proba = classifier.fit(X_resampled, y_resampled).predict_proba(X_test) y_predict = classifier.fit(X_resampled, y_resampled).predict(X_test) # y_predict = y_proba[:, 1].copy() # y_predict[y_predict >= 0.9] = 1 # y_predict[y_predict < 0.9] = 0 '''AUC and ROC curve''' fpr, tpr, _ = roc_curve(y_test, y_proba[:, 1]) auc_result = auc(fpr, tpr) # print("AUC Score:" + str(auc_result)) '''Accuracy''' accuracy_result = accuracy_score(y_test, y_predict) '''Precision''' precision_result = precision_score(y_test, y_predict) # print("Precision Score:" + str(precision_result)) '''Recall''' recall_result = recall_score(y_test, y_predict) # print("Recall Score:" + str(recall_result)) '''F1''' f1_result = f1_score(y_test, y_predict) # print("F1 Score:" + str(f1_result)) '''Log loss''' log_loss_result = log_loss(y_test, y_proba[:, 1]) # print("logloss Score:" + str(log_loss_result)) '''Cohen-Kappa''' cohen_kappa_result = cohen_kappa_score(y_test, y_predict) # print("Cohen-Kappa Score:" + str(cohen_kappa_result)) '''brier score''' brier_result = brier_score_loss(y_test, y_proba[:, 1]) # print("brier Score:" + str(brier_result)) '''K-S Value''' ks_result = max(tpr - fpr) '''plot roc''' # plt.figure() # lw = 2 # plt.plot(fpr, tpr, color='darkorange', lw=lw, label='ROC curve (area = %0.4f)' % roc_auc) # plt.plot([0, 1], [0, 1], color='navy', lw=lw, linestyle='--') # plt.xlim([0.0, 1.0]) # plt.ylim([0.0, 1.05]) # plt.xlabel('False Positive Rate') # plt.ylabel('True Positive Rate') # plt.title('Receiver operating characteristic example') # plt.legend(loc="lower right") # plt.show() '''Classification Report''' # target_names = ['class 0', 'class 1', 'class 2'] # print(classification_report(y_test, y_predict, target_names=target_names)) '''Confusion Matrix''' # # Compute confusion matrix # cnf_matrix = confusion_matrix(y_test, y_predict) # np.set_printoptions(precision=2) # # # Plot non-normalized confusion matrix # plt.figure() # plot_confusion_matrix(cnf_matrix, classes=[0, 1], title='Confusion matrix, without normalization') # # # Plot normalized confusion matrix # plt.figure() # plot_confusion_matrix(cnf_matrix, classes=[0, 1], normalize=True, title='Normalized confusion matrix') # # plt.show() # print("Accuracy Score:" + str(accuracy_result) + " Precision Score:" + str(precision_result) + " Recall Score:" + str(recall_result) + # " F1 Score:" + str(f1_result) + " logloss Score:" + str(log_loss_result) + " Cohen-Kappa Score:" + str(cohen_kappa_result) + # " brier Score:" + str(brier_result) + " AUC Score:" + str(auc_result)) return accuracy_result, precision_result, recall_result, f1_result, log_loss_result, cohen_kappa_result, brier_result, ks_result, auc_result
def score_probs(self, y_true, y_prob): return metrics.brier_score_loss(y_true, y_prob)
def test_calibration(): """Test calibration objects with isotonic and sigmoid""" n_samples = 100 X, y = make_classification(n_samples=2 * n_samples, n_features=6, random_state=42) sample_weight = np.random.RandomState(seed=42).uniform(size=y.size) X -= X.min() # MultinomialNB only allows positive X # split train and test X_train, y_train, sw_train = \ X[:n_samples], y[:n_samples], sample_weight[:n_samples] X_test, y_test = X[n_samples:], y[n_samples:] # Naive-Bayes clf = MultinomialNB().fit(X_train, y_train, sample_weight=sw_train) prob_pos_clf = clf.predict_proba(X_test)[:, 1] pc_clf = CalibratedClassifierCV(clf, cv=y.size + 1) assert_raises(ValueError, pc_clf.fit, X, y) # Naive Bayes with calibration for this_X_train, this_X_test in [(X_train, X_test), (sparse.csr_matrix(X_train), sparse.csr_matrix(X_test))]: for method in ['isotonic', 'sigmoid']: pc_clf = CalibratedClassifierCV(clf, method=method, cv=2) # Note that this fit overwrites the fit on the entire training # set pc_clf.fit(this_X_train, y_train, sample_weight=sw_train) prob_pos_pc_clf = pc_clf.predict_proba(this_X_test)[:, 1] # Check that brier score has improved after calibration assert (brier_score_loss(y_test, prob_pos_clf) > brier_score_loss(y_test, prob_pos_pc_clf)) # Check invariance against relabeling [0, 1] -> [1, 2] pc_clf.fit(this_X_train, y_train + 1, sample_weight=sw_train) prob_pos_pc_clf_relabeled = pc_clf.predict_proba(this_X_test)[:, 1] assert_array_almost_equal(prob_pos_pc_clf, prob_pos_pc_clf_relabeled) # Check invariance against relabeling [0, 1] -> [-1, 1] pc_clf.fit(this_X_train, 2 * y_train - 1, sample_weight=sw_train) prob_pos_pc_clf_relabeled = pc_clf.predict_proba(this_X_test)[:, 1] assert_array_almost_equal(prob_pos_pc_clf, prob_pos_pc_clf_relabeled) # Check invariance against relabeling [0, 1] -> [1, 0] pc_clf.fit(this_X_train, (y_train + 1) % 2, sample_weight=sw_train) prob_pos_pc_clf_relabeled = \ pc_clf.predict_proba(this_X_test)[:, 1] if method == "sigmoid": assert_array_almost_equal(prob_pos_pc_clf, 1 - prob_pos_pc_clf_relabeled) else: # Isotonic calibration is not invariant against relabeling # but should improve in both cases assert (brier_score_loss(y_test, prob_pos_clf) > brier_score_loss((y_test + 1) % 2, prob_pos_pc_clf_relabeled)) # Check failure cases: # only "isotonic" and "sigmoid" should be accepted as methods clf_invalid_method = CalibratedClassifierCV(clf, method="foo") assert_raises(ValueError, clf_invalid_method.fit, X_train, y_train) # base-estimators should provide either decision_function or # predict_proba (most regressors, for instance, should fail) clf_base_regressor = \ CalibratedClassifierCV(RandomForestRegressor(), method="sigmoid") assert_raises(RuntimeError, clf_base_regressor.fit, X_train, y_train)
def plot_calibration_curve_from_data(X, y, est, name, fig_index): """Plot calibration curve for est w/o and with calibration. """ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=7) # Calibrated with isotonic calibration isotonic = CalibratedClassifierCV(est, cv=2, method="isotonic") # Calibrated with sigmoid calibration sigmoid = CalibratedClassifierCV(est, cv=2, method="sigmoid") # Logistic regression with no calibration as baseline lr = LogisticRegression(C=1.0, solver="lbfgs") fig = plt.figure(fig_index, figsize=(10, 10)) ax1 = plt.subplot2grid((3, 1), (0, 0), rowspan=2) ax2 = plt.subplot2grid((3, 1), (2, 0)) ax1.plot([0, 1], [0, 1], "k:", label="Perfectly calibrated") for clf, name in [ (lr, "Logistic Regression"), (est, name), (isotonic, name + " + Isotonic"), (sigmoid, name + " + Sigmoid"), ]: clf.fit(X_train, y_train) # clf.fit(X_train[:,:10], X_train[:, 10]) y_pred = clf.predict(X_test) # y_pred = clf.predict(X_test[:,:10]) if hasattr(clf, "predict_proba"): prob_pos = clf.predict_proba(X_test)[:, 1] # prob_pos = clf.predict_proba(X_test[:,:10])[:, 1] # prob_pos = clf.predict_proba(X_test[:,:10])[:, 1]*weights[1] # prob_pos = np_average( 1 - clf.predict_proba(X_test[:,:10]), axis=1, weights=weights ) else: # use decision function prob_pos = clf.decision_function(X_test) # prob_pos = clf.decision_function(X_test[:,:10])[:, 1] # prob_pos = clf.decision_function(X_test[:,:10])[:, 1]*weights[1] # prob_pos = np_average( 1 - clf.decision_function(X_test[:,:10]), axis=1, weights=weights ) prob_pos = (prob_pos - prob_pos.min()) / (prob_pos.max() - prob_pos.min()) clf_score = brier_score_loss(y_test, prob_pos, pos_label=y.max()) print("* %s:" % name) OP_append("* %s:" % name) print(" * Brier: %1.3f" % (clf_score)) OP_append(" * Brier: %1.3f" % (clf_score)) print(" * Precision: %1.3f" % precision_score(y_test, y_pred)) OP_append(" * Precision: %1.3f" % precision_score(y_test, y_pred)) print(" * Recall: %1.3f" % recall_score(y_test, y_pred)) OP_append(" * Recall: %1.3f" % recall_score(y_test, y_pred)) print(" * F1: %1.3f\n" % f1_score(y_test, y_pred)) OP_append(" * F1: %1.3f\n" % f1_score(y_test, y_pred)) fraction_of_positives, mean_predicted_value = calibration_curve(y_test, prob_pos, n_bins=10) ax1.plot(mean_predicted_value, fraction_of_positives, "s-", label="%s (%1.3f)" % (name, clf_score)) ax2.hist(prob_pos, range=(0, 1), bins=10, label=name, histtype="step", lw=2) ax1.set_ylabel("Fraction of positives") ax1.set_ylim([-0.05, 1.05]) ax1.legend(loc="lower right") ax1.set_title("Calibration plots (reliability curve)") ax2.set_xlabel("Mean predicted value") ax2.set_ylabel("Count") ax2.legend(loc="upper center", ncol=2) plt.tight_layout() fig.savefig("NF/%s.png" % name, dpi=fig.dpi)
model.fit(X, y) else: status = 'No markers with ORs >= {}'.format(ORthreshold) else: markers = cell_markers[ctype] p1 = model.predict_proba(X)[:, 1] fpr, tpr, thresholds = metrics.roc_curve(y, p1) optimal_idx = np.argmax(tpr - fpr) optimal_threshold = thresholds[optimal_idx] optimal_pred = (p1 > optimal_threshold).astype(int) precision, recall, _ = metrics.precision_recall_curve(y, p1) auprc = metrics.auc(recall, precision) auroc = metrics.roc_auc_score(y, p1) ap = metrics.average_precision_score(y, p1) bs = metrics.brier_score_loss(y, p1) acc = metrics.accuracy_score(y, optimal_pred) # store results dt = pd.DataFrame( { 'ctype2pred': ctype, 'cluster': cluster, 'auroc': auroc, 'status': status, 'markers': [markers], 'ORs': np.exp(model.coef_).tolist(), 'ave_prec': ap, 'acc': acc, 'sensitivity': tpr[optimal_idx], 'specificity': 1 - fpr[optimal_idx]
test_y = x_trans(test_x) test_x = train_x_scaler.transform(test_x) elm = ELMClassifier(hidden_neurons, C=2E5) elmae = ExtremeLearningMachine() elmae.add_layer(ELMLayers.ELMAE(hidden_neurons, C=0)) elmae.add_layer(ELMLayers.ELMAE(hidden_neurons, C=0)) elmae.add_layer(ELMLayers.ELMRegression()) elmae.add_layer(classifier) instances = [ elm, elmae] i=1 for instance in instances: t0 = time() # fit and predict for each instance instance.fit(train_x, train_y) prediction, prob, _ = instance.predict(test_x) # calculate the forecast performance print(instance.print_network_structure()) print('Brier score: {0:f}'.format( brier_score_loss(elm.labels_bin(test_y)[:,1], prob[:,1]))) print('Time elapsed: {0:f}'.format(time()-t0))
clf.fit(X_train, y_train) # GaussianNB itself does not support sample-weights prob_pos_clf = clf.predict_proba(X_test)[:, 1] # Gaussian Naive-Bayes with isotonic calibration clf_isotonic = CalibratedClassifierCV(clf, cv=2, method='isotonic') clf_isotonic.fit(X_train, y_train, sample_weight=sw_train) prob_pos_isotonic = clf_isotonic.predict_proba(X_test)[:, 1] # Gaussian Naive-Bayes with sigmoid calibration clf_sigmoid = CalibratedClassifierCV(clf, cv=2, method='sigmoid') clf_sigmoid.fit(X_train, y_train, sample_weight=sw_train) prob_pos_sigmoid = clf_sigmoid.predict_proba(X_test)[:, 1] print("Brier score losses: (the smaller the better)") clf_score = brier_score_loss(y_test, prob_pos_clf, sample_weight=sw_test) print("No calibration: %1.3f" % clf_score) clf_isotonic_score = brier_score_loss(y_test, prob_pos_isotonic, sample_weight=sw_test) print("With isotonic calibration: %1.3f" % clf_isotonic_score) clf_sigmoid_score = brier_score_loss(y_test, prob_pos_sigmoid, sample_weight=sw_test) print("With sigmoid calibration: %1.3f" % clf_sigmoid_score) # ############################################################################# # Plot the data and the predicted probabilities plt.figure() y_unique = np.unique(y) colors = cm.rainbow(np.linspace(0.0, 1.0, y_unique.size))
def advanced_scoring_classifiers(probabilities, actuals, name=None): # pandas Series don't play nice here. Make sure our actuals list is indeed a list actuals = list(actuals) print('Here is our brier-score-loss, which is the default value we optimized for while ' 'training, and is the value returned from .score() unless you requested a custom ' 'scoring metric') print('It is a measure of how close the PROBABILITY predictions are.') if name is not None: print(name) # Sometimes we will be given "flattened" probabilities (only the probability of our positive # label), while other times we might be given "nested" probabilities (probabilities of both # positive and negative, in a list, for each item). try: probabilities = [proba[1] for proba in probabilities] except: # TODO: Fix bare Except pass brier_score = brier_score_loss(actuals, probabilities) print(format(brier_score, '.4f')) print('\nHere is the trained estimator\'s overall accuracy (when it predicts a label, ' 'how frequently is that the correct label?) ') predicted_labels = [] for pred in probabilities: if pred >= 0.5: predicted_labels.append(1) else: predicted_labels.append(0) print(format(accuracy_score(y_true=actuals, y_pred=predicted_labels) * 100, '.1f') + '%') print('\nHere is a confusion matrix showing predictions vs. actuals by label:') # it would make sense to use sklearn's confusion_matrix here but it apparently has no labels # took this idea instead from: http://stats.stackexchange.com/a/109015 conf = pd.crosstab( pd.Series(actuals), pd.Series(predicted_labels), rownames=['v Actual v'], colnames=['Predicted >'], margins=True) print(conf) # I like knowing the per class accuracy to see if the model is mishandling imbalanced data. # For example, if it is predicting 100% of observations to one class just because it is the # majority. Wikipedia seems to call that Positive/negative predictive value print('\nHere is predictive value by class:') df = pd.concat( [pd.Series(actuals, name='actuals'), pd.Series(predicted_labels, name='predicted')], axis=1) targets = list(df.predicted.unique()) for i in range(0, len(targets)): tot_count = len(df[df.predicted == targets[i]]) true_count = len(df[(df.predicted == targets[i]) & (df.actuals == targets[i])]) print('Class: ', targets[i], '=', float(true_count) / tot_count) # qcut is super fickle. so, try to use 10 buckets first, then 5 if that fails, then nothing # try: bucket_results = pd.qcut(probabilities, q=10, duplicates='drop') # except: # # bucket_results = pd.qcut(probabilities, q=5, duplicates='drop') df_probabilities = pd.DataFrame(probabilities, columns=['Predicted Probability Of Bucket']) df_probabilities['Actual Probability of Bucket'] = actuals df_probabilities['Bucket Edges'] = bucket_results df_buckets = df_probabilities.groupby(df_probabilities['Bucket Edges']) try: print( tabulate( df_buckets.mean(), headers='keys', floatfmt='.4f', tablefmt='psql', showindex='always')) except TypeError: print(tabulate(df_buckets.mean(), headers='keys', floatfmt='.4f', tablefmt='psql')) print('\nHere is the accuracy of our trained estimator at each level of predicted ' 'probabilities ') print('For a verbose description of what this means, please visit the docs:') print('http://cash-ml.readthedocs.io/en/latest/analytics.html#interpreting-predicted' '-probability-buckets-for-classifiers ') print('\n\n') return brier_score
def brier_skill_score(y_values, forecast_probabilities): """Computes the brier skill score""" climo = np.mean((y_values - np.mean(y_values))**2) return 1.0 - brier_score_loss(y_values, forecast_probabilities) / climo
def plot_calibration_curve(est, name, X_train, y_train): """Generate a plot fo the calibration curve, for use in classification modeling diagnostics. Parameters ---------- est : object type that implements the "fit" and "predict" methods An object of that type which is cloned for each validation. name : string Name of the classifier, i.e. "Logistic Regression, SVC, etc". X_train : array-like, shape (n_samples, n_features) Training vector, where n_samples is the number of samples and n_features is the number of features. y_train : array-like, shape (n_samples) or (n_samples, n_features) Target relative to X for classification. """ X_test = X_train y_test = y_train """Plot calibration curve for est w/o and with calibration. """ # Calibrated with isotonic calibration isotonic = CalibratedClassifierCV(est, cv=2, method='isotonic') # Calibrated with sigmoid calibration sigmoid = CalibratedClassifierCV(est, cv=2, method='sigmoid') # Logistic regression with no calibration as baseline lr = LogisticRegression(C=1., solver='lbfgs') plt.figure(figsize=(8, 8)) ax1 = plt.subplot2grid((3, 1), (0, 0), rowspan=2) ax2 = plt.subplot2grid((3, 1), (2, 0)) ax1.plot([0, 1], [0, 1], "k:", label="Perfectly calibrated") for clf, name in [(lr, 'Logistic'), (est, name), (isotonic, name + ' + Isotonic'), (sigmoid, name + ' + Sigmoid')]: clf.fit(X_train, y_train) y_pred = clf.predict(X_test) if hasattr(clf, "predict_proba"): prob_pos = clf.predict_proba(X_test)[:, 1] else: # use decision function prob_pos = clf.decision_function(X_test) prob_pos = \ (prob_pos - prob_pos.min()) / (prob_pos.max() - prob_pos.min()) clf_score = brier_score_loss(y_test, prob_pos, pos_label=y_test.max()) print("%s:" % name) print("\tBrier: %1.3f" % (clf_score)) print("\tPrecision: %1.3f" % precision_score(y_test, y_pred)) print("\tRecall: %1.3f" % recall_score(y_test, y_pred)) print("\tF1: %1.3f\n" % f1_score(y_test, y_pred)) fraction_of_positives, mean_predicted_value = \ calibration_curve(y_test, prob_pos, n_bins=10) ax1.plot(mean_predicted_value, fraction_of_positives, "s-", label="%s (%1.3f)" % (name, clf_score)) ax2.hist(prob_pos, range=(0, 1), bins=10, label=name, histtype="step", lw=2) ax1.set_ylabel("Fraction of positives") ax1.set_ylim([-0.05, 1.05]) ax1.legend(loc="lower right") ax1.set_title('Calibration plots (reliability curve)') ax2.set_xlabel("Mean predicted value") ax2.set_ylabel("Count") ax2.legend(loc="upper center", ncol=2) #plt.tight_layout() return plt
def evaluate_predictions(y_pred, y_probs, y_true, y_train_pred, y_train_probs, y_train, savedir): logging.info(f"Calculating accuracy metrics") precision, recall, _thresholds_pr = metrics.precision_recall_curve( y_true, y_probs) fpr, tpr, _thresholds_roc = metrics.roc_curve(y_true, y_probs) model_metrics = { 'training_accuracy': metrics.accuracy_score(y_train, y_train_pred), 'accuracy': metrics.accuracy_score(y_true, y_pred), 'training_f1_score': metrics.f1_score(y_train, y_train_pred), 'f1_score': metrics.f1_score(y_true, y_pred), 'precision': metrics.precision_score(y_true, y_pred), 'recall': metrics.recall_score(y_true, y_pred), 'cross_entropy': metrics.log_loss(y_true, y_pred), 'average_precision_score': metrics.average_precision_score(y_true, y_pred), 'pr_auc_score': metrics.auc(recall, precision), 'roc_auc_score': metrics.auc(fpr, tpr), 'brier_score_loss': metrics.brier_score_loss(y_true, y_probs), } longer_model_metrics = { 'confusion_matrix': metrics.confusion_matrix(y_true, y_pred).tolist(), 'binding_probs': stats.describe(y_probs)._asdict(), 'binding_probs_positive': stats.describe(y_probs[y_true == 1])._asdict(), 'binding_probs_negative': stats.describe(y_probs[y_true == 0])._asdict(), 'training_binding_probs': stats.describe(y_train_probs)._asdict(), 'training_binding_probs_positive': stats.describe(y_train_probs[y_train == 1])._asdict(), 'training_binding_probs_negative': stats.describe(y_train_probs[y_train == 0])._asdict(), } plot_filenames = { 'pred_probs': os.path.join(savedir, "pred_probs.png"), 'roc_curve': os.path.join(savedir, "roc_curve.png"), 'pr_curve': os.path.join(savedir, "pr_curve.png") } logging.info(f"Plotting predicted probability distribution") try: plt.clf() sns.distplot(y_probs[y_true == 1], label="Positives", color=sns.color_palette('colorblind')[2]) sns.distplot(y_probs[y_true == 0], label="Negatives", color=sns.color_palette('colorblind')[3]) except np.linalg.LinAlgError: # If all the predicted probabilities are the same, then we cannot calculate kde plt.clf() sns.distplot(y_probs[y_true == 1], label="Positives", kde=False, color=sns.color_palette('colorblind')[2]) sns.distplot(y_probs[y_true == 0], label="Negatives", kde=False, color=sns.color_palette('colorblind')[3]) plt.title("Prediction probabilities by class") plt.legend() plt.savefig(plot_filenames['pred_probs']) logging.info(f"Plotting ROC curve") plt.clf() plt.plot(fpr, tpr) plt.title("ROC curve") plt.xlabel("False positive rate") plt.ylabel("True positive rate") plt.legend() plt.savefig(plot_filenames['roc_curve']) logging.info(f"Plotting precision recall curve") plt.clf() plt.plot(recall, precision) plt.title("Precision recall curve") plt.xlabel("Recall") plt.ylabel("Precision") plt.legend() plt.savefig(plot_filenames['pr_curve']) return model_metrics, longer_model_metrics, plot_filenames