Example #1
0
def check_calibration(method):
    # Adpated from sklearn/tests/test_calibration.py
    # Authors: Alexandre Gramfort
    # License: BSD 3 clause

    n_samples = 100
    X, y = make_classification(n_samples=2 * n_samples, n_features=6,
                               random_state=42)

    X -= X.min()  # MultinomialNB only allows positive X

    # split train and test
    X_train, y_train = X[:n_samples], y[:n_samples]
    X_test, y_test = X[n_samples:], y[n_samples:]

    # Naive-Bayes
    clf = MultinomialNB().fit(X_train, y_train)
    prob_pos_clf = clf.predict_proba(X_test)[:, 1]

    pc_clf = CalibratedClassifierCV(clf, cv=y.size + 1)
    assert_raises(ValueError, pc_clf.fit, X, y)

    pc_clf = CalibratedClassifierCV(clf, method=method, cv=2)
    # Note that this fit overwrites the fit on the entire training set
    pc_clf.fit(X_train, y_train)
    prob_pos_pc_clf = pc_clf.predict_proba(X_test)[:, 1]

    # Check that brier score has improved after calibration
    assert_greater(brier_score_loss(y_test, prob_pos_clf),
                   brier_score_loss(y_test, prob_pos_pc_clf))

    # Check invariance against relabeling [0, 1] -> [1, 2]
    pc_clf.fit(X_train, y_train + 1)
    prob_pos_pc_clf_relabeled = pc_clf.predict_proba(X_test)[:, 1]
    assert_array_almost_equal(prob_pos_pc_clf,
                              prob_pos_pc_clf_relabeled)

    # Check invariance against relabeling [0, 1] -> [-1, 1]
    pc_clf.fit(X_train, 2 * y_train - 1)
    prob_pos_pc_clf_relabeled = pc_clf.predict_proba(X_test)[:, 1]
    assert_array_almost_equal(prob_pos_pc_clf,
                              prob_pos_pc_clf_relabeled)

    # Check invariance against relabeling [0, 1] -> [1, 0]
    pc_clf.fit(X_train, (y_train + 1) % 2)
    prob_pos_pc_clf_relabeled = pc_clf.predict_proba(X_test)[:, 1]
    if method == "sigmoid":
        assert_array_almost_equal(prob_pos_pc_clf,
                                  1 - prob_pos_pc_clf_relabeled)
    else:
        # Isotonic calibration is not invariant against relabeling
        # but should improve in both cases
        assert_greater(brier_score_loss(y_test, prob_pos_clf),
                       brier_score_loss((y_test + 1) % 2,
                                        prob_pos_pc_clf_relabeled))
def test_brier_score_loss():
    """Check brier_score_loss function"""
    y_true = np.array([0, 1, 1, 0, 1, 1])
    y_pred = np.array([0.1, 0.8, 0.9, 0.3, 1., 0.95])
    true_score = linalg.norm(y_true - y_pred) ** 2 / len(y_true)

    assert_almost_equal(brier_score_loss(y_true, y_true), 0.0)
    assert_almost_equal(brier_score_loss(y_true, y_pred), true_score)
    assert_almost_equal(brier_score_loss(1. + y_true, y_pred),
                        true_score)
    assert_almost_equal(brier_score_loss(2 * y_true - 1, y_pred),
                        true_score)
    assert_raises(ValueError, brier_score_loss, y_true, y_pred[1:])
    assert_raises(ValueError, brier_score_loss, y_true, y_pred + 1.)
    assert_raises(ValueError, brier_score_loss, y_true, y_pred - 1.)
Example #3
0
    def process(self):
        # 读取数据
        data = pd.DataFrame.from_csv(self.parameters['ex'])
        self.y_score = data[['pre_below', 'pre_normal', 'pre_above']]
        self.y_true = data[['obs_below', 'obs_normal', 'obs_above']]
        # 绘图
        fpr = dict()  # False Positive Rate
        tpr = dict()  # True Positive Rate
        roc_auc = dict() #ROC AREA UNDER CURVE
        bs = dict() #Brier Score Loss
        # turn off the interactive mode
        plt.clf()

        fpr[self.parameters['index']], tpr[self.parameters['index']], _ = metrics.roc_curve(self.y_true.ix[:, self.parameters['index']], self.y_score.ix[:, self.parameters['index']])
        roc_auc[self.parameters['index']] = metrics.roc_auc_score(self.y_true.ix[:, self.parameters['index']], self.y_score.ix[:, self.parameters['index']])
        bs[self.parameters['index']] = metrics.brier_score_loss(self.y_true.ix[:, self.parameters['index']], self.y_score.ix[:, self.parameters['index']])

        if self.args.verbose:
            print("====False Positive Ratio(fpr) And True Positive Ratio(tpr) Pair====")
            for idx,val in enumerate(fpr[self.parameters['index']]):
                print(idx,val,fpr[self.parameters['index']][idx])
        plt.plot(fpr[self.parameters['index']], tpr[self.parameters['index']],label='Num:%d,AUC: %0.2f,BS: %0.2f' \
                %(self.y_true.shape[0], roc_auc[self.parameters['index']],bs[self.parameters['index']]))
        plt.plot([0, 1], [0, 1], 'k--')
        plt.xlim([0.0, 1.05])
        plt.ylim([0.0, 1.05])
        plt.xlabel('False Positive Rate')
        plt.ylabel('True Positive Rate')
        plt.title(self.args.title[0] if self.args.title else 'Receiver Operating Characteristic(ROC)')
        plt.legend(loc="lower right")
        print('Saving image to {}'.format(self.parameters['name']))
        plt.savefig(self.parameters['name'])
        print('Completely Finshed.')
def plot_calibration_curve(est, name, fig_index):
    """Plot calibration curve for est w/o and with calibration. """
    # Calibrated with isotonic calibration
    isotonic = CalibratedClassifierCV(est, cv=2, method='isotonic')

    # Calibrated with sigmoid calibration
    sigmoid = CalibratedClassifierCV(est, cv=2, method='sigmoid')

    # Calibrated with ROC convex hull calibration
    rocch = CalibratedClassifierCV(est, cv=2, method='rocch')

    # Logistic regression with no calibration as baseline
    lr = LogisticRegression(C=1., solver='lbfgs')

    fig = plt.figure(fig_index, figsize=(10, 10))
    ax1 = plt.subplot2grid((3, 1), (0, 0), rowspan=2)
    ax2 = plt.subplot2grid((3, 1), (2, 0))

    ax1.plot([0, 1], [0, 1], "k:", label="Perfectly calibrated")
    for clf, name in [(lr, 'Logistic'),
                      (est, name),
                      (isotonic, name + ' + Isotonic'),
                      (sigmoid, name + ' + Sigmoid'),
                      (rocch, name + ' + ROCConvexHull')]:
        clf.fit(X_train, y_train)
        y_pred = clf.predict(X_test)
        if hasattr(clf, "predict_proba"):
            prob_pos = clf.predict_proba(X_test)[:, 1]
        else:  # use decision function
            prob_pos = clf.decision_function(X_test)
            prob_pos = \
                (prob_pos - prob_pos.min()) / (prob_pos.max() - prob_pos.min())

        clf_score = brier_score_loss(y_test, prob_pos, pos_label=y.max())
        print("%s:" % name)
        print("\tBrier: %1.4f" % (clf_score))
        print("\tPrecision: %1.3f" % precision_score(y_test, y_pred))
        print("\tRecall: %1.3f" % recall_score(y_test, y_pred))
        print("\tF1: %1.3f" % f1_score(y_test, y_pred))
        print("\tAuc: %1.4f\n" % roc_auc_score(y_test, prob_pos))

        fraction_of_positives, mean_predicted_value = \
            calibration_curve(y_test, prob_pos, n_bins=10)

        ax1.plot(mean_predicted_value, fraction_of_positives, "s-",
                 label="%s (%1.4f)" % (name, clf_score))

        ax2.hist(prob_pos, range=(0, 1), bins=10, label=name,
                 histtype="step", lw=2)

    ax1.set_ylabel("Fraction of positives")
    ax1.set_ylim([-0.05, 1.05])
    ax1.legend(loc="lower right")
    ax1.set_title('Calibration plots  (reliability curve)')

    ax2.set_xlabel("Mean predicted value")
    ax2.set_ylabel("Count")
    ax2.legend(loc="upper center", ncol=2)

    plt.tight_layout()
Example #5
0
def calibration_curve_plotter(y_test, prob_pos, n_bins=10):

    brier = brier_score_loss(y_test, prob_pos, pos_label=1)

    fig = plt.figure(0, figsize=(10, 10))
    ax1 = plt.subplot2grid((3, 1), (0, 0), rowspan=2)
    ax2 = plt.subplot2grid((3, 1), (2, 0))

    df = pd.DataFrame({"true": y_test})
    bins = np.linspace(0.0, 1.0, n_bins + 1)
    binids = np.digitize(prob_pos, bins) - 1
    df["Bin center"] = bins[binids] + 0.5 / n_bins
    df[""] = "Model calibration: (%1.5f)" % brier
    o = bins + 0.5 / n_bins

    df2 = pd.DataFrame({"true": o, "Bin center": o})
    df2[""] = "Perfect calibration"

    df = pd.concat([df, df2])

    sns.pointplot(x="Bin center", y="true", data=df, order=o, hue="", ax=ax1)

    ax2.hist(prob_pos, range=(0, 1), bins=10, label="Model", histtype="step", lw=2)

    ax1.set_ylabel("Fraction of positives")
    ax1.set_ylim([-0.05, 1.05])
    # ax1.legend(loc="lower right")
    ax1.set_title("Calibration plots")

    ax2.set_xlabel("Predicted Probability")
    ax2.set_ylabel("Count")

    plt.tight_layout()
Example #6
0
 def plot_probability_calibration_curves(self):
 
     """ Compute true and predicted probabilities for a calibration plot 
         fraction_of_positives - The true probability in each bin (fraction of positives).
         mean_predicted_value - The mean predicted probability in each bin.
     """
     
     fig = plt.figure()
     ax1 = plt.subplot2grid((3, 1), (0, 0), rowspan=2)
     ax2 = plt.subplot2grid((3, 1), (2, 0), rowspan=2)
     
     ax1.set_ylabel("Fraction of positives")
     ax1.set_ylim([-0.05, 1.05])
     ax1.legend(loc="lower right")
     ax1.set_title('Calibration plots  (reliability curve) ' + self.description)
 
     ax2.set_xlabel("Mean predicted value")
     ax2.set_ylabel("Count")
     ax2.legend(loc="upper center", ncol=2)
     
     clf_score = brier_score_loss(self.y_true, self.y_pred, pos_label=1)
     
     
     fraction_of_positives, mean_predicted_value = calibration_curve(self.y_true, self.y_pred, n_bins=50)
     
     ax1.plot(mean_predicted_value, fraction_of_positives, "s-", color="#660066",  alpha = 0.6, label="%s (%1.3f)" % (self.description, clf_score))
     ax2.hist(self.y_pred, range=(0, 1), bins=50, color="#660066", linewidth=2.0 , alpha = 0.6, label="%s (%1.3f)" % (self.description, clf_score), histtype="step", lw=2)
     plt.yscale('log')
     return
def plot_calibration_curve(est, name, fig_index):
	'''
	Plot calibration curve for est w/o and with calibration.
	'''
	# Calibrated with isotonic calibration 
	isotonic = CalibratedClassifierCV(est, cv=2, method='isotonic')

	# Calibrated with sigmoid calibration 
	sigmoid = CalibratedClassifierCV(est, cv=2, method='sigmoid')

	# Logistic regression with no calibration as baseline 
	lr = LogisticRegression(C=1.0, solver='lbfgs')
	fig = plt.figure(fig_index, figsize=(10, 10))
	ax1 = plt.subplot2grid((3, 1), (0, 0), rowspan=2)
	ax2 = plt.subplot2grid((3, 1), (2, 0))

	ax1.plot([0, 1], [0, 1], 'k:', label='Perfectly calibrated')
	for clf, name in [
		(lr, 'Logistic'),
		(est, name),
		(isotonic, name + ' + Isotonic'),
		(sigmoid, name + ' + Sigmoid')
	]:
		clf.fit(X_train, y_train)
		y_pred = clf.predict(X_test)
		if hasattr(clf, 'predict_proba'):
			prob_pos = clf.predict_proba(X_test)[:, 1]
		else:
			# use decision function 
			prob_pos = clf.decision_function(X_test)
			prob_pos = \
				(prob_pos - prob_pos.min()) / (prob_pos.max() - prob_pos.min()) 

		clf_score = brier_score_loss(y_test, prob_pos, pos_label=y.max())
		print('%s:' % name)
		print('\tBrier: %1.3f' % (clf_score))
		print('\tPrecision: %1.3f' % precision_score(y_test, y_pred))
		print('\tRecall: %1.3f' % recall_score(y_test, y_pred))
		print('\tF1: %1.3f\n' % f1_score(y_test, y_pred))

		fraction_of_positives, mean_predicted_value = \
			calibration_curve(y_test, prob_pos, n_bins = 10)

		ax1.plot(mean_predicted_value, fraction_of_positives, 's-',
			label='%s (%1.3f)' % (name, clf_score))

		ax2.hist(prob_pos, range=(0, 1), bins=10, label=name,
			histtype='step', lw=2)

	ax1.set_ylabel('Fraction of positives')
	ax1.set_ylim([-0.05, 1.05])
	ax1.legend(loc='lower right')
	ax1.set_title('Calibration plots (reliability curve)')

	ax2.set_xlabel('Mean predicted value')
	ax2.set_ylabel('Count')
	ax2.legend(loc='upper center', ncol=2)

	plt.tight_layout()
def brier(ytrue, yprob, num_classes):
    rv = 0.
    for i in xrange(num_classes):
        ind = np.where(ytrue == i)[0]
        tmp = np.zeros(ytrue.size)
        tmp[ind] += 1
        rv += brier_score_loss(ytrue, yprob[:, i])
    rv /= num_classes
    return rv
Example #9
0
def calibrate_proba_fitted_models(iDf, iFeatures, iModelsDict):
    iCalibratedModelsDict = {}

    for model_name in iModelsDict.keys():
        target = model_name.replace('_gbr', '').replace('_rf', '')
        proba_cal_sig = CalibratedClassifierCV(iModelsDict[model_name], method='sigmoid', cv='prefit')
        proba_cal_iso = CalibratedClassifierCV(iModelsDict[model_name], method='isotonic', cv='prefit')
        proba_cal_sig.fit(iDf.loc[:, iFeatures.values], iDf.loc[:, target].values)
        proba_cal_iso.fit(iDf.loc[:, iFeatures.values], iDf.loc[:, target].values)
        brier_sig = brier_score_loss(iDf.loc[:, target].value,
                                     proba_cal_sig.predict_proba(iDf.loc[:, iFeatures.values])[:, 1])
        brier_iso = brier_score_loss(iDf.loc[:, target].value,
                                     proba_cal_iso.predict_proba(iDf.loc[:, iFeatures.values])[:, 1])

        if brier_sig <= brier_iso:
            iCalibratedModelsDict[model_name] = proba_cal_sig.calibrated_classifiers_
        else:
            iCalibratedModelsDict[model_name] = proba_cal_iso.calibrated_classifiers_
    return iCalibratedModelsDict
Example #10
0
def plot_calibration_curve_cv(X, y, est, name, bins=10, n_folds=8, n_jobs=1, fig_index=1):
    """Plot calibration curve for est w/o and with calibration. """
    import sklearn.cross_validation as cross_validation
    from sklearn import (metrics, cross_validation)
    from model_selection import cross_val_predict_proba
    
    # Calibrated with isotonic calibration
    cv = 2
    isotonic = CalibratedClassifierCV(est, cv=cv, method='isotonic')

    # Calibrated with sigmoid calibration
    sigmoid = CalibratedClassifierCV(est, cv=cv, method='sigmoid')

    fig = plt.figure(fig_index, figsize=(10, 10))
    ax1 = plt.subplot2grid((3, 1), (0, 0), rowspan=2)
    ax2 = plt.subplot2grid((3, 1), (2, 0))

    ax1.plot([0, 1], [0, 1], "k:", label="Perfectly calibrated")
    for clf, name in [(est, name),
                      (isotonic, name + ' + Isotonic'),
                      (sigmoid, name + ' + Sigmoid')]:
        
        y_true = y
        scoring = 'roc_auc'
        cv1 = cross_validation.StratifiedKFold(y,n_folds)
        y_proba, scores = cross_val_predict_proba(clf, X, y, scoring=scoring, 
            cv=cv1, n_jobs=n_jobs, verbose=0, fit_params=None, pre_dispatch='2*n_jobs')
        y_pred = np.array(y_proba>0.5,dtype=int)

        clf_score = brier_score_loss(y_true, y_proba, pos_label=y_true.max())
        print("%s:" % name)
        print("\tBrier: %1.3f" % (clf_score))
        print("\tPrecision: %1.3f" % precision_score(y_true, y_pred))
        print("\tRecall: %1.3f" % recall_score(y_true, y_pred))
        print("\tF1: %1.3f\n" % f1_score(y_true, y_pred))

        fraction_of_positives, mean_predicted_value = \
            calibration_curve(y_true, y_proba, n_bins=bins)

        ax1.plot(mean_predicted_value, fraction_of_positives, "s-",
                 label="%s (%1.3f)" % (name, clf_score))

        ax2.hist(y_proba, range=(0, 1), bins=bins, label=name,
                 histtype="step", lw=2)

    ax1.set_ylabel("Fraction of positives")
    ax1.set_ylim([-0.05, 1.05])
    ax1.legend(loc="lower right")
    ax1.set_title('Calibration plots  (reliability curve)')

    ax2.set_xlabel("Mean predicted value")
    ax2.set_ylabel("Count")
    ax2.legend(loc="upper center", ncol=2)

    plt.tight_layout()
Example #11
0
def test_calibration_prefit():
    """Test calibration for prefitted classifiers"""
    n_samples = 50
    X, y = make_classification(n_samples=3 * n_samples, n_features=6,
                               random_state=42)
    sample_weight = np.random.RandomState(seed=42).uniform(size=y.size)

    X -= X.min()  # MultinomialNB only allows positive X

    # split train and test
    X_train, y_train, sw_train = \
        X[:n_samples], y[:n_samples], sample_weight[:n_samples]
    X_calib, y_calib, sw_calib = \
        X[n_samples:2 * n_samples], y[n_samples:2 * n_samples], \
        sample_weight[n_samples:2 * n_samples]
    X_test, y_test = X[2 * n_samples:], y[2 * n_samples:]

    # Naive-Bayes
    clf = MultinomialNB()
    clf.fit(X_train, y_train, sw_train)
    prob_pos_clf = clf.predict_proba(X_test)[:, 1]

    # Naive Bayes with calibration
    for this_X_calib, this_X_test in [(X_calib, X_test),
                                      (sparse.csr_matrix(X_calib),
                                       sparse.csr_matrix(X_test))]:
        for method in ['isotonic', 'sigmoid']:
            pc_clf = CalibratedClassifierCV(clf, method=method, cv="prefit")

            for sw in [sw_calib, None]:
                pc_clf.fit(this_X_calib, y_calib, sample_weight=sw)
                y_prob = pc_clf.predict_proba(this_X_test)
                y_pred = pc_clf.predict(this_X_test)
                prob_pos_pc_clf = y_prob[:, 1]
                assert_array_equal(y_pred,
                                   np.array([0, 1])[np.argmax(y_prob, axis=1)])

                assert_greater(brier_score_loss(y_test, prob_pos_clf),
                               brier_score_loss(y_test, prob_pos_pc_clf))
Example #12
0
 def print_stats():
     print(metrics.classification_report(y_true, y_pred,
           target_names=target_names))
     print("roc_auc_score: {:1.4f} | LogLoss: {:1.3f} | Brier score loss:"
           " {:1.3f}".format(metrics.roc_auc_score(y_true, y_proba),
                             metrics.log_loss(y_true, y_proba),
                             metrics.brier_score_loss(y_true, y_proba)))
     if hasattr(model, 'threshold') and model.threshold:
         precision, sensitivity, specificity = \
             precision_sensitivity_specificity(y_true, y_proba,
                                               threshold=model.threshold)
         print("sensitivity(recall): {:1.2f} and specificity: {:1.2f}"
               " with threshold={:1.2f}".format(
                   sensitivity, specificity, model.threshold))
Example #13
0
def get_error(est_track, true_track):
    """
    """
    
    if est_track.ndim > 1:
        true_track = true_track.reshape((true_track.shape[0],1))
    
    error = np.recarray(shape=est_track.shape,
                        dtype=[('position', float),
                               ('orientation', float),
                               ('orientation_weighted', float)])
    
    # Position error
    pos_err = (true_track.x - est_track.x)**2 + (true_track.y - est_track.y)**2
    error.position = np.sqrt(pos_err)
    
    # Orientation error
    error.orientation = anglediff(true_track.angle, est_track.angle, units='deg')    
    error.orientation_weighted = anglediff(true_track.angle, est_track.angle_w, units='deg')
    
    descr = {}
    bix = np.logical_not(np.isnan(error.orientation))
    descr['orientation_median'] = np.median(np.abs(error.orientation[bix]))
    descr['orientation_mean'] = np.mean(np.abs(error.orientation[bix]))
    bix = np.logical_not(np.isnan(error.orientation_weighted))
    descr['orientation_weighted_median'] = np.nanmedian(np.abs(error.orientation_weighted[bix]))
    descr['orientation_weighted_mean'] = np.nanmean(np.abs(error.orientation_weighted[bix]))
    # no angle
    true_no_angle = np.isnan(true_track.angle)
    est_no_angle = np.isnan(est_track.angle)
    agree = np.logical_and(true_no_angle, est_no_angle)
    disagree = np.logical_xor(true_no_angle, est_no_angle)
    both = np.logical_or(true_no_angle, est_no_angle)
    #ipdb.set_trace()
    descr['no_angle_auc'] = roc_auc_score(true_no_angle, est_no_angle)
    descr['no_angle_mcc'] = matthews_corrcoef(true_no_angle, est_no_angle)
    descr['no_angle_brier'] = brier_score_loss(true_no_angle, est_no_angle)    
    descr['no_angle_acc'] = agree.sum()/both.sum()
    descr['no_angle_p_per_frame'] = disagree.sum()/disagree.shape[0]
    descr['position_median'] = np.median(error.position)
    descr['position_mean'] = np.mean(error.position)
    
    #print('True frequency of angle-does-not-apply:',
     #     true_no_angle.sum()/true_no_angle.shape[0])
    
    #print('Estimated frequency of angle-does-not-apply:',
     #     est_no_angle.sum()/est_no_angle.shape[0])    

    return error, descr
Example #14
0
    def process(self):
        """ process """
        ##directory check
        files = glob.glob(os.path.join(self.parameters['csv_dir'],'*.csv'))

        if not files:
            print('No .csv file found in {}.'.format(self.parameters['csv_dir']))
            exit(-1)

        self.auc = np.zeros([self.lats,self.lons])
        self.bs = np.zeros([self.lats,self.lons])
        self.sum = np.zeros([self.lats,self.lons])

        ##loop for reshape data
        for lat in np.arange(self.lats):
            for lon in np.arange(self.lons):
                if self.args.verbose:
                    print('Now Calculating Grid({},{})......'.format(lat,lon))
                y_true = list()
                y_score = list()
                for path in files:
                    row = pd.DataFrame.from_csv(path).query('latitude=={} and longitude=={}'.format(lat,lon))
                    if row.empty:
                        continue
                    y_true.append(row.iloc[0]['obs_'+self.nclass[self.parameters['index']]])
                    y_score.append(row.iloc[0]['pre_'+self.nclass[self.parameters['index']]])

                ##校验y_true结果,如果全是0则跳过后面的计算
                if not y_true:
                    print('Warning: y_true is empty in Grid({},{}).'.format(lat,lon))
                    continue

                if all(i==0 for i in y_true):
                    print('Warning:Grid({},{}) y_true has only one class(0 or 1)'.format(lat,lon))
                    continue

                ##计算auc,bs
                self.auc[lat,lon] = metrics.roc_auc_score(y_true,y_score)
                self.bs[lat,lon] = metrics.brier_score_loss(y_true,y_score)
                self.sum[lat,lon] = len(y_true)
                print(self.auc[lat,lon],self.bs[lat,lon])
                del(y_true)
                del(y_score)

        ##save result
        np.save(self.parameters['name']+'_auc',self.auc)
        np.save(self.parameters['name']+'_bs',self.bs)
        np.save(self.parameters['name']+'_sum',self.sum)
def train_model_rfc_calibrated_cv (features, labels, hold_out = False, train_sz = 0.9) :
	features_train, features_test = [], []
	labels_train, labels_test = [], []
	if (hold_out == True) :
		# First, set aside a some of the training set for calibration
		# Use stratified shuffle split so that class ratios are maintained after the split
		splitter = StratifiedShuffleSplit(labels, n_iter = 1, train_size = train_sz, random_state = 30)

		# Length is 1 in this case since we have a single fold for splitting
		print (len(splitter))

		for train_idx, test_idx in splitter:
			features_train, features_test = features[train_idx], features[test_idx]
			labels_train, labels_test = labels[train_idx], labels[test_idx]
	else :
		features_train = features
		labels_train = labels

	print ("features_train shape: ", features_train.shape)
	print ("labels_train shape: ", labels_train.shape)
	if (hold_out == True) :
		print ("features_test shape: ", features_test.shape)
		print ("labels_test shape: ", labels_test.shape)
		
	print ("Parameters selected based on prior grid Search ...")
	#clf = rfc(random_state = 30, n_jobs = 4, criterion = 'entropy', max_depth = 7, min_samples_leaf = 2, min_samples_split = 5, n_estimators = 50)
	#clf = rfc(random_state = 30, n_jobs = 4, criterion = 'gini', max_depth = 8, min_samples_leaf = 5, min_samples_split = 2, n_estimators = 120)
	# clf = rfc(random_state = 30, n_jobs = 4, criterion = 'gini', class_weight = 'auto', max_depth = 5, min_samples_leaf = 5, min_samples_split = 2, n_estimators = 100)
	clf = rfc(random_state = 30, n_jobs = 4, criterion = 'entropy', class_weight = 'auto', max_depth = 5, min_samples_leaf = 5, min_samples_split = 2, n_estimators = 60)

	# Perform calibration 
	# Use 'sigmoid' because sklearn cautions against using 'isotonic' for lesser than 1000 calibration samples as it can result in overfitting
	# 05/22 - Looks like isotonic does better than sigmoid for both Brier score and roc_auc_score.
	# Using 30-40% holdout actually improves ROC AUC for holdout score from 0.88 to 0.925 with CV=5
	print ("Performing Calibration now ...")
	# sigmoid = CalibratedClassifierCV(clf, cv=5, method='sigmoid')
	sigmoid = CalibratedClassifierCV(clf, cv=5, method='isotonic')
	sigmoid.fit(features_train, labels_train)

	if (hold_out == True) :
		# Calculate Brier score loss
		y_probs = sigmoid.predict_proba(features_test)[:, 1]
		clf_score = brier_score_loss(labels_test, y_probs)
		print ("Brier score: ", clf_score)
		auc_score = estimate_roc_auc (sigmoid, features_test, labels_test)

	return sigmoid
Example #16
0
def get_model_results(model, training_data, test_data):
    """
    Find the best hyper parameters for model given the training and test data
    
    
    Parameters
    -----
    model: machine learning model such as Logistic Regression, MultiLayer Perceptron
    
    training_data: list containing X,y training data
    
    test_data: list containing test X,y test data
    
    
    Returns
    ------
    y_proba, y_pred y_test, accuracy, auc, brier_loss
    """
    
    # choose model
    if model == "LR":
        model = LogisticRegression()
    elif model == "TF":
        model = learn.TensorFlowDNNClassifier(hidden_units=[150, 40], n_classes=2, steps=1000, batch_size=25, learning_rate=0.0002, optimizer="Adam")
     
    # fit model
    start = time()
    X_train, y_train = training_data
    X_test, y_test = test_data
    model.fit(X_train, y_train)
    
    # accuracy
    y_pred = model.predict(X_test)
    accuracy = metrics.accuracy_score(y_test, y_pred)

    # auc
    y_proba = model.predict_proba(X_test)
    auc = metrics.roc_auc_score(y_test, (y_proba[:,1] - y_proba[:,0]))
            
    print 'Accuracy: {0:f}'.format(accuracy)
    print 'AUC: {0:f}'.format(auc)
    
    # brier loss
    brier_loss = metrics.brier_score_loss(y_test, y_proba[:,1], pos_label=1)

    print 'Model computation duration (secs):', time() - start
    return (y_proba, y_pred, y_test, accuracy, auc, brier_loss)
Example #17
0
    def get_stat(self, X_test, y_test):
        """Print list of score for the current classifier"""
        y_pred = self.predict(X_test)
        if hasattr(self.clf, "predict_proba"):
            prob_pos = self.clf.predict_proba(X_test)[:, 1]
        else:  # use decision function
            prob_pos = self.clf.decision_function(X_test)
            prob_pos = (prob_pos - prob_pos.min()) / \
                (prob_pos.max() - prob_pos.min())

        clf_score = brier_score_loss(y_test, prob_pos)
        print("%s:" % self.method)
        print("\tBrier: %1.3f" % (clf_score))
        print("\tPrecision: %1.3f" % precision_score(y_test, y_pred))
        print("\tRecall: %1.3f" % recall_score(y_test, y_pred))
        print("\tF1: %1.3f" % f1_score(y_test, y_pred))
        print("\tROC AUC score: %1.3f\n" % roc_auc_score(y_test, prob_pos))
Example #18
0
def ProcessAndFit(input):
    '''
    For testing the brier score loss associated with a calibration model given features tt (tt = totest)
    This is meant to be run in parallel, hence the "input"
    '''
    
    y, X, tt = input[0]

    X = X[:,tt]

    X_train, X_test, y_train, y_test = train_test_split(dummyize(X), y, test_size = 0.5)
    lr = LogisticRegression()
    lr.fit(X_train, y_train)

    y_prob = lr.predict_proba(X_test)[:,1]

    return brier_score_loss(y_test, y_prob)
def evaluate(estimator, dev_X, dev_y):
    print('evaluating on development set', flush=True)
    guess_dev = estimator.predict(dev_X)
    score_roc_auc_dev = roc_auc_score(dev_y, guess_dev)
    print('{:.4f} -- roc auc'.format(score_roc_auc_dev))
    score_brier_loss_dev = brier_score_loss(dev_y, guess_dev)
    print('{:.4f} -- brier loss'.format(score_brier_loss_dev))
    score_log_loss_dev = log_loss(dev_y, estimator.predict_proba(dev_X))
    print('{:.4f} -- log loss'.format(score_log_loss_dev))
    guess_dev_negative_one = guess_dev.copy().astype('int8')
    guess_dev_negative_one[guess_dev_negative_one == 0] = -1
    '''
    decision_fuction not implemented
    # score_hinge_loss_dev = hinge_loss(dev_y, estimator.decision_function(dev_X))
    '''
    score_hinge_loss_dev = hinge_loss(dev_y, guess_dev_negative_one)
    print('{:.4f} -- hinge loss'.format(score_hinge_loss_dev))
    score_matthews_corrcoef_dev = matthews_corrcoef(dev_y, guess_dev_negative_one)
    print('{:.4f} -- matthews_corrcoef'.format(score_matthews_corrcoef_dev))
    print(flush=True)

    return score_roc_auc_dev, score_brier_loss_dev,\
        score_log_loss_dev, score_hinge_loss_dev, score_matthews_corrcoef_dev
Example #20
0
def calibration_inner_loop(clf,X,y,train,test,n_bins,n_power,bins_used,minsamples):
    X_train, y_train  = X[train],y[train]
    X_test, y_test = X[test],y[test]
    
    clf.fit(X_train, y_train)
    if hasattr(clf, "predict_proba"):
        y_proba = clf.predict_proba(X_test)[:, 1]
    elif hasattr(clf, "decision_function"):  # use decision function
        prob_pos = clf.decision_function(X_test)
        y_proba = \
            (prob_pos - prob_pos.min()) / (prob_pos.max() - prob_pos.min())
    else:
        raise RuntimeError("clf without predict_proba or decision_function")

    fraction_of_positives, mean_predicted_value, bins_used, n_bins = \
        calibration_curve_nan(y_test, y_proba, n_bins=n_bins, n_power=n_power, 
            bins=bins_used, minsamples=minsamples)
    #print fraction_of_positives.shape, mean_predicted_value.shape
    return (\
        np.array(list(fraction_of_positives)+list(mean_predicted_value)),
        brier_score_loss(y_test, y_proba, pos_label=y_test.max()),
        metrics.roc_auc_score(y_test, y_proba),
        bins_used, n_bins
        )
Example #21
0
 def evaluate_sigmoid_match(self,X_test,y_test,A,B):
     from sklearn.calibration import calibration_curve
     import matplotlib.pyplot as plt
     from sklearn.metrics import (brier_score_loss, precision_score, recall_score,f1_score)
     prob_pos = 1. / (1. + (np.exp(A * X_test + B)))
     clf_score = brier_score_loss(y_test, prob_pos, pos_label=y_test.max())
     fraction_of_positives, mean_predicted_value = calibration_curve(y_test, prob_pos, n_bins=10)
     print("SVC_sigmoid:")
     print("\tBrier: %1.3f" % (clf_score))
     fig = plt.figure(2, figsize=(10, 10))
     ax1 = plt.subplot2grid((3, 1), (0, 0), rowspan=2)
     ax2 = plt.subplot2grid((3, 1), (2, 0))
     ax1.plot([0, 1], [0, 1], "k:", label="Perfectly calibrated")  
     ax1.plot(mean_predicted_value, fraction_of_positives, "s-",label="%s (%1.3f)" % ("SVC_sigmoid", clf_score))
     ax2.hist(prob_pos, range=(0, 1), bins=10, label="SVC_sigmoid",histtype="step", lw=2)
     ax1.set_ylabel("Fraction of positives")
     ax1.set_ylim([-0.05, 1.05])
     ax1.legend(loc="lower right")
     ax1.set_title('Calibration plots  (reliability curve)')
     ax2.set_xlabel("Mean predicted value")
     ax2.set_ylabel("Count")
     ax2.legend(loc="upper center", ncol=2)
     plt.tight_layout()
     plt.show()
Example #22
0
def test_calibration():
    """Test calibration objects with isotonic and sigmoid"""
    n_samples = 100
    X, y = make_classification(n_samples=2 * n_samples, n_features=6,
                               random_state=42)
    sample_weight = np.random.RandomState(seed=42).uniform(size=y.size)

    X -= X.min()  # MultinomialNB only allows positive X

    # split train and test
    X_train, y_train, sw_train = \
        X[:n_samples], y[:n_samples], sample_weight[:n_samples]
    X_test, y_test = X[n_samples:], y[n_samples:]

    # Naive-Bayes
    clf = MultinomialNB().fit(X_train, y_train, sample_weight=sw_train)
    prob_pos_clf = clf.predict_proba(X_test)[:, 1]

    pc_clf = CalibratedClassifierCV(clf, cv=y.size + 1)
    assert_raises(ValueError, pc_clf.fit, X, y)

    # Naive Bayes with calibration
    for this_X_train, this_X_test in [(X_train, X_test),
                                      (sparse.csr_matrix(X_train),
                                       sparse.csr_matrix(X_test))]:
        for method in ['isotonic', 'sigmoid']:
            pc_clf = CalibratedClassifierCV(clf, method=method, cv=2)
            # Note that this fit overwrites the fit on the entire training
            # set
            pc_clf.fit(this_X_train, y_train, sample_weight=sw_train)
            prob_pos_pc_clf = pc_clf.predict_proba(this_X_test)[:, 1]

            # Check that brier score has improved after calibration
            assert_greater(brier_score_loss(y_test, prob_pos_clf),
                           brier_score_loss(y_test, prob_pos_pc_clf))

            # Check invariance against relabeling [0, 1] -> [1, 2]
            pc_clf.fit(this_X_train, y_train + 1, sample_weight=sw_train)
            prob_pos_pc_clf_relabeled = pc_clf.predict_proba(this_X_test)[:, 1]
            assert_array_almost_equal(prob_pos_pc_clf,
                                      prob_pos_pc_clf_relabeled)

            # Check invariance against relabeling [0, 1] -> [-1, 1]
            pc_clf.fit(this_X_train, 2 * y_train - 1, sample_weight=sw_train)
            prob_pos_pc_clf_relabeled = pc_clf.predict_proba(this_X_test)[:, 1]
            assert_array_almost_equal(prob_pos_pc_clf,
                                      prob_pos_pc_clf_relabeled)

            # Check invariance against relabeling [0, 1] -> [1, 0]
            pc_clf.fit(this_X_train, (y_train + 1) % 2,
                       sample_weight=sw_train)
            prob_pos_pc_clf_relabeled = \
                pc_clf.predict_proba(this_X_test)[:, 1]
            if method == "sigmoid":
                assert_array_almost_equal(prob_pos_pc_clf,
                                          1 - prob_pos_pc_clf_relabeled)
            else:
                # Isotonic calibration is not invariant against relabeling
                # but should improve in both cases
                assert_greater(brier_score_loss(y_test, prob_pos_clf),
                               brier_score_loss((y_test + 1) % 2,
                                                prob_pos_pc_clf_relabeled))

        # Check failure cases:
        # only "isotonic" and "sigmoid" should be accepted as methods
        clf_invalid_method = CalibratedClassifierCV(clf, method="foo")
        assert_raises(ValueError, clf_invalid_method.fit, X_train, y_train)

        # base-estimators should provide either decision_function or
        # predict_proba (most regressors, for instance, should fail)
        clf_base_regressor = \
            CalibratedClassifierCV(RandomForestRegressor(), method="sigmoid")
        assert_raises(RuntimeError, clf_base_regressor.fit, X_train, y_train)
clf.fit(X_train, y_train)  # GaussianNB itself does not support sample-weights
prob_pos_clf = clf.predict_proba(X_test)[:, 1]

# Gaussian Naive-Bayes with isotonic calibration
clf_isotonic = CalibratedClassifierCV(clf, cv=2, method="isotonic")
clf_isotonic.fit(X_train, y_train, sw_train)
prob_pos_isotonic = clf_isotonic.predict_proba(X_test)[:, 1]

# Gaussian Naive-Bayes with sigmoid calibration
clf_sigmoid = CalibratedClassifierCV(clf, cv=2, method="sigmoid")
clf_sigmoid.fit(X_train, y_train, sw_train)
prob_pos_sigmoid = clf_sigmoid.predict_proba(X_test)[:, 1]

print("Brier scores: (the smaller the better)")

clf_score = brier_score_loss(y_test, prob_pos_clf, sw_test)
print("No calibration: %1.3f" % clf_score)

clf_isotonic_score = brier_score_loss(y_test, prob_pos_isotonic, sw_test)
print("With isotonic calibration: %1.3f" % clf_isotonic_score)

clf_sigmoid_score = brier_score_loss(y_test, prob_pos_sigmoid, sw_test)
print("With sigmoid calibration: %1.3f" % clf_sigmoid_score)

###############################################################################
# Plot the data and the predicted probabilities
plt.figure()
y_unique = np.unique(y)
colors = cm.rainbow(np.linspace(0.0, 1.0, y_unique.size))
for this_y, color in zip(y_unique, colors):
    this_X = X_train[y_train == this_y]
Example #24
0
def calibration_comparison(base_estimator,
                           n_samples,
                           weights=None,
                           n_bins=10,
                           detail=False):

    X, y = make_classification(n_samples=3 * n_samples,
                               n_features=6,
                               random_state=42,
                               weights=weights)
    base_estimator_dict = {
        "MultinomialNB": MultinomialNB(),
        "GaussianNB": GaussianNB(),
        "SVC": LinearSVC()
    }

    if (base_estimator == "MultinomialNB"):
        X -= X.min()
    # Train data: train binary model.
    X_train, y_train = X[:n_samples], y[:n_samples]
    print("Positive Rate: {x}".format(x=y_train.mean()))
    # calibrate data.
    X_calib, y_calib = X[n_samples:2 * n_samples], y[n_samples:2 * n_samples]
    # test data.
    X_test, y_test = X[2 * n_samples:], y[2 * n_samples:]
    # train the base estimator
    clf = base_estimator_dict[base_estimator].fit(X_train, y_train)

    if (base_estimator == "SVC"):
        # y_calib_score: training in the calibration model.
        y_calib_score = clf.decision_function(X_calib)
        y_calib_score = (y_calib_score - y_calib_score.min()) /\
                        (y_calib_score.max() - y_calib_score.min())
        # y_test_score: evaluation in the calibration model.
        y_test_score = clf.decision_function(X_test)
        y_test_score = (y_test_score - y_test_score.min()) /\
                       (y_test_score.max() - y_test_score.min())
    else:
        # y_calib_score: training in the calibration model.
        y_calib_score = clf.predict_proba(X_calib)
        y_calib_score = np.array([score[1] for score in y_calib_score])

        # y_test_score: evaluation in the calibration model.
        y_test_score = clf.predict_proba(X_test)
        y_test_score = np.array([score[1] for score in y_test_score])

    calibrate_model_dict = {
        "mimic": _MimicCalibration(threshold_pos=5, record_history=False),
        "isotonic": IsotonicRegression(y_min=0.0,
                                       y_max=1.0,
                                       out_of_bounds='clip'),
        # "platt": LogisticRegression()
    }

    result = {}
    result[base_estimator] = {}
    for cal_name, cal_object in calibrate_model_dict.items():
        # import pdb; pdb.set_trace()
        print(cal_name)
        cal_object.fit(copy(y_calib_score), copy(y_calib))
        if cal_name in ["mimic", "isotonic"]:
            y_output_score = cal_object.predict(copy(y_test_score))
        else:
            raise "Please specify probability prediction function."

        frac_pos, predicted_value = calibration_curve(y_test,
                                                      y_output_score,
                                                      n_bins=n_bins)
        b_score = brier_score_loss(y_test, y_output_score, pos_label=1)
        # precsion = precision_score(y_test, y_output_score)
        # recall = recall_score(y_test, y_output_score)
        # f1 = f1_score(y_test, y_output_score)

        result[base_estimator][cal_name] = {
            "calibration_curve": [frac_pos, predicted_value],
            # "eval_score" : [b_score, precsion, recall, f1]
            "eval_score": [b_score]
        }

        if (detail):
            result[base_estimator][cal_name]["detail"] = {
                "y_test": y_test,
                "y_test_calibrate_score": y_output_score
            }

    return result
def best_N_experts(X_trainval,
                   y_trainval,
                   X_test,
                   y_test,
                   Nreplicates=10,
                   type='Brier_weighted',
                   average='median'):

    from sklearn.model_selection import train_test_split
    from scipy import stats

    n_experts = X_trainval.shape[1]
    y_trainval_bin = (y_trainval == 1).astype(
        int)  #convert to Away-based binary labels

    #Setup the grid search
    coeff_grid = np.arange(1, n_experts, 5)
    Ntest = len(coeff_grid)
    TestScores = np.ones(Ntest)

    Nopt = np.zeros(Nreplicates).astype(int)
    for rep in range(Nreplicates):

        for tst in range(Ntest):

            N = coeff_grid[tst]

            #shuffle the data and split into training and validation sets
            X_train, X_val, y_train, y_val = train_test_split(X_trainval,
                                                              y_trainval_bin,
                                                              test_size=0.3,
                                                              shuffle=True)

            n_train = X_train.shape[0]

            #determine the Brier scores of all predictors (in the given order) and also the number of predictions
            #from the training data. Any prediction at 0.5 is counted as a no-prediction
            Brier_Scores = np.ones(n_experts)
            weights = np.zeros(n_experts)
            for i in range(n_experts):
                Brier_Scores[i] = brier_score_loss(y_train,
                                                   X_train[:, i],
                                                   pos_label=1)
                weights[i] = sum(X_train[:, i] != 0.5) / n_train

            Brier_Scores_weighted = 1 - (1 - Brier_Scores) * weights

            #choose the type of experts
            if type == 'Brier':
                #Get expert on pure Brier Score
                Scores = Brier_Scores
            elif type == 'Brier_weighted':
                #Get expert on weighted Brier Score
                Scores = Brier_Scores_weighted

            #Determine the number of top experts
            sorted_expert_indices = np.argsort(Scores)
            #keep only the top N experts
            sorted_expert_indices = sorted_expert_indices[:N]

            #now test on the validation set
            predictions = X_val[:, sorted_expert_indices]

            #Average the experts
            if average == 'weighted':
                y_prob = np.average(predictions,
                                    axis=1,
                                    weights=weights[sorted_expert_indices])
            elif average == 'median':
                y_prob = np.median(predictions, axis=1)
            else:  #simple unweighted averaging
                y_prob = np.mean(predictions, axis=1)

            #calculate the Brier score on the valdation data
            TestScores[tst] = brier_score_loss(y_val, y_prob, pos_label=1)

        #Take the parameter with the minimum Brier score
        tst_index = np.argmin(TestScores)
        Nopt[rep] = coeff_grid[tst_index]

    #Get the mode as the most optimal value
    Nopt = stats.mode(Nopt)[0][0]

    #Now evaluate on the test set. The expert indices of both train_val and test sets should be identical

    #Determine the number of top experts from the full train_val set. Any prediction at 0.5 is counted as a no-prediction
    Brier_Scores = np.ones(n_experts)
    weights = np.zeros(n_experts)
    n_train = X_trainval.shape[0]
    for i in range(n_experts):
        Brier_Scores[i] = brier_score_loss(y_trainval_bin,
                                           X_trainval[:, i],
                                           pos_label=1)
        weights[i] = sum(X_trainval[:, i] != 0.5) / n_train

    Brier_Scores_weighted = 1 - (1 - Brier_Scores) * weights

    #choose the type of experts
    if type == 'Brier':
        #Get expert on pure Brier Score
        Scores = Brier_Scores
    elif type == 'Brier_weighted':
        #Get expert on weighted Brier Score
        Scores = Brier_Scores_weighted

    sorted_expert_indices = np.argsort(Scores)
    #keep only the top N experts
    sorted_expert_indices = sorted_expert_indices[:Nopt]

    #now test on the test set
    predictions = X_test[:, sorted_expert_indices]

    #Average the experts
    if average == 'weighted':
        y_prob = np.average(predictions,
                            axis=1,
                            weights=weights[sorted_expert_indices])
    elif average == 'median':
        y_prob = np.median(predictions, axis=1)
    else:  #simple unweighted averaging
        y_prob = np.mean(predictions, axis=1)

    return brier_score_loss(y_test, y_prob, pos_label=1), Nopt
        obs = obs[flag_nonan]
        fcst = fcst[flag_nonan]
        L = np.sum(flag_nonan)

        o_bar_ = np.mean(obs)

        o_bar[d] = o_bar_

        for n in range(N_boost):

            ind_bagging = np.random.choice(L, size=L, replace=True)
            obs_ = obs[ind_bagging]
            fcst_ = fcst[ind_bagging]

            prob_true_, prob_pred_ = reliability_diagram(
                obs_, fcst_, hist_bins)
            brier_ = brier_score_loss(obs_, fcst_)

            prob_true[d, :, n] = prob_true_
            prob_pred[d, :, n] = prob_pred_
            brier[d, n] = brier_

        hist_bins_ = np.mean(prob_pred[d, ...], axis=1)
        use_, _ = np.histogram(fcst, bins=np.array(list(hist_bins_) + [1.0]))
        use[d, :] = use_

    tuple_save = (brier, prob_true, prob_pred, use, o_bar)
    label_save = ['brier', 'pos_frac', 'pred_value', 'use', 'o_bar']
    du.save_hdf5(tuple_save, label_save, save_dir,
                 '{}_Calib_loc{}.hdf'.format(prefix_out, r))
Example #27
0
def brier_skill_score(target_values, forecast_probabilities):
    climo = np.mean((target_values - np.mean(target_values))**2)
    return 1.0 - brier_score_loss(target_values,
                                  forecast_probabilities) / climo
def online_eval(model, dataloader, txtlog, submit_path, uncertaintys_path,
                save_segmentation, save_uncertainty):
    txtlog.write("Dice_mean fg|bg|hausdorff_dist|ravd|ece|nll|sklearn_brier\n")
    my_evaluation = Evaluation()
    start_time = time.time()
    with torch.no_grad():
        dice_new_list = []
        data_dict_list = []
        hausdorff_dist_list = []
        ravd_list = []
        shape_list = []
        testset_list_pre = []
        testset_list_gt = []
        nll_list = []
        brier_list = []
        brier_sklearn_list = []
        ece_list = []
        for data_val in dataloader:
            images_val, targets_val, subject, slice, images_origin = data_val
            model.eval()
            images_val = images_val.to(device)
            targets_val = targets_val.to(device)
            outputs = model(images_val, test_config.lamda_sem)
            # final_out [i-1,i,i+1]
            outputs_val = outputs.final_out
            softmax = outputs.softmax_out
            # calculate predicted entropy as uncertainty
            softmax_1 = torch.unsqueeze(softmax[:, 1, ...], dim=1)
            softmax_2 = torch.unsqueeze(softmax[:, 3, ...], dim=1)
            softmax_3 = torch.unsqueeze(softmax[:, 5, ...], dim=1)
            softmax_fg = torch.cat((softmax_1, softmax_2, softmax_3), dim=1)
            softmax_fg_numpy = softmax_fg.data.cpu().numpy()
            softmax_fg_numpy = np.squeeze(softmax_fg_numpy, axis=0)
            mean_fg = np.mean(softmax_fg_numpy, axis=0)
            entropy = -mean_fg * np.log(mean_fg)

            # softmax outputs for uncertainty quantification
            softmax_final_out = softmax[:, 6:8, ...]
            softmax_final_out = np.squeeze(
                softmax_final_out.data.cpu().numpy(), axis=0)
            # 逐切片处理
            outputs_val_1 = outputs_val[:, 0:2, ...]

            image_origin = images_origin.data.cpu().numpy()
            image_origin1 = np.squeeze(image_origin, axis=0)
            image_origin1 = image_origin1[:, :, 1]

            _, predicted_1 = torch.max(outputs_val_1.data, 1)

            # ----------Compute dice-----------
            predicted_val_1 = predicted_1.data.cpu().numpy()
            subject_val = subject.data.cpu().numpy()
            slice_val = slice.data.cpu().numpy()
            slice_val_1 = slice_val[0][1]
            targets_val = targets_val.data.cpu().numpy()
            targets_val_1 = targets_val[:, 1, ...]

            shape_list.append(predicted_val_1.shape)
            data_dict_list.append({
                "subject": subject_val[0],
                "slice": slice_val_1,
                "pre": np.squeeze(predicted_val_1, axis=0),
                "target": np.squeeze(targets_val_1, axis=0),
                "image": image_origin1,
                "uncertainty": entropy,
                "softmax_out": softmax_final_out
            })

        # test the elaps of uncertainty quantification
        end_time = time.time()
        print("elapsed:{}".format(end_time - start_time))
        # 利用pandas分组
        pd_data = pd.DataFrame(data_dict_list)
        for subject, volume_data in pd_data.groupby("subject"):
            pre = volume_data["pre"]
            tar = volume_data["target"]
            slices = volume_data["slice"]
            image = volume_data["image"]
            uncertain = volume_data["uncertainty"]
            softmax_prob = volume_data["softmax_out"]

            pre_array = pre.values
            target_array = tar.values
            image_array = image.values
            uncertain_arr = uncertain.values
            slices_arr = slices.values
            softmax_prob_arr = softmax_prob.values

            pre_temp = np.zeros(
                (len(pre_array), pre_array[0].shape[0], pre_array[0].shape[1]),
                dtype="int16")
            target_temp = np.zeros((len(pre_array), target_array[0].shape[0],
                                    target_array[0].shape[1]),
                                   dtype="int16")
            # dimentions: slices*class*width*height
            softmax_probs_temp = np.zeros(
                (len(pre_array), softmax_prob_arr[0].shape[0],
                 softmax_prob_arr[0].shape[1], softmax_prob_arr[0].shape[2]),
                dtype="float32")
            for i in range(len(pre_array)):
                pre_temp[i, :, :] = pre_array[i]
                target_temp[i, :, :] = target_array[i]
                softmax_probs_temp[i, :, :, :] = softmax_prob_arr[i]
                # 保存预测结果与GT及图像
                if save_segmentation:
                    image_slice = image_array[i]
                    # save image and segmentation
                    my_evaluation.save_contour_label(
                        image_slice.astype("int16"),
                        target_array[i],
                        save_path=submit_path,
                        color="red",
                        file_name=str(subject) + "_" + str(slices_arr[i]) +
                        "label",
                        show_mask=True)
                    my_evaluation.save_contour_label(
                        image_slice.astype("int16"),
                        pre_array[i],
                        save_path=submit_path,
                        color="blue",
                        file_name=str(subject) + "_" + str(slices_arr[i]) +
                        "pre",
                        show_mask=True)

                    orig_path = os.path.join(
                        submit_path,
                        str(subject) + "_" + str(slices_arr[i]) + '.png')
                    cv.imwrite(orig_path, image_slice.astype("uint8"))
                if save_uncertainty:
                    # Predicted error map
                    error = np.abs(pre_array[i] - target_array[i])
                    error_name = str(subject) + "_" + str(
                        slices_arr[i]) + "error.png"
                    error_file_path = os.path.join(uncertaintys_path,
                                                   error_name)
                    plt.figure()
                    plt.imshow(error,
                               cmap=plt.cm.Reds,
                               interpolation='nearest')
                    # Visulization of the uncertainty
                    file_name = str(subject) + "_" + str(
                        slices_arr[i]) + ".png"
                    file_path = os.path.join(uncertaintys_path, file_name)
                    plt.colorbar()
                    plt.xticks([])
                    plt.yticks([])
                    plt.savefig(error_file_path)
                    plt.clf()
                    plt.cla()
                    plt.close()

                    plt.figure()
                    plt.imshow(uncertain_arr[i],
                               cmap=plt.cm.rainbow,
                               interpolation='nearest')
                    plt.colorbar()
                    plt.xticks([])
                    plt.yticks([])
                    # plt.axes('off')
                    plt.savefig(file_path)
                    plt.clf()
                    plt.cla()
                    plt.close()

            dsc_list1 = []
            if 0 == np.count_nonzero(pre_temp):
                print("zero" + "_" + str(subject))
                continue

            # calculate the dice metric
            for i in range(0, test_config.num_classes):
                dsc_i = dice(pre_temp, target_temp, i)
                dsc_list1.append(dsc_i)

            # Calculate Hausdorff Distance 以及ravd
            hausdorff_dist = hd(pre_temp, target_temp, [5, 0.42, 0.42])
            # we measure the absolute volume difference
            ravd = abs(rAVD(pre_temp, target_temp))

            # calculate the volume of ICH for GT and predictions
            volume_gt = calculate_volume(target_temp)
            volume_pre = calculate_volume(pre_temp)

            # Evaluate uncertainty qualification with nll, brier, ece
            softmax_probs_temp = softmax_probs_temp.transpose(1, 0, 2, 3)
            brier_socre = brier(
                torch.from_numpy(softmax_probs_temp).float(),
                torch.from_numpy(target_temp).long())
            ece_subject_wise, _, _ = ece(softmax_probs_temp[1, :, :, :],
                                         target_temp, 10)
            # Test sklearn
            target_onehot_temp = one_hot(target_temp, 2)

            brier_sklearn = brier_score_loss(target_onehot_temp[0, ...].flatten(), softmax_probs_temp[0, ...].flatten())+\
            brier_score_loss(target_onehot_temp[1,...].flatten(), softmax_probs_temp[1,...].flatten())

            nll_score = nll(
                torch.from_numpy(softmax_probs_temp).float(),
                torch.from_numpy(target_temp).long())
            print("nll_score:{}  brier_socre:{}".format(
                nll_score.data.numpy(), brier_socre.data.numpy()))
            print("dice_bg:{}  dice_fg:{}  Hausdorff_dist:{} ravd:{}".format(
                dsc_list1[0], dsc_list1[1], hausdorff_dist, ravd))
            txtlog.write(
                "ID{:30} {:3f}  {:3f} {:3f} {:3f} {:3f}   {:3f}  {:3f} {:3f} {:3f} \n"
                .format(subject, dsc_list1[0], dsc_list1[1], hausdorff_dist,
                        ravd, ece_subject_wise, nll_score, brier_sklearn,
                        volume_gt, volume_pre))
            dice_new_list.append(dsc_list1)
            hausdorff_dist_list.append(hausdorff_dist)
            ravd_list.append(ravd)

            brier_list.append(brier_socre.data.numpy())
            nll_list.append(nll_score.data.numpy())
            brier_sklearn_list.append(brier_sklearn)
            ece_list.append(ece_subject_wise)
            # store all the test data
            testset_list_pre.append(softmax_probs_temp[1, :, :, :])
            testset_list_gt.append(target_temp)

        dice_array = np.array(dice_new_list)
        dice_mean = np.mean(dice_array, axis=0)
        haus_dist_arr = np.array(hausdorff_dist_list)
        hausdorff_dist_mean = np.mean(haus_dist_arr, axis=0)
        ravd_arr = np.array(ravd_list)
        ravd_mean = np.mean(ravd_arr, axis=0)

        # uncertainty quantification
        brier_array = np.mean(np.array(brier_list), axis=0)
        nll_array = np.mean(np.array(nll_list), axis=0)
        brier_sklearn_mean = np.mean(np.array(brier_sklearn_list), axis=0)
        ece_subject_mean = np.mean(np.array(ece_list), axis=0)

        stacked_pre = merge_samples(testset_list_pre)
        stacked_gt = merge_samples(testset_list_gt)
        print("pre:{}  gt:{}".format(stacked_pre.shape, stacked_gt.shape))
        ece_score, confidence, accuracy = ece(stacked_pre, stacked_gt, 10)
        fraction_of_positives, mean_predicted_value = \
            calibration_curve(stacked_gt.flatten(), stacked_pre.flatten(), n_bins=10)

        # Draw Reliability Diagram (binned version and curve version)
        x = np.linspace(0, 1. + 1e-8, 10)
        y3 = x
        plt.plot([0, 1], [0, 1], "k:")
        plt.bar(x,
                height=fraction_of_positives,
                color='b',
                width=-0.112,
                label='Outputs',
                linewidth=2,
                edgecolor=['black'] * len(x),
                align='edge')
        plt.bar(x,
                height=y3 - fraction_of_positives,
                color='g',
                bottom=fraction_of_positives,
                width=-0.112,
                label='Gap',
                linewidth=2,
                edgecolor=['black'] * len(x),
                align='edge')
        plt.xlim(0., 1.)
        plt.ylim(0., 1.)
        plt.xlabel("Confidence")
        plt.ylabel("Accuracy")
        # plt.title("Histogram polt")
        plt.legend(loc="upper left")
        plt.savefig('reliability_diagram_bined.png',
                    dpi=400,
                    bbox_inches='tight')

        plt.figure(figsize=(5, 5))
        ax1 = plt.subplot2grid((1, 1), (0, 0), rowspan=2)
        ax1.plot([0, 1], [0, 1], "k:", label="Perfectly calibrated")
        ax1.plot(mean_predicted_value,
                 fraction_of_positives,
                 "s-",
                 label="calibrated_sklearn")
        ax1.set_ylabel("Fraction of positives")
        ax1.set_ylim([-0.05, 1.05])
        ax1.legend(loc="upper left")
        ax1.set_title('Calibration plots  (reliability curve)')
        plt.savefig('reliability_diagram_sklearn.png',
                    dpi=400,
                    bbox_inches='tight')

        with h5py.File("reliability_se_net.h5", "w") as f:
            f['condifence'] = confidence
            f['accuracy'] = accuracy
        txtlog.write("Dice_mean fg|bg|hausdorff_dist|ravd|ece|brier|nll|sklearn_brier|ece_sub_mea"\
                     "n:{:3f} ||{:3f}||{:3f}||{:3f}||{:3f}||{:3f}||{:3f}||{:3f} ||{:3f}\n".format(dice_mean[0],\
                    dice_mean[1], hausdorff_dist_mean, ravd_mean,ece_score,brier_array, nll_array, brier_sklearn_mean,ece_subject_mean))
        txtlog.write("Time Elapsed:  {}".format(end_time - start_time))
    return dice_mean
Example #29
0
ax2 = plt.subplot2grid((3, 1), (2, 0))

ax1.plot([0, 1], [0, 1], "k:", label="Perfectly calibrated")
for clf, name in [(lr, 'Logistic'), (gnb, 'Naive Bayes'),
                  (svc, 'Support Vector Classification'),
                  (rfc, 'Random Forest')]:
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    if hasattr(clf, "predict_proba"):
        prob_pos = clf.predict_proba(X_test)[:, 1]
    else:  # use decision function
        prob_pos = clf.decision_function(X_test)
        prob_pos = (prob_pos - prob_pos.min()) / (prob_pos.max() -
                                                  prob_pos.min())

    clf_score = brier_score_loss(y_test, prob_pos, pos_label=y.max())
    print("%s:" % name)
    print("\tBrier: %1.3f" % (clf_score))
    print("\tPrecision: %1.3f" % precision_score(y_test, y_pred))
    print("\tRecall: %1.3f" % recall_score(y_test, y_pred))
    print("\tF1: %1.3f\n" % f1_score(y_test, y_pred))

    fraction_of_positives, mean_predicted_value = calibration_curve(y_test,
                                                                    prob_pos,
                                                                    n_bins=10)

    ax1.plot(mean_predicted_value,
             fraction_of_positives,
             "s-",
             label="%s" % (name, ))
    bestModel = load_model('results/sampling/miTAR_CNN_BiRNN_b' + str(batch) +
                           '_lr' + str(lr) + '_dout' + str(dout) + '_seed' +
                           str(seed) + '.h5')

    score = bestModel.evaluate(X_test, y_test, verbose=0)
    print("Accuracy: %.2f%%" % (score[1] * 100))
    scores.append(score[1] * 100)

    y_pred = bestModel.predict_proba(X_test)
    posthr = 0.5
    negthr = 0.5
    rm = 0

    oneacc, sen, spe, Fmeasure, PPV, NPV = evals(y_test, y_pred, posthr,
                                                 negthr, rm)
    brierScore = brier_score_loss(y_test, y_pred)
    vals.append([oneacc, sen, spe, Fmeasure, PPV, NPV, brierScore])

    if score[1] > acc:
        acc = score[1]
        paras = [seed]
        print("best so far, acc=", acc, " paras=", paras)

    print("finish paras at: seed=", seed)

from statistics import mean
aveScore = mean(scores)
print("the average accuracy is: ", aveScore)

aveEvals = []
for i in range(7):
Example #31
0
 def common_get_brier(self, y_test, y_score):
     try:
         brier = brier_score_loss(y_test, y_score)
         return brier
     except:
         return 1.0
Example #32
0
def _compute_score(model, X, y, scoring_metric=None, scoring_params=None):
    '''Helper function that maps metric string names to their function calls.

    Parameters
    ----------
    model : class inheriting sklearn.base.BaseEstimator
        The classifier whose hyperparams you need to optimize with grid search.
        The model must have model.fit(X,y) and model.predict(X) defined. Although it can
        work without it, its best if you also define model.score(X,y) so you can decide
        the scoring function for deciding the best parameters. If you are using an
        sklearn model, everything will work out of the box. To use a model from a
        different library is no problem, but you need to wrap it in a class and
        inherit sklearn.base.BaseEstimator as seen in:
        https://github.com/cgnorthcutt/hyperopt

    X : np.array of shape (n, m)
        The training data.

    y : np.array of shape (n,) or (n, 1)
        Corresponding labels.

    scoring_metric : str
        See hypopt.GridSearch.fit() scoring parameter docstring
        for list of options.

    scoring_params : dict
        All other params you want passed to the scoring function.
        Params will be passed as scoring_func(**scoring_params).'''

    if scoring_params is None:
        scoring_params = {}

    if scoring_metric == 'accuracy':
        return metrics.accuracy_score(y, model.predict(X), **scoring_params)
    elif scoring_metric == 'brier_score_loss':
        return metrics.brier_score_loss(y, model.predict(X), **scoring_params)
    elif scoring_metric == 'average_precision':
        return metrics.average_precision_score(y,
                                               model.predict_proba(X)[:, 1],
                                               **scoring_params)
    elif scoring_metric == 'f1':
        return metrics.f1_score(y, model.predict(X), **scoring_params)
    elif scoring_metric == 'f1_micro':
        return metrics.f1_score(y,
                                model.predict(X),
                                average='micro',
                                **scoring_params)
    elif scoring_metric == 'f1_macro':
        return metrics.f1_score(y,
                                model.predict(X),
                                average='macro',
                                **scoring_params)
    elif scoring_metric == 'f1_weighted':
        return metrics.f1_score(y,
                                model.predict(X),
                                average='weighted',
                                **scoring_params)
    elif scoring_metric == 'neg_log_loss':
        return -1. * metrics.log_loss(y, model.predict_proba(X), **
                                      scoring_params)
    elif scoring_metric == 'precision':
        return metrics.precision_score(y, model.predict(X), **scoring_params)
    elif scoring_metric == 'recall':
        return metrics.recall_score(y, model.predict(X), **scoring_params)
    elif scoring_metric == 'roc_auc':
        return metrics.roc_auc_score(y,
                                     model.predict_proba(X)[:, 1],
                                     **scoring_params)
    elif scoring_metric == 'explained_variance':
        return metrics.explained_variance_score(y, model.predict(X),
                                                **scoring_params)
    elif scoring_metric == 'neg_mean_absolute_error':
        return -1. * metrics.mean_absolute_error(y, model.predict(X), **
                                                 scoring_params)
    elif scoring_metric == 'neg_mean_squared_error':
        return -1. * metrics.mean_squared_error(y, model.predict(X), **
                                                scoring_params)
    elif scoring_metric == 'neg_mean_squared_log_error':
        return -1. * metrics.mean_squared_log_error(y, model.predict(X), **
                                                    scoring_params)
    elif scoring_metric == 'neg_median_absolute_error':
        return -1. * metrics.median_absolute_error(y, model.predict(X), **
                                                   scoring_params)
    elif scoring_metric == 'r2':
        return metrics.r2_score(y, model.predict(X), **scoring_params)
    else:
        raise ValueError(scoring_metric + 'is not a supported metric.')
Example #33
0
def metrics_sklearn(y_true=np.ndarray,
                    y_pred=np.ndarray,
                    y_pred_c=np.ndarray,
                    alpha=0.05,
                    n_boot=5,
                    blocksize=1,
                    clim_prob=None,
                    threshold_pred='upper_clim'):
    '''
    threshold_pred  options: 'clim', 'upper_clim', 'int or float'
                    if 'clim' is passed then all positive prediction is forecasted
                    for all values of y_pred above clim_prob
                    if 'upper_clim' is passed, from all values that are above 
                    the clim_prob, only the upper 75% of the prediction is used
                    
    '''

    #    y_true, y_pred, y_pred_c = y_true_c, ts_logit_c, y_pred_c_c
    #%%

    y_true = np.array(y_true).squeeze()
    cont_pred = np.unique(y_pred).size > 5
    metrics_dict = {}

    if clim_prob is None:
        clim_prob = np.round((y_true[(y_true == 1)].size / y_true.size), 2)

    sorval = np.array(sorted(y_pred))
    # probability to percentile
    if threshold_pred == 'clim':
        # binary metrics calculated for clim prevailance
        quantile = 1 - y_pred[sorval > clim_prob].size / y_pred.size
        # old : quantile = 100 * clim_prob
    elif threshold_pred == 'upper_clim':
        # binary metrics calculated for top 75% of 'above clim prob'
        No_vals_above_clim = y_pred[sorval > clim_prob].size / y_pred.size
        upper_75 = 0.75 * No_vals_above_clim  # 0.75 * percentage values above clim
        quantile = 1 - upper_75
        # old: bin_threshold = 100 * (1 - 0.75*clim_prob)
        # old:  quantile = bin_threshold
    elif isinstance(threshold_pred, int) or isinstance(threshold_pred, float):
        if threshold_pred < 1:
            quantile = 1 - y_pred[sorval > threshold_pred].size / y_pred.size
        else:
            quantile = 1 - y_pred[sorval > threshold_pred /
                                  100.].size / y_pred.size
    elif isinstance(threshold_pred, tuple):
        times = threshold_pred[0]
        quantile = 1 - (y_pred[sorval > times * clim_prob].size / y_pred.size)
    percentile_t = 100 * quantile

    y_pred_b = np.array(y_pred > np.percentile(y_pred, percentile_t),
                        dtype=int)

    out = get_metrics_bin(y_true, y_pred, t=percentile_t)
    (prec, recall, FPR, SP, Acc, f1, KSS_score, EDI) = out
    prec = metrics.precision_score(y_true, y_pred_b)
    acc = metrics.accuracy_score(y_true, y_pred_b)

    if cont_pred:

        AUC_score = metrics.roc_auc_score(y_true, y_pred)
        fpr, tpr, thresholds = metrics.roc_curve(y_true, y_pred_b)
        # P : Precision at threshold, R : Recall at threshold, PRthresholds
        P, R, PRthresholds = metrics.precision_recall_curve(y_true, y_pred)
        AUCPR_score = metrics.average_precision_score(y_true, y_pred)

        # convert y_pred to fake probabilities if spatcov is given
        if y_pred.max() > 1 or y_pred.min() < 0:
            y_pred = (y_pred + abs(y_pred.min())) / (y_pred.max() +
                                                     abs(y_pred.min()))
        else:
            y_pred = y_pred

        brier_score = metrics.brier_score_loss(y_true, y_pred)
        brier_score_clim = metrics.brier_score_loss(y_true, y_pred_c)

    old_index = range(0, len(y_pred), 1)
    n_bl = blocksize
    chunks = [
        old_index[n_bl * i:n_bl * (i + 1)]
        for i in range(int(len(old_index) / n_bl))
    ]

    # divide subchunks to boostrap to all cpus
    n_boot_sub = int(round((n_boot / max_cpu) + 0.4, 0))
    with ProcessPoolExecutor(max_workers=max_cpu) as pool:
        futures = []
        unique_seed = 42
        for i_cpu in range(max_cpu):
            unique_seed += 1  # ensure that no same shuffleling is done
            futures.append(
                pool.submit(_bootstrap, y_true, y_pred, n_boot_sub, chunks,
                            percentile_t, unique_seed))
        out = [future.result() for future in futures]

    boots_AUC = []
    boots_AUCPR = []
    boots_brier = []
    boots_prec = []
    boots_acc = []
    boots_KSS = []
    boots_EDI = []
    for i_cpu in range(max_cpu):
        _AUC, _AUCPR, _brier, _prec, _acc, _KSS, _EDI = out[i_cpu]
        boots_AUC.append(_AUC)
        boots_AUCPR.append(_AUCPR)
        boots_brier.append(_brier)
        boots_prec.append(_prec)
        boots_acc.append(_acc)
        boots_KSS.append(_KSS)
        boots_EDI.append(_EDI)

    # Computing the lower and upper bound of the 90% confidence interval
    # You can change the bounds percentiles to 0.025 and 0.975 to get
    # a 95% confidence interval instead.
    def get_ci(boots, alpha=0.05):
        if len(np.array(boots).shape) == 2:
            boots = flatten(boots)
        sorted_scores = np.array(boots)
        sorted_scores.sort()
        ci_low = sorted_scores[int(alpha * len(sorted_scores))]
        ci_high = sorted_scores[int((1 - alpha) * len(sorted_scores))]
        return ci_low, ci_high, sorted_scores

    if np.array(boots_AUC).ravel().size != 0:
        if cont_pred:
            ci_low_AUC, ci_high_AUC, sorted_AUCs = get_ci(boots_AUC, alpha)

            ci_low_AUCPR, ci_high_AUCPR, sorted_AUCPRs = get_ci(
                boots_AUCPR, alpha)

            ci_low_brier, ci_high_brier, sorted_briers = get_ci(
                boots_brier, alpha)

        ci_low_KSS, ci_high_KSS, sorted_KSSs = get_ci(boots_KSS, alpha)

        ci_low_prec, ci_high_prec, sorted_precs = get_ci(boots_prec, alpha)

        ci_low_acc, ci_high_acc, sorted_accs = get_ci(boots_acc, alpha)

        ci_low_EDI, ci_high_EDI, sorted_EDIs = get_ci(boots_EDI, alpha)

    else:
        if cont_pred:
            ci_low_AUC, ci_high_AUC, sorted_AUCs = (AUC_score, AUC_score,
                                                    [AUC_score])

            ci_low_AUCPR, ci_high_AUCPR, sorted_AUCPRs = (AUCPR_score,
                                                          AUCPR_score,
                                                          [AUCPR_score])

            ci_low_brier, ci_high_brier, sorted_briers = (brier_score,
                                                          brier_score,
                                                          [brier_score])

        ci_low_KSS, ci_high_KSS, sorted_KSSs = (KSS_score, KSS_score,
                                                [KSS_score])

        ci_low_prec, ci_high_prec, sorted_precs = (prec, prec, [prec])

        ci_low_acc, ci_high_acc, sorted_accs = (acc, acc, [acc])

        ci_low_EDI, ci_high_EDI, sorted_EDIs = (EDI, EDI, [EDI])

    if cont_pred:
        metrics_dict['AUC'] = (AUC_score, ci_low_AUC, ci_high_AUC, sorted_AUCs)
        metrics_dict['AUCPR'] = (AUCPR_score, ci_low_AUCPR, ci_high_AUCPR,
                                 sorted_AUCPRs)
        metrics_dict['brier'] = (brier_score, brier_score_clim, ci_low_brier,
                                 ci_high_brier, sorted_briers)
        metrics_dict['fpr_tpr_thres'] = fpr, tpr, thresholds
        metrics_dict['P_R_thres'] = P, R, PRthresholds
    metrics_dict['KSS'] = (KSS_score, ci_low_KSS, ci_high_KSS, sorted_KSSs)
    metrics_dict['prec'] = (prec, ci_low_prec, ci_high_prec, sorted_precs)
    metrics_dict['acc'] = (acc, ci_low_acc, ci_high_acc, sorted_accs)
    metrics_dict['EDI'] = EDI, ci_low_EDI, ci_high_EDI, sorted_EDIs

    #    print("Confidence interval for the score: [{:0.3f} - {:0.3}]".format(
    #        confidence_lower, confidence_upper))
    #%%
    return metrics_dict
Example #34
0
                                           random_state=None,
                                           solver='warn',
                                           tol=0.0001,
                                           verbose=0,
                                           warm_start=False)

clfLogisticRegression.fit(X_train, y_train)
y_pred_c = clfLogisticRegression.predict(X_test)
y_pred_proba_clg = clfLogisticRegression.predict_proba(X_test)[:, 1]
confmat_test_c = confusion_matrix(y_true=y_test, y_pred=y_pred_c)

print('confmat_test:\n', confmat_test_c)
print('the acc is:', accuracy_score(y_test, y_pred_c))
print('the classification_report:', classification_report(y_test, y_pred_c))
print('the auc of logistics is:', roc_auc_score(y_test, y_pred_proba_clg))
print('the brier socre is', brier_score_loss(y_test, y_pred_proba_clg))

#confmat_test:
# [[3125  184]
# [  17   51]]
#the acc is: 0.9404797157240155
#the classification_report:               precision    recall  f1-score   support
#
#         0.0       0.99      0.94      0.97      3309
#         1.0       0.22      0.75      0.34        68
#
#    accuracy                           0.94      3377
#   macro avg       0.61      0.85      0.65      3377
#weighted avg       0.98      0.94      0.96      3377
#
#the auc of logistics is: 0.8805752582084511
n = len(methods)
# 对比方法指标
for i in range(n):
    print '========' + str(methods[i])
    cutoff = 0.5
    #cutoff = test[methods[i] + 'DefaultPred'].median()
    f1 = f1_score(test.wtbz, pd.Series(test[methods[i] + 'DefaultPred'] > cutoff).apply(lambda x: 1 if x else 0))
    print '%.3f' % f1
    precision = precision_score(test.wtbz,
                                pd.Series(test[methods[i] + 'DefaultPred'] > cutoff).apply(lambda x: 1 if x else 0))
    print '%.3f' % precision

    fpr, tpr, thresholds = roc_curve(test.wtbz, test[methods[i] + 'DefaultPred'].apply(lambda x:1 if x>1 else x))
    print  '%.3f' % np.max(tpr - fpr)

    bs = brier_score_loss(test.wtbz,
                          test[methods[i] + 'DefaultPred'].apply(lambda x: x if x > 0 else 0).apply(lambda x:1 if x>1 else x))
    print '%.3f' % bs

    ap = average_precision_score(test.wtbz, test[methods[i]+'DefaultPred'])
    print '%.3f'%ap

    auc = roc_auc_score(test.wtbz, test[methods[i] + 'DefaultPred'])
    print '%.3f' % auc

    a = test.wtbz[test[methods[i] + 'DefaultPred'] < cutoff]
    b = test[methods[i] + 'DefaultPred'][test[methods[i] + 'DefaultPred'] < cutoff]
    try:
        auc = roc_auc_score(a, b)
    except ValueError:
        pass
def computeTestScore():
    # Read data from HDFStore file
    X1 = pd.read_hdf('trainingDataT1.h5', 'data')
    y1 = pd.read_hdf('trainingDataT1.h5', 'y')

    X2 = pd.read_hdf('trainingDataT2.h5', 'data')
    y2 = pd.read_hdf('trainingDataT2.h5', 'y')

    X3 = pd.read_hdf('trainingDataT3.h5', 'data')
    y3 = pd.read_hdf('trainingDataT3.h5', 'y')

    print 'X and y read'
    X1.drop(X1.columns[[20, 21, 22]], axis=1, inplace=True)
    X1.columns = [
        0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
        20, 21, 22, 23, 24
    ]

    X = X2.append(X3)
    X = X1.append(X)

    y = y2.append(y3)
    y = y1.append(y)

    print 'X shape ', X.shape
    X = X.as_matrix()
    y = np.array(y).ravel()
    y = map(int, y)

    listX0 = []
    listX1 = []

    for i in range(X.shape[0]):
        if y[i] == 1.0:
            listX1.append(X[i][0])
        elif y[i] == 0.0:
            listX0.append(X[i][0])

    print 'min = %d, max=%d' % (np.amin(listX0), np.amax(listX0))
    print np.median(listX0)
    print 'min = %d, max=%d' % (np.amin(listX1), np.amax(listX1))
    print np.median(listX1)
    print 'ratio : ', np.median(listX1) / np.median(listX0)
    '''
    print 'min = %d, max=%d' % (np.amin(X), np.amax(X))
    print np.median(X)
    print 'X and y processed'
    '''
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.33,
                                                        random_state=42)

    clf = GradientBoostingClassifier(random_state=42)
    '''
    # Create GBT algorithm with xgboost library
    clf = XGBoostClassifier(
        objective = 'binary:logistic',
        booster = 'gbtree',
        eval_metric = 'auc',
        tree_method = 'exact',
        num_class = 2,
        silent = 1,
        seed = 42,
        )
    
    parameters = {
        'eta': [0.01],#[0.01, 0.015, 0.025, 0.05, 0.1],
        'gamma': [0.1],#[0.05, 0.1, 0.3, 0.5, 0.7, 0.9, 1.0],
        'max_depth': [2],#[3, 5, 7, 9, 12, 15, 17, 25],
        'min_child_weight': [1],#[1, 3, 5, 7],
        'subsample': [0.4],#[0.6, 0.7, 0.8, 0.9, 1.0],
        'colsample_bytree': [1.0],#[0.6, 0.7, 0.8, 0.9, 1.0],
        'lambda': [0.1],#[0.05, 0.1, 1.0],
        'alpha': [0.01],#[0, 0.1, 0.5, 1.0],
    }

    eval_size = 0.10
    kf = StratifiedKFold(y_train, round(1. / eval_size), shuffle=True, random_state=42)

    scoring_fnc = make_scorer(roc_auc_score)

    clf = GridSearchCV(clf, parameters, scoring_fnc, cv=kf, n_jobs=-1)
    '''
    clf.fit(X_train, y_train)
    '''
    clf = clf.best_estimator_
    '''
    prob_pos_clf = clf.predict_proba(X_test)[:, 1]

    # Model with isotonic calibration
    clf_isotonic = CalibratedClassifierCV(clf, cv=2, method='isotonic')
    clf_isotonic.fit(X_train, y_train)
    prob_pos_isotonic = clf_isotonic.predict_proba(X_test)[:, 1]

    # Model with sigmoid calibration
    clf_sigmoid = CalibratedClassifierCV(clf, cv=2, method='sigmoid')
    clf_sigmoid.fit(X_train, y_train)
    prob_pos_sigmoid = clf_sigmoid.predict_proba(X_test)[:, 1]

    print("Brier scores: (the smaller the better)")

    clf_score = brier_score_loss(y_test, prob_pos_clf)
    print("No calibration: %1.3f" % clf_score)

    clf_isotonic_score = brier_score_loss(y_test, prob_pos_isotonic)
    print("With isotonic calibration: %1.3f" % clf_isotonic_score)

    clf_sigmoid_score = brier_score_loss(y_test, prob_pos_sigmoid)
    print("With sigmoid calibration: %1.3f" % clf_sigmoid_score)

    print("AUC scores:")

    clf_auc_score = roc_auc_score(y_test, prob_pos_clf)
    print("No calibration: %1.3f" % clf_auc_score)

    clf_isotonic_auc_score = roc_auc_score(y_test, prob_pos_isotonic)
    print("With isotonic calibration: %1.3f" % clf_isotonic_auc_score)

    clf_sigmoid_auc_score = roc_auc_score(y_test, prob_pos_sigmoid)
    print("With sigmoid calibration: %1.3f" % clf_sigmoid_auc_score)
Example #37
0
def brier_scorer(estimator, X, y):
    probabilities = estimator.predict_proba(X)
    return metrics.brier_score_loss(
        map(lambda d: float(d), y),
        probabilities[:, 1],
    )
Example #38
0
def getscores(X, pred_y, test_y, harm1test, harm2test, j, predtag, eblcattest):
    pred_y = pd.DataFrame(pred_y)
    harm1test = pd.DataFrame(harm1test)
    harm2test = pd.DataFrame(harm2test)
    test_y = pd.DataFrame(test_y)
    # td = [pred, harm1test, harm2test, truthtest]
    # td = pd.concat(td, axis=1)
    # td.columns = ['pred', 'harm1', 'harm2','truthtest']
    # td.sort_values(by='harm1', ascending=True)

    sumharm1 = sum(harm1test)
    sumharm2 = sum(harm2test)

    thresh_cent = np.arange(0.01, 1, 0.01)
    cent = 100 - np.arange(1, 100, 1)

    dummyarray = np.empty((100, 22))
    dummyarray[:] = np.nan
    output = pd.DataFrame(dummyarray)

    for a in range(0, 99, 1):

        test_thresh = thresh_cent[a]
        test_cent = cent[a]
        pharm_cent = np.percentile((harm2[harm2 > 0]), test_cent)
        # ppred_cent = np.percentile((td.harm2[td.pred2>0]), test_cent)

        # rows_h1cent = harm1test.index.values[harm1test >= pharm_cent]
        # rows_h2cent = harm2test.index.values[harm2test >= pharm_cent]
        # rows_pcent = td['pred'].index.values[td['pred'] >= pharm_cent]

        pred3 = to_labels(pred_y, test_thresh)
        # tempdata_h = td.iloc[rows_hcent, :]
        # temph_return = getppv(pred3, test_y, harm1test,harm2test, sumharm1, sumharm2)

        p3truth_yes = np.where(np.array(test_y) == 1)
        p3truth_no = np.where(np.array(test_y) == 0)
        p3screen_yes = np.where(np.array(pred3) == 1)
        p3screen_no = np.where(np.array(pred3) == 0)

        eblcat1 = np.where(np.array(eblcattest) == 1)
        eblcat2 = np.where(np.array(eblcattest) == 2)
        eblcat3 = np.where(np.array(eblcattest) == 3)
        eblcat4 = np.where(np.array(eblcattest) == 4)

        tp_temp_yes = np.where(p3screen_yes)
        tp_capture_pos = np.intersect1d(p3truth_yes, p3screen_yes)
        tp_capture_neg = np.intersect1d(p3truth_no, p3screen_no)

        harm2capture_tpos = 0
        if (harm2test[pred3 == 1].sum() > 0).bool():
            harm2capture_tpos = harm2test[pred3 == 1].sum() / harm2test.sum()

        harm2capture_pos = 0
        if (harm2test.iloc[tp_capture_pos].sum() > 0).bool():
            harm2capture_pos = harm2test.iloc[tp_capture_pos].sum(
            ) / harm2test.sum()

        harm2capture_tneg = 0
        if (harm2test[pred3 == 0].sum() > 0).bool():
            harm2capture_tneg = harm2test[pred3 == 0].sum() / harm1test.sum()

        harm2capture_neg = 0
        if (harm2test.iloc[tp_capture_neg].sum() > 0).bool():
            harm2capture_neg = harm2test.iloc[tp_capture_neg].sum(
            ) / harm2test.sum()

        ptn, pfp, pfn, ptp = confusion_matrix(test_y, pred3).ravel()

        # eblcat
        tp_eblcat1 = (np.intersect1d(
            (np.intersect1d(p3truth_yes, p3screen_yes)), eblcat1)).shape[0]
        fp_eblcat1 = (np.intersect1d(
            (np.intersect1d(p3truth_no, p3screen_yes)), eblcat1)).shape[0]
        tn_eblcat1 = (np.intersect1d((np.intersect1d(p3truth_no, p3screen_no)),
                                     eblcat1)).shape[0]
        fn_eblcat1 = (np.intersect1d(
            (np.intersect1d(p3truth_yes, p3screen_no)), eblcat1)).shape[0]
        # eblcat2
        tp_eblcat2 = (np.intersect1d(
            (np.intersect1d(p3truth_yes, p3screen_yes)), eblcat2)).shape[0]
        fp_eblcat2 = (np.intersect1d(
            (np.intersect1d(p3truth_no, p3screen_yes)), eblcat2)).shape[0]
        tn_eblcat2 = (np.intersect1d((np.intersect1d(p3truth_no, p3screen_no)),
                                     eblcat2)).shape[0]
        fn_eblcat2 = (np.intersect1d(
            (np.intersect1d(p3truth_yes, p3screen_no)), eblcat2)).shape[0]
        # eblcat 3
        tp_eblcat3 = (np.intersect1d(
            (np.intersect1d(p3truth_yes, p3screen_yes)), eblcat3)).shape[0]
        fp_eblcat3 = (np.intersect1d(
            (np.intersect1d(p3truth_no, p3screen_yes)), eblcat3)).shape[0]
        tn_eblcat3 = (np.intersect1d((np.intersect1d(p3truth_no, p3screen_no)),
                                     eblcat3)).shape[0]
        fn_eblcat3 = (np.intersect1d(
            (np.intersect1d(p3truth_yes, p3screen_no)), eblcat3)).shape[0]
        # eblcat 4
        tp_eblcat4 = (np.intersect1d(
            (np.intersect1d(p3truth_yes, p3screen_yes)), eblcat4)).shape[0]
        fp_eblcat4 = (np.intersect1d(
            (np.intersect1d(p3truth_no, p3screen_yes)), eblcat4)).shape[0]
        tn_eblcat4 = (np.intersect1d((np.intersect1d(p3truth_no, p3screen_no)),
                                     eblcat4)).shape[0]
        fn_eblcat4 = (np.intersect1d(
            (np.intersect1d(p3truth_yes, p3screen_no)), eblcat4)).shape[0]

        pspec = 0
        if ptn != 0:
            pspec = ptn / (ptn + pfp)

        psens = 0
        if ptp != 0:
            psens = ptp / (ptp + pfn)

        pppv = 0
        if ptp != 0:
            pppv = ptp / (ptp + pfp)

        pnpv = 0
        if ptn != 0:
            pnpv = ptn / (ptn + pfn)

        oapr = 0
        if ptp != 0 and pfp != 0:
            oapr = pfp / ptp

        oanr = 0
        if ptp != 0 and pfp != 0:
            oanr = pfn / ptn

        # ppv by eblcat
        pppv1 = 0
        if tp_eblcat1 != 0:
            pppv1 = tp_eblcat1 / (tp_eblcat1 + fp_eblcat1)

        pppv2 = 0
        if tp_eblcat2 != 0:
            pppv2 = tp_eblcat2 / (tp_eblcat2 + fp_eblcat2)

        pppv3 = 0
        if tp_eblcat3 != 0:
            pppv3 = tp_eblcat3 / (tp_eblcat3 + fp_eblcat3)

        pppv4 = 0
        if tp_eblcat4 != 0:
            pppv4 = tp_eblcat4 / (tp_eblcat4 + fp_eblcat4)

        # npv by eblcat
        pnpv1 = 0
        if tn_eblcat1 != 0:
            pnpv1 = tn_eblcat1 / (tn_eblcat1 + fn_eblcat1)

        pnpv2 = 0
        if tn_eblcat2 != 0:
            pnpv2 = tn_eblcat2 / (tn_eblcat2 + fn_eblcat2)

        pnpv3 = 0
        if tn_eblcat3 != 0:
            pnpv3 = tn_eblcat3 / (tn_eblcat3 + fn_eblcat3)

        pnpv4 = 0
        if tn_eblcat4 != 0:
            pnpv4 = tn_eblcat4 / (tn_eblcat4 + fn_eblcat4)

        fpr, tpr, _ = roc_curve(test_y, pred3)
        auc_score = auc(tpr, fpr)

        precision, recall, _ = precision_recall_curve(test_y, pred3)
        prc_score = auc(recall, precision)

        try:
            tempbrier = brier_score_loss(test_y, pred_y)
        except:
            tempbrier = 0

        try:
            t1 = harm2capture_tpos[0]
        except:
            t1 = 0
        try:
            t2 = harm2capture_pos[0]
        except:
            t2 = 1
        try:
            t3 = 1 - harm2capture_tneg[0]
        except:
            t3 = 1
        try:
            t4 = 1 - harm2capture_neg[0]
        except:
            t4 = 0

        output.iloc[a, 0] = test_thresh
        output.iloc[a, 1] = psens
        output.iloc[a, 2] = pspec
        output.iloc[a, 3] = pppv
        output.iloc[a, 4] = pnpv
        output.iloc[a, 5] = t1
        output.iloc[a, 6] = t2
        output.iloc[a, 7] = t3
        output.iloc[a, 8] = t4
        output.iloc[a, 9] = pppv1
        output.iloc[a, 10] = pppv2
        output.iloc[a, 11] = pppv3
        output.iloc[a, 12] = pppv4
        output.iloc[a, 13] = pnpv1
        output.iloc[a, 14] = pnpv2
        output.iloc[a, 15] = pnpv3
        output.iloc[a, 16] = pnpv4
        output.iloc[a, 17] = prc_score
        output.iloc[a, 18] = auc_score
        output.iloc[a, 19] = tempbrier
        output.iloc[a, 20] = oapr
        output.iloc[a, 21] = oanr

    output.columns = [
        'Thresh', 'pSens', 'pSpec', 'pPPV', 'pNPV', 'harmCaptureAllPos',
        'harmCaptureTruePos', 'harmCaptureAllNeg', 'harm2CaptureTrueNeg',
        'PPV_eblcat1', 'PPV_eblcat2', 'PPV_eblcat3', 'PPV_eblcat4',
        'NPV_eblcat1', 'NPV_eblcat2', 'NPV_eblcat3', 'NPV_eblcat4', 'PRC',
        'AUC', 'Brier', 'OAPR', 'OANR'
    ]

    if predtag == 'Yes':
        filename = 'Z:/2019/PPH/AnalysisMaster/ML/Data/Chromosomes/chr4list.xlsx'

    if predtag == 'No':
        filename = 'Z:/2019/PPH/AnalysisMaster/ML/Data/Chromosomes/chr4list.xlsx'
    output.to_excel(filename)

    return output
Example #39
0
    if h == 0:
        new_bools.append(1)
    else:
        new_bools.append(0)

from sklearn.metrics import brier_score_loss, average_precision_score, accuracy_score

print(
    "\nAccuracy score as defined by\n "
    "http://scikit-learn.org/stable/modules/generated/sklearn.metrics.accuracy_score.html#sklearn.metrics.accuracy_score"
)
print(accuracy_score(hnr_booleans, binary_prediction))
print(
    "\nBrier score loss as defined by\n "
    "http://scikit-learn.org/stable/modules/generated/sklearn.metrics.brier_score_loss.html#sklearn.metrics.brier_score_loss"
)
print(brier_score_loss(new_bools, probability_list))
print(
    "\nArea under the PR-Curver\n "
    "http://scikit-learn.org/stable/modules/generated/sklearn.metrics.average_precision_score.html#sklearn.metrics.average_precision_score"
)
print(average_precision_score(new_bools, probability_list, average='micro'))

import numpy
print("\nMean across the percentages")
print(numpy.mean(std_list))
print("\nSTD of the likelihood")
print(numpy.std(std_list))
print("\nVariance")
print(numpy.var(std_list))
Example #40
0
import numpy as np
from sklearn.metrics import brier_score_loss
y_true = np.array([0, 1, 1, 0])
y_true_categorical = np.array(["spam", "ham", "ham", "spam"])
y_prob = np.array([0.1, 0.9, 0.8, 0.3])
print(brier_score_loss(y_true, y_prob))

print(brier_score_loss(y_true, 1 - y_prob, pos_label=0))

# brier_score_loss(y_true_categorical, y_prob,pos_label="ham")

# brier_score_loss(y_true, np.array(y_prob) > 0.5)
Example #41
0
# write classification report
print(class_report,
      file=open(results_folder + "rf_uc_classification_report.txt", "w"))

# define confusion matrix
cm = confusion_matrix(y_test, y_pred_class)

# run accuracy summary on confuxion matrix
dx_summary = dx_accuracy(cm)
print(dx_summary)
# save summary metrics
dx_summary.to_csv(results_folder + "rf_uc_dx_summary.csv")
"""
2. Brier score 
"""
brier_score = np.round(brier_score_loss(y_test, y_pred[:, 1]), 3)

print('Ulcerative Colitis RF Clinical + Labs Features Benchmark',
      '\nBrier Score:',
      brier_score,
      file=open(results_folder + 'brier_score.txt', 'w'))
"""
3. ROC
"""
# roc for prediction of y=1 (2nd part of 2d array)
fpr, tpr, thresholds = roc_curve(y_test, y_pred[:, 1])
# auc
roc_auc = auc(fpr, tpr)

# create roc_df
roc_df = pd.DataFrame({'fpr': fpr, 'tpr': tpr, 'thresholds': thresholds})
Example #42
0
def squared_err(yPred, yTest):
    return brier_score_loss(yTest, yPred)
def GlobalBrier_optimiser(X_trainval,
                          y_trainval,
                          weights_trainval,
                          X_test,
                          y_test,
                          curr_year=2020):

    from scipy.optimize import minimize, Bounds
    import functools
    from sklearn.model_selection import KFold, train_test_split

    #the residual cost function to minimise
    def forecast_error_func(x, arg1, arg2, arg3, arg4, arg5):

        Data = arg1
        Outcome = arg2
        weights = arg3

        P_est = Data @ x  #weighted Arithmetic mean
        #P_est =  np.power(np.prod(np.power(Data , x),axis=1), 1/sum(x))   #weighted Geometric mean

        r = P_est - Outcome

        l1 = arg4  #regularisation coefficient
        l2 = arg5  #regularisation coefficient

        reg1 = l1 * np.sum(x * x)  #L2 norm
        reg2 = l2 * np.sum(abs(x))  #L1 norm

        #Elastic NET

        # W=np.ones(len(Outcome))  #flat
        # W = np.exp((curr_year-weights))   #exponential
        # W = (curr_year-weights)**2 #squared
        # W= 2**(curr_year-weights) # power
        W = np.log(1 + curr_year - weights)  #logarihmic
        return np.sum((r * r) * W) / len(Outcome) + reg1 + reg2

    def constraint1(x):
        return np.sum(x) - 1

    n_experts = X_trainval.shape[1]
    y_trainval_bin = (y_trainval == 1).astype(
        int)  #convert to Away-based binary labels

    #constraints and bounds for the optimisation
    cons = {'type': 'eq', 'fun': constraint1}
    bnds = Bounds(0, 1)

    #Set up the grid search
    Ntests = 50
    coeff_grid = np.linspace(0, 1, Ntests)
    Brier_Scores = np.ones(Ntests)

    for i in range(Ntests):

        coeff = coeff_grid[i]

        #shuffle the data and split it into train and validation sets
        X_train, X_val, y_train, y_val, w_train, w_val = train_test_split(
            X_trainval,
            y_trainval_bin,
            weights_trainval,
            test_size=0.3,
            shuffle=True,
            random_state=1)

        #setup the optimisation problem
        objective_fun = functools.partial(forecast_error_func,
                                          arg1=X_train,
                                          arg2=y_train,
                                          arg3=w_train,
                                          arg4=coeff,
                                          arg5=1 - coeff)

        #initial weights
        x0 = np.ones(n_experts) / n_experts

        out = minimize(objective_fun,
                       x0,
                       options={
                           'disp': False,
                           'maxiter': 500
                       },
                       method='SLSQP',
                       constraints=cons,
                       bounds=bnds)
        x_opt = out.x

        #evaluate on validation data
        y_prob = np.zeros([len(X_val), 2])
        Brier_Scores[i] = brier_score_loss(y_val, X_val @ x_opt, pos_label=1)

    #get the best coefficients that minimise the brier score
    best_coeffs = coeff_grid[np.argmin(Brier_Scores)]

    #fit on train&val and evaluate on test data
    objective_fun = functools.partial(forecast_error_func,
                                      arg1=X_trainval,
                                      arg2=y_trainval_bin,
                                      arg3=weights_trainval,
                                      arg4=best_coeffs,
                                      arg5=1 - best_coeffs)
    out = minimize(objective_fun,
                   x0,
                   options={
                       'disp': False,
                       'maxiter': 500
                   },
                   method='SLSQP',
                   constraints=cons,
                   bounds=bnds)
    x_opt = out.x

    y_test_bin = (y_test == 1).astype(
        int)  #convert to Away-based binary labels
    y_prob = np.zeros([len(X_test), 2])
    y_prob[:, 0] = X_test @ x_opt
    y_prob[:, 1] = 1 - y_prob[:, 0]
    return brier_score_loss(y_test_bin, y_prob[:, 0], pos_label=1)
def plot_calibration_curve(est,
                           name,
                           fig_index,
                           X_train,
                           X_test,
                           y_train,
                           y_test,
                           cv='prefit'):
    '''
    Plot calibration curve for est w/o and with calibration.

    Inputs:
        est : the model
        name : the model name
        fig_index : which figure to plot it in
        cv : the cross-validation strategy
             Stock models will be fitted already and are applicable to 'prefit'
             Integer values are the number of folds

    e.g.,
        # Plot calibration curve for Gaussian Naive Bayes
        plot_calibration_curve(GaussianNB(), "Naive Bayes", 1)

        # Plot calibration curve for Linear SVC
        plot_calibration_curve(LinearSVC(), "SVC", 2)

    '''
    # Calibrated with isotonic calibration
    isotonic = CalibratedClassifierCV(est, cv=cv, method='isotonic')

    # Calibrated with sigmoid calibration
    sigmoid = CalibratedClassifierCV(est, cv=cv, method='sigmoid')

    # Logistic regression with no calibration as baseline
    lr = LogisticRegression(C=1., solver='lbfgs')

    # fig = plt.figure(fig_index, figsize=(10, 10))
    ax1 = plt.subplot2grid((3, 1), (0, 0), rowspan=2)
    ax2 = plt.subplot2grid((3, 1), (2, 0))

    ax1.plot([0, 1], [0, 1], "k:", label="Perfectly calibrated")
    for clf, name in [(lr, 'Logistic'), (est, name),
                      (isotonic, name + ' + Isotonic'),
                      (sigmoid, name + ' + Sigmoid')]:
        # if name == 'Logistic':
        #     clf.fit(X_train, y_train)
        clf.fit(X_train, y_train)
        y_pred = clf.predict(X_test)

        if hasattr(clf, "predict_proba"):
            prob_pos = clf.predict_proba(X_test)[:, 1]
        else:  # use decision function
            prob_pos = clf.decision_function(X_test)
            prob_pos = \
                (prob_pos - prob_pos.min()) / (prob_pos.max() - prob_pos.min())

        clf_score = brier_score_loss(y_test, prob_pos, pos_label=1)
        # clf_score = brier_score_loss(y_test, prob_pos)
        print("%s:" % name)
        print("\tBrier: %1.3f" % (clf_score))
        print("\tPrecision: %1.3f" % precision_score(y_test, y_pred))
        print("\tRecall: %1.3f" % recall_score(y_test, y_pred))
        print("\tF1: %1.3f\n" % f1_score(y_test, y_pred))

        fraction_of_positives, mean_predicted_value = \
            calibration_curve(y_test, prob_pos, n_bins=10)

        ax1.plot(mean_predicted_value,
                 fraction_of_positives,
                 "s-",
                 label="%s (%1.3f)" % (name, clf_score))

        ax2.hist(prob_pos,
                 range=(0, 1),
                 bins=10,
                 label=name,
                 histtype="step",
                 lw=2)

    ax1.set_ylabel("Fraction of positives")
    ax1.set_ylim([-0.05, 1.05])
    ax1.legend(loc="lower right")
    ax1.set_title('Calibration plots  (reliability curve)')

    ax2.set_xlabel("Mean predicted value")
    ax2.set_ylabel("Count")
    ax2.legend(loc="upper center", ncol=2)

    plt.tight_layout()

    plt.show()
Example #45
0
def cross_val_estimate(estimator, X, y, cv1=None, n_folds=8, n_jobs=1,
                       verbosity=1):
    """ Estimate the estimator using cross-validation.

    - Calculate probabilities of the target (dplus) returned by classificator
        using cross-validation technique: predict targets for validation part
        after training estimator on training part inside cross-validation cycle
    - Estimate scores of classificator using roc_auc as a metric
    - Calculate LogLoss and Brier score loss (mean squared error) to estimate
        quality of predicted probabilities
    - Calculate sensitivity and specificity using the best threshold for their
        harmonic mean
    - Print classification report using the best threshold for F1-score

    Parameters
    ----------
    estimator: BaseEstimator-like
        an estimator to estimate
    X: array, shape=(n_samples, n_features)
        the train data samples with values of their features
    y: array, shape=(n_samples,))
        the targets
    n_folds: int, optional (default=8)
        number of folds in the cross-validation
    n_jobs:int, optional (default=1)
        number of cores to use to speed up calculations
    verbosity: int, optional (default=1)
        level of verbosity

    Returns
    -------
    y_proba: array
        numpy array of predicted probabilities
    scores: array
        numpy array of cross-validated scores
    """
    from sklearn import (metrics, cross_validation)
    from .model_selection import cross_val_predict_proba
    from .modsel import (
        estimate_scores,
        precision_sensitivity_specificity, best_threshold)

    y_true = y
    scoring = 'roc_auc'
    if cv1 is None:
        cv1 = cross_validation.StratifiedKFold(y, n_folds)
    y_proba, scores = cross_val_predict_proba(
        estimator, X, y, scoring=scoring, cv=cv1, n_jobs=n_jobs, verbose=0,
        fit_params=None, pre_dispatch='2*n_jobs')

    print("\nScores: ", " ".join(["{:.2f}".format(e) for e in scores]))
    scores_mean, me = estimate_scores(scores, scoring, sampling=False)

    best_thr1, best_thr2 = best_threshold(y_true, y_proba)
    precision, sensitivity, specificity = precision_sensitivity_specificity(
        y_true, y_proba, threshold=best_thr2)
    print()
    print(
        "LogLoss: {:1.3f} | Brier score loss: {:1.3f} | sensitivity(recall): "
        "{:1.2f} and specificity: {:1.2f} with threshold={:1.2f}".format(
            metrics.log_loss(y_true, y_proba),
            metrics.brier_score_loss(y_true, y_proba),
            sensitivity,
            specificity,
            best_thr2)
    )

    target_names = ['class 0', 'class 1']

    print("Threshold={:1.2f}:".format(best_thr1))
    print(metrics.classification_report(
        y_true, np.asarray(y_proba > best_thr1, dtype=int),
        target_names=target_names))

    return y_proba, scores
# Gaussian Naive-Bayes with no calibration
clf = GaussianNB()
clf.fit(X_train, y_train)  # GaussianNB itself does not support sample-weights
prob_pos_clf = clf.predict_proba(X_test)[:, 1]

# Gaussian Naive-Bayes with isotonic calibration
clf_isotonic = CalibratedClassifierCV(clf, cv=2, method='isotonic')
clf_isotonic.fit(X_train, y_train, sw_train)
prob_pos_isotonic = clf_isotonic.predict_proba(X_test)[:, 1]

# Gaussian Naive-Bayes with sigmoid calibration
clf_sigmoid = CalibratedClassifierCV(clf, cv=2, method='sigmoid')
clf_sigmoid.fit(X_train, y_train, sw_train)
prob_pos_sigmoid = clf_sigmoid.predict_proba(X_test)[:, 1]

print("Brier scores: (the smaller the better)")

clf_score = brier_score_loss(y_test, prob_pos_clf)
clf_score_auc = auc(y_test, prob_pos_clf, True)
print("No calibration: %1.3f, %1.3f" % (clf_score, clf_score_auc))

clf_isotonic_score = brier_score_loss(y_test, prob_pos_isotonic)
clf_isotonic_score_auc = auc(y_test, prob_pos_isotonic, True)
print("With isotonic calibration: %1.3f, %1.3f" %
      (clf_isotonic_score, clf_isotonic_score_auc))

clf_sigmoid_score = brier_score_loss(y_test, prob_pos_sigmoid)
clf_sigmoid_score_auc = auc(y_test, prob_pos_sigmoid, True)
print("With sigmoid calibration: %1.3f, %.3f" %
      (clf_sigmoid_score, clf_sigmoid_score_auc))
Example #47
0
def plot_calibration_curve(est, name, fig_index, y_test, X_test, y_train,
                           X_train):
    """Plot calibration curve for est w/o and with calibration. """
    # Calibrated with isotonic calibration
    isotonic = CalibratedClassifierCV(est, cv=2, method='isotonic')

    # Calibrated with sigmoid calibration
    sigmoid = CalibratedClassifierCV(est, cv=2, method='sigmoid')

    # Logistic regression with no calibration as baseline
    lr = LogisticRegression(C=1., solver='lbfgs')

    fig = plt.figure(fig_index, figsize=(10, 10))
    ax1 = plt.subplot2grid((3, 1), (0, 0), rowspan=2)
    ax2 = plt.subplot2grid((3, 1), (2, 0))

    ax1.plot([0, 1], [0, 1], "k:", label="Perfectly calibrated")
    for clf, name in [(lr, 'Logistic'), (est, name),
                      (isotonic, name + ' + Isotonic'),
                      (sigmoid, name + ' + Sigmoid')]:
        clf.fit(X_train, y_train)
        y_pred = clf.predict(X_test)
        if hasattr(clf, "predict_proba"):
            prob_pos = clf.predict_proba(X_test)[:, 1]
        else:  # use decision function
            prob_pos = clf.decision_function(X_test)
            prob_pos = \
                (prob_pos - prob_pos.min()) / (prob_pos.max() - prob_pos.min())

        clf_score = brier_score_loss(y_test, prob_pos, pos_label=y_pred.max())
        # print("%s:" % name)
        # print("\tBrier: %1.3f" % (clf_score))
        # print("\tPrecision: %1.3f" % precision_score(y_test, y_pred))
        # print("\tRecall: %1.3f" % recall_score(y_test, y_pred))
        # print("\tF1: %1.3f\n" % f1_score(y_test, y_pred))

        fraction_of_positives, mean_predicted_value = \
            calibration_curve(y_test, prob_pos, n_bins=10)

        ax1.plot(mean_predicted_value,
                 fraction_of_positives,
                 "s-",
                 label="%s (%1.3f)" % (name, clf_score))

        ax2.hist(prob_pos,
                 range=(0, 1),
                 bins=10,
                 label=name,
                 histtype="step",
                 lw=2)

    ax1.set_ylabel("Fraction of positives")
    ax1.set_ylim([-0.05, 1.05])
    ax1.legend(loc="lower right")
    ax1.set_title('Calibration plots  (reliability curve)')

    ax2.set_xlabel("Mean predicted value")
    ax2.set_ylabel("Count")
    ax2.legend(loc="upper center", ncol=2)

    plt.tight_layout()
Example #48
0
    def RandomGridSearch(self,x_train,y_train,x_test,y_test,splits,path_results,m,itera,clf_g,name,tuned_parameters,opt,ite):
        """
        This function looks for the best set o parameters for RFC method
        Input: 
            X: training set
            Y: labels of training set
            splits: cross validation splits, used to make sure the parameters are stable
        Output:
            clf.best_params_: dictionary with the parameters, to use: param_svm['kernel']
        """    
        
        start_rfc = time.time()                  
        #clf_grid =  RandomizedSearchCV(clf_g, tuned_parameters, cv=splits,random_state=random_state,
        #                   scoring='%s' % opt[0],n_jobs=n_jobs)        
        clf_grid =  RandomizedSearchCV(clf_g, tuned_parameters, cv=splits,random_state=random_state,
                           scoring='%s' % opt[0],n_jobs=n_jobs)
                                          
        clf_grid.fit(x_train, y_train)
        #print("Score",clf.best_score_)
        end_rfc = time.time()
        
        print("Time to process: ",end_rfc - start_rfc)
        
        with open(path_results+"parameters_"+name+".txt", "a") as file:
            for item in clf_grid.best_params_:
              file.write(" %s %s " %(item,clf_grid.best_params_[item] ))
            file.write("\n")
            
        #clf = clf_g(**clf_grid.best_params_,random_state=random_state)
        clf = clf_grid.best_estimator_
        
        x_train_t, x_val, y_train_t, y_val = train_test_split(x_train, y_train, test_size=0.2, random_state=random_state)
 
                    
        #clf_t = clf_g(**clf_grid.best_params_,random_state=random_state)
        clf_t = clf.fit(x_train_t,y_train_t)
        
#        import shap
#        #testing feature importance with xgb
#        x_train_t = pd.DataFrame(x_train,columns=new_cols)
#        #f = plt.figure(figsize=(25, 19))
#        #xgboost.plot_importance(clf_t,importance_type="gain")
#       
#        explainer = shap.TreeExplainer(clf_t)
#        shap_values = explainer.shap_values(x_train_t)
#        shap.summary_plot(shap_values, x_train_t, plot_type="bar")
#        #end of test
                             
        if name=="SVM":
            decisions = clf.decision_function(x_test)
            probas=\
            (decisions-decisions.min())/(decisions.max()-decisions.min())
            
            decisions_t = clf_t.decision_function(x_val)
            probas_val=\
            (decisions_t-decisions_t.min())/(decisions_t.max()-decisions_t.min())
        else:
             probas = clf.predict_proba(x_test)[:, 1]
             probas_val = clf_t.predict_proba(x_val)[:, 1]
        
        ts=np.linspace(0.1, 0.99, num=100)
        best_val=0
        best_t=0
        t_spec=0
        found=False
        found_ppv=False
        for i in range(ts.shape[0]):
            p=probas_val>ts[i]
            #c_f1=f1_score(y_val, p)

            tn, fp, fn, tp = confusion_matrix(y_val, p).ravel()
            c_f1 = tp/(tp+fp)
            c_spec=tn/(tn+fp)
            #if c_f1>best_val:
            if c_f1>=0.95 and not found_ppv:
                best_val=c_f1
                best_t=ts[i]
                found_ppv=True
            if c_spec>=0.95 and not found:
                t_spec=ts[i]
                found=True 
                #print(c_spec)
                
        self.model = clf        
        preds = clf.predict(x_test)        
        m.clf_f1_score[ite,itera]=f1_score(y_test, preds)
        tn, fp, fn, tp = confusion_matrix(y_test, preds).ravel()        
        m.clf_sens[ite,itera]=tp/(tp+fn)
        m.clf_spec[ite,itera]=tn/(tn+fp)
        m.clf_ppv[ite,itera]=tp/(tp+fp)
        m.clf_npv[ite,itera]=tn/(tn+fn)                        
        
        m.f1_score_f1[ite,itera]=f1_score(y_test, probas>best_t)        
        tn, fp, fn, tp = confusion_matrix(y_test, probas>best_t).ravel()        
        m.sens_f1[ite,itera]=tp/(tp+fn)
        m.spec_f1[ite,itera]=tn/(tn+fp)
        m.clf_ppv_f1[ite,itera]=tp/(tp+fp)
        m.clf_npv_f1[ite,itera]=tn/(tn+fn)
        
        m.f1_score_spec[ite,itera] = f1_score(y_test, probas>t_spec)
        tn, fp, fn, tp = confusion_matrix(y_test, probas>t_spec).ravel()        
        m.sens_spec[ite,itera] = tp/(tp+fn)
        m.spec_spec[ite,itera] = tn/(tn+fp)
        m.clf_ppv_spec[ite,itera] = tp/(tp+fp)
        m.clf_npv_spec[ite,itera] = tn/(tn+fn)
        m.probas = probas
        m.preds = preds
        m.clf_auc[ite,itera] = roc_auc_score(y_test,probas)
        m.clf_thresholds[ite,itera] = t_spec
        
               
        fpr_rf, tpr_rf, _ = roc_curve(y_test, probas)  
        
        m.clf_brier[ite,itera] = brier_score_loss(y_test, probas)   
                
        tn, fp, fn, tp = confusion_matrix(y_test, preds).ravel()
        print(probas_val.shape,y_train.shape)
        save_prob = np.concatenate((probas.reshape(-1,1),y_test.reshape(-1,1)),axis = 1)
        save_prob_train = np.concatenate((probas_val.reshape(-1,1),y_val.reshape(-1,1)),axis = 1)
        
        #Feature importance
        
        weights = list()
        stds = list()
        names = list()
        
        import eli5                                   
        model = clf                  
        
        f = pd.DataFrame()
        f['name']=name
        
        from eli5.sklearn import PermutationImportance
        perm = PermutationImportance(model, random_state=1,scoring="roc_auc").fit(x_train,y_train)
        new_cols = np.load(r"\\amc.intra\users\L\laramos\home\Desktop\MrClean_Poor\HPC\organize_cols.npy")
        html = eli5.explain_weights(perm, feature_names = new_cols.tolist())  
        for imp in range(len(html.feature_importances.importances)):
            weights.append(html.feature_importances.importances[imp].weight)
            stds.append(html.feature_importances.importances[imp].std)
            names.append(html.feature_importances.importances[imp].feature)
        
        import_frame = pd.DataFrame(list(zip(names,weights,stds)))
        import_frame.columns = ['name','weight','std']
        import_frame.to_excel(path_results+'features_'+name+'_'+str(itera)+'.xls')
        
        
      
        #np.save(path_results+"probabilities_"+name+"_"+str(itera)+".npy",probas)
        
        np.save(path_results+"probabilities_"+name+"_"+str(itera)+str(i)+".npy",save_prob)
        np.save(path_results+"probabilities_train"+name+"_"+str(itera)+str(i)+".npy",save_prob_train)

        #np.save(path_results+"feature_importance"+name+"_"+str(itera)+str(i)+".npy",clf.coef_)
        #joblib.dump(clf,path_results+'clf_'+name+str(itera)+str(i))
        return(fpr_rf,tpr_rf,probas,clf)
Example #49
0
def evaluatingModel(model, model_name, X, y, skv):

	print(model_name + " STARTS HERE\n\n")
	
	# Implement BoW model
	vectorizer = CountVectorizer(analyzer="word", ngram_range=(1, 1))

	# Create Confusion Matrix Dictionary
	cm_dict = { "tp": 0, "fp": 0, "tn": 0, "fn": 0}

	# Array to store results
	accuracy_array = []
	precision_array = []
	fpr_array = []
	auc_array = []
	log_loss_array = []
	brier_array = []
	execution_time_array = []

	for train_cv, test_cv in skv.split(X,y):

		# Seperate the training and testing fold
		# NOTE: y_test corresponds to y_true
		X_train, X_test = X[train_cv], X[test_cv]
		y_train, y_test = y[train_cv], y[test_cv]

		# Transform X_train and X_test using BoW
		X_train = vectorizer.fit_transform(X_train).toarray()
		X_test = vectorizer.transform(X_test).toarray()

		# Train the model
		model.fit(X_train , y_train)

		# Predict and calculate run-time
		# NOTE: result corresponds to y_pred
		start = time.time()
		result = model.predict(X_test)
		end = time.time()

		execution_time = end - start

		# Get the probability scores
		# Use Logistic Regression for LinearSVC case
		if model_name == 'SVM':

			lr = LogisticRegression()
			lr.fit(X_train, y_train)

			y_scores = lr.predict_proba(X_test)

		else:

			y_scores = model.predict_proba(X_test)

		# Get AUC score, Log Loss
		auc_score = roc_auc_score(y_test, y_scores[:, 1])
		log_loss_score = log_loss(y_test, y_scores)
		brier_score = brier_score_loss(y_test, y_scores[:, 1])

		# Confusion Matrix
		tn, fp, fn, tp = confusion_matrix(y_test, result).ravel()

		# Add the results to confusion matrix
		cm_dict["tn"] += tn
		cm_dict["fp"] += fp 
		cm_dict["fn"] += fn 
		cm_dict["tp"] += tp

		# Evaluation Metrics
		accuracy = accuracy_score(y_test , result)
		precision = tp/(tp+fp)
		fpr = fp/(fp + tn) # False Positive Rate

		# Append results
		accuracy_array.append(accuracy)
		precision_array.append(precision)
		fpr_array.append(fpr)
		auc_array.append(auc_score)
		log_loss_array.append(log_loss_score)
		brier_array.append(brier_score)
		execution_time_array.append(execution_time)

	# Get mean results
	mean_accuracy = np.mean(accuracy_array)
	mean_precision = np.mean(precision_array)
	mean_fpr = np.mean(fpr_array)
	mean_auc = np.mean(auc_array)
	mean_log_loss = np.mean(log_loss_array)
	mean_brier = np.mean(brier_array)
	mean_execution_time = np.mean(execution_time_array)

	# Get standard deviation (population)
	accuracy_std = np.std(accuracy_array)
	precision_std = np.std(precision_array)
	fpr_std = np.std(fpr_array)
	auc_std = np.std(auc_array)
	log_std = np.std(log_loss_array)
	brier_std = np.std(brier_array)
	run_std = np.std(mean_execution_time)

	# Display results
	print("MEAN ACCURACY: %0.3f (+/- %0.3f) \n" % (mean_accuracy, accuracy_std))
	print("MEAN PRECISION: %0.3f (+/- %0.3f) \n" % (mean_precision, precision_std))
	print("MEAN FALSE POSITIVE RATE: %0.3f (+/- %0.3f) \n" % (mean_fpr, fpr_std))
	print("MEAN AUC SCORE: %0.3f (+/- %0.3f) \n" % (mean_auc, auc_std))
	print("MEAN LOG LOSS SCORE: %0.3f (+/- %0.3f) \n" % (mean_log_loss, log_std))
	print("MEAN BRIER SCORE LOSS: %0.3f (+/- %0.3f) \n" % (mean_brier, brier_std))
	print("MEAN RUN TIME: %0.3f (+/- %0.3f) \n" % (mean_execution_time, run_std))

	print("\n\n" + model_name + " STOPS HERE\n\n")
def baseline_resampling(data_path, bad_sample_num, good_sample_num,
                        reject_sample_num, random_state_for_each_epoch,
                        classifier, resampling_model):

    warnings.filterwarnings("ignore")
    raw_data_train = pd.read_csv(data_path, index_col='ID')

    data_bad = raw_data_train[raw_data_train['label'] == 1]
    # print data_bad.shape
    data_good = raw_data_train[(raw_data_train['label'] == 0)]
    data_reject = raw_data_train[raw_data_train['label'] == -1]

    data_bad_sampling = data_bad.sample(
        n=bad_sample_num, random_state=random_state_for_each_epoch)
    data_good_sampling = data_good.sample(
        n=good_sample_num, random_state=random_state_for_each_epoch)

    data_train = pd.concat([data_bad_sampling, data_good_sampling], axis=0)
    # print("All Data Size:" + str(data_train.shape))

    feature_name = list(data_train.columns.values)
    # print(feature_name)

    s = 0
    np.random.seed(s)
    sampler = np.random.permutation(len(data_train.values))
    data_train_randomized = data_train.take(sampler)

    y = data_train_randomized['label'].as_matrix()
    X = data_train_randomized.drop(['label'], axis=1).as_matrix()

    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=.2,
                                                        random_state=123)

    X_resampled, y_resampled = resampling_model.fit_sample(X_train, y_train)
    # borderline2 > > borderline1

    # X_resampled, y_resampled = SMOTE(kind='borderline2', k_neighbors=5).fit_sample(X_train, y_train)
    # X_resampled, y_resampled = ADASYN(n_neighbors=50).fit_sample(X_train, y_train)
    # X_resampled, y_resampled = TomekLinks(ratio='auto', random_state=100).fit_sample(X_train, y_train) #

    # X_resampled, y_resampled = SMOTEENN().fit_sample(X_train, y_train)
    '''Choose a classification model'''
    y_proba = classifier.fit(X_resampled, y_resampled).predict_proba(X_test)
    y_predict = classifier.fit(X_resampled, y_resampled).predict(X_test)

    # y_predict = y_proba[:, 1].copy()
    # y_predict[y_predict >= 0.9] = 1
    # y_predict[y_predict < 0.9] = 0
    '''AUC and ROC curve'''
    fpr, tpr, _ = roc_curve(y_test, y_proba[:, 1])
    auc_result = auc(fpr, tpr)
    # print("AUC Score:" + str(auc_result))
    '''Accuracy'''
    accuracy_result = accuracy_score(y_test, y_predict)
    '''Precision'''
    precision_result = precision_score(y_test, y_predict)
    # print("Precision Score:" + str(precision_result))
    '''Recall'''
    recall_result = recall_score(y_test, y_predict)
    # print("Recall Score:" + str(recall_result))
    '''F1'''
    f1_result = f1_score(y_test, y_predict)
    # print("F1 Score:" + str(f1_result))
    '''Log loss'''
    log_loss_result = log_loss(y_test, y_proba[:, 1])
    # print("logloss Score:" + str(log_loss_result))
    '''Cohen-Kappa'''
    cohen_kappa_result = cohen_kappa_score(y_test, y_predict)
    # print("Cohen-Kappa Score:" + str(cohen_kappa_result))
    '''brier score'''
    brier_result = brier_score_loss(y_test, y_proba[:, 1])
    # print("brier Score:" + str(brier_result))
    '''K-S Value'''
    ks_result = max(tpr - fpr)
    '''plot roc'''

    # plt.figure()
    # lw = 2
    # plt.plot(fpr, tpr, color='darkorange', lw=lw, label='ROC curve (area = %0.4f)' % roc_auc)
    # plt.plot([0, 1], [0, 1], color='navy', lw=lw, linestyle='--')
    # plt.xlim([0.0, 1.0])
    # plt.ylim([0.0, 1.05])
    # plt.xlabel('False Positive Rate')
    # plt.ylabel('True Positive Rate')
    # plt.title('Receiver operating characteristic example')
    # plt.legend(loc="lower right")
    # plt.show()
    '''Classification Report'''
    # target_names = ['class 0', 'class 1', 'class 2']
    # print(classification_report(y_test, y_predict, target_names=target_names))
    '''Confusion Matrix'''
    # # Compute confusion matrix
    # cnf_matrix = confusion_matrix(y_test, y_predict)
    # np.set_printoptions(precision=2)
    #
    # # Plot non-normalized confusion matrix
    # plt.figure()
    # plot_confusion_matrix(cnf_matrix, classes=[0, 1], title='Confusion matrix, without normalization')
    #
    # # Plot normalized confusion matrix
    # plt.figure()
    # plot_confusion_matrix(cnf_matrix, classes=[0, 1], normalize=True, title='Normalized confusion matrix')
    #
    # plt.show()

    # print("Accuracy Score:" + str(accuracy_result) + " Precision Score:" + str(precision_result) + " Recall Score:" + str(recall_result) +
    #       " F1 Score:" + str(f1_result) + " logloss Score:" + str(log_loss_result) + " Cohen-Kappa Score:" + str(cohen_kappa_result) +
    #       " brier Score:" + str(brier_result) + " AUC Score:" + str(auc_result))

    return accuracy_result, precision_result, recall_result, f1_result, log_loss_result, cohen_kappa_result, brier_result, ks_result, auc_result
Example #51
0
File: m2m.py Project: dimagi/brain
 def score_probs(self, y_true, y_prob):
     return metrics.brier_score_loss(y_true, y_prob)
Example #52
0
def test_calibration():
    """Test calibration objects with isotonic and sigmoid"""
    n_samples = 100
    X, y = make_classification(n_samples=2 * n_samples, n_features=6,
                               random_state=42)
    sample_weight = np.random.RandomState(seed=42).uniform(size=y.size)

    X -= X.min()  # MultinomialNB only allows positive X

    # split train and test
    X_train, y_train, sw_train = \
        X[:n_samples], y[:n_samples], sample_weight[:n_samples]
    X_test, y_test = X[n_samples:], y[n_samples:]

    # Naive-Bayes
    clf = MultinomialNB().fit(X_train, y_train, sample_weight=sw_train)
    prob_pos_clf = clf.predict_proba(X_test)[:, 1]

    pc_clf = CalibratedClassifierCV(clf, cv=y.size + 1)
    assert_raises(ValueError, pc_clf.fit, X, y)

    # Naive Bayes with calibration
    for this_X_train, this_X_test in [(X_train, X_test),
                                      (sparse.csr_matrix(X_train),
                                       sparse.csr_matrix(X_test))]:
        for method in ['isotonic', 'sigmoid']:
            pc_clf = CalibratedClassifierCV(clf, method=method, cv=2)
            # Note that this fit overwrites the fit on the entire training
            # set
            pc_clf.fit(this_X_train, y_train, sample_weight=sw_train)
            prob_pos_pc_clf = pc_clf.predict_proba(this_X_test)[:, 1]

            # Check that brier score has improved after calibration
            assert (brier_score_loss(y_test, prob_pos_clf) >
                           brier_score_loss(y_test, prob_pos_pc_clf))

            # Check invariance against relabeling [0, 1] -> [1, 2]
            pc_clf.fit(this_X_train, y_train + 1, sample_weight=sw_train)
            prob_pos_pc_clf_relabeled = pc_clf.predict_proba(this_X_test)[:, 1]
            assert_array_almost_equal(prob_pos_pc_clf,
                                      prob_pos_pc_clf_relabeled)

            # Check invariance against relabeling [0, 1] -> [-1, 1]
            pc_clf.fit(this_X_train, 2 * y_train - 1, sample_weight=sw_train)
            prob_pos_pc_clf_relabeled = pc_clf.predict_proba(this_X_test)[:, 1]
            assert_array_almost_equal(prob_pos_pc_clf,
                                      prob_pos_pc_clf_relabeled)

            # Check invariance against relabeling [0, 1] -> [1, 0]
            pc_clf.fit(this_X_train, (y_train + 1) % 2,
                       sample_weight=sw_train)
            prob_pos_pc_clf_relabeled = \
                pc_clf.predict_proba(this_X_test)[:, 1]
            if method == "sigmoid":
                assert_array_almost_equal(prob_pos_pc_clf,
                                          1 - prob_pos_pc_clf_relabeled)
            else:
                # Isotonic calibration is not invariant against relabeling
                # but should improve in both cases
                assert (brier_score_loss(y_test, prob_pos_clf) >
                               brier_score_loss((y_test + 1) % 2,
                                                prob_pos_pc_clf_relabeled))

        # Check failure cases:
        # only "isotonic" and "sigmoid" should be accepted as methods
        clf_invalid_method = CalibratedClassifierCV(clf, method="foo")
        assert_raises(ValueError, clf_invalid_method.fit, X_train, y_train)

        # base-estimators should provide either decision_function or
        # predict_proba (most regressors, for instance, should fail)
        clf_base_regressor = \
            CalibratedClassifierCV(RandomForestRegressor(), method="sigmoid")
        assert_raises(RuntimeError, clf_base_regressor.fit, X_train, y_train)
def plot_calibration_curve_from_data(X, y, est, name, fig_index):
    """Plot calibration curve for est w/o and with calibration. """
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=7)

    # Calibrated with isotonic calibration
    isotonic = CalibratedClassifierCV(est, cv=2, method="isotonic")

    # Calibrated with sigmoid calibration
    sigmoid = CalibratedClassifierCV(est, cv=2, method="sigmoid")

    # Logistic regression with no calibration as baseline
    lr = LogisticRegression(C=1.0, solver="lbfgs")

    fig = plt.figure(fig_index, figsize=(10, 10))
    ax1 = plt.subplot2grid((3, 1), (0, 0), rowspan=2)
    ax2 = plt.subplot2grid((3, 1), (2, 0))

    ax1.plot([0, 1], [0, 1], "k:", label="Perfectly calibrated")
    for clf, name in [
        (lr, "Logistic Regression"),
        (est, name),
        (isotonic, name + " + Isotonic"),
        (sigmoid, name + " + Sigmoid"),
    ]:
        clf.fit(X_train, y_train)
        # clf.fit(X_train[:,:10], X_train[:, 10])
        y_pred = clf.predict(X_test)
        # y_pred = clf.predict(X_test[:,:10])
        if hasattr(clf, "predict_proba"):
            prob_pos = clf.predict_proba(X_test)[:, 1]
            # prob_pos = clf.predict_proba(X_test[:,:10])[:, 1]
            # prob_pos = clf.predict_proba(X_test[:,:10])[:, 1]*weights[1]
            # prob_pos = np_average( 1 - clf.predict_proba(X_test[:,:10]), axis=1, weights=weights )
        else:  # use decision function
            prob_pos = clf.decision_function(X_test)
            # prob_pos = clf.decision_function(X_test[:,:10])[:, 1]
            # prob_pos = clf.decision_function(X_test[:,:10])[:, 1]*weights[1]
            # prob_pos = np_average( 1 - clf.decision_function(X_test[:,:10]), axis=1, weights=weights )
        prob_pos = (prob_pos - prob_pos.min()) / (prob_pos.max() - prob_pos.min())
        clf_score = brier_score_loss(y_test, prob_pos, pos_label=y.max())
        print("* %s:" % name)
        OP_append("* %s:" % name)
        print(" * Brier: %1.3f" % (clf_score))
        OP_append(" * Brier: %1.3f" % (clf_score))
        print(" * Precision: %1.3f" % precision_score(y_test, y_pred))
        OP_append(" * Precision: %1.3f" % precision_score(y_test, y_pred))
        print(" * Recall: %1.3f" % recall_score(y_test, y_pred))
        OP_append(" * Recall: %1.3f" % recall_score(y_test, y_pred))
        print(" * F1: %1.3f\n" % f1_score(y_test, y_pred))
        OP_append(" * F1: %1.3f\n" % f1_score(y_test, y_pred))

        fraction_of_positives, mean_predicted_value = calibration_curve(y_test, prob_pos, n_bins=10)

        ax1.plot(mean_predicted_value, fraction_of_positives, "s-", label="%s (%1.3f)" % (name, clf_score))

        ax2.hist(prob_pos, range=(0, 1), bins=10, label=name, histtype="step", lw=2)

    ax1.set_ylabel("Fraction of positives")
    ax1.set_ylim([-0.05, 1.05])
    ax1.legend(loc="lower right")
    ax1.set_title("Calibration plots  (reliability curve)")

    ax2.set_xlabel("Mean predicted value")
    ax2.set_ylabel("Count")
    ax2.legend(loc="upper center", ncol=2)

    plt.tight_layout()
    fig.savefig("NF/%s.png" % name, dpi=fig.dpi)
Example #54
0
                model.fit(X, y)
            else:
                status = 'No markers with ORs >= {}'.format(ORthreshold)
        else:
            markers = cell_markers[ctype]

        p1 = model.predict_proba(X)[:, 1]
        fpr, tpr, thresholds = metrics.roc_curve(y, p1)
        optimal_idx = np.argmax(tpr - fpr)
        optimal_threshold = thresholds[optimal_idx]
        optimal_pred = (p1 > optimal_threshold).astype(int)
        precision, recall, _ = metrics.precision_recall_curve(y, p1)
        auprc = metrics.auc(recall, precision)
        auroc = metrics.roc_auc_score(y, p1)
        ap = metrics.average_precision_score(y, p1)
        bs = metrics.brier_score_loss(y, p1)
        acc = metrics.accuracy_score(y, optimal_pred)

        # store results
        dt = pd.DataFrame(
            {
                'ctype2pred': ctype,
                'cluster': cluster,
                'auroc': auroc,
                'status': status,
                'markers': [markers],
                'ORs': np.exp(model.coef_).tolist(),
                'ave_prec': ap,
                'acc': acc,
                'sensitivity': tpr[optimal_idx],
                'specificity': 1 - fpr[optimal_idx]
test_y = x_trans(test_x)
test_x = train_x_scaler.transform(test_x)


elm = ELMClassifier(hidden_neurons, C=2E5)

elmae = ExtremeLearningMachine()
elmae.add_layer(ELMLayers.ELMAE(hidden_neurons, C=0))
elmae.add_layer(ELMLayers.ELMAE(hidden_neurons, C=0))
elmae.add_layer(ELMLayers.ELMRegression())
elmae.add_layer(classifier)


instances = [
    elm,
    elmae]



i=1
for instance in instances:
    t0 = time()
    # fit and predict for each instance
    instance.fit(train_x, train_y)
    prediction, prob, _ = instance.predict(test_x)
    # calculate the forecast performance
    print(instance.print_network_structure())
    print('Brier score: {0:f}'.format(
        brier_score_loss(elm.labels_bin(test_y)[:,1], prob[:,1])))
    print('Time elapsed: {0:f}'.format(time()-t0))
clf.fit(X_train, y_train)  # GaussianNB itself does not support sample-weights
prob_pos_clf = clf.predict_proba(X_test)[:, 1]

# Gaussian Naive-Bayes with isotonic calibration
clf_isotonic = CalibratedClassifierCV(clf, cv=2, method='isotonic')
clf_isotonic.fit(X_train, y_train, sample_weight=sw_train)
prob_pos_isotonic = clf_isotonic.predict_proba(X_test)[:, 1]

# Gaussian Naive-Bayes with sigmoid calibration
clf_sigmoid = CalibratedClassifierCV(clf, cv=2, method='sigmoid')
clf_sigmoid.fit(X_train, y_train, sample_weight=sw_train)
prob_pos_sigmoid = clf_sigmoid.predict_proba(X_test)[:, 1]

print("Brier score losses: (the smaller the better)")

clf_score = brier_score_loss(y_test, prob_pos_clf, sample_weight=sw_test)
print("No calibration: %1.3f" % clf_score)

clf_isotonic_score = brier_score_loss(y_test, prob_pos_isotonic,
                                      sample_weight=sw_test)
print("With isotonic calibration: %1.3f" % clf_isotonic_score)

clf_sigmoid_score = brier_score_loss(y_test, prob_pos_sigmoid,
                                     sample_weight=sw_test)
print("With sigmoid calibration: %1.3f" % clf_sigmoid_score)

# #############################################################################
# Plot the data and the predicted probabilities
plt.figure()
y_unique = np.unique(y)
colors = cm.rainbow(np.linspace(0.0, 1.0, y_unique.size))
Example #57
0
def advanced_scoring_classifiers(probabilities, actuals, name=None):
    # pandas Series don't play nice here. Make sure our actuals list is indeed a list
    actuals = list(actuals)

    print('Here is our brier-score-loss, which is the default value we optimized for while '
          'training, and is the value returned from .score() unless you requested a custom '
          'scoring metric')
    print('It is a measure of how close the PROBABILITY predictions are.')
    if name is not None:
        print(name)

    # Sometimes we will be given "flattened" probabilities (only the probability of our positive
    # label), while other times we might be given "nested" probabilities (probabilities of both
    # positive and negative, in a list, for each item).
    try:
        probabilities = [proba[1] for proba in probabilities]
    except:
        # TODO: Fix bare Except
        pass

    brier_score = brier_score_loss(actuals, probabilities)
    print(format(brier_score, '.4f'))

    print('\nHere is the trained estimator\'s overall accuracy (when it predicts a label, '
          'how frequently is that the correct label?) ')
    predicted_labels = []
    for pred in probabilities:
        if pred >= 0.5:
            predicted_labels.append(1)
        else:
            predicted_labels.append(0)
    print(format(accuracy_score(y_true=actuals, y_pred=predicted_labels) * 100, '.1f') + '%')

    print('\nHere is a confusion matrix showing predictions vs. actuals by label:')
    # it would make sense to use sklearn's confusion_matrix here but it apparently has no labels
    # took this idea instead from: http://stats.stackexchange.com/a/109015
    conf = pd.crosstab(
        pd.Series(actuals),
        pd.Series(predicted_labels),
        rownames=['v Actual v'],
        colnames=['Predicted >'],
        margins=True)
    print(conf)

    # I like knowing the per class accuracy to see if the model is mishandling imbalanced data.
    # For example, if it is predicting 100% of observations to one class just because it is the
    # majority. Wikipedia seems to call that Positive/negative predictive value
    print('\nHere is predictive value by class:')
    df = pd.concat(
        [pd.Series(actuals, name='actuals'),
         pd.Series(predicted_labels, name='predicted')], axis=1)
    targets = list(df.predicted.unique())
    for i in range(0, len(targets)):
        tot_count = len(df[df.predicted == targets[i]])
        true_count = len(df[(df.predicted == targets[i]) & (df.actuals == targets[i])])
        print('Class: ', targets[i], '=', float(true_count) / tot_count)

    # qcut is super fickle. so, try to use 10 buckets first, then 5 if that fails, then nothing
    # try:
    bucket_results = pd.qcut(probabilities, q=10, duplicates='drop')
    # except:
    #
    #     bucket_results = pd.qcut(probabilities, q=5, duplicates='drop')

    df_probabilities = pd.DataFrame(probabilities, columns=['Predicted Probability Of Bucket'])
    df_probabilities['Actual Probability of Bucket'] = actuals
    df_probabilities['Bucket Edges'] = bucket_results

    df_buckets = df_probabilities.groupby(df_probabilities['Bucket Edges'])
    try:
        print(
            tabulate(
                df_buckets.mean(),
                headers='keys',
                floatfmt='.4f',
                tablefmt='psql',
                showindex='always'))
    except TypeError:
        print(tabulate(df_buckets.mean(), headers='keys', floatfmt='.4f', tablefmt='psql'))
    print('\nHere is the accuracy of our trained estimator at each level of predicted '
          'probabilities ')
    print('For a verbose description of what this means, please visit the docs:')
    print('http://cash-ml.readthedocs.io/en/latest/analytics.html#interpreting-predicted'
          '-probability-buckets-for-classifiers ')


    print('\n\n')
    return brier_score
Example #58
0
def brier_skill_score(y_values, forecast_probabilities):
    """Computes the brier skill score"""
    climo = np.mean((y_values - np.mean(y_values))**2)
    return 1.0 - brier_score_loss(y_values, forecast_probabilities) / climo
Example #59
0
def plot_calibration_curve(est, name, X_train, y_train):
    """Generate a plot fo the calibration curve, for use in classification modeling diagnostics.

    Parameters
    ----------
    est : object type that implements the "fit" and "predict" methods
        An object of that type which is cloned for each validation.

    name : string
        Name of the classifier, i.e. "Logistic Regression, SVC, etc".

    X_train : array-like, shape (n_samples, n_features)
        Training vector, where n_samples is the number of samples and
        n_features is the number of features.

    y_train : array-like, shape (n_samples) or (n_samples, n_features)
        Target relative to X for classification.
    """
    X_test = X_train
    y_test = y_train
    """Plot calibration curve for est w/o and with calibration. """
    # Calibrated with isotonic calibration
    isotonic = CalibratedClassifierCV(est, cv=2, method='isotonic')

    # Calibrated with sigmoid calibration
    sigmoid = CalibratedClassifierCV(est, cv=2, method='sigmoid')

    # Logistic regression with no calibration as baseline
    lr = LogisticRegression(C=1., solver='lbfgs')

    plt.figure(figsize=(8, 8))
    ax1 = plt.subplot2grid((3, 1), (0, 0), rowspan=2)
    ax2 = plt.subplot2grid((3, 1), (2, 0))

    ax1.plot([0, 1], [0, 1], "k:", label="Perfectly calibrated")
    for clf, name in [(lr, 'Logistic'),
                      (est, name),
                      (isotonic, name + ' + Isotonic'),
                      (sigmoid, name + ' + Sigmoid')]:
        clf.fit(X_train, y_train)
        y_pred = clf.predict(X_test)
        if hasattr(clf, "predict_proba"):
            prob_pos = clf.predict_proba(X_test)[:, 1]
        else:  # use decision function
            prob_pos = clf.decision_function(X_test)
            prob_pos = \
                (prob_pos - prob_pos.min()) / (prob_pos.max() - prob_pos.min())

        clf_score = brier_score_loss(y_test, prob_pos, pos_label=y_test.max())
        print("%s:" % name)
        print("\tBrier: %1.3f" % (clf_score))
        print("\tPrecision: %1.3f" % precision_score(y_test, y_pred))
        print("\tRecall: %1.3f" % recall_score(y_test, y_pred))
        print("\tF1: %1.3f\n" % f1_score(y_test, y_pred))

        fraction_of_positives, mean_predicted_value = \
            calibration_curve(y_test, prob_pos, n_bins=10)

        ax1.plot(mean_predicted_value, fraction_of_positives, "s-",
                 label="%s (%1.3f)" % (name, clf_score))

        ax2.hist(prob_pos, range=(0, 1), bins=10, label=name,
                 histtype="step", lw=2)

    ax1.set_ylabel("Fraction of positives")
    ax1.set_ylim([-0.05, 1.05])
    ax1.legend(loc="lower right")
    ax1.set_title('Calibration plots  (reliability curve)')

    ax2.set_xlabel("Mean predicted value")
    ax2.set_ylabel("Count")
    ax2.legend(loc="upper center", ncol=2)

    #plt.tight_layout()
    return plt
Example #60
0
def evaluate_predictions(y_pred, y_probs, y_true, y_train_pred, y_train_probs,
                         y_train, savedir):
    logging.info(f"Calculating accuracy metrics")
    precision, recall, _thresholds_pr = metrics.precision_recall_curve(
        y_true, y_probs)
    fpr, tpr, _thresholds_roc = metrics.roc_curve(y_true, y_probs)
    model_metrics = {
        'training_accuracy': metrics.accuracy_score(y_train, y_train_pred),
        'accuracy': metrics.accuracy_score(y_true, y_pred),
        'training_f1_score': metrics.f1_score(y_train, y_train_pred),
        'f1_score': metrics.f1_score(y_true, y_pred),
        'precision': metrics.precision_score(y_true, y_pred),
        'recall': metrics.recall_score(y_true, y_pred),
        'cross_entropy': metrics.log_loss(y_true, y_pred),
        'average_precision_score':
        metrics.average_precision_score(y_true, y_pred),
        'pr_auc_score': metrics.auc(recall, precision),
        'roc_auc_score': metrics.auc(fpr, tpr),
        'brier_score_loss': metrics.brier_score_loss(y_true, y_probs),
    }

    longer_model_metrics = {
        'confusion_matrix':
        metrics.confusion_matrix(y_true, y_pred).tolist(),
        'binding_probs':
        stats.describe(y_probs)._asdict(),
        'binding_probs_positive':
        stats.describe(y_probs[y_true == 1])._asdict(),
        'binding_probs_negative':
        stats.describe(y_probs[y_true == 0])._asdict(),
        'training_binding_probs':
        stats.describe(y_train_probs)._asdict(),
        'training_binding_probs_positive':
        stats.describe(y_train_probs[y_train == 1])._asdict(),
        'training_binding_probs_negative':
        stats.describe(y_train_probs[y_train == 0])._asdict(),
    }

    plot_filenames = {
        'pred_probs': os.path.join(savedir, "pred_probs.png"),
        'roc_curve': os.path.join(savedir, "roc_curve.png"),
        'pr_curve': os.path.join(savedir, "pr_curve.png")
    }

    logging.info(f"Plotting predicted probability distribution")

    try:
        plt.clf()
        sns.distplot(y_probs[y_true == 1],
                     label="Positives",
                     color=sns.color_palette('colorblind')[2])
        sns.distplot(y_probs[y_true == 0],
                     label="Negatives",
                     color=sns.color_palette('colorblind')[3])
    except np.linalg.LinAlgError:
        # If all the predicted probabilities are the same, then we cannot calculate kde
        plt.clf()
        sns.distplot(y_probs[y_true == 1],
                     label="Positives",
                     kde=False,
                     color=sns.color_palette('colorblind')[2])
        sns.distplot(y_probs[y_true == 0],
                     label="Negatives",
                     kde=False,
                     color=sns.color_palette('colorblind')[3])
    plt.title("Prediction probabilities by class")
    plt.legend()
    plt.savefig(plot_filenames['pred_probs'])

    logging.info(f"Plotting ROC curve")
    plt.clf()
    plt.plot(fpr, tpr)
    plt.title("ROC curve")
    plt.xlabel("False positive rate")
    plt.ylabel("True positive rate")
    plt.legend()
    plt.savefig(plot_filenames['roc_curve'])

    logging.info(f"Plotting precision recall curve")
    plt.clf()
    plt.plot(recall, precision)
    plt.title("Precision recall curve")
    plt.xlabel("Recall")
    plt.ylabel("Precision")
    plt.legend()
    plt.savefig(plot_filenames['pr_curve'])

    return model_metrics, longer_model_metrics, plot_filenames