def calibration_isotonic_regression(model_name, model, prob_model,
                                    X_calibration, y_calibration, X_train):
    # 1. function that trains the calibration regressor using as input calibration data in the first instance
    # 2. it then takes in the prob_out of the mdel on the test and outputs calibrated prob for further calculation of
    # calibrated std
    # ref: https: // arxiv.org / abs / 1807.00263
    if model_name == 'Bayes_Ridge_model':
        y_hat_calibration, sem_hat_calibration = model.predict(X_calibration,
                                                               return_std=True)

    elif model_name == 'RF_model':
        y_hat_calibration = model.predict(X_calibration)
        sem_hat_calibration = np.sqrt(
            fci.random_forest_error(model, X_train, X_calibration))

    else:
        print('Error: Not able to calculate variace!')
        # y_hat, sem = model.predict(X_calibration)

    prob_per_int_y_calibration, prob_y_calibration, prob_y_calibration_expected, prob = count_entries_per_interval(
        y_calibration, y_hat_calibration, sem_hat_calibration)
    prob_model_y_calibration = predict_prob(y_calibration, y_hat_calibration,
                                            sem_hat_calibration)

    # isotonic regression
    from sklearn.isotonic import IsotonicRegression as IR
    ir = IR(out_of_bounds='clip')
    ir.fit(prob_model_y_calibration, prob_y_calibration)

    prob_test_calibrated = ir.transform(prob_model)
    return prob_test_calibrated
Ejemplo n.º 2
0
    def __call__(self, valid_preacts, valid_labels):
        ir = IR()
        valid_preacts = valid_preacts.flatten()
        min_valid_preact = np.min(valid_preacts)
        max_valid_preact = np.max(valid_preacts)
        assert len(valid_preacts) == len(valid_labels)
        #sorting to be safe...I think weird results can happen when unsorted
        sorted_valid_preacts, sorted_valid_labels = zip(
            *sorted(zip(valid_preacts, valid_labels), key=lambda x: x[0]))
        y = ir.fit_transform(sorted_valid_preacts, sorted_valid_labels)

        def calibration_func(preact):
            preact = np.minimum(preact, max_valid_preact)
            preact = np.maximum(preact, min_valid_preact)
            return ir.transform(preact.flatten())

        return calibration_func
def calibration_isotonic_regression(data_calibration,
                                    prob_model):  # calibration function

    y_true_calibration, y_hat_calibration, sem_hat_calibration = predict_w_DNN(
        data_calibration)

    prob_per_int_y_calibration, prob_y_calibration, prob_y_calibration_expected, prob = count_entries_per_interval(
        y_true_calibration, y_hat_calibration, sem_hat_calibration)
    prob_model_y_calibration = predict_prob(y_true_calibration,
                                            y_hat_calibration,
                                            sem_hat_calibration)

    # isotonic regression
    from sklearn.isotonic import IsotonicRegression as IR
    ir = IR(out_of_bounds='clip')
    ir.fit(prob_model_y_calibration, prob_y_calibration)

    prob_test_calibrated = ir.transform(prob_model)
    return prob_test_calibrated
Ejemplo n.º 4
0
def calibrate_probabilities(prob_dict,instance_label_dict):
    labels = []
    probabilities = []
    print(len(prob_dict))
    print(len(instance_label_dict))
    for i in prob_dict:
        labels.append(instance_label_dict[i])
        probabilities.append(prob_dict[i])

    ir = IR(out_of_bounds='clip')
    ir.fit(probabilities,labels) #fit ir to abstract level precision and classes
    p_calibrated=ir.transform(probabilities)

    fig,ax = plt.subplots()
    fraction_of_positives, mean_predicted_value = calibration_curve(labels, p_calibrated, n_bins=10)
    ax.plot(mean_predicted_value, fraction_of_positives)
    fraction_of_positives, mean_predicted_value = calibration_curve(labels, probabilities, n_bins=10)
    ax.plot(mean_predicted_value, fraction_of_positives)

    plt.savefig('calibration_curve_on_data.png')
    return ir
Ejemplo n.º 5
0
 def isotonic_calibration(self, xtrain, ytrain):
     ir = IR(out_of_bounds='clip')
     ir.fit(xtrain, ytrain)
     #print ir
     return ir
Ejemplo n.º 6
0
 def __init__(self, c, device):
     self.c = c
     self.ir = IR(out_of_bounds='clip')
     self.device = device
def calculate_probability_distribution(tree , instances , index , cal_method =None):

	if cal_method == None :
		return tree.distribution_for_instance(instances.get_instance(index))

	elif cal_method == 'Platt' :

		p_train = np.zeros(shape=(instances.num_instances,1))
		y_train = np.zeros(shape=(instances.num_instances,1))

		for i,instance in enumerate(instances) :
		    dist = tree.distribution_for_instance(instance)
		    p_train[i] = [ (dist[1] - 0.5)*2.0 ]
		    y_train[i] = [instance.get_value(instance.class_index)]

		# print("p_train ====>>>" , p_train)
		# print("y_train ====>>>" , y_train)

		dist = (tree.distribution_for_instance(instances.get_instance(index))[1]-0.5)*2.0
		tmp = np.zeros(shape=(1,1))
		tmp[0] = [dist]

		print(np.sum(y_train))
		if np.sum(y_train) in [len(y_train),0]:
			print("all one class")
			for ins in instances : 
				print("ins ===> " , ins)
			return tree.distribution_for_instance(instances.get_instance(index))

		else :

			warnings.filterwarnings("ignore", category=FutureWarning)
			lr = LR(solver='lbfgs')                                                      
			lr.fit( p_train , np.ravel(y_train,order='C') )

			return lr.predict_proba( tmp.reshape(1, -1))[0]


	elif cal_method == 'Isotonic' :

		p_train = np.zeros(shape=(instances.num_instances,1))
		y_train = np.zeros(shape=(instances.num_instances,1))

		for i,instance in enumerate(instances) :
		    dist = tree.distribution_for_instance(instance)
		    p_train[i] = [ dist[1] ]
		    y_train[i] = [instance.get_value(instance.class_index)]


		dist = tree.distribution_for_instance(instances.get_instance(index))[1]
		tmp = np.zeros(shape=(1,1))
		tmp[0] = [dist]

		print(np.sum(y_train))
		if np.sum(y_train) in [len(y_train),0]:
			print("all one class")
			for ins in instances : 
				print("ins ===> " , ins)
			return tree.distribution_for_instance(instances.get_instance(index))

		else :

			ir = IR( out_of_bounds = 'clip' )
			ir.fit(np.ravel(p_train,order='C')  , np.ravel(y_train,order='C'))

			p = ir.transform( np.ravel(tmp,order='C'))[0]
			return [p,1-p]
			
	# elif cal_method == 'ProbabilityCalibrationTree' :
	# 	pass


	elif cal_method == 'ICP' :


		pass
	elif cal_method == 'Venn1' :
		calibrPts = []
		
		for i,instance in enumerate(instances) :
		    dist = tree.distribution_for_instance(instance)
		    score = dist[0] if  dist[1] < dist[0] else dist[1]
		    calibrPts.append( ( (score) , instance.get_value(instance.class_index) ) ) 
		    

		dist = (tree.distribution_for_instance(instances.get_instance(index)))
		score = dist[0] if dist[1] < dist[0] else dist[1]
		tmp = [score]

		p0,p1=VennABERS.ScoresToMultiProbs(calibrPts,tmp)
		print("Vennnnnn =========>>>>>>>>>>>>  ", p0, "  , ",p1)
		return [p0,p1]
		pass
Ejemplo n.º 8
0
def calibrated(test_predictions,
               oof_predictions,
               flag_transform=sigmoid,
               type_transform=parse_classifier_probas):
    """
    Update test predictions w.r.t to calibration trained on OOF predictions
    :param test_predictions:
    :param oof_predictions:
    :return:
    """
    from sklearn.isotonic import IsotonicRegression as IR
    import matplotlib.pyplot as plt

    oof_predictions = oof_predictions.copy()
    oof_predictions[OUTPUT_PRED_MODIFICATION_TYPE] = oof_predictions[
        OUTPUT_PRED_MODIFICATION_TYPE].apply(type_transform)
    oof_predictions[OUTPUT_PRED_MODIFICATION_FLAG] = oof_predictions[
        OUTPUT_PRED_MODIFICATION_FLAG].apply(flag_transform)

    test_predictions = test_predictions.copy()
    test_predictions[OUTPUT_PRED_MODIFICATION_TYPE] = test_predictions[
        OUTPUT_PRED_MODIFICATION_TYPE].apply(type_transform)
    test_predictions[OUTPUT_PRED_MODIFICATION_FLAG] = test_predictions[
        OUTPUT_PRED_MODIFICATION_FLAG].apply(flag_transform)

    y_true = oof_predictions["true_modification_flag"].values.astype(int)
    # print("Target", np.bincount(oof_predictions["true_modification_type"].values.astype(int)))

    if True:
        y_pred_raw = oof_predictions[OUTPUT_PRED_MODIFICATION_FLAG].values
        b_auc_before = alaska_weighted_auc(y_true, y_pred_raw)

        ir_flag = IR(out_of_bounds="clip", y_min=0, y_max=1)
        y_pred_cal = ir_flag.fit_transform(y_pred_raw, y_true)
        b_auc_after = alaska_weighted_auc(y_true, y_pred_cal)

        if b_auc_after > b_auc_before:
            test_predictions[
                OUTPUT_PRED_MODIFICATION_FLAG] = ir_flag.transform(
                    test_predictions[OUTPUT_PRED_MODIFICATION_FLAG].values)
        else:
            # test_predictions[OUTPUT_PRED_MODIFICATION_FLAG] = ir_flag.transform(
            #     test_predictions[OUTPUT_PRED_MODIFICATION_FLAG].values
            # )

            warnings.warn(
                f"Failed to train IR flag {b_auc_before} {b_auc_after}")

            plt.figure()
            plt.hist(y_pred_raw,
                     alpha=0.5,
                     bins=100,
                     label=f"non-calibrated {b_auc_after}")
            plt.hist(y_pred_cal,
                     alpha=0.5,
                     bins=100,
                     label=f"calibrated {b_auc_before}")
            plt.yscale("log")
            plt.legend()
            plt.show()

    if True:
        ir_type = IR(out_of_bounds="clip", y_min=0, y_max=1)
        y_pred_raw = oof_predictions[OUTPUT_PRED_MODIFICATION_TYPE].values
        c_auc_before = alaska_weighted_auc(y_true, y_pred_raw)
        y_pred_cal = ir_type.fit_transform(y_pred_raw, y_true)
        c_auc_after = alaska_weighted_auc(y_true, y_pred_cal)
        if c_auc_after > c_auc_before:
            test_predictions[
                OUTPUT_PRED_MODIFICATION_TYPE] = ir_type.transform(
                    test_predictions[OUTPUT_PRED_MODIFICATION_TYPE].values)

            # plt.figure()
            # plt.hist(y_pred_raw, alpha=0.5, bins=100, label=f"non-calibrated {c_auc_before}")
            # plt.hist(y_pred_cal, alpha=0.5, bins=100, label=f"calibrated {c_auc_after}")
            # plt.yscale("log")
            # plt.legend()
            # plt.show()
        else:
            # test_predictions[OUTPUT_PRED_MODIFICATION_TYPE] = ir_type.transform(
            #     test_predictions[OUTPUT_PRED_MODIFICATION_TYPE].values
            # )

            warnings.warn(
                f"Failed to train IR on type {c_auc_before} {c_auc_after}")

            # plt.figure()
            # plt.hist(y_pred_raw, alpha=0.5, bins=100, label=f"non-calibrated {c_auc_before}")
            # plt.hist(y_pred_cal, alpha=0.5, bins=100, label=f"calibrated {c_auc_after}")
            # plt.yscale("log")
            # plt.legend()
            # plt.show()

    results = {
        "b_auc_before": b_auc_before,
        "b_auc_after": b_auc_after,
        "c_auc_before": c_auc_before,
        "c_auc_after": c_auc_after,
    }
    return test_predictions, results
Ejemplo n.º 9
0

trainResult = runffm('../data/calibration/ffmTrain-102662.ffm',
                     '../data/calibration/ffm-model-102662')

p_train_all = read_csv(trainResult)['prob']
oriTrain = read_csv('../data/train.csv')
sameTrain = oriTrain[oriTrain['clickTime'] >= 190000].reset_index()
print len(sameTrain), len(p_train_all)
part_sameTrain = sameTrain[(sameTrain['clickTime'] >= 200000)
                           & (sameTrain['clickTime'] < 290000)]

p_train = p_train_all.loc[part_sameTrain.index]
y_train = part_sameTrain['label']

ir = IR()
ir.fit(p_train, y_train)

oriResult = read_csv(
    '../data/calibration/ffm_mergeAppUser_s17_preAction_190000_no_Dist_noNum_t150_k8_l2e-05_2017-06-05-20-58-00.csv'
)
p_test = oriResult['prob']
p_calibrated = ir.transform(
    p_test)  # or ir.fit( p_test ), that's the same thing

oriResult['new_prob'] = Series(p_calibrated)
oriResult.to_csv('../data/calibration/calib_temp.csv', index=False)
oriResult['nozero_new_prob'] = oriResult.apply(
    lambda x: x['new_prob'] if x['new_prob'] > 0 else x['prob'],
    axis='columns')
Ejemplo n.º 10
0
 def Iso(self):
     IReg = IR(y_min=None, y_max=None, increasing=True, out_of_bounds='nan')
     pass
def calibrate_probs(probabilities,classes):
    ir = IR(out_of_bounds='clip')
    ir.fit(probabilities,classes) #fit ir to abstract level precision and classes
    p_calibrated=ir.transform(probabilities)
    
    return p_calibrated
Ejemplo n.º 12
0
    # convert to termonilogy used in all python files so far (redundant from a code perspective)
    y_true = y_sample.flatten()
    y_hat = m # predicted mean

    return y_true, y_hat, varma

def calibration_isotonic_regression(data_calibration, prob_model): # calibration function

    y_true_calibration, y_hat_calibration, sem_hat_calibration = predict_w_DNN(data_calibration)

    prob_per_int_y_calibration, prob_y_calibration, prob_y_calibration_expected, prob = count_entries_per_interval(y_true_calibration, y_hat_calibration, sem_hat_calibration)
    prob_model_y_calibration = predict_prob(y_true_calibration, y_hat_calibration, sem_hat_calibration)

    # isotonic regression
    from sklearn.isotonic import IsotonicRegression as IR
    ir = IR(out_of_bounds='clip')
    ir.fit(prob_model_y_calibration, prob_y_calibration)

    prob_test_calibrated = ir.transform(prob_model)
    return prob_test_calibrated


r2_vec = []
mape_vec = []
rmspe_vec = []
mse_vec = []
acc_zone_percentage_vec = []
beta_vec = []
rlh_vec = []
avg_calibration_vec = []
SH_vec = []
###

# train/test split (in half)

train_end = y.shape[0] / 2
test_start = train_end + 1

y_train = y[0:train_end]
y_test = y[test_start:]

p_train = p[0:train_end]
p_test = p[test_start:]

###

ir = IR(out_of_bounds='clip')  # out_of_bounds param needs scikit-learn >= 0.15
ir.fit(p_train, y_train)
p_calibrated = ir.transform(p_test)

p_calibrated[np.isnan(p_calibrated)] = 0

###

acc = accuracy_score(y_test, np.round(p_test))
acc_calibrated = accuracy_score(y_test, np.round(p_calibrated))

auc = AUC(y_test, p_test)
auc_calibrated = AUC(y_test, p_calibrated)

ll = log_loss(y_test, p_test)
ll_calibrated = log_loss(y_test, p_calibrated)