def run_icp(nc, X_train, y_train, X_test, idx_train, idx_cal, significance, condition=None): """ Run split conformal method Parameters ---------- nc : class of nonconformist object X_train : numpy array, training features (n1Xp) y_train : numpy array, training labels (n1) X_test : numpy array, testing features (n2Xp) idx_train : numpy array, indices of proper training set examples idx_cal : numpy array, indices of calibration set examples significance : float, significance level (e.g. 0.1) condition : function, mapping feature vector to group id Returns ------- y_lower : numpy array, estimated lower bound for the labels (n2) y_upper : numpy array, estimated upper bound for the labels (n2) """ icp = IcpRegressor(nc, condition=condition) # Fit the ICP using the proper training set icp.fit(X_train[idx_train, :], y_train[idx_train]) # Calibrate the ICP using the calibration set icp.calibrate(X_train[idx_cal, :], y_train[idx_cal]) # Produce predictions for the test set, with confidence 90% predictions = icp.predict(X_test, significance=significance) y_lower = predictions[:, 0] y_upper = predictions[:, 1] return y_lower, y_upper
def run_icp_sep(nc, X_train, y_train, X_test, idx_train, idx_cal, significance, condition): """ Run split conformal method, train a seperate regressor for each group Parameters ---------- nc : class of nonconformist object X_train : numpy array, training features (n1Xp) y_train : numpy array, training labels (n1) X_test : numpy array, testing features (n2Xp) idx_train : numpy array, indices of proper training set examples idx_cal : numpy array, indices of calibration set examples significance : float, significance level (e.g. 0.1) condition : function, mapping a feature vector to group id Returns ------- y_lower : numpy array, estimated lower bound for the labels (n2) y_upper : numpy array, estimated upper bound for the labels (n2) """ X_proper_train = X_train[idx_train, :] y_proper_train = y_train[idx_train] X_calibration = X_train[idx_cal, :] y_calibration = y_train[idx_cal] category_map_proper_train = np.array([ condition((X_proper_train[i, :], y_proper_train[i])) for i in range(y_proper_train.size) ]) category_map_calibration = np.array([ condition((X_calibration[i, :], y_calibration[i])) for i in range(y_calibration.size) ]) category_map_test = np.array( [condition((X_test[i, :], None)) for i in range(X_test.shape[0])]) categories = np.unique(category_map_proper_train) y_lower = np.zeros(X_test.shape[0]) y_upper = np.zeros(X_test.shape[0]) cnt = 0 for cond in categories: icp = IcpRegressor(nc[cnt]) idx_proper_train_group = category_map_proper_train == cond # Fit the ICP using the proper training set icp.fit(X_proper_train[idx_proper_train_group, :], y_proper_train[idx_proper_train_group]) idx_calibration_group = category_map_calibration == cond # Calibrate the ICP using the calibration set icp.calibrate(X_calibration[idx_calibration_group, :], y_calibration[idx_calibration_group]) idx_test_group = category_map_test == cond # Produce predictions for the test set, with confidence 90% predictions = icp.predict(X_test[idx_test_group, :], significance=significance) y_lower[idx_test_group] = predictions[:, 0] y_upper[idx_test_group] = predictions[:, 1] cnt = cnt + 1 return y_lower, y_upper
def run_experiment(cur_test_method, cur_dataset_name, cur_batch_size, cur_lr_loss, cur_lr_dis, cur_loss_steps, cur_dis_steps, cur_mu_val, cur_epochs, cur_model_type, cur_regression_type, cur_random_state, cur_second_scale, num_experiments): method = cur_test_method seed = cur_random_state random.seed(seed) np.random.seed(seed) torch.manual_seed(seed) if torch.cuda.is_available(): torch.cuda.manual_seed_all(seed) dataset = cur_dataset_name batch_size = cur_batch_size # step size to minimize loss lr_loss = cur_lr_loss # step size used to fit GAN's classifier lr_dis = cur_lr_dis # inner epochs to fit loss loss_steps = cur_loss_steps # inner epochs to fit GAN's classifier dis_steps = cur_dis_steps # total number of epochs epochs = cur_epochs # utility loss if cur_regression_type == "mreg": cost_pred = torch.nn.MSELoss() out_shape = 1 else: raise model_type = cur_model_type metric = "equalized_odds" print(dataset) print(method) sys.stdout.flush() avg_length_0 = np.zeros(num_experiments) avg_length_1 = np.zeros(num_experiments) avg_coverage_0 = np.zeros(num_experiments) avg_coverage_1 = np.zeros(num_experiments) avg_p_val = np.zeros(num_experiments) mse = np.zeros(num_experiments) for i in range(num_experiments): # Split into train and test X, A, Y, X_cal, A_cal, Y_cal, X_test, A_test, Y_test = get_dataset.get_train_test_data( base_path, dataset, seed + i) in_shape = X.shape[1] print("n train = " + str(X.shape[0]) + " p = " + str(X.shape[1])) print("n calibration = " + str(X_cal.shape[0])) print("n test = " + str(X_test.shape[0])) sys.stdout.flush() if method == "AdversarialDebiasing": class RegAdapter(RegressorAdapter): def __init__(self, model=None, fit_params=None, params=None): super(RegAdapter, self).__init__(model, fit_params) # Instantiate model self.learner = adv_debiasing.AdvDebiasingRegLearner( lr=lr_loss, N_CLF_EPOCHS=loss_steps, N_ADV_EPOCHS=dis_steps, N_EPOCH_COMBINED=epochs, cost_pred=cost_pred, in_shape=in_shape, batch_size=batch_size, model_type=model_type, out_shape=out_shape, lambda_vec=cur_mu_val) def fit(self, x, y): self.learner.fit(x, y) def predict(self, x): return self.learner.predict(x) elif method == 'FairDummies': class RegAdapter(RegressorAdapter): def __init__(self, model=None, fit_params=None, params=None): super(RegAdapter, self).__init__(model, fit_params) # Instantiate model self.learner = fair_dummies_learning.EquiRegLearner( lr=lr_loss, pretrain_pred_epochs=0, pretrain_dis_epochs=0, epochs=epochs, loss_steps=loss_steps, dis_steps=dis_steps, cost_pred=cost_pred, in_shape=in_shape, batch_size=batch_size, model_type=model_type, lambda_vec=cur_mu_val, second_moment_scaling=cur_second_scale, out_shape=out_shape) def fit(self, x, y): self.learner.fit(x, y) def predict(self, x): return self.learner.predict(x) elif method == 'HGR': class RegAdapter(RegressorAdapter): def __init__(self, model=None, fit_params=None, params=None): super(RegAdapter, self).__init__(model, fit_params) # Instantiate model self.learner = continuous_fairness.HGR_Reg_Learner( lr=lr_loss, epochs=epochs, mu=cur_mu_val, cost_pred=cost_pred, in_shape=in_shape, out_shape=out_shape, batch_size=batch_size, model_type=model_type) def fit(self, x, y): self.learner.fit(x, y) def predict(self, x): return self.learner.predict(x) elif method == 'Baseline': class RegAdapter(RegressorAdapter): def __init__(self, model=None, fit_params=None, params=None): super(RegAdapter, self).__init__(model, fit_params) # Instantiate model self.learner = fair_dummies_learning.EquiRegLearner( lr=lr_loss, pretrain_pred_epochs=epochs, pretrain_dis_epochs=0, epochs=0, loss_steps=0, dis_steps=0, cost_pred=cost_pred, in_shape=in_shape, batch_size=batch_size, model_type=model_type, lambda_vec=0, second_moment_scaling=0, out_shape=out_shape) def fit(self, x, y): self.learner.fit(x, y) def predict(self, x): return self.learner.predict(x) fairness_reg = RegAdapter(model=None) if cur_regression_type == "mreg": nc = RegressorNc(fairness_reg, AbsErrorErrFunc()) else: raise # function that extracts the group identifier def condition(x, y=None): return int(x[0][0] > 0) icp = IcpRegressor(nc, condition=condition) input_data_train = np.concatenate((A[:, np.newaxis], X), 1) icp.fit(input_data_train, Y) input_data_cal = np.concatenate((A_cal[:, np.newaxis], X_cal), 1) icp.calibrate(input_data_cal, Y_cal) input_data_test = np.concatenate((A_test[:, np.newaxis], X_test), 1) Yhat_test = icp.predict(input_data_test, significance=0.1) # compute and print average coverage and average length coverage_sample, length_sample = compute_coverage_per_sample( Y_test, Yhat_test[:, 0], Yhat_test[:, 1], 0.1, method, input_data_test, condition) avg_coverage, avg_length = compute_coverage_len( Y_test, Yhat_test[:, 0], Yhat_test[:, 1]) avg_length_0[i] = np.mean(length_sample[0]) avg_coverage_0[i] = np.mean(coverage_sample[0]) avg_length_1[i] = np.mean(length_sample[1]) avg_coverage_1[i] = np.mean(coverage_sample[1]) Yhat_out_cal = fairness_reg.learner.predict(input_data_cal) Yhat_out_test = fairness_reg.learner.predict(input_data_test) if out_shape == 1: mse[i] = np.mean((Yhat_out_test - Y_test)**2) MSE_trivial = np.mean((np.mean(Y_test) - Y_test)**2) print("MSE = " + str(mse[i]) + "MSE Trivial = " + str(MSE_trivial)) p_val = utility_functions.fair_dummies_test_regression( Yhat_out_cal, A_cal, Y_cal, Yhat_out_test, A_test, Y_test, num_reps=1, num_p_val_rep=1000, reg_func_name="Net") avg_p_val[i] = p_val print("experiment = " + str(i + 1)) # if out_shape==2: # init_coverage, init_length = compute_coverage_len(Y_test, Yhat_out_test[:,0], Yhat_out_test[:,1]) # print("Init Coverage = " + str(init_coverage)) # print("Init Length = " + str(init_length)) print("Coverage 0 = " + str(avg_coverage_0[i])) print("Coverage 1 = " + str(avg_coverage_1[i])) print("Length 0 = " + str(avg_length_0[i])) print("Length 1 = " + str(avg_length_1[i])) print("MSE = " + str(mse[i])) print("p_val = " + str(p_val)) sys.stdout.flush() outdir = './results/' if not os.path.exists(outdir): os.mkdir(outdir) out_name = outdir + 'results.csv' full_name = cur_test_method + "_" + cur_model_type + "_" + cur_regression_type df = pd.DataFrame({ 'method': [cur_test_method], 'dataset': [cur_dataset_name], 'batch_size': [cur_batch_size], 'lr_loss': [cur_lr_loss], 'lr_dis': [cur_lr_dis], 'loss_steps': [cur_loss_steps], 'dis_steps': [cur_dis_steps], 'mu_val': [cur_mu_val], 'epochs': [cur_epochs], 'random_state': [seed + i], 'model_type': [cur_model_type], 'metric': [metric], 'cur_second_scale': [cur_second_scale], 'regression_type': [cur_regression_type], 'avg_length': [avg_length], 'avg_coverage': [avg_coverage], 'avg_length_0': [avg_length_0[i]], 'avg_length_1': [avg_length_1[i]], 'mse': [mse[i]], 'avg_coverage_0': [avg_coverage_0[i]], 'avg_coverage_1': [avg_coverage_1[i]], 'p_val': [p_val], 'full_name': [full_name] }) if os.path.isfile(out_name): df2 = pd.read_csv(out_name) df = pd.concat([df2, df], ignore_index=True) df.to_csv(out_name, index=False) print(full_name) print( "Num experiments %02d | Avg MSE = %.4f | Avg Length 0 = %.4f | Avg Length 1 = %.4f | Avg Coverage 0 = %.4f | Avg Coverage 1 = %.4f | Avg p_val = %.4f | min p_val = %.4f" % (i + 1, np.mean(mse[:i + 1]), np.mean(avg_length_0[:i + 1]), np.mean(avg_length_1[:i + 1]), np.mean(avg_coverage_0[:i + 1]), np.mean(avg_coverage_1[:i + 1]), np.mean( avg_p_val[:i + 1]), np.min(avg_p_val[:i + 1]))) print("======== Done =========") sys.stdout.flush()
def __updatePlot(self): plotIdx = 0 # Plot sampling over ground truth if self.groundTruth is not None: self.ax[plotIdx].clear() self.ax[plotIdx].set_xlim([0, 1.05]) self.ax[plotIdx].set_ylim([0, 1.05]) self.ax[plotIdx].set_title("ATNE sampling") self.ax[plotIdx].plot(self.groundTruth[:, 0], self.groundTruth[:, 1], 'x', color="0.7", markeredgewidth=1.8, markersize=5) if len(self.hintEliminatedIndexes ) > 0 and self.elimWeights is None: self.ax[plotIdx].plot( self.groundTruth[self.hintEliminatedIndexes, 0], self.groundTruth[self.hintEliminatedIndexes, 1], 'x', color="indianred", markeredgewidth=1.8, markersize=5) if self.elimWeights is None: self.ax[plotIdx].plot(self.groundTruth[self.relaxedIndexes, 0], self.groundTruth[self.relaxedIndexes, 1], 'x', color="g", markeredgewidth=1.8, markersize=5) else: #self.ax[plotIdx].plot(self.groundTruth[self.sampledIndexes,0], self.groundTruth[self.sampledIndexes,1], 'o', color="b", markeredgewidth=1.8, markersize=5, alpha=0.6) weights = np.sum( np.mean(self.elimWeights, axis=1) / np.max(np.mean(self.elimWeights, axis=1), axis=0), axis=1) # weights = np.mean(self.elimWeights[:,:,0], axis=1) alpha = 1 - (weights / np.max(weights)) / 2 red = weights / np.max(weights) for i in range(self.groundTruth.shape[0]): if i not in self.relaxedIndexes: continue if np.isnan(red[i]): red[i] = 0 self.ax[plotIdx].plot(self.groundTruth[i, 0], self.groundTruth[i, 1], 'x', color=[red[i], 1 - red[i], 0], markeredgewidth=1.8, markersize=5) self.ax[plotIdx].plot(self.groundTruth[self.sampledIndexes, 0], self.groundTruth[self.sampledIndexes, 1], 'x', color="b", markeredgewidth=1.8, markersize=5) plotIdx += 1 # Plot predicted design space if self.predictions is not None and self.doPlotPredictions: self.ax[plotIdx].clear() labeledMask = np.in1d(self.predictionsIndexes, self.sampledIndexes) labeledMaskIdx = np.where(labeledMask)[0] cmap = self.plt.cm.get_cmap('hsv') shapes = ['x', '.', '+'] # Plot type 1 #self.ax[plotIdx].set_title("Estimated design spaces by each forest") #for f in range(self.predictions.shape[0]): # self.ax[plotIdx].plot(self.predictions[f,:,0], self.predictions[f,:,1], 'x', markeredgewidth=1.8, markersize=5) # #self.ax[plotIdx].plot(self.predictions[f,labeledMask,0], self.predictions[f,labeledMask,1], 'x', markeredgewidth=1.8, markersize=5) # Plot type 2 #import matplotlib #for i,p in enumerate(labeledMaskIdx): # color = cmap(i/len(labeledMaskIdx)) # predmean = self.predictions[:,p,:].mean(0) # predmed = np.median(self.predictions[:,p,:], 0) # predstd = self.predictions[:,p,:].std(0) # # Plot type 2.1 # #self.ax[plotIdx].plot(self.predictions[:,p,0], self.predictions[:,p,1], shapes[i%len(shapes)], markeredgewidth=1.8, markersize=5, color=color) # #self.ax[plotIdx].plot(predmean[0], predmean[1], shapes[0], markeredgewidth=1.8, markersize=5, color=color) # #self.ax[plotIdx].plot(predmed[0], predmed[1], shapes[1], markeredgewidth=1.8, markersize=5, color=color) # # Plot type 2.2 # circle = matplotlib.patches.Ellipse(predmean[[0,1]], predstd[0], predstd[1]) # self.ax[plotIdx].add_artist(circle) # Plot type 3 (Mean predictions) # self.ax[plotIdx].set_title("Average estimated P_relaxed") # pred_mean = self.predictions.mean(0) # self.ax[plotIdx].plot(pred_mean[:,0], pred_mean[:,1], 'x', markeredgewidth=1.8, markersize=5) # Plot type 4 (Mean predictions of the entire space) self.ax[plotIdx].set_title("Average estimated design space") if self.estimators is not None: predictions = np.empty([ self.predictions.shape[0], self.designs.getNumDesigns(), self.predictions.shape[2] ]) for f in range(self.predictions.shape[0]): for o in range(self.predictions.shape[2]): predictions[f, :, o] = self.estimators[f][o].predict( self.allKnobs) pred_mean = predictions.mean(0) self.ax[plotIdx].scatter(pred_mean[:, 0], pred_mean[:, 1], marker='x', c=np.arange(pred_mean.shape[0]) / pred_mean.shape[0]) # Some tests here, although I can't remember what I was testing exactly... if False: from nonconformist.cp import IcpRegressor from nonconformist.nc import NcFactory from sklearn.ensemble import RandomForestRegressor model1 = RandomForestRegressor() nc1 = NcFactory.create_nc(model1) icp1 = IcpRegressor(nc1) model2 = RandomForestRegressor() nc2 = NcFactory.create_nc(model2) icp2 = IcpRegressor(nc2) n = self.sampledIndexes.size idx = np.random.permutation(n) idx_train, idx_cal = idx[:int(0.8 * n)], idx[int(0.8 * n):] icp1.fit( self.allKnobs[self.sampledIndexes][idx_train, :], self.groundTruth[self.sampledIndexes, 0][idx_train]) icp2.fit( self.allKnobs[self.sampledIndexes][idx_train, :], self.groundTruth[self.sampledIndexes, 1][idx_train]) icp1.calibrate( self.allKnobs[self.sampledIndexes][idx_cal, :], self.groundTruth[self.sampledIndexes, 0][idx_cal]) icp2.calibrate( self.allKnobs[self.sampledIndexes][idx_cal, :], self.groundTruth[self.sampledIndexes, 1][idx_cal]) prediction1 = icp1.predict(self.allKnobs, significance=0.05) prediction2 = icp2.predict(self.allKnobs, significance=0.05) print(prediction1) self.ax[plotIdx].errorbar(pred_mean[:, 0], pred_mean[:, 1], xerr=prediction1, yerr=prediction2, linestyle="None") # Keep this #self.ax[plotIdx].set_xlim(left=0, right=2) #self.ax[plotIdx].set_ylim(bottom=0, top=2) plotIdx += 1 # Plot hint space if available if self.doPlotHintSpace and self.hintSpace is not None: self.ax[plotIdx].clear() self.ax[plotIdx].set_xlim([0, 1.05]) self.ax[plotIdx].set_ylim([0, 1.05]) self.ax[plotIdx].set_title("Hint space") self.ax[plotIdx].plot(self.hintSpace[:, 0], self.hintSpace[:, 1], 'x', markeredgewidth=1.8, markersize=5) plotIdx += 1 # Plot distances for labeled samples if self.selectedDistances and self.doPlotDistances: self.ax[plotIdx].clear() self.ax[plotIdx].set_title( "Estimated distances for labeled samples") for d in self.selectedDistances: self.ax[plotIdx].hist(d.flatten(), 50, alpha=0.65) #self.ax[plotIdx].hist(d.mean(0), 50, alpha=0.65) #for d in self.gtDistances: # self.ax[plotIdx].hist(d.flatten(), 50, alpha=0.65) plotIdx += 1 # Plot distances for unlabeled samples if self.predictedDistances and self.doPlotDistances: self.ax[plotIdx].clear() self.ax[plotIdx].set_title( "Estimated distances for unlabeled samples (within P_relaxed)") predictedDistances = np.array(self.predictedDistances) for d in range(predictedDistances.shape[2]): self.ax[plotIdx].hist(predictedDistances[:, :, d].flatten(), 50, alpha=0.65) plotIdx += 1 self.plt.show() try: self.plt.pause(0.00001) except: pass self.fig.canvas.draw() if self.blocking: self.fig.waitforbuttonpress() self.selectedDistances = [] self.gtDistances = [] self.predictedDistances = []
class Adapt_to_CP(): r""" Class to adapt models so the produce predictions with confidence intervals This class transforms sklearn, lightgbm, xgboost and catboost models so they can perform conformal prediction. In order to initiate this class the `model` must be given as an input. In adition to the `model`, a boolean variable `sklearn_model` must be given, indicating if the model belongs to scikit learn (True) or not (False). """ def __init__(self, model, sklearn_model: bool): r"""__init__ method This method is used to adapt the input `model` so it can be used for creating confidente intervals with conformal prediction. Parameters ---------- model: Model we want to use as the underlying model to generate predictions and the confidence interval. This model can only be a scikit learn model, LGBMRegressor, LGBMClassifier, XGBRegressor, XGBClassifier, CatBoostRegressor or CatBoostClassifier. sklearn_model: bool This variable indicates if the model belongs to scikit learn or not. Returns ------- cp: obj: Adapt_to_CP The class of the adapted model. Examples -------- >>> model = lightgbm.LGBMRegressor() >>> cp = Adapt_to_CP(model) """ self.model = model if sklearn_model: if is_classifier(model): self.icp = IcpClassifier(NcFactory.create_nc(model)) elif is_regressor(model): self.icp = IcpRegressor(NcFactory.create_nc(model)) else: model_adapter = NonConformistAdapter(model) if is_classifier(model): self.icp = IcpClassifier(ClassifierNc(model_adapter)) elif is_regressor(model): self.icp = IcpRegressor(RegressorNc(model_adapter)) elif model.__class__.__name__ == "Booster": self.icp = IcpRegressor(RegressorNc(model_adapter)) def fit(self, x_train: np.ndarray, y_train: np.ndarray): r"""Method used to fit the underlying model In order to create the prediction and the confidence interval the underlying model must be fitted first. This fuction trains the model using the data features `x_train` and the target features `y_train`. Parameters ---------- x_train: numpy.ndarray Array of data fetures the model will be trained with. y_train: numpy.ndarray Array of target features the model is trained to predict. Returns ------- None """ self.icp.fit(x_train, y_train) def calibrate(self, x_cal: np.ndarray, y_cal: np.ndarray): r"""Method used to calculate the nonconformity scores To create the confidence intervals we need a set of nonconformity scores, and, in classification cases, their probability distribution. This method uses the fitted underlying model and calculates de scores, thus calibrating the inductive conformal predictor. Parameters ---------- x_cal: numpy.ndarray Array of data features used for calibrating the inductive conformal predictor. y_cal: numpy.ndarray Array of target features used for calibrating the inductive conformal predictor Returns ------- None Notes ----- It's very important to make sure the calibration data is different from the training data. """ self.icp.calibrate(x_cal, y_cal) def predict(self, x_test: np.ndarray, confidence: float): r"""Method that returns the prediction and the confidence interval This method returns the interval with a confidence level of `confidence` and the target predictions for `x_test`. The information returned for classification is different from the one returned for regression. In classification cases the tuple returned has two elements: a numpy.ndarray with a matrix of boolean values and a numpy.ndarray that contains the class predictions. On the other hand, in regression cases the tuple returned has 3 elements: a numpy.ndarray with the lower bound values, a numpy.ndarray with the predicted target values and a numpy.ndarray with the upper bound values. Parameters ---------- x_test: numpy.ndarray Array of data features used to predict the target values and the confidence interval confidence: float Float between 0 and 1 that represent the percentage of observations we want to be inside the predicted interval. Returns ------- prediction: Tuple[numpy.ndarray, numpy.ndarray] or Tuple[numpy.ndarray, numpy.ndarray, numpy.ndarray] Tuple containing the confidence interval and the target prediction Notes ----- The `x_test` data must have the same features as the data used for training and calibration, and they must be in the same order. The level of confidence has to be a fraction between 0 and 1. """ sig = 1 - confidence if is_classifier(self.model): return self.icp.predict(x_test, significance=sig), self.model.predict(x_test) elif is_regressor(self.model): return self.icp.predict(x_test, significance=sig)[:, 0], self.model.predict(x_test), self.icp.predict(x_test, significance=sig)[:, 1] elif type(self.model) == lgbm.basic.Booster: return self.icp.predict(x_test, significance=sig)[:, 0], self.model.predict(x_test), self.icp.predict(x_test, significance=sig)[:, 1] def calibrate_and_predict(self, x_cal: np.ndarray, y_cal: np.ndarray, x_test: np.ndarray, confidence: bool): r"""Method used for calibrating the conformal predictor and predicting target values and the confidence interval This method is equal to running the calibrate and the predict methods consecutively. Accordingly, the inputs are the calibration data `x_cal` and targets `y_cal`, the test data `x_test` and the confidence level `confidence` desired for the predicted interval. The tuple returned contains the predicted values and the confidence interval. Parameters ---------- x_cal: numpy.ndarray Array of data features used for calibrating the inductive conformal predictor. y_cal: numpy.ndarray Array of target features used for calibrating the inductive conformal predictor x_test: numpy.ndarray Array of data features used to predict the target values and the confidence interval confidence: float Float between 0 and 1 that represent the percentage of observations we want to be inside the predicted interval. Returns ------- prediction: Tuple[numpy.ndarray, numpy.ndarray] or Tuple[numpy.ndarray, numpy.ndarray, numpy.ndarray] Tuple containing the confidence interval and the target prediction Notes ----- Both the calibration and the test data must have the same features (scale and order) as the data used for training the underlying model. The level of confidence has to be a decimal between 0 and 1. """ sig = 1 - confidence self.icp.calibrate(x_cal, y_cal) if is_classifier(self.model): return self.icp.predict(x_test, significance=sig), self.model.predict(x_test) elif is_regressor(self.model): return self.icp.predict(x_test, significance=sig)[:, 0], self.model.predict(x_test), self.icp.predict(x_test, significance=sig)[:, 1] elif type(self.model) == lgbm.basic.Booster: return self.icp.predict(x_test, significance=sig)[:, 0], self.model.predict(x_test), self.icp.predict(x_test, significance=sig)[:, 1]
def evaluate(model_filepath, train_filepath, test_filepath, calibrate_filepath): """Evaluate model to estimate power. Args: model_filepath (str): Path to model. train_filepath (str): Path to train set. test_filepath (str): Path to test set. calibrate_filepath (str): Path to calibrate set. """ METRICS_FILE_PATH.parent.mkdir(parents=True, exist_ok=True) # Load parameters params = yaml.safe_load(open("params.yaml"))["evaluate"] params_train = yaml.safe_load(open("params.yaml"))["train"] params_split = yaml.safe_load(open("params.yaml"))["split"] test = np.load(test_filepath) X_test = test["X"] y_test = test["y"] # pandas data frame to store predictions and ground truth. df_predictions = None y_pred = None if params_split["calibrate_split"] == 0: model = models.load_model(model_filepath) y_pred = model.predict(X_test) else: trained_model = models.load_model(model_filepath) # mycustommodel = MyCustomModel(model_filepath) mycustommodel = MyCustomModel(trained_model) m = cnn(X_test.shape[-2], X_test.shape[-1], output_length=1, kernel_size=params_train["kernel_size"]) nc = RegressorNc( mycustommodel, err_func=AbsErrorErrFunc(), # non-conformity function # normalizer_model=KNeighborsRegressor(n_neighbors=15) # normalizer # normalizer=m ) # nc = NcFactory.create_nc(mycustommodel, # err_func=AbsErrorErrFunc(), # non-conformity function # # normalizer_model=KNeighborsRegressor(n_neighbors=15) # normalizer # normalizer_model=m # ) model = IcpRegressor(nc) # Fit the normalizer. train = np.load(train_filepath) X_train = train["X"] y_train = train["y"] y_train = y_train.reshape((y_train.shape[0], )) model.fit(X_train, y_train) # Calibrate model. calibrate = np.load(calibrate_filepath) X_calibrate = calibrate["X"] y_calibrate = calibrate["y"] y_calibrate = y_calibrate.reshape((y_calibrate.shape[0], )) model.calibrate(X_calibrate, y_calibrate) print(f"Calibration: {X_calibrate.shape}") # Set conformal prediction error. This should be a parameter specified by the user. error = 0.05 # Predictions will contain the intervals. We need to compute the middle # points to get the actual predictions y. predictions = model.predict(X_test, significance=error) # Compute middle points. y_pred = predictions[:, 0] + (predictions[:, 1] - predictions[:, 0]) / 2 # Reshape to put it in the same format as without calibration set. y_pred = y_pred.reshape((y_pred.shape[0], 1)) # Build data frame with predictions. my_results = list( zip(np.reshape(y_test, (y_test.shape[0], )), np.reshape(y_pred, (y_pred.shape[0], )), predictions[:, 0], predictions[:, 1])) df_predictions = pd.DataFrame(my_results, columns=[ 'ground_truth', 'predicted', 'lower_bound', 'upper_bound' ]) save_predictions(df_predictions) plot_intervals(df_predictions) mse = mean_squared_error(y_test, y_pred) r2 = r2_score(y_test, y_pred) print("MSE: {}".format(mse)) print("R2: {}".format(r2)) plot_prediction(y_test, y_pred, inputs=X_test, info="(R2: {})".format(r2)) plot_individual_predictions(y_test, y_pred) with open(METRICS_FILE_PATH, "w") as f: json.dump(dict(mse=mse, r2=r2), f)