def test_inference_mean_approximation(p_cal_binary, y_cal_binary): # GP calibration gpc = calm.GPCalibration(n_classes=2, logits=False, random_state=42) gpc.fit(p_cal_binary, y_cal_binary) # Inference: mean approximation p_gpc = gpc.predict_proba(p_cal_binary, mean_approx=True) # Check for NaNs in predictions assert not np.any(np.isnan( p_gpc)), "Calibrated probabilities of the mean approximation are NaN."
cal_ind, test_ind = next(benchmark.cross_validator.split(Z, y)) Z_cal = Z[cal_ind, :] y_cal = y[cal_ind] Z_test = Z[test_ind, :] y_test = y[test_ind] hist_data = Z_cal.flatten() # Calibrate nocal = calm.NoCalibration(logits=use_logits) ts = calm.TemperatureScaling() ts.fit(Z_cal, y_cal) gpc = calm.GPCalibration(n_classes=n_classes, maxiter=1000, n_inducing_points=10, logits=use_logits, verbose=True, random_state=random_state) gpc.fit(Z_cal, y_cal) # # Compute calibration error # ECE_nocal = pycalib.scoring.expected_calibration_error(y_test, nocal.predict_proba(Z_test), n_bins=100) # ECE_ts = pycalib.scoring.expected_calibration_error(y_test, ts.predict_proba(Z_test), n_bins=100) # ECE_gpc = pycalib.scoring.expected_calibration_error(y_test, gpc.predict_proba(Z_test), n_bins=100) # Plot reliability diagrams if not os.path.exists( os.path.join(folder_path, "reliability_diagrams")): os.makedirs( os.path.join(folder_path, "reliability_diagrams")) p_pred_nocal = nocal.predict_proba(Z_test)
run_dir = os.path.join(file, "calibration") # Classifiers classifier_names = list(clf_dict.keys()) # Calibration models with gpflow.defer_build(): meanfunc = pycalib.gp_classes.ScalarMult() meanfunc.alpha.transform = gpflow.transforms.positive cal_methods = { "Uncal": calm.NoCalibration(), "GPcalib": calm.GPCalibration(n_classes=2, maxiter=1000, n_inducing_points=10, logits=False, random_state=random_state), "GPcalib_lin": calm.GPCalibration(n_classes=2, maxiter=1000, mean_function=meanfunc, n_inducing_points=10, logits=False, random_state=random_state), "GPcalib_approx": calm.GPCalibration(n_classes=2, maxiter=1000, n_inducing_points=10, logits=False, random_state=random_state,
random_state = 1 clf_output_dir = os.path.join(file, output_folder) run_dir = os.path.join(file, "calibration") n_classes = 100 train_size = 1000 test_size = 9000 # Calibration methods for logits with gpflow.defer_build(): meanfunc = pycalib.gp_classes.ScalarMult() meanfunc.alpha.transform = gpflow.transforms.positive cal_methods_logits = { "Uncal": calm.NoCalibration(logits=True), "GPcalib_lin": calm.GPCalibration(n_classes=n_classes, maxiter=1000, n_inducing_points=10, mean_function=meanfunc, logits=True, verbose=False, random_state=random_state), "GPcalib": calm.GPCalibration(n_classes=n_classes, maxiter=1000, n_inducing_points=10, logits=True, random_state=random_state), "GPcalib_approx": calm.GPCalibration(n_classes=n_classes, maxiter=1000, n_inducing_points=10, logits=True, random_state=random_state, inf_mean_approx=True), "Temp": calm.TemperatureScaling() } # Create benchmark object cifar_benchmark = pycalib.benchmark.CIFARData(run_dir=run_dir, clf_output_dir=clf_output_dir, classifier_names=clf_names, cal_methods=list(cal_methods_logits.values()), cal_method_names=list(cal_methods_logits.keys()), use_logits=True, n_splits=10, test_size=test_size, train_size=train_size, random_state=random_state)
############################### # Benchmark ############################### # Initialization run_dir = os.path.join(file, "calibration") clf_output_dir = os.path.join(file, output_folder) # Classifiers classifier_names = list(clf_dict.keys()) # Calibration methods cal_methods = { "Uncal": calm.NoCalibration(), "GPcalib": calm.GPCalibration(n_classes=10, maxiter=1000, n_inducing_points=10, logits=False, random_state=random_state), "GPcalib_approx": calm.GPCalibration(n_classes=10, maxiter=1000, n_inducing_points=10, logits=False, random_state=random_state, inf_mean_approx=True), "Platt": calm.PlattScaling(random_state=random_state), "Isotonic": calm.IsotonicRegression(), "Beta": calm.BetaCalibration(), "BBQ": calm.BayesianBinningQuantiles(), "Temp": calm.TemperatureScaling() } # Create benchmark object mnist_benchmark = pycalib.benchmark.MNISTData(run_dir=run_dir, clf_output_dir=clf_output_dir, classifier_names=classifier_names, cal_methods=list(cal_methods.values()), cal_method_names=list(cal_methods.keys()), n_splits=10, test_size=9000,
metrics_test["err"].append(1 - metr.accuracy_score( y_true=y_test, y_pred=y_pred_test, normalize=True)) metrics_test["iter"].append(mlp.n_iter_) metrics_train["ECE"].append( meas.expected_calibration_error(y=y_train, p_pred=p_pred_train, n_classes=10)) metrics_train["NLL"].append( metr.log_loss(y_true=y_train, y_pred=p_pred_train)) metrics_train["err"].append(1 - metr.accuracy_score( y_true=y_train, y_pred=y_pred_train, normalize=True)) metrics_train["iter"].append(mlp.n_iter_) # Calibration gpc = calm.GPCalibration(n_classes=len(np.unique(y)), random_state=random_state) gpc.fit(mlp.predict_proba(X_cal), y_cal) gpc.plot_latent(filename=os.path.join(dir_path, "gpc_latent"), z=np.linspace(start=10**-3, stop=1, num=1000)) p_calib = gpc.predict_proba(p_pred_test) ece_calib = meas.expected_calibration_error(y=y_test, p_pred=p_calib) acc_calib = meas.accuracy(y=y_test, p_pred=p_calib) nll_calib = metr.log_loss(y_true=y_test, y_pred=p_calib) # Save data to file json.dump(metrics_train, open(os.path.join(dir_path, "metrics_train.txt"), 'w')) json.dump(metrics_test, open(os.path.join(dir_path, "metrics_test.txt"), 'w')) json.dump( {
# Benchmark ############################### # Seed and directories random_state = 1 clf_output_dir = os.path.join(file, "clf_output") run_dir = os.path.join(file, "calibration") # Calibration methods for logits cal_methods_logits = { "Uncal": calm.NoCalibration(logits=True), "GPcalib": calm.GPCalibration(n_classes=n_classes, maxiter=1000, n_inducing_points=10, logits=True, random_state=random_state), "GPcalib_approx": calm.GPCalibration(n_classes=n_classes, maxiter=1000, n_inducing_points=10, logits=True, random_state=random_state, inf_mean_approx=True), "Temp": calm.TemperatureScaling() } # Create benchmark object imnet_benchmark = pycalib.benchmark.ImageNetData(
def plot_feature_space_level_set(seed, dir_out='pycalib/out/synthetic_data/'): import sklearn.datasets from sklearn.neural_network import MLPClassifier import matplotlib.colors # Setup train_size = 1000 cal_size = 100 noise = .25 contour_levels = 10 # generate 2d classification dataset np.random.seed(seed) X, y = sklearn.datasets.make_circles(n_samples=train_size, noise=noise) # train classifier clf = MLPClassifier(hidden_layer_sizes=[10, 10], alpha=1, max_iter=200) clf.fit(X, y) # scatter plot, dots colored by class value df = pd.DataFrame(dict(x=X[:, 0], y=X[:, 1], label=y)) markers = {0: 'x', 1: '.'} fig, ax = texfig.subplots(width=8, ratio=.3, nrows=1, ncols=3, sharex=True, sharey=True) # grouped = df.groupby('label') # for key, group in grouped: # group.plot(ax=ax[0], kind='scatter', x='x', y='y', label=key, marker=markers[key], color='gray', alpha=.75) # Put the result into a color plot x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5 y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5 h = .02 # step size in the mesh xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h)) # Plot the decision boundary. For that, we will assign a color to each # point in the mesh [x_min, x_max]x[y_min, y_max]. if hasattr(clf, "decision_function"): Z = clf.decision_function(np.c_[xx.ravel(), yy.ravel()]) p_pred = clf.predict_proba(np.c_[xx.ravel(), yy.ravel()]) else: p_pred = clf.predict_proba(np.c_[xx.ravel(), yy.ravel()]) Z = p_pred[:, 1] Z0 = Z.reshape(xx.shape) cm = plt.cm.RdBu_r # colormap cm_bright = matplotlib.colors.ListedColormap(['#FF0000', '#0000FF']) cont0 = ax[0].contourf(xx, yy, Z0, cmap=cm, alpha=.8, levels=contour_levels, vmin=0, vmax=1) ax[0].set_title("Classification Uncertainty") # calibrate X_cal, y_cal = sklearn.datasets.make_circles(n_samples=cal_size, noise=noise) p_cal = clf.predict_proba(X_cal) clf_cal = cm.GPCalibration(SVGP=True) clf_cal.fit(p_cal, y_cal) # calibrated contour plot Z1 = clf_cal.predict_proba(p_pred)[:, 1].reshape(xx.shape) cont1 = ax[1].contourf(xx, yy, Z1, cmap=cm, alpha=.8, levels=contour_levels, vmin=0, vmax=1) ax[1].set_title("Calibrated Uncertainty") # difference plot cm_diff = plt.cm.viridis_r # colormap cont1 = ax[2].contourf(xx, yy, Z1 - Z0, cmap=cm_diff, alpha=.8) ax[2].set_title("Uncertainty Difference") # color bar # fig.subplots_adjust(right=0.8) # cbar_ax = fig.add_axes([.96, 0.15, 0.05, 0.7]) # cbar = fig.colorbar(cont1, cax=cbar_ax) # # contour labels # ax[0].clabel(cont0, inline=1, fontsize=8) # ax[1].clabel(cont1, inline=1, fontsize=8) texfig.savefig(dir_out + '/plots/' + 'level_sets')
cm.NoCalibration(), "Platt_scaling": cm.PlattScaling(), "Isotonic_Regression": cm.IsotonicRegression(), "Beta_Calibration": cm.BetaCalibration(params='abm'), "Histogram_Binning": cm.HistogramBinning(mode='equal_freq'), "Bayesian_Binning_into_Quantiles": cm.BayesianBinningQuantiles(), "Temperature_Scaling": cm.TemperatureScaling(verbose=False), "GP_calibration": cm.GPCalibration(n_classes=n_classes, maxiter=300, n_inducing_points=100) } # Evaluate calibration methods sb = bm.SyntheticBeta( run_dir=dir_out, cal_methods=list(cal_methods_dict.values()), cal_method_names=list(cal_methods_dict.keys()), beta_params=beta_params, miscal_functions=list(miscal_function_names.values()), miscal_function_names=list(miscal_function_names.keys()), size=size_list, marginal_probs=marginal_probs, n_splits=10, test_size=0.9,
# Synthetic data def miscal(z): return z**3 n_classes = 4 Z, y, info_dict = bm.SyntheticBeta.sample_miscal_data( alpha=2, beta=.75, miscal_func=miscal, miscal_func_name="power", size=100, marginal_probs=np.ones(n_classes) / n_classes, random_state=0) # Calibrate gpc = calm.GPCalibration(n_classes=n_classes, maxiter=1000, n_inducing_points=10, verbose=True) gpc.fit(Z, y) # Plot file = "/home/j/Documents/research/projects/nonparametric_calibration/" + \ "pycalib/figures/gpcalib_illustration/latent_process" gpc.plot_latent(z=np.linspace(start=.001, stop=1, num=1000), filename=file, plot_classes=True, ratio=0.5, gridspec_kw={'height_ratios': [3, 2]})
cal_method_names=[], use_logits=use_logits, n_splits=1, test_size=10000, train_size=1000, random_state=random_state) with gpflow.defer_build(): meanfunc = pycalib.gp_classes.ScalarMult() meanfunc.alpha.transform = gpflow.transforms.positive cal_methods = { "GPcalib": calm.GPCalibration(n_classes=n_classes, maxiter=300, n_inducing_points=10, mean_function=meanfunc, logits=use_logits, verbose=False, random_state=random_state), "Temp": calm.TemperatureScaling() } # Iterate through data sets, calibrate and plot latent functions for Z, y, info_dict in imagenet_benchmark_data.data_gen(): # Train, test split for i, (cal_index, test_index) in enumerate( imagenet_benchmark_data.cross_validator.split(Z)): print("CV iteration: {}".format(i + 1)) Z_cal = Z[cal_index, :]
random_state=random_state, verbose=0) # Active learning al_exp = al.ActiveLearningExperiment( classifier=mf, X_train=X_train, y_train=y_train, X_test=X_test, y_test=y_test, query_criterion=al.query_norm_entropy, uncertainty_thresh=0.25, pretrain_size=500, calibration_method=calib.GPCalibration(n_classes=n_classes, maxiter=1000, n_inducing_points=10, verbose=False, random_state=random_state), # calibration_method=calib.TemperatureScaling(), calib_size=250, calib_points=[500, 2000, 3500], batch_size=250) # result_df = al_exp.run(n_cv=10, random_state=random_state) # Save to file dir = "/home/j/Documents/research/projects/nonparametric_calibration/pycalib/figures/active_learning/" # al_exp.save_result(file=dir) al_exp.result_df = al_exp.load_result(file=dir + "/active_learning_results.csv")