def test_temperature_positive(p_cal_binary, y_cal_binary): # Temperature Scaling ts = calm.TemperatureScaling() ts.fit(p_cal_binary, y_cal_binary) # Positive temperature assert ts.T > 0, "Temperature is not positive."
def test_constant_accuracy(p_cal_binary, y_cal_binary): # Compute accuracy acc = np.mean(np.equal(np.argmax(p_cal_binary, axis=1), y_cal_binary)) # Temperature Scaling ts = calm.TemperatureScaling() ts.fit(p_cal_binary, y_cal_binary) # Test constant accuracy on calibration set p_ts = ts.predict_proba(p_cal_binary) acc_ts = np.mean(np.equal(np.argmax(p_ts, axis=1), y_cal_binary)) assert acc == acc_ts, "Accuracy of calibrated probabilities does not match accuracy of calibration set."
# Iterate through data sets, calibrate and plot latent functions for (i_clf, (Z, y, info_dict)) in enumerate(benchmark.data_gen()): # Train, test split cal_ind, test_ind = next(benchmark.cross_validator.split(Z, y)) Z_cal = Z[cal_ind, :] y_cal = y[cal_ind] Z_test = Z[test_ind, :] y_test = y[test_ind] hist_data = Z_cal.flatten() # Calibrate nocal = calm.NoCalibration(logits=use_logits) ts = calm.TemperatureScaling() ts.fit(Z_cal, y_cal) gpc = calm.GPCalibration(n_classes=n_classes, maxiter=1000, n_inducing_points=10, logits=use_logits, verbose=True, random_state=random_state) gpc.fit(Z_cal, y_cal) # # Compute calibration error # ECE_nocal = pycalib.scoring.expected_calibration_error(y_test, nocal.predict_proba(Z_test), n_bins=100) # ECE_ts = pycalib.scoring.expected_calibration_error(y_test, ts.predict_proba(Z_test), n_bins=100) # ECE_gpc = pycalib.scoring.expected_calibration_error(y_test, gpc.predict_proba(Z_test), n_bins=100)
with gpflow.defer_build(): meanfunc = pycalib.gp_classes.ScalarMult() meanfunc.alpha.transform = gpflow.transforms.positive cal_methods = { "Uncal": calm.NoCalibration(), "GPcalib": calm.GPCalibration(n_classes=10, maxiter=1000, n_inducing_points=10, logits=False, random_state=random_state), "GPcalib_lin": calm.GPCalibration(n_classes=10, maxiter=1000, mean_function=meanfunc, n_inducing_points=10, logits=False, random_state=random_state), "GPcalib_approx": calm.GPCalibration(n_classes=10, maxiter=1000, n_inducing_points=10, logits=False, random_state=random_state, inf_mean_approx=True), "Platt": calm.PlattScaling(random_state=random_state), "Isotonic": calm.IsotonicRegression(), "Beta": calm.BetaCalibration(), "BBQ": calm.BayesianBinningQuantiles(), "Temp": calm.TemperatureScaling() } # Create benchmark object mnist_benchmark = pycalib.benchmark.MNISTData(run_dir=run_dir, clf_output_dir=clf_output_dir, classifier_names=classifier_names, cal_methods=list(cal_methods.values()), cal_method_names=list(cal_methods.keys()), n_splits=10, test_size=9000, train_size=1000, random_state=random_state) # Run mnist_benchmark.run(n_jobs=1)
# Define calibration methods cal_methods_dict = { "No_Calibration": cm.NoCalibration(), "Platt_scaling": cm.PlattScaling(), "Isotonic_Regression": cm.IsotonicRegression(), "Beta_Calibration": cm.BetaCalibration(params='abm'), "Histogram_Binning": cm.HistogramBinning(mode='equal_freq'), "Bayesian_Binning_into_Quantiles": cm.BayesianBinningQuantiles(), "Temperature_Scaling": cm.TemperatureScaling(verbose=False), "GP_calibration": cm.GPCalibration(n_classes=n_classes, maxiter=300, n_inducing_points=100) } # Evaluate calibration methods sb = bm.SyntheticBeta( run_dir=dir_out, cal_methods=list(cal_methods_dict.values()), cal_method_names=list(cal_methods_dict.keys()), beta_params=beta_params, miscal_functions=list(miscal_function_names.values()), miscal_function_names=list(miscal_function_names.keys()), size=size_list,