def test_temperature_positive(p_cal_binary, y_cal_binary):
    # Temperature Scaling
    ts = calm.TemperatureScaling()
    ts.fit(p_cal_binary, y_cal_binary)

    # Positive temperature
    assert ts.T > 0, "Temperature is not positive."
def test_constant_accuracy(p_cal_binary, y_cal_binary):
    # Compute accuracy
    acc = np.mean(np.equal(np.argmax(p_cal_binary, axis=1), y_cal_binary))

    # Temperature Scaling
    ts = calm.TemperatureScaling()
    ts.fit(p_cal_binary, y_cal_binary)

    # Test constant accuracy on calibration set
    p_ts = ts.predict_proba(p_cal_binary)
    acc_ts = np.mean(np.equal(np.argmax(p_ts, axis=1), y_cal_binary))
    assert acc == acc_ts, "Accuracy of calibrated probabilities does not match accuracy of calibration set."
Beispiel #3
0
            # Iterate through data sets, calibrate and plot latent functions
            for (i_clf, (Z, y, info_dict)) in enumerate(benchmark.data_gen()):

                # Train, test split
                cal_ind, test_ind = next(benchmark.cross_validator.split(Z, y))
                Z_cal = Z[cal_ind, :]
                y_cal = y[cal_ind]
                Z_test = Z[test_ind, :]
                y_test = y[test_ind]
                hist_data = Z_cal.flatten()

                # Calibrate
                nocal = calm.NoCalibration(logits=use_logits)

                ts = calm.TemperatureScaling()
                ts.fit(Z_cal, y_cal)

                gpc = calm.GPCalibration(n_classes=n_classes,
                                         maxiter=1000,
                                         n_inducing_points=10,
                                         logits=use_logits,
                                         verbose=True,
                                         random_state=random_state)
                gpc.fit(Z_cal, y_cal)

                # # Compute calibration error
                # ECE_nocal = pycalib.scoring.expected_calibration_error(y_test, nocal.predict_proba(Z_test), n_bins=100)
                # ECE_ts = pycalib.scoring.expected_calibration_error(y_test, ts.predict_proba(Z_test), n_bins=100)
                # ECE_gpc = pycalib.scoring.expected_calibration_error(y_test, gpc.predict_proba(Z_test), n_bins=100)
Beispiel #4
0
    with gpflow.defer_build():
        meanfunc = pycalib.gp_classes.ScalarMult()
        meanfunc.alpha.transform = gpflow.transforms.positive
    cal_methods = {
        "Uncal": calm.NoCalibration(),
        "GPcalib": calm.GPCalibration(n_classes=10, maxiter=1000, n_inducing_points=10, logits=False,
                                      random_state=random_state),
        "GPcalib_lin": calm.GPCalibration(n_classes=10, maxiter=1000, mean_function=meanfunc,
                                          n_inducing_points=10, logits=False,
                                          random_state=random_state),
        "GPcalib_approx": calm.GPCalibration(n_classes=10, maxiter=1000, n_inducing_points=10,
                                             logits=False, random_state=random_state, inf_mean_approx=True),
        "Platt": calm.PlattScaling(random_state=random_state),
        "Isotonic": calm.IsotonicRegression(),
        "Beta": calm.BetaCalibration(),
        "BBQ": calm.BayesianBinningQuantiles(),
        "Temp": calm.TemperatureScaling()
    }


    # Create benchmark object
    mnist_benchmark = pycalib.benchmark.MNISTData(run_dir=run_dir, clf_output_dir=clf_output_dir,
                                                  classifier_names=classifier_names,
                                                  cal_methods=list(cal_methods.values()),
                                                  cal_method_names=list(cal_methods.keys()),
                                                  n_splits=10, test_size=9000,
                                                  train_size=1000, random_state=random_state)

    # Run
    mnist_benchmark.run(n_jobs=1)
Beispiel #5
0
    # Define calibration methods
    cal_methods_dict = {
        "No_Calibration":
        cm.NoCalibration(),
        "Platt_scaling":
        cm.PlattScaling(),
        "Isotonic_Regression":
        cm.IsotonicRegression(),
        "Beta_Calibration":
        cm.BetaCalibration(params='abm'),
        "Histogram_Binning":
        cm.HistogramBinning(mode='equal_freq'),
        "Bayesian_Binning_into_Quantiles":
        cm.BayesianBinningQuantiles(),
        "Temperature_Scaling":
        cm.TemperatureScaling(verbose=False),
        "GP_calibration":
        cm.GPCalibration(n_classes=n_classes,
                         maxiter=300,
                         n_inducing_points=100)
    }

    # Evaluate calibration methods
    sb = bm.SyntheticBeta(
        run_dir=dir_out,
        cal_methods=list(cal_methods_dict.values()),
        cal_method_names=list(cal_methods_dict.keys()),
        beta_params=beta_params,
        miscal_functions=list(miscal_function_names.values()),
        miscal_function_names=list(miscal_function_names.keys()),
        size=size_list,