Exemple #1
0
def example_calibration(datafile: str) -> int:
    """
    Example of several calibration methods.

    Parameters
    ----------
    datafile : str
        Path to datafile which contains two NumPy arrays with keys 'ground_truth' and 'predictions'.

    Returns
    -------
    int
        0 at success, -1 otherwise.
    """

    bins = 10

    # diagram = None
    diagram = 'diagram'

    # define validation split for test data
    validation_split = 0.7

    # if True, a Pickle-Object will be written out for each calibration model built
    save_models = False

    histogram = HistogramBinning(bins)
    iso = IsotonicRegression()
    bbq = BBQ()
    enir = ENIR()
    lr_calibration = LogisticCalibration()
    temperature = TemperatureScaling()
    betacal = BetaCalibration()

    models = [("Histogram Binning", histogram), ("Isotonic Regression", iso),
              ("BBQ", bbq), ("ENIR", enir),
              ("Logistic Calibration", lr_calibration),
              ("Temperature Scaling", temperature),
              ("Beta Calibration", betacal)]

    # see ../utils.py for calibration and its measurement
    success = single_example(models=models,
                             datafile=datafile,
                             bins=bins,
                             diagram=diagram,
                             validation_split=validation_split,
                             save_models=save_models)

    return success
Exemple #2
0
def cross_validation(datafile: str) -> int:
    """
    5x2 cross validation of several calibration methods.

    Parameters
    ----------
    datafile : str
        Path to datafile which contains two NumPy arrays with keys 'ground_truth' and 'predictions'.

    Returns
    -------
    int
        0 at success, -1 otherwise.
    """

    bins = 10

    # if True, a Pickle-Object will be written out for each calibration model built
    save_models = False

    histogram = HistogramBinning(bins)
    iso = IsotonicRegression()
    bbq = BBQ()
    enir = ENIR()
    lr_calibration = LogisticCalibration()
    temperature = TemperatureScaling()
    betacal = BetaCalibration()

    models = [("Histogram Binning", histogram), ("Isotonic Regression", iso),
              ("BBQ", bbq), ("ENIR", enir),
              ("Logistic Calibration", lr_calibration),
              ("Temperature Scaling", temperature),
              ("Beta Calibration", betacal)]

    # invoke cross validation function from ../utils.py
    # see ../utils.py for calibration and its measurement
    success = cross_validation_5_2(models=models,
                                   datafile=datafile,
                                   bins=bins,
                                   save_models=save_models)

    return success
 def get_calibrator_fn(self, cfg):
     if cfg.METHOD.NAME == "HistogramBinning":
         bins = cfg.METHOD.HIST.N_BINS
         if bins == -1:
             bins = cfg.TEST.N_BINS
         return HistogramBinning(bins, detection=True)
     elif cfg.METHOD.NAME == "IsotonicRegression":
         return IsotonicRegression(detection=True)
     elif cfg.METHOD.NAME == "BBQ":
         return BBQ(score_function=cfg.METHOD.BBQ.SCORE_FN, detection=True)
     elif cfg.METHOD.NAME == "ENIR":
         return ENIR(score_function=cfg.METHOD.ENIR.SCORE_FN,
                     detection=True)
     elif cfg.METHOD.NAME == "LogisticCalibration":
         return LogisticCalibration(detection=True)
     elif cfg.METHOD.NAME == "BetaCalibration":
         return BetaCalibration(detection=True)
     elif cfg.METHOD.NAME == "TemperatureScaling":
         return TemperatureScaling(detection=True)
     else:
         raise NotImplementedError
def calibrate(Xtrain,
              prob_train,
              Ytrain,
              Xtest=None,
              Xtrun_simulationest=None,
              prob_test=None,
              method='platt',
              **kwargs):
    """
        A calibration method that takes the predicted probabilties and positive cases and recalibrate the probabilities.

        Parameters
        ----------
        y_true : array, shape (n_samples_train,)
            True targets for the training set.

        y_prob_train : array, shape (n_samples_train,)
            Probabilities of the positive class to train a calibration model.

        y_prob_test : array, shape (n_samples_test,)
            Probabilities of the positive class to be calibrated (test set). If None it re-calibrate the training set.

        method: string, 'platt', 'isotonic', 'temperature_scaling', 'beta', 'HB', 'BBG', 'ENIR'
            The method to use for calibration. Can be ‘sigmoid’ which corresponds to Platt’s method
            (i.e. a logistic regression model) or ‘isotonic’ which is a non-parametric approach.
            It is not advised to use isotonic calibration with too few calibration samples (<<1000) since it tends to overfit.

        Returns
        -------
        p_calibrated : array, shape (n_bins,)
            The calibrated error for test set.


        References
        ----------
        Küppers et al., "Multivariate Confidence Calibration for Object Detection." CVPR Workshops, 2020.

        Leeuw, Hornik, Mair, Isotone, "Optimization in R : Pool-Adjacent-Violators Algorithm (PAVA) and Active
        Set Methods." Journal of Statistical Software, 2009.

        Naeini, Mahdi Pakdaman, Gregory Cooper, and Milos Hauskrecht, "Obtaining well calibrated probabilities
        using bayesian binning." Twenty-Ninth AAAI Conference on Artificial Intelligence, 2015.

        Kull, Meelis, Telmo Silva Filho, and Peter Flach: "Beta calibration: a well-founded and easily implemented
        improvement on logistic calibration for binary classifiers." Artificial Intelligence and Statistics,
        PMLR 54:623-631, 2017.

        Zadrozny, Bianca and Elkan, Charles: "Obtaining calibrated probability estimates from decision
        trees and naive bayesian classifiers." In ICML, pp. 609–616, 2001.

        Zadrozny, Bianca and Elkan, Charles: "Transforming classifier scores into accurate
        multiclass probability estimates." In KDD, pp. 694–699, 2002.

        Ryan J Tibshirani, Holger Hoefling, and Robert Tibshirani: "Nearly-isotonic regression."
        Technometrics, 53(1):54–61, 2011.

        Naeini, Mahdi Pakdaman, and Gregory F. Cooper: "Binary classifier calibration using an ensemble of near
        isotonic regression models." 2016 IEEE 16th International Conference on Data Mining (ICDM). IEEE, 2016.

        Chuan Guo, Geoff Pleiss, Yu Sun and Kilian Q. Weinberger: "On Calibration of Modern Neural Networks."
        Proceedings of the 34th International Conference on Machine Learning, 2017.

        Pereyra, G., Tucker, G., Chorowski, J., Kaiser, L. and Hinton, G.: “Regularizing neural networks by
        penalizing confident output distributions.” CoRR, 2017.

        Pedregosa, F., Varoquaux, G., Gramfort, A., Michel, V., Thirion, B., Grisel, O., Blondel, M., Prettenhofer, P.,
        Weiss, R., Dubourg, V., Vanderplas, J., Passos, A., Cournapeau, D., Brucher, M., Perrot, M. and Duchesnay, E.:
        "Scikit-learn: Machine Learning in Python." In Journal of Machine Learning Research, volume 12 pp 2825-2830,
        2011.

        Platt, John: "Probabilistic outputs for support vector machines and comparisons to regularized likelihood
        methods." Advances in large margin classifiers, 10(3): 61–74, 1999.

        Neumann, Lukas, Andrew Zisserman, and Andrea Vedaldi: "Relaxed Softmax: Efficient Confidence Auto-Calibration
        for Safe Pedestrian Detection." Conference on Neural Information Processing Systems (NIPS) Workshop MLITS, 2018.

        Nilotpal Chakravarti, Isotonic Median Regression: A Linear Programming Approach, Mathematics of Operations
        Research Vol. 14, No. 2 (May, 1989), pp. 303-308.
    """

    from sklearn.linear_model import LogisticRegression
    from sklearn.isotonic import IsotonicRegression
    from netcal.scaling import TemperatureScaling, BetaCalibration
    from netcal.binning import HistogramBinning, BBQ, ENIR

    if prob_test is None:
        probs = prob_train[:, np.newaxis]
    else:
        probs = prob_test[:, np.newaxis]

    if Xtest is None:
        Xtest = Xtrain
    else:
        Xtest = Xtest

    if method == 'platt':
        model = LogisticRegression()
        model.fit(prob_train[:, np.newaxis],
                  Ytrain)  # LR needs X to be 2-dimensional
        p_calibrated = model.predict_proba(probs)[:, 1]

    elif method == 'isotonic':
        model = IsotonicRegression(out_of_bounds='clip')
        model.fit(prob_train, Ytrain)  # LR needs X to be 2-dimensional
        p_calibrated = model.transform(probs.flatten())

    elif method == 'temperature_scaling':
        model = TemperatureScaling()
        model.fit(prob_train, Ytrain)
        p_calibrated = model.transform(probs)

    elif method == 'beta':
        model = BetaCalibration()
        model.fit(prob_train, Ytrain)
        p_calibrated = model.transform(probs)

    elif method == 'HB':
        model = HistogramBinning()
        model.fit(prob_train, Ytrain)
        p_calibrated = model.transform(probs)

    elif method == 'BBQ':
        model = BBQ()
        model.fit(prob_train, Ytrain)
        p_calibrated = model.transform(probs)

    elif method == 'ENIR':
        model = ENIR()
        model.fit(prob_train, Ytrain)
        p_calibrated = model.transform(probs)

    elif method == 'KRR':
        model = KRR_calibration()
        model.fit(Xtrain, prob_train, Ytrain, **kwargs)
        p_calibrated = model.predict(Xtest, probs, mode='prob')

    elif method == 'EWF':
        model = EWF_calibration()
        model.fit(Xtrain, prob_train, Ytrain, **kwargs)
        p_calibrated = model.predict(Xtest, probs, mode='prob')

    else:
        raise ValueError("Method %s is not defined." % method)

    p_calibrated[np.isnan(p_calibrated)] = 0

    # normalize the large numbers and small numbers to one and zero
    p_calibrated[p_calibrated > 1.0] = 1.0
    p_calibrated[p_calibrated < 0.0] = 0.0

    return p_calibrated
def cross_validation(datafile: str,
                     use_cuda: Union[bool, str] = False,
                     domain: str = ".") -> int:
    """
    5x2 cross validation of several calibration methods.

    Parameters
    ----------
    datafile : str
        Path to datafile which contains two NumPy arrays with keys 'ground_truth' and 'predictions'.

    Returns
    -------
    int
        0 at success, -1 otherwise.
    """

    # kwargs for uncertainty mode. Those can also be safely set on MLE
    uncertainty_kwargs = {
        'mcmc_chains': 1,
        'mcmc_samples': 300,
        'mcmc_warmup_steps': 50,
        'vi_samples': 300,
        'vi_epochs': 3000
    }

    hist_bins = 20
    bins = 15

    if domain == 'examination-mcmc':
        method = 'mcmc'
    elif domain == 'examination-variational':
        method = 'variational'
    else:
        method = 'mle'

    # if True, a Pickle-Object will be written out for each calibration model built
    save_models = True

    histogram = HistogramBinning(hist_bins)
    iso = IsotonicRegression()
    bbq = BBQ()
    enir = ENIR()
    lr_calibration = LogisticCalibration(detection=False,
                                         method=method,
                                         use_cuda=use_cuda,
                                         **uncertainty_kwargs)
    temperature = TemperatureScaling(detection=False,
                                     method=method,
                                     use_cuda=use_cuda,
                                     **uncertainty_kwargs)
    betacal = BetaCalibration(detection=False,
                              method=method,
                              use_cuda=use_cuda,
                              **uncertainty_kwargs)

    models = [("hist", histogram), ("iso", iso), ("bbq", bbq), ("enir", enir),
              ("lr", lr_calibration), ("temperature", temperature),
              ("beta", betacal)]

    # invoke cross validation function from ../utils.py
    # see ../utils.py for calibration and its measurement
    success = cross_validation_5_2(models=models,
                                   datafile=datafile,
                                   bins=bins,
                                   save_models=save_models,
                                   domain=domain)

    return success