Beispiel #1
0
def example_calibration(datafile: str) -> int:
    """
    Example of several calibration methods.

    Parameters
    ----------
    datafile : str
        Path to datafile which contains two NumPy arrays with keys 'ground_truth' and 'predictions'.

    Returns
    -------
    int
        0 at success, -1 otherwise.
    """

    bins = 10

    # diagram = None
    diagram = 'diagram'

    # define validation split for test data
    validation_split = 0.7

    # if True, a Pickle-Object will be written out for each calibration model built
    save_models = False

    histogram = HistogramBinning(bins)
    iso = IsotonicRegression()
    bbq = BBQ()
    enir = ENIR()
    lr_calibration = LogisticCalibration()
    temperature = TemperatureScaling()
    betacal = BetaCalibration()

    models = [("Histogram Binning", histogram), ("Isotonic Regression", iso),
              ("BBQ", bbq), ("ENIR", enir),
              ("Logistic Calibration", lr_calibration),
              ("Temperature Scaling", temperature),
              ("Beta Calibration", betacal)]

    # see ../utils.py for calibration and its measurement
    success = single_example(models=models,
                             datafile=datafile,
                             bins=bins,
                             diagram=diagram,
                             validation_split=validation_split,
                             save_models=save_models)

    return success
Beispiel #2
0
def cross_validation(datafile: str) -> int:
    """
    5x2 cross validation of several calibration methods.

    Parameters
    ----------
    datafile : str
        Path to datafile which contains two NumPy arrays with keys 'ground_truth' and 'predictions'.

    Returns
    -------
    int
        0 at success, -1 otherwise.
    """

    bins = 10

    # if True, a Pickle-Object will be written out for each calibration model built
    save_models = False

    histogram = HistogramBinning(bins)
    iso = IsotonicRegression()
    bbq = BBQ()
    enir = ENIR()
    lr_calibration = LogisticCalibration()
    temperature = TemperatureScaling()
    betacal = BetaCalibration()

    models = [("Histogram Binning", histogram), ("Isotonic Regression", iso),
              ("BBQ", bbq), ("ENIR", enir),
              ("Logistic Calibration", lr_calibration),
              ("Temperature Scaling", temperature),
              ("Beta Calibration", betacal)]

    # invoke cross validation function from ../utils.py
    # see ../utils.py for calibration and its measurement
    success = cross_validation_5_2(models=models,
                                   datafile=datafile,
                                   bins=bins,
                                   save_models=save_models)

    return success
 def get_calibrator_fn(self, cfg):
     if cfg.METHOD.NAME == "HistogramBinning":
         bins = cfg.METHOD.HIST.N_BINS
         if bins == -1:
             bins = cfg.TEST.N_BINS
         return HistogramBinning(bins, detection=True)
     elif cfg.METHOD.NAME == "IsotonicRegression":
         return IsotonicRegression(detection=True)
     elif cfg.METHOD.NAME == "BBQ":
         return BBQ(score_function=cfg.METHOD.BBQ.SCORE_FN, detection=True)
     elif cfg.METHOD.NAME == "ENIR":
         return ENIR(score_function=cfg.METHOD.ENIR.SCORE_FN,
                     detection=True)
     elif cfg.METHOD.NAME == "LogisticCalibration":
         return LogisticCalibration(detection=True)
     elif cfg.METHOD.NAME == "BetaCalibration":
         return BetaCalibration(detection=True)
     elif cfg.METHOD.NAME == "TemperatureScaling":
         return TemperatureScaling(detection=True)
     else:
         raise NotImplementedError
def calibrate(Xtrain,
              prob_train,
              Ytrain,
              Xtest=None,
              Xtrun_simulationest=None,
              prob_test=None,
              method='platt',
              **kwargs):
    """
        A calibration method that takes the predicted probabilties and positive cases and recalibrate the probabilities.

        Parameters
        ----------
        y_true : array, shape (n_samples_train,)
            True targets for the training set.

        y_prob_train : array, shape (n_samples_train,)
            Probabilities of the positive class to train a calibration model.

        y_prob_test : array, shape (n_samples_test,)
            Probabilities of the positive class to be calibrated (test set). If None it re-calibrate the training set.

        method: string, 'platt', 'isotonic', 'temperature_scaling', 'beta', 'HB', 'BBG', 'ENIR'
            The method to use for calibration. Can be ‘sigmoid’ which corresponds to Platt’s method
            (i.e. a logistic regression model) or ‘isotonic’ which is a non-parametric approach.
            It is not advised to use isotonic calibration with too few calibration samples (<<1000) since it tends to overfit.

        Returns
        -------
        p_calibrated : array, shape (n_bins,)
            The calibrated error for test set.


        References
        ----------
        Küppers et al., "Multivariate Confidence Calibration for Object Detection." CVPR Workshops, 2020.

        Leeuw, Hornik, Mair, Isotone, "Optimization in R : Pool-Adjacent-Violators Algorithm (PAVA) and Active
        Set Methods." Journal of Statistical Software, 2009.

        Naeini, Mahdi Pakdaman, Gregory Cooper, and Milos Hauskrecht, "Obtaining well calibrated probabilities
        using bayesian binning." Twenty-Ninth AAAI Conference on Artificial Intelligence, 2015.

        Kull, Meelis, Telmo Silva Filho, and Peter Flach: "Beta calibration: a well-founded and easily implemented
        improvement on logistic calibration for binary classifiers." Artificial Intelligence and Statistics,
        PMLR 54:623-631, 2017.

        Zadrozny, Bianca and Elkan, Charles: "Obtaining calibrated probability estimates from decision
        trees and naive bayesian classifiers." In ICML, pp. 609–616, 2001.

        Zadrozny, Bianca and Elkan, Charles: "Transforming classifier scores into accurate
        multiclass probability estimates." In KDD, pp. 694–699, 2002.

        Ryan J Tibshirani, Holger Hoefling, and Robert Tibshirani: "Nearly-isotonic regression."
        Technometrics, 53(1):54–61, 2011.

        Naeini, Mahdi Pakdaman, and Gregory F. Cooper: "Binary classifier calibration using an ensemble of near
        isotonic regression models." 2016 IEEE 16th International Conference on Data Mining (ICDM). IEEE, 2016.

        Chuan Guo, Geoff Pleiss, Yu Sun and Kilian Q. Weinberger: "On Calibration of Modern Neural Networks."
        Proceedings of the 34th International Conference on Machine Learning, 2017.

        Pereyra, G., Tucker, G., Chorowski, J., Kaiser, L. and Hinton, G.: “Regularizing neural networks by
        penalizing confident output distributions.” CoRR, 2017.

        Pedregosa, F., Varoquaux, G., Gramfort, A., Michel, V., Thirion, B., Grisel, O., Blondel, M., Prettenhofer, P.,
        Weiss, R., Dubourg, V., Vanderplas, J., Passos, A., Cournapeau, D., Brucher, M., Perrot, M. and Duchesnay, E.:
        "Scikit-learn: Machine Learning in Python." In Journal of Machine Learning Research, volume 12 pp 2825-2830,
        2011.

        Platt, John: "Probabilistic outputs for support vector machines and comparisons to regularized likelihood
        methods." Advances in large margin classifiers, 10(3): 61–74, 1999.

        Neumann, Lukas, Andrew Zisserman, and Andrea Vedaldi: "Relaxed Softmax: Efficient Confidence Auto-Calibration
        for Safe Pedestrian Detection." Conference on Neural Information Processing Systems (NIPS) Workshop MLITS, 2018.

        Nilotpal Chakravarti, Isotonic Median Regression: A Linear Programming Approach, Mathematics of Operations
        Research Vol. 14, No. 2 (May, 1989), pp. 303-308.
    """

    from sklearn.linear_model import LogisticRegression
    from sklearn.isotonic import IsotonicRegression
    from netcal.scaling import TemperatureScaling, BetaCalibration
    from netcal.binning import HistogramBinning, BBQ, ENIR

    if prob_test is None:
        probs = prob_train[:, np.newaxis]
    else:
        probs = prob_test[:, np.newaxis]

    if Xtest is None:
        Xtest = Xtrain
    else:
        Xtest = Xtest

    if method == 'platt':
        model = LogisticRegression()
        model.fit(prob_train[:, np.newaxis],
                  Ytrain)  # LR needs X to be 2-dimensional
        p_calibrated = model.predict_proba(probs)[:, 1]

    elif method == 'isotonic':
        model = IsotonicRegression(out_of_bounds='clip')
        model.fit(prob_train, Ytrain)  # LR needs X to be 2-dimensional
        p_calibrated = model.transform(probs.flatten())

    elif method == 'temperature_scaling':
        model = TemperatureScaling()
        model.fit(prob_train, Ytrain)
        p_calibrated = model.transform(probs)

    elif method == 'beta':
        model = BetaCalibration()
        model.fit(prob_train, Ytrain)
        p_calibrated = model.transform(probs)

    elif method == 'HB':
        model = HistogramBinning()
        model.fit(prob_train, Ytrain)
        p_calibrated = model.transform(probs)

    elif method == 'BBQ':
        model = BBQ()
        model.fit(prob_train, Ytrain)
        p_calibrated = model.transform(probs)

    elif method == 'ENIR':
        model = ENIR()
        model.fit(prob_train, Ytrain)
        p_calibrated = model.transform(probs)

    elif method == 'KRR':
        model = KRR_calibration()
        model.fit(Xtrain, prob_train, Ytrain, **kwargs)
        p_calibrated = model.predict(Xtest, probs, mode='prob')

    elif method == 'EWF':
        model = EWF_calibration()
        model.fit(Xtrain, prob_train, Ytrain, **kwargs)
        p_calibrated = model.predict(Xtest, probs, mode='prob')

    else:
        raise ValueError("Method %s is not defined." % method)

    p_calibrated[np.isnan(p_calibrated)] = 0

    # normalize the large numbers and small numbers to one and zero
    p_calibrated[p_calibrated > 1.0] = 1.0
    p_calibrated[p_calibrated < 0.0] = 0.0

    return p_calibrated
def calibration(samples: dict,
                bins: Union[tuple, list],
                save_models: bool = False,
                random_state: int = None) -> dict:
    """
    Do example calibration on the artificial dataset created by the module "CreateDataset".

    Parameters
    ----------
    samples : dict
        Dictionary with samples generated by module "CreateDataset".
    bins : iterable
        Number of bins in each direction.
    save_models : bool, optional, default: False
        If True, save all models on disk.
    random_state : int, optional, default: None
        Random state as seed for train/test split

    Returns
    -------
    dict
        Dictionary with splitted test data and calibrated confidence estimates.
    """

    # extract values from dict
    matched = samples['matched']
    confidences = samples['confidences']
    cx = samples['cx']
    cy = samples['cy']

    # check if save directory exists if models should be stored on disk
    if save_models:
        if not os.path.isdir("models"):
            os.makedirs("models", exist_ok=True)

    # stratified random split
    conf_train, conf_test, cx_train, cx_test, cy_train, cy_test, matched_train, matched_test = \
        train_test_split(confidences, cx, cy, matched,
                         train_size=0.7,
                         shuffle=True,
                         stratify=matched,
                         random_state=random_state)

    # calibration results are stored in this dict
    results = {
        'confidence': conf_test,
        'matched': matched_test,
        'cx': cx_test,
        'cy': cy_test
    }

    # -----------------------------------------
    # 0D methods with confidence only
    hist = HistogramBinning(bins=bins[0], detection=True)
    betacal = BetaCalibration(detection=True)
    lr_calibration = LogisticCalibration(temperature_only=False,
                                         detection=True)

    methods0d = [("hist", hist), ("betacal", betacal),
                 ("lr_calibration", lr_calibration)]

    # iterate over 0D models, build calibration mapping and perform calibration
    for name, method in methods0d:

        method.fit(conf_train, matched_train)
        results[name] = method.transform(conf_test)

        if save_models:
            method.save_model("models/%s.pkl" % name)

    # -----------------------------------------
    # 2D methods with confidence and x/y position

    hist2d = HistogramBinning(bins=bins, detection=True)
    betacal2d = BetaCalibration(detection=True)
    betacal_dependent2d = BetaCalibrationDependent(momentum=True,
                                                   detection=True)
    lr_calibration2d = LogisticCalibration(temperature_only=False,
                                           detection=True)
    lr_calibration_dependent2d = LogisticCalibrationDependent(detection=True)

    methods2d = [("hist2d", hist2d), ("betacal2d", betacal2d),
                 ("betacal_dependent2d", betacal_dependent2d),
                 ("lr_calibration2d", lr_calibration2d),
                 ("lr_calibration_dependent2d", lr_calibration_dependent2d)]

    # iterate over 2D models, build calibration mapping and perform calibration
    conf_train_2d = np.stack((conf_train, cx_train, cy_train), axis=1)
    conf_test_2d = np.stack((conf_test, cx_test, cy_test), axis=1)

    # iterate over 0D models, build calibration mapping and perform calibration
    for name, method in methods2d:

        method.fit(conf_train_2d, matched_train)
        results[name] = method.transform(conf_test_2d)

        if save_models:
            method.save_model("models/%s.pkl" % name)

    return results
Beispiel #6
0
def calibrate(frames: List[Dict], dataset: str, network: str,
              subset: List[str], ious: List[float], train_ids: List):
    """
    Perform calibration of the given frames (as list of dicts) for a dedicated dataset with dedicated train_ids.
    The trained models are stored at "calibration/<network>/models/".

    Parameters
    ----------
    frames : List[Dict]
        List of dictionaries holding the input data for each image frame.
    dataset : str
        String of the used dataset (see detectron2 registered datasets).
    network : str
        String describing the base neural network.
    subset : List[str]
        List with additional features used for calibration. Options are:
        - 'cx'
        - 'cy'
        - 'w'
        - 'h'
    ious : List[float]
        List with IoU scores used for evaluation.
    train_ids : List
        List of data frame ids used for calibration training.
    """

    meta = MetadataCatalog.get(dataset)
    model_dir = os.path.join("calibration", network, "models")
    os.makedirs(model_dir, exist_ok=True)

    # reverse mapping of category ids to network class ids (e.g. for COCO dataset)
    if hasattr(meta, "thing_dataset_id_to_contiguous_id"):
        reverse_dictionary = {
            v: k
            for k, v in meta.thing_dataset_id_to_contiguous_id.items()
        }
    else:
        reverse_dictionary = None

    # iterate over classes and perform class-wise calibration
    for i, classname in enumerate(meta.thing_classes):
        category_id = reverse_dictionary[
            i] if reverse_dictionary is not None else i
        features, matched, _ = get_features(frames, category_id, subset, ious,
                                            train_ids)

        if features.size == 0:
            print("No samples for category %s found" % classname)
            continue

        # different binning schemes for different feature dimensions
        if features.shape[1] == 1:
            bins = 15
        elif features.shape[1] == 3:
            bins = 5
        elif features.shape[1] == 5:
            bins = 3
        else:
            raise ValueError("Unknown dimension: %d" % features.shape[1])

        # iterate over IoUs and perform class-wise calibration for each IoU separately
        print("Training: category %d: %d samples" %
              (category_id, features.shape[0]))
        for iou, m in zip(ious, matched):

            # initialize calibration methods
            histogram = HistogramBinning(bins=bins, detection=True)
            lr = LogisticCalibration(detection=True)
            lr_dependent = LogisticCalibrationDependent()
            betacal = BetaCalibration(detection=True)
            betacal_dependent = BetaCalibrationDependent(momentum_epochs=500)

            # if only negative (or positive) examples are given, calibration is not applicable
            unique = np.unique(m)
            print("Different labels:", unique)
            if len(unique) != 2:
                print(
                    "Calibration failed for cls %d as there are only negative samples"
                    % i)
                continue

            # fit and save calibration models
            print("Fit and save histogram binning")
            histogram.fit(features, m)
            histogram.save_model(
                os.path.join(
                    model_dir, "histogram_%s_iou%.2f_cls-%02d.pkl" %
                    (''.join(subset), iou, i)))

            print("Fit independent logistic calibration")
            lr.fit(features, m)
            lr.save_model(
                os.path.join(
                    model_dir,
                    "lr_%s_iou%.2f_cls-%02d.pkl" % (''.join(subset), iou, i)))

            print("Fit dependent logistic calibration")
            lr_dependent.fit(features, m)
            lr_dependent.save_model(
                os.path.join(
                    model_dir, "lr_dependent_%s_iou%.2f_cls-%02d.pkl" %
                    (''.join(subset), iou, i)))

            print("Fit independent beta calibration")
            betacal.fit(features, m)
            betacal.save_model(
                os.path.join(
                    model_dir, "betacal_%s_iou%.2f_cls-%02d.pkl" %
                    (''.join(subset), iou, i)))

            print("Fit dependent beta calibration")
            betacal_dependent.fit(features, m)
            betacal_dependent.save_model(
                os.path.join(
                    model_dir, "betacal_dependent_%s_iou%.2f_cls-%02d.pkl" %
                    (''.join(subset), iou, i)))
def eval_method(iteration: int, method_short: str, num_combinations: int,
                train_combinations: list, train_matched: np.ndarray,
                test_combinations: list, bins_combination: list) -> dict:
    """
    Eval one single method. For multiprocessing it is necessary to create the instance for the calibration
    method within the new process.

    Parameters
    ----------
    iteration : int
        Number of current iteration used for writing the model files
    method_short : str
        Short description string to create the right calibration method within the new process on multiprocessing.
    num_combinations : int
        Total number of different calibration combinations.
    train_combinations : list
        List with all prebuild combinations used for calibration.
    train_matched : list
        List with all prebuild ground truth annotations for each calibration combination.
    test_combinations : list
        List with all prebuild combinations used for testing.
    bins_combination : list
        List with binning schemes for all combinations.

    Returns
    -------
    dict
        Calibration data for each combination on the current method.
    """

    print("Method %s" % method_short)

    # initialize method based on the identifier
    if method_short == "betacal":
        method = BetaCalibration(detection=True)
    elif method_short == "hist":
        method = HistogramBinning(detection=True)
    elif method_short == "lr":
        method = LogisticCalibration(detection=True)
    elif method_short == "lr_dependent":
        method = LogisticCalibrationDependent(detection=True)
    elif method_short == "betacal_dependent":
        method = BetaCalibrationDependent(momentum=True, detection=True)
    else:
        raise AttributeError("Unknown short description")

    # collect calibrated data of each combination
    calibrated_data = {}
    for j in range(num_combinations):
        print("Combination %d method %s" % (j, method_short))
        train_combination, val_combination = train_combinations[
            j], test_combinations[j]

        # set Histogram binning w.r.t. current combination
        if isinstance(method, HistogramBinning):
            method = HistogramBinning(bins=bins_combination[j], detection=True)

        # fit and save model
        method.fit(train_combination, train_matched)
        method.save_model("models/%s_%s_%d_%02d.pkl" %
                          (network, method_short, j, iteration))

        # perform calibration and save into dict
        calibrated = method.transform(val_combination)
        calibrated_data["%s_c%d" % (method_short, j)] = calibrated
        print("Finished combination %d method %s" % (j, method_short))

    return calibrated_data
def cross_validation(datafile: str,
                     use_cuda: Union[bool, str] = False,
                     domain: str = ".") -> int:
    """
    5x2 cross validation of several calibration methods.

    Parameters
    ----------
    datafile : str
        Path to datafile which contains two NumPy arrays with keys 'ground_truth' and 'predictions'.

    Returns
    -------
    int
        0 at success, -1 otherwise.
    """

    # kwargs for uncertainty mode. Those can also be safely set on MLE
    uncertainty_kwargs = {
        'mcmc_chains': 1,
        'mcmc_samples': 300,
        'mcmc_warmup_steps': 50,
        'vi_samples': 300,
        'vi_epochs': 3000
    }

    hist_bins = 20
    bins = 15

    if domain == 'examination-mcmc':
        method = 'mcmc'
    elif domain == 'examination-variational':
        method = 'variational'
    else:
        method = 'mle'

    # if True, a Pickle-Object will be written out for each calibration model built
    save_models = True

    histogram = HistogramBinning(hist_bins)
    iso = IsotonicRegression()
    bbq = BBQ()
    enir = ENIR()
    lr_calibration = LogisticCalibration(detection=False,
                                         method=method,
                                         use_cuda=use_cuda,
                                         **uncertainty_kwargs)
    temperature = TemperatureScaling(detection=False,
                                     method=method,
                                     use_cuda=use_cuda,
                                     **uncertainty_kwargs)
    betacal = BetaCalibration(detection=False,
                              method=method,
                              use_cuda=use_cuda,
                              **uncertainty_kwargs)

    models = [("hist", histogram), ("iso", iso), ("bbq", bbq), ("enir", enir),
              ("lr", lr_calibration), ("temperature", temperature),
              ("beta", betacal)]

    # invoke cross validation function from ../utils.py
    # see ../utils.py for calibration and its measurement
    success = cross_validation_5_2(models=models,
                                   datafile=datafile,
                                   bins=bins,
                                   save_models=save_models,
                                   domain=domain)

    return success