Exemple #1
0
def histPlot(predictions, truth, classes = None, label = "Model", newFigure = None, splitPosNeg = False, kde = False):
    """
        Computes the histograms of a binary predictions
        
        Arguments:
            predictions {Dict / List} -- Label predictions
            truth {Dict / List} -- Ground truth
            classes {Dict "+":int, "-":int} -- Classes to consider to plot {Default None ie {+":1, "-":0}}
        
        Keyword Arguments:
            label {str} -- Legend to plot (default: {"Model"})
            newFigure {str} -- Display on a given figure (default: {None} - Create new figure)
            splitPosNeg {bool} -- Split between positive and negative (default: {False})
            kde {bool} -- Computes the kde of the histogram (default: {False})
    """
    predictions, truth = selection(predictions, truth, classes)
    predictions, truth = flatten(predictions, truth)
    bins = np.linspace(0, 1, 20)

    if newFigure is not None:
        plt.figure(newFigure)
    else:
        plt.xlabel('Predicted Probability')
        plt.ylabel('Frequency')
        plt.title('Histogram Probabilities')

    if splitPosNeg:
        sns.distplot(predictions[truth == 1], label=label + " Positive", kde = kde, bins = bins)
        sns.distplot(predictions[truth == 0], label=label + " Negative", kde = kde, bins = bins)
    else:
        sns.distplot(predictions, label=label, kde = kde, bins = bins)
Exemple #2
0
def computeEvolutionRoc(temporalListLabels, predictions, classes = None, percentage = 0.001):
    """
        Plots the evolution of the auc 
        
        Arguments:
            temporalListLabels {List of (time, labels)*} -- Ground truth labels
            predictions {Dict / List of labels} -- Predicitons (same format than labels in temporalListLabels)
            classes {Dict} -- Classes to consider to plot (key: Name to display, Value: label)
            percentage {float} -- Evaluate the TPR and TNR at this given value of FNR and FPR
    """
    aucs = {}
    for time, labels in temporalListLabels:
        pred_time, labels_time = selection(predictions, labels, classes)
        pred_time, labels_time = flatten(pred_time, labels_time)
        fpr, tpr, _ = roc_curve(labels_time, pred_time)
        fnr, tnr = (1 - tpr)[::-1], (1 - fpr)[::-1]
        auc_time = auc(fpr, tpr)
        wilson_tpr = 1.96 * np.sqrt(tpr * (1 - tpr)/len(predictions))
        wilson_tnr = 1.96 * np.sqrt(tnr * (1 - tnr)/len(predictions))

        aucs[time] = {
                        "auc": auc_time, 
                        "lower": auc(fpr, tpr - wilson_tpr), 
                        "upper": auc(fpr, tpr + wilson_tpr), 

                        "tpr": np.interp(percentage, fpr, tpr),
                        "tpr_wilson" : np.interp(percentage, fpr, wilson_tpr),

                        "tnr": np.interp(percentage, fnr, tnr),
                        "tnr_wilson" : np.interp(percentage, fnr, wilson_tnr),
                     }
                     
    return pd.DataFrame.from_dict(aucs, orient = "index")
Exemple #3
0
def rocPlot(predictions, truth, classes = None, label = "Model", newFigure = None, reverse = False, percentage = None):
    """
        Computes the roc with confidence bounds for the given model
        
        Arguments:
            predictions {Dict / List} -- Label predictions
            truth {Dict / List} -- Ground truth
            classes {Dict "+":int, "-":int} -- Classes to consider to plot {Default None ie {+":1, "-":0}}
        
        Keyword Arguments:
            label {str} -- Legend to plot (default: {"Model"})
            newFigure {str} -- Display on a given figure (default: {None} - Create new figure)
            reverse {bool} -- Plot the reverse ROC useful for analyzing TNR (default: {False})
    """
    predictions, truth = selection(predictions, truth, classes)
    predictions, truth = flatten(predictions, truth)
    global_fpr, global_tpr, _ = roc_curve(truth, predictions)
    if reverse:
        x, y = 1 - global_tpr, 1 - global_fpr # FNR, TNR
        x, y = x[::-1], y[::-1]
        minx = 1. / np.sum(truth == 1)
        if percentage is None:
            percentage = minx
        str_print = "TNR @{:.2f}% FNR : {:.2f}".format(percentage*100, np.interp(percentage, x, y))
    else:
        x, y = global_fpr, global_tpr
        minx = 1. / np.sum(truth == 0)
        if percentage is None:
            percentage = minx
        str_print = "TPR @{:.2f}% FPR : {:.2f}".format(percentage*100, np.interp(percentage, x, y))

            
    if newFigure is not None:
        plt.figure(newFigure)
    else:
        plt.plot(np.linspace(0, 1, 100), np.linspace(0, 1, 100), 'k--', label="Random")
        if reverse:
            plt.xlabel('False negative rate')
            plt.ylabel('True negative rate')
            plt.title('Reverse ROC curve')
        else:
            plt.xlabel('False positive rate')
            plt.ylabel('True positive rate')
            plt.title('ROC curve')
            
    newx = np.linspace(minx, 1, 1000)
    y = np.interp(newx, x, y)
    wilson = 1.96 * np.sqrt(y * (1 - y)/len(predictions))
    print(str_print + " +/- {:.2f}".format(np.interp(0.01, newx, wilson)))
    upper = np.minimum(y + wilson, 1)
    lower = np.maximum(y - wilson, 0)
    plRoc = plt.plot(newx, y, label=label + " ({:.2f} +/- {:.2f})".format(aucCompute(predictions, truth, classes), (auc(newx, upper) - auc(newx, lower))/2.), ls = '--' if "train" in label.lower() else '-')
    plt.fill_between(newx, lower, upper, color=plRoc[0].get_color(), alpha=.2)
Exemple #4
0
def calibrationPlot(predictions,
                    truth,
                    classes=None,
                    label="Model",
                    newFigure=None,
                    n_bins=5):
    """
        Computes the roc with confidence bounds for the given model
        
        Arguments:
            predictions {Dict / List} -- Label predictions
            truth {Dict / List} -- Ground truth
            classes {Dict "+":int, "-":int} -- Classes to consider to plot {Default None ie {+":1, "-":0}}
        
        Keyword Arguments:
            label {str} -- Legend to plot (default: {"Model"})
            newFigure {str} -- Display on a given figure (default: {None} - Create new figure)
            n_bins {int} -- Numbre of bins for the calibration (default: {5})
    """
    predictions, truth = selection(predictions, truth, classes)
    predictions, truth = flatten(predictions, truth)
    predictions = ((predictions - predictions.min()) /
                   (predictions.max() - predictions.min())).flatten()
    fraction_of_positives, mean_predicted_value = calibration_curve(
        truth, predictions, n_bins=n_bins)
    bins = np.linspace(0., 1. + 1e-8, n_bins + 1)
    binids = np.digitize(predictions, bins) - 1
    bin_sums = np.bincount(binids, minlength=len(bins))
    bin_sums = bin_sums[bin_sums != 0] * 500 / np.sum(bin_sums)

    if newFigure is not None:
        plt.figure(newFigure)
    else:
        plt.xlabel('Mean Predicted Value')
        plt.ylabel('Fraction Positive')
        plt.title('Calibration')

    p = plt.plot(mean_predicted_value,
                 fraction_of_positives,
                 alpha=0.5,
                 ls=':')
    plt.scatter(mean_predicted_value,
                fraction_of_positives,
                s=bin_sums,
                label=label +
                " ({:.2f})".format(brier_score_loss(truth, predictions)),
                color=p[0].get_color(),
                alpha=0.5)
Exemple #5
0
def averagePrecisionRecallCompute(predictions, truth, classes=None):
    """
        Computes AUC of the given predictions
        
        Arguments:
            predictions {Dict / List} -- Label predictions
            truth {Dict / List} -- Ground truth
        
        Keyword Arguments:
            classes {Dict "+":int, "-":int} -- Classes to consider to plot {Default None ie {+":1, "-":0}}
    
        Returns:
            float -- Estimation by pooling of auc
    """
    predictions, truth = selection(predictions, truth, classes)
    predictions, truth = flatten(predictions, truth)
    return average_precision_score(truth, predictions)
Exemple #6
0
def confusionPlot(predictions, truth, classes, percentage = True):
    """
        Computes the confusion matrix of the given model
        
        Arguments:
            predictions {Dict / List} -- Label predictions
            truth {Dict / List} -- Ground truth
            classes {Dict "+":int, "-":int} -- Classes to consider to plot
    """
    predictions, truth = selection(predictions, truth, classes)
    predictions, truth = flatten(predictions, truth)

    classes_list = np.array(list(classes.keys()))
    confusion = confusion_matrix(truth, predictions, labels=[classes[c] for c in classes_list])
    notNull = confusion.sum(axis = 0) != 0

    if percentage:
        confusion = confusion / confusion.sum(axis = 1, keepdims = True)

    sns.heatmap(confusion[:, notNull], xticklabels = classes_list[notNull], yticklabels = classes_list, annot = True, vmin = 0, vmax = 1 if percentage else None)
    plt.xlabel("Predicted")
    plt.ylabel("Ground truth")
Exemple #7
0
def precisionRecallPlot(predictions,
                        truth,
                        classes=None,
                        label="Model",
                        newFigure=None,
                        reverse=False,
                        percentage=None):
    """
        Computes the roc with confidence bounds for the given model
        
        Arguments:
            predictions {Dict / List} -- Label predictions
            truth {Dict / List} -- Ground truth
            classes {Dict "+":int, "-":int} -- Classes to consider to plot {Default None ie {+":1, "-":0}}
        
        Keyword Arguments:
            label {str} -- Legend to plot (default: {"Model"})
            newFigure {str} -- Display on a given figure (default: {None} - Create new figure)
            reverse {bool} -- Plot the reverse ROC useful for analyzing TNR (default: {False})
    """
    predictions, truth = selection(predictions, truth, classes)
    predictions, truth = flatten(predictions, truth)
    precision, recall, _ = precision_recall_curve(truth, predictions)

    if newFigure is not None:
        plt.figure(newFigure)
    else:
        plt.xlabel('Precision')
        plt.ylabel('Recall')
        plt.title('Precision Recall curve')

    plt.plot(precision,
             recall,
             label=label + " ({:.2f})".format(
                 averagePrecisionRecallCompute(predictions, truth, classes)),
             ls='--' if "train" in label.lower() else '-')
def cross_validation(model,
                     data,
                     labels,
                     folds,
                     classes=None,
                     transform=None,
                     proba=True,
                     nested_cv=False):
    """
        Computes the cross valdiation on the data
        Given the folds indicated in folds
        
        Arguments:
            model {model} -- Model to cross validate
            data {Dict} -- Data to split 
            labels {Dict} -- Labels of the data (keys have to match)
            folds {Dict: (key : fold, values: key of data and labels)} -- Folds in order to split the data
            transform {Transform Object} -- Compute the transformation on train and apply on train and test

        Returns:
            Predictions by the model on cross validated data (Dict with same keys than data)
    """
    predictions, labels_res = {}, labels.copy()
    for k in folds:
        data_fold, labels_fold = selection(
            {d: data[d].copy()
             for d in data if d not in folds[k]},
            {d: labels[d].copy()
             for d in data if d not in folds[k]}, classes)
        data_test = {d: data[d] for d in folds[k]}

        if transform is not None:
            data_fold = transform.fit_transform_dict(data_fold, labels_fold)
            data_test = transform.transform_dict(data_test)

            # Because Normalization can impact labeling
            data_fold = {
                d: data_fold[d]
                for d in data_fold if len(data_fold[d]) > 0
            }
            labels_fold = {
                d: labels[d][data_fold[d].index]
                for d in data_fold if len(data_fold[d]) > 0
            }

            data_test = {
                d: data_test[d]
                for d in data_test if len(data_test[d]) > 0
            }
            labels_res.update({
                d: labels[d][data_test[d].index]
                for d in folds[k] if len(data_test[d]) > 0
            })

        if nested_cv:
            groups = [i for i in data_fold for _ in range(len(data_fold[i]))]
            model.fit_dict(data_fold, labels_fold, groups=groups)
        else:
            model.fit_dict(data_fold, labels_fold)

        if proba:
            predictions.update(model.predict_proba_dict(data_test))
        else:
            predictions.update(model.predict_dict(data_test))

    return predictions, labels_res