Esempio n. 1
0
def plotIncrementalProfit(Ytest, Yprobs, ax=None, xlabel=True, ylabel=True):
    """Plots incremental profit obtained by the model as a function of probability threshold"""

    if ax is None:
        ax = plt.gca()

    fps, tps, thresh = _binary_clf_curve(Ytest, Yprobs)
    thresh = thresh
    profit = (9.85 * tps - 0.15 * fps) / (sum(Ytest) * 9.85)
    plt.style.use('seaborn-whitegrid')
    step_kwargs = ({'step': 'post'})
    ax.fill_between(thresh, profit, alpha=0.4, color='skyblue', **step_kwargs)
    ix = np.nanargmax(profit)
    bestThr = thresh[ix]
    ax.scatter(thresh[ix], profit[ix], marker='o', color='red', label='Best')
    ax.set_xlabel('Threshold', fontsize=18)
    ax.set_ylabel('Incremental profit (percentage of max)', fontsize=18)
    ax.set_ylim([0, 1])
    ax.set_xlim([0, 1])
    ax.xaxis.set_tick_params(labelsize=16)
    ax.yaxis.set_tick_params(labelsize=16)
    ax.legend(fontsize=14)
    if not ylabel:
        ax.set_ylabel('')
    if not xlabel:
        ax.set_xlabel('')
    ax.set_aspect('equal')
    profit = pd.DataFrame({
        'profit': profit[:-1],
        'threshold': thresh[:-1]
    }).transpose()

    return profit, bestThr
Esempio n. 2
0
def prec_rec(y_true, y_pred, method, alpha=100, plot=False):
    """
    Calculates the weighted precision metric at recall levels 0.1, 0.5 and 0.9 as proposed in:
    The Deepfake Detection Challenge (DFDC) Preview Dataset (https://arxiv.org/abs/1910.08854)

    Parts from sklearn.metrics precision_recall_curve adapted by: Christopher Otto

    alpha = 100 as suggested in the paper.
    """
    fps, tps, thresholds = _ranking._binary_clf_curve(y_true,
                                                      y_pred,
                                                      pos_label=None,
                                                      sample_weight=None)

    weighted_precision = tps / (tps + alpha * fps)
    weighted_precision[np.isnan(weighted_precision)] = 0
    # take log of weighted precision similar to The Deepfake Detection Challenge (DFDC) Preview Dataset (https://arxiv.org/abs/1910.08854)
    weighted_precision = [
        math.log(entry) if entry > 0 else 0 for entry in weighted_precision
    ]
    recall = tps / tps[-1]

    # stop when full recall attained
    # and reverse the outputs so recall is decreasing
    last_ind = tps.searchsorted(tps[-1])
    sl = slice(last_ind, None, -1)
    prec, rec, thresh = np.r_[weighted_precision[sl],
                              1], np.r_[recall[sl], 0], thresholds[sl]

    # first precision entry for recall level at 0.9
    threshold_index_point_nine = len([entry
                                      for entry in rec if entry >= 0.9]) - 1
    weighted_precision_at_point_nine_rec = prec[threshold_index_point_nine]
    # first precision entry for recall level at 0.5
    threshold_index_point_five = len([entry
                                      for entry in rec if entry >= 0.5]) - 1
    weighted_precision_at_point_five_rec = prec[threshold_index_point_five]
    # first precision entry for recall level at 0.1
    threshold_index_point_one = len([entry
                                     for entry in rec if entry >= 0.1]) - 1
    weigthed_precision_at_point_one_rec = prec[threshold_index_point_one]

    if plot:
        average_precision = average_precision_score(y_true, y_pred)
        viz = precision_recall_curve.PrecisionRecallDisplay(
            precision=prec,
            recall=rec,
            average_precision=average_precision,
            estimator_name=f"{method}")
        disp = viz.plot(ax=None, name=f"Method: {method}")
        disp.ax_.set_title('Weighted Precision-Recall curve')
        plt.xlabel('Weighted Precision (Cost)')
        plt.ylabel('Recall')
        plt.savefig('w_prec_recall_curve.png')
        plt.show()

    return weigthed_precision_at_point_one_rec, weighted_precision_at_point_five_rec, weighted_precision_at_point_nine_rec
Esempio n. 3
0
def specificity_recall_calculator(y_true, probas_pred, pos_label=None,
                                  sample_weight=None):
    fps, tps, thresholds = _binary_clf_curve(y_true, probas_pred,
                                             pos_label=pos_label,
                                             sample_weight=sample_weight)

    specificity = (fps[-1] - fps) / fps[-1]
    specificity[np.isnan(specificity)] = 0
    recall = tps / tps[-1]

    # stop when full recall attained
    # and reverse the outputs so recall is decreasing
    last_ind = tps.searchsorted(tps[-1])
    sl = slice(last_ind, None, -1)
    return np.r_[specificity[sl], 1], np.r_[recall[sl], 0], thresholds[sl]
Esempio n. 4
0
    def find(self, clf, x_train, y_train):
        proba = clf.predict_proba(x_train)

        assert isinstance(proba, np.ndarray), \
            "classifier should return numpy array"
        assert proba.shape == (x_train.shape[0], 2), \
            "classifier should return (%d,%d)-shaped array, not %s" % (
                x_train.shape[0], 2, str(proba.shape))

        fps, tps, thresholds = _binary_clf_curve(y_train, proba[:, 1])
        precision = tps / (tps + fps)

        for k in reversed(range(len(precision))):
            if precision[k] >= self.min_precision:
                return thresholds[k]
        return 2.0
def precision_curves(y_true,
                     probas_pred,
                     *,
                     pos_label=None,
                     sample_weight=None):
    """
    Minor adaption of corresponding scikit-learn function
    """
    fps, tps, thresholds = _binary_clf_curve(y_true,
                                             probas_pred,
                                             pos_label=pos_label,
                                             sample_weight=sample_weight)

    precision = tps / (tps + fps)
    precision[np.isnan(precision)] = 0
    recall = tps / tps[-1]
    specificity = 1 - fps / fps[-1]

    last_ind = tps.searchsorted(tps[-1])
    sl = slice(last_ind, None, -1)
    return np.r_[precision[sl], 1], np.r_[recall[sl],
                                          0], np.r_[specificity[sl], 1]
Esempio n. 6
0
def _truncated_roc(y_df, bg_idx=-1, fp_cutoff=None):
    """
    Computes truncated ROC info
    """
    import sklearn
    try:
        from sklearn.metrics._ranking import _binary_clf_curve
    except ImportError:
        from sklearn.metrics.ranking import _binary_clf_curve
    y_true = (y_df['true'] == y_df['pred'])
    y_score = y_df['score']
    sample_weight = y_df['weight']

    # y_true[y_true == -1] = 0

    # < TRUCNATED PART >
    # GET ROC CURVES AT A PARTICULAR FALSE POSITIVE COUNT CUTOFF
    # This will let different runs be more comparable
    realpos_total = sample_weight[(y_df['txs'] >= 0)].sum()

    fp_count, tp_count, count_thresholds = _binary_clf_curve(
        y_true, y_score, pos_label=1, sample_weight=sample_weight)

    if len(count_thresholds) > 0 and count_thresholds[-1] == 0:
        # Chop off the last entry where it will jump
        count_thresholds = count_thresholds[:-1]
        tp_count = tp_count[:-1]
        fp_count = fp_count[:-1]

    # Cutoff the curves at a comparable point
    if fp_cutoff is None:
        fp_cutoff = np.inf
    idxs = np.where(fp_count > fp_cutoff)[0]
    if len(idxs) == 0:
        idx = len(fp_count)
    else:
        idx = idxs[0]
    trunc_fp_count = fp_count[:idx]
    trunc_tp_count = tp_count[:idx]
    trunc_thresholds = count_thresholds[:idx]

    # if the cuttoff was not reached, horizontally extend the curve
    # This will hurt the scores (aka we may be bias against small
    # scenes), but this will ensure that big scenes are comparable
    if len(fp_count) == 0:
        trunc_fp_count = np.array([fp_cutoff])
        trunc_tp_count = np.array([0])
        trunc_thresholds = np.array([0])
        # THIS WILL CAUSE AUC TO RAISE AN ERROR IF IT GETS HIT
    elif fp_count[-1] < fp_cutoff and np.isfinite(fp_cutoff):
        trunc_fp_count = np.hstack([trunc_fp_count, [fp_cutoff]])
        trunc_tp_count = np.hstack([trunc_tp_count, [trunc_tp_count[-1]]])
        trunc_thresholds = np.hstack([trunc_thresholds, [0]])

    falsepos_total = trunc_fp_count[-1]  # is this right?

    trunc_tpr = trunc_tp_count / realpos_total
    trunc_fpr = trunc_fp_count / falsepos_total
    trunc_auc = sklearn.metrics.auc(trunc_fpr, trunc_tpr)
    # < /TRUCNATED PART >
    roc_info = {
        'fp_cutoff': fp_cutoff,
        'realpos_total': realpos_total,
        'tpr': trunc_tpr,
        'fpr': trunc_fpr,
        'fp_count': trunc_fp_count,
        'tp_count': trunc_tp_count,
        'thresholds': trunc_thresholds,
        'auc': trunc_auc,
    }
    return roc_info
Esempio n. 7
0
    def roc(self, fp_cutoff=None, stabalize_thresh=7, stabalize_pad=7):
        """
        Example:
            >>> self = BinaryConfusionVectors.demo(n=0)
            >>> print('roc = {}'.format(ub.repr2(self.roc())))
            >>> self = BinaryConfusionVectors.demo(n=1, p_true=0.5, p_error=0.5)
            >>> print('roc = {}'.format(ub.repr2(self.roc())))
            >>> self = BinaryConfusionVectors.demo(n=3, p_true=0.5, p_error=0.5)
            >>> print('roc = {}'.format(ub.repr2(self.roc())))
        """
        import sklearn
        import sklearn.metrics  # NOQA
        try:
            from sklearn.metrics._ranking import _binary_clf_curve
        except ImportError:
            from sklearn.metrics.ranking import _binary_clf_curve

        data = self.data
        y_true = data['is_true'].astype(np.uint8)
        y_score = data['pred_score']
        sample_weight = data._data.get('weight', None)

        npad = 0
        if len(self) > 0:
            if len(self) <= stabalize_thresh:
                # add dummy data to stabalize the computation
                if sample_weight is None:
                    sample_weight = np.ones(len(self))
                npad = stabalize_pad
                y_true, y_score, sample_weight = _stabalilze_data(
                    y_true, y_score, sample_weight, npad=npad)

        if sample_weight is None:
            weight = 1
            nsupport = len(y_true) - bool(npad)
        else:
            weight = sample_weight
            nsupport = sample_weight.sum() - bool(npad)

        # y_true[y_true == -1] = 0

        # < TRUCNATED PART >
        # GET ROC CURVES AT A PARTICULAR FALSE POSITIVE COUNT CUTOFF
        # This will let different runs be more comparable

        # Get the total weight (typically number of) positive and negative
        # examples of this class
        realpos_total = (y_true * weight).sum()
        realneg_total = ((1 - y_true) * weight).sum()

        if len(self) == 0:
            fp_count = np.array([np.nan])
            tp_count = np.array([np.nan])
            count_thresholds = np.array([np.nan])
        else:
            fp_count, tp_count, count_thresholds = _binary_clf_curve(
                y_true, y_score, pos_label=1, sample_weight=sample_weight)

        if len(count_thresholds) > 0 and count_thresholds[-1] == 0:
            # Chop off the last entry where it will jump
            count_thresholds = count_thresholds[:-1]
            tp_count = tp_count[:-1]
            fp_count = fp_count[:-1]

        # Cutoff the curves at a comparable point
        if fp_cutoff is None:
            fp_cutoff = np.inf
        elif isinstance(fp_cutoff, str):
            if fp_cutoff == 'num_true':
                fp_cutoff = int(np.ceil(realpos_total))
            else:
                raise KeyError(fp_cutoff)

        idxs = np.where(fp_count > fp_cutoff)[0]
        if len(idxs) == 0:
            idx = len(fp_count)
        else:
            idx = idxs[0]
        trunc_fp_count = fp_count[:idx]
        trunc_tp_count = tp_count[:idx]
        trunc_thresholds = count_thresholds[:idx]

        # if the cuttoff was not reached, horizontally extend the curve
        # This will hurt the scores (aka we may be bias against small
        # scenes), but this will ensure that big scenes are comparable
        if len(fp_count) == 0:
            trunc_fp_count = np.array([fp_cutoff])
            trunc_tp_count = np.array([0])
            trunc_thresholds = np.array([0])
            # THIS WILL CAUSE AUC TO RAISE AN ERROR IF IT GETS HIT
        elif fp_count[-1] < fp_cutoff and np.isfinite(fp_cutoff):
            trunc_fp_count = np.hstack([trunc_fp_count, [fp_cutoff]])
            trunc_tp_count = np.hstack([trunc_tp_count, [trunc_tp_count[-1]]])
            trunc_thresholds = np.hstack([trunc_thresholds, [0]])

        falsepos_total = trunc_fp_count[-1]

        with warnings.catch_warnings():
            warnings.filterwarnings('ignore', message='invalid .* true_divide')
            trunc_tpr = trunc_tp_count / realpos_total
            trunc_fpr = trunc_fp_count / falsepos_total
            try:
                trunc_auc = sklearn.metrics.auc(trunc_fpr, trunc_tpr)
            except ValueError:
                # At least 2 points are needed to compute area under curve, but x.shape = 1
                trunc_auc = np.nan
        # < /TRUCNATED PART >
        roc_info = {
            'fp_cutoff': fp_cutoff,
            'realpos_total': realpos_total,
            'realneg_total': realneg_total,
            'nsupport': nsupport,
            'tpr': trunc_tpr,
            'fpr': trunc_fpr,
            'fp_count': trunc_fp_count,
            'tp_count': trunc_tp_count,
            'thresholds': trunc_thresholds,
            'auc': trunc_auc,
        }
        if self.cx is not None:
            roc_info.update({
                'cx': self.cx,
                'node': self.classes[self.cx],
            })
        return ROC_Result(roc_info)
Esempio n. 8
0
    def precision_recall(self,
                         stabalize_thresh=7,
                         stabalize_pad=7,
                         method='sklearn'):
        """
        Example:
            >>> self = BinaryConfusionVectors.demo(n=11)
            >>> print('precision_recall = {}'.format(ub.repr2(self.precision_recall())))
            >>> self = BinaryConfusionVectors.demo(n=7)
            >>> print('precision_recall = {}'.format(ub.repr2(self.precision_recall())))
            >>> self = BinaryConfusionVectors.demo(n=5)
            >>> print('precision_recall = {}'.format(ub.repr2(self.precision_recall())))
            >>> self = BinaryConfusionVectors.demo(n=3)
            >>> print('precision_recall = {}'.format(ub.repr2(self.precision_recall())))
            >>> self = BinaryConfusionVectors.demo(n=2)
            >>> print('precision_recall = {}'.format(ub.repr2(self.precision_recall())))
            >>> self = BinaryConfusionVectors.demo(n=1)
            >>> print('precision_recall = {}'.format(ub.repr2(self.precision_recall())))

            >>> self = BinaryConfusionVectors.demo(n=0)
            >>> print('precision_recall = {}'.format(ub.repr2(self.precision_recall())))

            >>> self = BinaryConfusionVectors.demo(n=1, p_true=0.5, p_error=0.5)
            >>> print('precision_recall = {}'.format(ub.repr2(self.precision_recall())))


            >>> self = BinaryConfusionVectors.demo(n=3, p_true=0.5, p_error=0.5)
            >>> print('precision_recall = {}'.format(ub.repr2(self.precision_recall())))

        """
        import sklearn
        import sklearn.metrics  # NOQA
        try:
            from sklearn.metrics._ranking import _binary_clf_curve
        except ImportError:
            from sklearn.metrics.ranking import _binary_clf_curve

        data = self.data
        y_true = data['is_true'].astype(np.uint8)
        y_score = data['pred_score']
        sample_weight = data._data.get('weight', None)

        npad = 0
        if len(self) == 0:
            ap = np.nan
            prec = [np.nan]
            rec = [np.nan]
            fps = [np.nan]
            fns = [np.nan]
            tps = [np.nan]
            thresholds = [np.nan]

            realpos_total = 0
            realneg_total = 0
            nsupport = 0
        else:
            if len(self) <= stabalize_thresh:
                # add dummy data to stabalize the computation
                if sample_weight is None:
                    sample_weight = np.ones(len(self))
                npad = stabalize_pad
                y_true, y_score, sample_weight = _stabalilze_data(
                    y_true, y_score, sample_weight, npad=npad)

            # Get the total weight (typically number of) positive and negative
            # examples of this class
            if sample_weight is None:
                weight = 1
                nsupport = len(y_true) - bool(npad)
            else:
                weight = sample_weight
                nsupport = sample_weight.sum() - bool(npad)

            realpos_total = (y_true * weight).sum()
            realneg_total = ((1 - y_true) * weight).sum()
            """
            Notes:
                Apparently, consistent scoring is really hard to get right.

                For detection problems scoring via
                confusion_vectors+sklearn produces noticably different
                results than the VOC method. There are a few reasons for
                this.  The VOC method stops counting true positives after
                all assigned predicted boxes have been counted. It simply
                remembers the amount of original true positives to
                normalize the true positive reate. On the other hand,
                confusion vectors maintains a list of these unassigned true
                boxes and gives them a predicted index of -1 and a score of
                zero. This means that this function sees them as having a
                y_true of 1 and a y_score of 0, which allows the
                scikit-learn fps and tps counts to effectively get up to
                100% recall when the threshold is zero. The VOC method
                simply ignores these and handles them implicitly. The
                problem is that if you remove these from the scikit-learn
                inputs, it wont see the correct number of positives and it
                will incorrectly normalize the recall.  In summary:

                    VOC:
                        * remembers realpos_total
                        * doesn't count unassigned truths as TP when the
                        threshold is zero.

                    CV+SKL:
                        * counts unassigned truths as TP with score=0.
                        * Always ensure tpr=1, ppv=0 and ppv=1, tpr=0 cases
                        exist.
            """
            with warnings.catch_warnings():
                warnings.filterwarnings('ignore',
                                        message='invalid .* true_divide')

                if method.startswith('voc'):
                    y_score_ = y_score[y_score > 0]
                    y_true_ = y_true[y_score > 0]
                    fps, tps, _thresholds = _binary_clf_curve(
                        y_true_,
                        y_score_,
                        pos_label=1.0,
                        sample_weight=sample_weight)
                elif method == 'sklearn':
                    fps, tps, _thresholds = _binary_clf_curve(
                        y_true,
                        y_score,
                        pos_label=1.0,
                        sample_weight=sample_weight)
                else:
                    raise KeyError(method)

                # Slight tweak to sklearn.metrics.precision_recall_curve
                fns = realpos_total - tps

                precision = tps / (tps + fps)
                precision[np.isnan(precision)] = 0
                recall = tps / realpos_total

                # stop when full recall attained
                # and reverse the outputs so recall is decreasing
                last_ind = tps.searchsorted(tps[-1])
                sl = slice(last_ind, None, -1)
                prec, rec, thresholds = (np.r_[precision[sl],
                                               1], np.r_[recall[sl],
                                                         0], _thresholds[sl])

                if method.startswith('voc'):
                    from netharn.metrics.voc_metrics import _voc_ave_precision
                    ap = _voc_ave_precision(rec[::-1],
                                            prec[::-1],
                                            method=method)
                elif method == 'sklearn':
                    ap = sklearn.metrics.average_precision_score(
                        y_score=y_score,
                        y_true=y_true,
                        sample_weight=sample_weight)

        prs_info = {
            'ap': ap,
            'ppv': prec,  # (positive predictive value) == (precision)
            'tpr': rec,  # (true positive rate) == (recall)
            'fp_count': fps,
            'tp_count': tps,
            'fn_count': fns,
            'thresholds': thresholds,
            'nsupport': nsupport,
            'realpos_total': realpos_total,
            'realneg_total': realneg_total,
        }
        if self.cx is not None:
            prs_info.update({
                'cx': self.cx,
                'node': self.classes[self.cx],
            })
        return PR_Result(prs_info)
Esempio n. 9
0
    def _binary_clf_curves(self, stabalize_thresh=7, stabalize_pad=7):
        """
        Code common to ROC, PR, and threshold measures

        TODO: refactor ROC and PR curves to use this code, perhaps even
        memoizing it.
        """
        try:
            from sklearn.metrics._ranking import _binary_clf_curve
        except ImportError:
            from sklearn.metrics.ranking import _binary_clf_curve
        data = self.data
        y_true = data['is_true'].astype(np.uint8)
        y_score = data['pred_score']
        sample_weight = data._data.get('weight', None)

        npad = 0
        if len(self) == 0:
            fps = [np.nan]
            fns = [np.nan]
            tps = [np.nan]
            thresholds = [np.nan]

            realpos_total = 0
            realneg_total = 0
            nsupport = 0
        else:
            if len(self) <= stabalize_thresh:
                # add dummy data to stabalize the computation
                if sample_weight is None:
                    sample_weight = np.ones(len(self))
                npad = stabalize_pad
                y_true, y_score, sample_weight = _stabalilze_data(
                    y_true, y_score, sample_weight, npad=npad)

            # Get the total weight (typically number of) positive and negative
            # examples of this class
            if sample_weight is None:
                weight = 1
                nsupport = len(y_true) - bool(npad)
            else:
                weight = sample_weight
                nsupport = sample_weight.sum() - bool(npad)

            realpos_total = (y_true * weight).sum()
            realneg_total = ((1 - y_true) * weight).sum()

            fps, tps, thresholds = _binary_clf_curve(
                y_true, y_score, pos_label=1.0, sample_weight=sample_weight)

            # Adjust weighted totals to be robust to floating point errors
            if np.isclose(realneg_total, fps[-1]):
                realneg_total = max(realneg_total, fps[-1])
            if np.isclose(realpos_total, tps[-1]):
                realpos_total = max(realpos_total, tps[-1])

        tns = realneg_total - fps
        fns = realpos_total - tps

        info = {
            'fp_count': fps,
            'tp_count': tps,
            'tn_count': tns,
            'fn_count': fns,
            'thresholds': thresholds,
            'realpos_total': realpos_total,
            'realneg_total': realneg_total,
            'nsupport': nsupport,
        }
        if self.cx is not None:
            info.update({
                'cx': self.cx,
                'node': self.classes[self.cx],
            })
        return info
Esempio n. 10
0
def roc_curve(y_true,
              y_score,
              pos_label=None,
              sample_weight=None,
              drop_intermediate=True):
    """Compute Receiver operating characteristic (ROC)

    Note: this implementation is restricted to the binary classification task.

    Read more in the :ref:`User Guide <roc_metrics>`.

    Parameters
    ----------

    y_true : array, shape = [n_samples]
        True binary labels. If labels are not either {-1, 1} or {0, 1}, then
        pos_label should be explicitly given.

    y_score : array, shape = [n_samples]
        Target scores, can either be probability estimates of the positive
        class, confidence values, or non-thresholded measure of decisions
        (as returned by "decision_function" on some classifiers).

    pos_label : int or str, default=None
        The label of the positive class.
        When ``pos_label=None``, if y_true is in {-1, 1} or {0, 1},
        ``pos_label`` is set to 1, otherwise an error will be raised.

    sample_weight : array-like of shape (n_samples,), default=None
        Sample weights.

    drop_intermediate : boolean, optional (default=True)
        Whether to drop some suboptimal thresholds which would not appear
        on a plotted ROC curve. This is useful in order to create lighter
        ROC curves.

        .. versionadded:: 0.17
           parameter *drop_intermediate*.

    Returns
    -------
    fpr : array, shape = [>2]
        Increasing false positive rates such that element i is the false
        positive rate of predictions with score >= thresholds[i].

    tpr : array, shape = [>2]
        Increasing true positive rates such that element i is the true
        positive rate of predictions with score >= thresholds[i].

    thresholds : array, shape = [n_thresholds]
        Decreasing thresholds on the decision function used to compute
        fpr and tpr. `thresholds[0]` represents no instances being predicted
        and is arbitrarily set to `max(y_score) + 1`.

    See also
    --------
    roc_auc_score : Compute the area under the ROC curve

    Notes
    -----
    Since the thresholds are sorted from low to high values, they
    are reversed upon returning them to ensure they correspond to both ``fpr``
    and ``tpr``, which are sorted in reversed order during their calculation.

    References
    ----------
    .. [1] `Wikipedia entry for the Receiver operating characteristic
            <https://en.wikipedia.org/wiki/Receiver_operating_characteristic>`_

    .. [2] Fawcett T. An introduction to ROC analysis[J]. Pattern Recognition
           Letters, 2006, 27(8):861-874.

    Examples
    --------
    >>> import numpy as np
    >>> from sklearn import metrics
    >>> y = np.array([1, 1, 2, 2])
    >>> scores = np.array([0.1, 0.4, 0.35, 0.8])
    >>> fpr, tpr, thresholds = metrics.roc_curve(y, scores, pos_label=2)
    >>> fpr
    array([0. , 0. , 0.5, 0.5, 1. ])
    >>> tpr
    array([0. , 0.5, 0.5, 1. , 1. ])
    >>> thresholds
    array([1.8 , 0.8 , 0.4 , 0.35, 0.1 ])

    """
    fps, tps, thresholds = _binary_clf_curve(y_true,
                                             y_score,
                                             pos_label=pos_label,
                                             sample_weight=sample_weight)

    # Attempt to drop thresholds corresponding to points in between and
    # collinear with other points. These are always suboptimal and do not
    # appear on a plotted ROC curve (and thus do not affect the AUC).
    # Here np.diff(_, 2) is used as a "second derivative" to tell if there
    # is a corner at the point. Both fps and tps must be tested to handle
    # thresholds with multiple data points (which are combined in
    # _binary_clf_curve). This keeps all cases where the point should be kept,
    # but does not drop more complicated cases like fps = [1, 3, 7],
    # tps = [1, 2, 4]; there is no harm in keeping too many thresholds.
    if drop_intermediate and len(fps) > 2:
        optimal_idxs = np.where(
            np.r_[True,
                  np.logical_or(np.diff(fps, 2), np.diff(tps, 2)), True])[0]
        fps = fps[optimal_idxs]
        tps = tps[optimal_idxs]
        thresholds = thresholds[optimal_idxs]

    # Add an extra threshold position
    # to make sure that the curve starts at (0, 0)
    tps = np.r_[0, tps]
    fps = np.r_[0, fps]
    thresholds = np.r_[thresholds[0] * 1.01, thresholds]

    if fps[-1] <= 0:
        warnings.warn(
            "No negative samples in y_true, "
            "false positive value should be meaningless",
            UndefinedMetricWarning)
        fpr = np.repeat(np.nan, fps.shape)
    else:
        fpr = fps / fps[-1]

    if tps[-1] <= 0:
        warnings.warn(
            "No positive samples in y_true, "
            "true positive value should be meaningless",
            UndefinedMetricWarning)
        tpr = np.repeat(np.nan, tps.shape)
    else:
        tpr = tps / tps[-1]

    return fpr, tpr, thresholds