Ejemplo n.º 1
0
def precision_recall_curve(y_true, y_pred_proba):
    """
    Given labels and binary classifier predicted probabilities, compute and return the data representing a precision-recall curve.

    Arguments:
        y_true (ww.DataColumn, pd.Series or np.ndarray): True binary labels.
        y_pred_proba (ww.DataColumn, pd.Series or np.ndarray): Predictions from a binary classifier, before thresholding has been applied. Note this should be the predicted probability for the "true" label.

    Returns:
        list: Dictionary containing metrics used to generate a precision-recall plot, with the following keys:

                  * `precision`: Precision values.
                  * `recall`: Recall values.
                  * `thresholds`: Threshold values used to produce the precision and recall.
                  * `auc_score`: The area under the ROC curve.
    """
    y_true = _convert_to_woodwork_structure(y_true)
    y_pred_proba = _convert_to_woodwork_structure(y_pred_proba)
    y_true = _convert_woodwork_types_wrapper(y_true.to_series())
    y_pred_proba = _convert_woodwork_types_wrapper(y_pred_proba.to_series())

    precision, recall, thresholds = sklearn_precision_recall_curve(
        y_true, y_pred_proba)
    auc_score = sklearn_auc(recall, precision)
    return {
        'precision': precision,
        'recall': recall,
        'thresholds': thresholds,
        'auc_score': auc_score
    }
Ejemplo n.º 2
0
def test_precision_recall_curve():
    y_true = np.array([0, 0, 1, 1])
    y_score = np.array([0.1, 0.4, 0.35, 0.8])
    precision_using_sk, recall_using_sk, thresholds_using_sk = \
        sklearn_precision_recall_curve(
            y_true, y_score)

    precision, recall, thresholds = precision_recall_curve(y_true, y_score)

    assert array_equal(precision, precision_using_sk)
    assert array_equal(recall, recall_using_sk)
    assert array_equal(thresholds, thresholds_using_sk)
Ejemplo n.º 3
0
def test_precision_recall_curve_random(n_samples, dtype):

    y_true, _, _, _ = generate_random_labels(
        lambda rng: rng.randint(0, 2, n_samples).astype(dtype))

    y_score, _, _, _ = generate_random_labels(
        lambda rng: rng.randint(0, 1000, n_samples).astype(dtype))

    precision_using_sk, recall_using_sk, thresholds_using_sk = \
        sklearn_precision_recall_curve(
            y_true, y_score)

    precision, recall, thresholds = precision_recall_curve(y_true, y_score)

    assert array_equal(precision, precision_using_sk)
    assert array_equal(recall, recall_using_sk)
    assert array_equal(thresholds, thresholds_using_sk)