def precision_recall_curve(y_true, y_pred_proba): """ Given labels and binary classifier predicted probabilities, compute and return the data representing a precision-recall curve. Arguments: y_true (ww.DataColumn, pd.Series or np.ndarray): True binary labels. y_pred_proba (ww.DataColumn, pd.Series or np.ndarray): Predictions from a binary classifier, before thresholding has been applied. Note this should be the predicted probability for the "true" label. Returns: list: Dictionary containing metrics used to generate a precision-recall plot, with the following keys: * `precision`: Precision values. * `recall`: Recall values. * `thresholds`: Threshold values used to produce the precision and recall. * `auc_score`: The area under the ROC curve. """ y_true = _convert_to_woodwork_structure(y_true) y_pred_proba = _convert_to_woodwork_structure(y_pred_proba) y_true = _convert_woodwork_types_wrapper(y_true.to_series()) y_pred_proba = _convert_woodwork_types_wrapper(y_pred_proba.to_series()) precision, recall, thresholds = sklearn_precision_recall_curve( y_true, y_pred_proba) auc_score = sklearn_auc(recall, precision) return { 'precision': precision, 'recall': recall, 'thresholds': thresholds, 'auc_score': auc_score }
def test_precision_recall_curve(): y_true = np.array([0, 0, 1, 1]) y_score = np.array([0.1, 0.4, 0.35, 0.8]) precision_using_sk, recall_using_sk, thresholds_using_sk = \ sklearn_precision_recall_curve( y_true, y_score) precision, recall, thresholds = precision_recall_curve(y_true, y_score) assert array_equal(precision, precision_using_sk) assert array_equal(recall, recall_using_sk) assert array_equal(thresholds, thresholds_using_sk)
def test_precision_recall_curve_random(n_samples, dtype): y_true, _, _, _ = generate_random_labels( lambda rng: rng.randint(0, 2, n_samples).astype(dtype)) y_score, _, _, _ = generate_random_labels( lambda rng: rng.randint(0, 1000, n_samples).astype(dtype)) precision_using_sk, recall_using_sk, thresholds_using_sk = \ sklearn_precision_recall_curve( y_true, y_score) precision, recall, thresholds = precision_recall_curve(y_true, y_score) assert array_equal(precision, precision_using_sk) assert array_equal(recall, recall_using_sk) assert array_equal(thresholds, thresholds_using_sk)