def get_correct_and_misclassified_examples(val_metrics_csv: Path, test_metrics_csv: Path, prediction_target: str = "Default") -> Results: """ Given the paths to the metrics files for the validation and test sets, get a list of true positives, false positives, false negatives and true negatives. The threshold for classification is obtained by looking at the validation file, and applied to the test set to get label predictions. The validation and test csvs must have at least the following columns (defined in the LoggingColumns enum): LoggingColumns.Hue, LoggingColumns.Patient, LoggingColumns.Label, LoggingColumns.ModelOutput. """ df_val = read_csv_and_filter_prediction_target(val_metrics_csv, prediction_target) fpr, tpr, thresholds = roc_curve(df_val[LoggingColumns.Label.value], df_val[LoggingColumns.ModelOutput.value]) optimal_idx = MetricsDict.get_optimal_idx(fpr=fpr, tpr=tpr) optimal_threshold = thresholds[optimal_idx] df_test = read_csv_and_filter_prediction_target(test_metrics_csv, prediction_target) df_test["predicted"] = df_test.apply(lambda x: int(x[LoggingColumns.ModelOutput.value] >= optimal_threshold), axis=1) true_positives = df_test[(df_test["predicted"] == 1) & (df_test[LoggingColumns.Label.value] == 1)] false_positives = df_test[(df_test["predicted"] == 1) & (df_test[LoggingColumns.Label.value] == 0)] false_negatives = df_test[(df_test["predicted"] == 0) & (df_test[LoggingColumns.Label.value] == 1)] true_negatives = df_test[(df_test["predicted"] == 0) & (df_test[LoggingColumns.Label.value] == 0)] return Results(true_positives=true_positives, true_negatives=true_negatives, false_positives=false_positives, false_negatives=false_negatives)
def get_metric(predictions_to_set_optimal_threshold: LabelsAndPredictions, predictions_to_compute_metrics: LabelsAndPredictions, metric: ReportedScalarMetrics, optimal_threshold: Optional[float] = None) -> float: """ Given LabelsAndPredictions objects for the validation and test sets, return the specified metric. :param predictions_to_set_optimal_threshold: This set of ground truth labels and model predictions is used to determine the optimal threshold for classification. :param predictions_to_compute_metrics: The set of labels and model outputs to calculate metrics for. :param metric: The name of the metric to calculate. :param optimal_threshold: If provided, use this threshold instead of calculating an optimal threshold. """ if not optimal_threshold: fpr, tpr, thresholds = roc_curve( predictions_to_set_optimal_threshold.labels, predictions_to_set_optimal_threshold.model_outputs) optimal_idx = MetricsDict.get_optimal_idx(fpr=fpr, tpr=tpr) optimal_threshold = thresholds[optimal_idx] assert optimal_threshold # for mypy, we have already calculated optimal threshold if it was set to None if metric is ReportedScalarMetrics.OptimalThreshold: return optimal_threshold only_one_class_present = len(set( predictions_to_compute_metrics.labels)) < 2 if metric is ReportedScalarMetrics.AUC_ROC: return math.nan if only_one_class_present else roc_auc_score( predictions_to_compute_metrics.labels, predictions_to_compute_metrics.model_outputs) elif metric is ReportedScalarMetrics.AUC_PR: if only_one_class_present: return math.nan precision, recall, _ = precision_recall_curve( predictions_to_compute_metrics.labels, predictions_to_compute_metrics.model_outputs) return auc(recall, precision) elif metric is ReportedScalarMetrics.AccuracyAtOptimalThreshold: return binary_classification_accuracy( model_output=predictions_to_compute_metrics.model_outputs, label=predictions_to_compute_metrics.labels, threshold=optimal_threshold) elif metric is ReportedScalarMetrics.AccuracyAtThreshold05: return binary_classification_accuracy( model_output=predictions_to_compute_metrics.model_outputs, label=predictions_to_compute_metrics.labels, threshold=0.5) elif metric is ReportedScalarMetrics.Specificity: return recall_score( predictions_to_compute_metrics.labels, predictions_to_compute_metrics.model_outputs >= optimal_threshold, pos_label=0) elif metric is ReportedScalarMetrics.Sensitivity: return recall_score( predictions_to_compute_metrics.labels, predictions_to_compute_metrics.model_outputs >= optimal_threshold) else: raise ValueError("Unknown metric")
def get_correct_and_misclassified_examples(val_metrics_csv: Path, test_metrics_csv: Path) -> Results: """ Given the paths to the metrics files for the validation and test sets, get a list of true positives, false positives, false negatives and true negatives. The threshold for classification is obtained by looking at the validation file, and applied to the test set to get label predictions. """ df_val = pd.read_csv(val_metrics_csv) if not df_val[LoggingColumns.Patient.value].is_unique: raise ValueError( f"Subject IDs should be unique, but found duplicate entries " f"in column {LoggingColumns.Patient.value} in the csv file.") fpr, tpr, thresholds = roc_curve(df_val[LoggingColumns.Label.value], df_val[LoggingColumns.ModelOutput.value]) optimal_idx = MetricsDict.get_optimal_idx(fpr=fpr, tpr=tpr) optimal_threshold = thresholds[optimal_idx] df_test = pd.read_csv(test_metrics_csv) if not df_test[LoggingColumns.Patient.value].is_unique: raise ValueError( f"Subject IDs should be unique, but found duplicate entries " f"in column {LoggingColumns.Patient.value} in the csv file.") df_test["predicted"] = df_test.apply(lambda x: int(x[ LoggingColumns.ModelOutput.value] >= optimal_threshold), axis=1) true_positives = df_test[(df_test["predicted"] == 1) & (df_test[LoggingColumns.Label.value] == 1)] false_positives = df_test[(df_test["predicted"] == 1) & (df_test[LoggingColumns.Label.value] == 0)] false_negatives = df_test[(df_test["predicted"] == 0) & (df_test[LoggingColumns.Label.value] == 1)] true_negatives = df_test[(df_test["predicted"] == 0) & (df_test[LoggingColumns.Label.value] == 0)] return Results(true_positives=true_positives, true_negatives=true_negatives, false_positives=false_positives, false_negatives=false_negatives)
def get_metric(val_metrics_csv: Path, test_metrics_csv: Path, metric: ReportedMetrics) -> float: """ Given a csv file, read the predicted values and ground truth labels and return the specified metric. """ results_val = get_results(val_metrics_csv) fpr, tpr, thresholds = roc_curve(results_val.labels, results_val.model_outputs) optimal_idx = MetricsDict.get_optimal_idx(fpr=fpr, tpr=tpr) optimal_threshold = thresholds[optimal_idx] if metric is ReportedMetrics.OptimalThreshold: return optimal_threshold results_test = get_results(test_metrics_csv) only_one_class_present = len(set(results_test.labels)) < 2 if metric is ReportedMetrics.AUC_ROC: return math.nan if only_one_class_present else roc_auc_score( results_test.labels, results_test.model_outputs) elif metric is ReportedMetrics.AUC_PR: if only_one_class_present: return math.nan precision, recall, _ = precision_recall_curve( results_test.labels, results_test.model_outputs) return auc(recall, precision) elif metric is ReportedMetrics.Accuracy: return binary_classification_accuracy( model_output=results_test.model_outputs, label=results_test.labels, threshold=optimal_threshold) elif metric is ReportedMetrics.FalsePositiveRate: tnr = recall_score(results_test.labels, results_test.model_outputs >= optimal_threshold, pos_label=0) return 1 - tnr elif metric is ReportedMetrics.FalseNegativeRate: return 1 - recall_score( results_test.labels, results_test.model_outputs >= optimal_threshold) else: raise ValueError("Unknown metric")