def compute_metrics(truth_file_stream, prediction_file_stream) -> Dict[str, Dict[str, float]]: truth_probabilities = parse_csv(truth_file_stream, CATEGORIES) prediction_probabilities = parse_csv(prediction_file_stream, CATEGORIES) exclude_rows(truth_probabilities, EXCLUDE_LABELS) exclude_rows(prediction_probabilities, EXCLUDE_LABELS) validate_rows(truth_probabilities, prediction_probabilities) sort_rows(truth_probabilities) sort_rows(prediction_probabilities) scores: Dict[str, Dict[str, float]] = {} for category in CATEGORIES: truth_category_probabilities: pd.Series = truth_probabilities[category] prediction_category_probabilities: pd.Series = prediction_probabilities[category] truth_binary_values: pd.Series = truth_category_probabilities.gt(0.5) prediction_binary_values: pd.Series = prediction_category_probabilities.gt(0.5) category_cm = create_binary_confusion_matrix( truth_binary_values=truth_binary_values.to_numpy(), prediction_binary_values=prediction_binary_values.to_numpy(), name=category, ) scores[category] = { 'accuracy': metrics.binary_accuracy(category_cm), 'sensitivity': metrics.binary_sensitivity(category_cm), 'specificity': metrics.binary_specificity(category_cm), 'dice': metrics.binary_dice(category_cm), 'ppv': metrics.binary_ppv(category_cm), 'npv': metrics.binary_npv(category_cm), 'auc': metrics.auc(truth_category_probabilities, prediction_category_probabilities), 'auc_sens_80': metrics.auc_above_sensitivity( truth_category_probabilities, prediction_category_probabilities, 0.80 ), 'ap': metrics.average_precision( truth_category_probabilities, prediction_category_probabilities ), } # Compute averages for all per-category metrics per_category_metrics: ValuesView[str] = next(iter(scores.values())).keys() scores['macro_average'] = { metric: float(np.mean([scores[category][metric] for category in CATEGORIES])) for metric in per_category_metrics } # Compute multi-category aggregate metrics scores['aggregate'] = { 'balanced_accuracy': metrics.balanced_multiclass_accuracy( truth_probabilities, prediction_probabilities ) } return scores
def from_stream( cls, truth_file_stream: TextIO, prediction_file_stream: TextIO ): truth_probabilities, truth_weights = parse_truth_csv(truth_file_stream) categories = truth_probabilities.columns prediction_probabilities = parse_csv(prediction_file_stream, categories) validate_rows(truth_probabilities, prediction_probabilities) sort_rows(truth_probabilities) sort_rows(prediction_probabilities) score = cls(truth_probabilities, prediction_probabilities, truth_weights) return score
def test_validate_rows_missing_images(categories): truth_probabilities = pd.DataFrame( [[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0]], index=['ISIC_0000123', 'ISIC_0000124'], columns=categories, ) prediction_probabilities = pd.DataFrame( [[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0]], index=['ISIC_0000123', 'ISIC_0000124'], columns=categories, ) load_csv.validate_rows(truth_probabilities, prediction_probabilities) truth_probabilities = pd.DataFrame( [[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0]], index=['ISIC_0000123', 'ISIC_0000124'], columns=categories, ) prediction_probabilities = pd.DataFrame( [[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]], index=['ISIC_0000123'], columns=categories) with pytest.raises(ScoreException) as exc_info: load_csv.validate_rows(truth_probabilities, prediction_probabilities) assert "Missing images in CSV: ['ISIC_0000124']." == str(exc_info.value) truth_probabilities = pd.DataFrame( [[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0]], index=['ISIC_0000123', 'ISIC_0000124'], columns=categories, ) prediction_probabilities = pd.DataFrame( [[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]], index=['ISIC_0000120'], columns=categories) with pytest.raises(ScoreException) as exc_info: load_csv.validate_rows(truth_probabilities, prediction_probabilities) assert "Missing images in CSV: ['ISIC_0000123', 'ISIC_0000124']." == str( exc_info.value) truth_probabilities = pd.DataFrame( [[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0]], index=['ISIC_0000123', 'ISIC_0000124'], columns=categories, ) prediction_probabilities = pd.DataFrame( [[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0]], index=['ISIC_0000123', 'ISIC_0000125'], columns=categories, ) with pytest.raises(ScoreException) as exc_info: load_csv.validate_rows(truth_probabilities, prediction_probabilities) assert "Missing images in CSV: ['ISIC_0000124']." == str(exc_info.value)
def test_validate_rows_extra_images(categories): truth_probabilities = pd.DataFrame( [[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]], index=['ISIC_0000123'], columns=categories ) prediction_probabilities = pd.DataFrame( [ [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0], ], index=['ISIC_0000123', 'ISIC_0000126', 'ISIC_0000127'], columns=categories, ) with pytest.raises(ScoreException) as exc_info: load_csv.validate_rows(truth_probabilities, prediction_probabilities) assert 'Extra images in CSV: [\'ISIC_0000126\', \'ISIC_0000127\'].' == str(exc_info.value)
def compute_metrics(truth_file_stream, prediction_file_stream) -> ScoresType: truth_probabilities, truth_weights = parse_truth_csv(truth_file_stream) categories = truth_probabilities.columns prediction_probabilities = parse_csv(prediction_file_stream, categories) validate_rows(truth_probabilities, prediction_probabilities) sort_rows(truth_probabilities) sort_rows(prediction_probabilities) scores: ScoresType = {} for category in categories: truth_category_probabilities: pd.Series = truth_probabilities[category] prediction_category_probabilities: pd.Series = prediction_probabilities[ category] truth_binary_values: pd.Series = truth_category_probabilities.gt(0.5) prediction_binary_values: pd.Series = prediction_category_probabilities.gt( 0.5) category_cm = create_binary_confusion_matrix( truth_binary_values=truth_binary_values.to_numpy(), prediction_binary_values=prediction_binary_values.to_numpy(), weights=truth_weights.score_weight.to_numpy(), name=category, ) scores[category] = { 'accuracy': metrics.binary_accuracy(category_cm), 'sensitivity': metrics.binary_sensitivity(category_cm), 'specificity': metrics.binary_specificity(category_cm), 'dice': metrics.binary_dice(category_cm), 'ppv': metrics.binary_ppv(category_cm), 'npv': metrics.binary_npv(category_cm), 'auc': metrics.auc( truth_category_probabilities, prediction_category_probabilities, truth_weights.score_weight, ), 'auc_sens_80': metrics.auc_above_sensitivity( truth_category_probabilities, prediction_category_probabilities, truth_weights.score_weight, 0.80, ), 'ap': metrics.average_precision( truth_category_probabilities, prediction_category_probabilities, truth_weights.score_weight, ), 'roc': metrics.roc( truth_category_probabilities, prediction_category_probabilities, truth_weights.score_weight, ), } # Compute averages for all per-category metrics per_category_metrics: KeysView[str] = next(iter(scores.values())).keys() scores['macro_average'] = { metric: float(np.mean([scores[category][metric] for category in categories])) for metric in per_category_metrics if metric != 'roc' } # Compute multi-category aggregate metrics scores['aggregate'] = { 'balanced_accuracy': metrics.balanced_multiclass_accuracy(truth_probabilities, prediction_probabilities, truth_weights.score_weight) } scores['overall'] = scores['aggregate']['balanced_accuracy'] scores['validation'] = metrics.balanced_multiclass_accuracy( truth_probabilities, prediction_probabilities, truth_weights.validation_weight) return scores