def test_parse_csv_invalid_unicode(categories): prediction_file_stream = io.TextIOWrapper(io.BytesIO(b'\xef')) with pytest.raises(ScoreException) as exc_info: load_csv.parse_csv(prediction_file_stream, categories) assert 'Could not parse CSV: could not decode file as UTF-8.' == str( exc_info.value)
def test_parse_csv_missing_index(categories): prediction_file_stream = io.StringIO('MEL,NV,BCC,AKIEC,BKL,DF,VASC\n' '1.0,0.0,0.0,0.0,0.0,0.0,0.0\n') with pytest.raises(ScoreException) as exc_info: load_csv.parse_csv(prediction_file_stream, categories) assert 'Missing column in CSV: "image".' == str(exc_info.value)
def test_parse_csv_missing_columns(categories): prediction_file_stream = io.StringIO('image,MEL,BCC,AKIEC,BKL,DF\n' 'ISIC_0000123,1.0,0.0,0.0,0.0,0.0\n') with pytest.raises(ScoreException) as exc_info: load_csv.parse_csv(prediction_file_stream, categories) assert "Missing columns in CSV: ['NV', 'VASC']." == str(exc_info.value)
def test_parse_csv_misnamed_columns(categories): prediction_file_stream = io.StringIO( 'image,MEL,FOO,BCC,AKIEC,BKL,BAZ,VASC\n' 'ISIC_0000123,1.0,0.0,0.0,0.0,0.0,0.0,0.0\n' ) with pytest.raises(ScoreException) as exc_info: load_csv.parse_csv(prediction_file_stream, categories) assert 'Missing columns in CSV: [\'DF\', \'NV\'].' == str(exc_info.value)
def test_parse_csv_extra_columns(categories): prediction_file_stream = io.StringIO( 'image,MEL,FOO,NV,BCC,AKIEC,BKL,DF,BAZ,VASC\n' 'ISIC_0000123,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0\n') with pytest.raises(ScoreException) as exc_info: load_csv.parse_csv(prediction_file_stream, categories) assert "Extra columns in CSV: ['BAZ', 'FOO']." == str(exc_info.value)
def test_parse_csv_empty(categories): # Provide just enough to evade the newline check, but raise an EmptyDataError prediction_file_stream = io.StringIO('\n\n') with pytest.raises(ScoreException) as exc_info: load_csv.parse_csv(prediction_file_stream, categories) assert 'Could not parse CSV: "No columns to parse from file".' == str( exc_info.value)
def compute_metrics(truth_file_stream, prediction_file_stream) -> Dict[str, Dict[str, float]]: truth_probabilities = parse_csv(truth_file_stream, CATEGORIES) prediction_probabilities = parse_csv(prediction_file_stream, CATEGORIES) exclude_rows(truth_probabilities, EXCLUDE_LABELS) exclude_rows(prediction_probabilities, EXCLUDE_LABELS) validate_rows(truth_probabilities, prediction_probabilities) sort_rows(truth_probabilities) sort_rows(prediction_probabilities) scores: Dict[str, Dict[str, float]] = {} for category in CATEGORIES: truth_category_probabilities: pd.Series = truth_probabilities[category] prediction_category_probabilities: pd.Series = prediction_probabilities[category] truth_binary_values: pd.Series = truth_category_probabilities.gt(0.5) prediction_binary_values: pd.Series = prediction_category_probabilities.gt(0.5) category_cm = create_binary_confusion_matrix( truth_binary_values=truth_binary_values.to_numpy(), prediction_binary_values=prediction_binary_values.to_numpy(), name=category, ) scores[category] = { 'accuracy': metrics.binary_accuracy(category_cm), 'sensitivity': metrics.binary_sensitivity(category_cm), 'specificity': metrics.binary_specificity(category_cm), 'dice': metrics.binary_dice(category_cm), 'ppv': metrics.binary_ppv(category_cm), 'npv': metrics.binary_npv(category_cm), 'auc': metrics.auc(truth_category_probabilities, prediction_category_probabilities), 'auc_sens_80': metrics.auc_above_sensitivity( truth_category_probabilities, prediction_category_probabilities, 0.80 ), 'ap': metrics.average_precision( truth_category_probabilities, prediction_category_probabilities ), } # Compute averages for all per-category metrics per_category_metrics: ValuesView[str] = next(iter(scores.values())).keys() scores['macro_average'] = { metric: float(np.mean([scores[category][metric] for category in CATEGORIES])) for metric in per_category_metrics } # Compute multi-category aggregate metrics scores['aggregate'] = { 'balanced_accuracy': metrics.balanced_multiclass_accuracy( truth_probabilities, prediction_probabilities ) } return scores
def test_parse_csv_non_float_columns(categories): prediction_file_stream = io.StringIO( 'image,MEL,NV,BCC,AKIEC,BKL,DF,VASC\n' 'ISIC_0000123,1.0,0.0,0.0,0.0,0.0,0.0,0.0\n' "ISIC_0000124,0.0,1.0,0.0,0.0,0.0,0.0,'BAD'\n" 'ISIC_0000125,0.0,0.0,True,0.0,0.0,0.0,0.0\n') with pytest.raises(ScoreException) as exc_info: load_csv.parse_csv(prediction_file_stream, categories) assert "CSV contains non-floating-point value(s) in columns: ['BCC', 'VASC']." == str( exc_info.value)
def test_parse_csv_missing_values(categories): prediction_file_stream = io.StringIO( 'image,MEL,NV,BCC,AKIEC,BKL,DF,VASC\n' 'ISIC_0000123,1.0,0.0,0.0,0.0,0.0,0.0,0.0\n' 'ISIC_0000124,0.0,,0.0,0.0,0.0,0.0,0.0\n' 'ISIC_0000125,0.0,0.0,1.0,0.0,0.0,0.0\n') with pytest.raises(ScoreException) as exc_info: load_csv.parse_csv(prediction_file_stream, categories) assert "Missing value(s) in CSV for images: ['ISIC_0000124', 'ISIC_0000125']." == str( exc_info.value)
def test_parse_csv_no_newlines(categories): prediction_file_stream = io.StringIO( 'image,MEL,NV,BCC,AKIEC,BKL,DF,VASC\n') for i in range(10000): # Simulate many long floats prediction_file_stream.write(f'{i:030f},') prediction_file_stream.seek(0) with pytest.raises(ScoreException) as exc_info: load_csv.parse_csv(prediction_file_stream, categories) assert 'No newlines detected in CSV.' == str(exc_info.value)
def test_parse_csv_out_of_range_values(categories): prediction_file_stream = io.StringIO( 'image,MEL,NV,BCC,AKIEC,BKL,DF,VASC\n' 'ISIC_0000123,100.0,0.0,0.0,0.0,0.0,0.0,0.0\n' 'ISIC_0000124,0.0,1.0,0.0,0.0,0.0,0.0,0.0\n' 'ISIC_0000125,0.0,0.0,-1.0,0.0,0.0,0.0,0.0\n') with pytest.raises(ScoreException) as exc_info: load_csv.parse_csv(prediction_file_stream, categories) assert ('Values in CSV are outside the interval [0.0, 1.0] for images: ' "['ISIC_0000123', 'ISIC_0000125']." == str(exc_info.value))
def test_parse_csv_duplicate_images(categories): prediction_file_stream = io.StringIO( 'image,MEL,NV,BCC,AKIEC,BKL,DF,VASC\n' 'ISIC_0000123,1.0,0.0,0.0,0.0,0.0,0.0,0.0\n' 'ISIC_0000123,0.0,1.0,0.0,0.0,0.0,0.0,0.0\n' 'ISIC_0000124,0.0,0.0,1.0,0.0,0.0,0.0,0.0\n' 'ISIC_0000124.jpg,0.0,0.0,1.0,0.0,0.0,0.0,0.0\n') with pytest.raises(ScoreException) as exc_info: load_csv.parse_csv(prediction_file_stream, categories) assert "Duplicate image rows detected in CSV: ['ISIC_0000123', 'ISIC_0000124']." == str( exc_info.value)
def test_parse_csv_mismatched_headers(categories): prediction_file_stream = io.StringIO( 'image\n' 'ISIC_0000123,1.0,0.0,0.0,0.0,0.0,0.0,0.0\n' 'ISIC_0000124,0.0,1.0,0.0,0.0,0.0,0.0,0.0\n' 'ISIC_0000125,0.0,0.0,1.0,0.0,0.0,0.0,0.0\n') # Too few header columns causes Pandas to raise an IndexError when reading with pytest.raises(ScoreException) as exc_info: load_csv.parse_csv(prediction_file_stream, categories) assert 'Could not parse CSV: inconsistent number of header columns.' == str( exc_info.value)
def test_parse_csv_invalid_type_index(categories): prediction_file_stream = io.StringIO('image,MEL,NV,BCC,AKIEC,BKL,DF,VASC\n' '5,1.0,0.0,0.0,0.0,0.0,0.0,0.0\n') prediction_probabilities = load_csv.parse_csv(prediction_file_stream, categories) # Apparent numeric 'image' fields should be coerced to string / NumPy 'O' assert prediction_probabilities.index.is_object()
def test_parse_csv_reordered_columns(categories): prediction_file_stream = io.StringIO( 'NV,BCC,BKL,DF,AKIEC,MEL,VASC,image\n' '0.0,0.0,0.0,0.0,0.0,1.0,0.0,ISIC_0000123\n') prediction_probabilities = load_csv.parse_csv(prediction_file_stream, categories) assert prediction_probabilities.equals( pd.DataFrame([[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]], index=['ISIC_0000123'], columns=categories))
def from_stream( cls, truth_file_stream: TextIO, prediction_file_stream: TextIO ): truth_probabilities, truth_weights = parse_truth_csv(truth_file_stream) categories = truth_probabilities.columns prediction_probabilities = parse_csv(prediction_file_stream, categories) validate_rows(truth_probabilities, prediction_probabilities) sort_rows(truth_probabilities) sort_rows(prediction_probabilities) score = cls(truth_probabilities, prediction_probabilities, truth_weights) return score
def test_parse_csv_trailing_delimiters(categories): prediction_file_stream = io.StringIO( 'image,MEL,NV,BCC,AKIEC,BKL,DF,VASC\n' 'ISIC_0000123,1.0,0.0,0.0,0.0,0.0,0.0,0.0,\n' 'ISIC_0000124,0.0,1.0,0.0,0.0,0.0,0.0,0.0,\n') # If all data rows have trailing delimiters, 'pd.read_csv' can misinterpret the data without # 'index_col=False' prediction_probabilities = load_csv.parse_csv(prediction_file_stream, categories) assert prediction_probabilities.equals( pd.DataFrame( [[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0]], index=['ISIC_0000123', 'ISIC_0000124'], columns=categories, ))
def test_parse_csv(categories): prediction_file_stream = io.StringIO( 'image,MEL,NV,BCC,AKIEC,BKL,DF,VASC\n' 'ISIC_0000123,1.0,0.0,0.0,0.0,0.0,0.0,0.0\n' 'ISIC_0000124.jpg,0.0,1.0,0.0,0.0,0.0,0.0,0.0\n' 'ISIC_0000125.JPG,0.0,0.0,1.0,0.0,0.0,0.0,0.0\n') prediction_probabilities = load_csv.parse_csv(prediction_file_stream, categories) assert prediction_probabilities.equals( pd.DataFrame( [ [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0], ], index=['ISIC_0000123', 'ISIC_0000124', 'ISIC_0000125'], columns=categories, ))
def compute_metrics(truth_file_stream, prediction_file_stream) -> ScoresType: truth_probabilities, truth_weights = parse_truth_csv(truth_file_stream) categories = truth_probabilities.columns prediction_probabilities = parse_csv(prediction_file_stream, categories) validate_rows(truth_probabilities, prediction_probabilities) sort_rows(truth_probabilities) sort_rows(prediction_probabilities) scores: ScoresType = {} for category in categories: truth_category_probabilities: pd.Series = truth_probabilities[category] prediction_category_probabilities: pd.Series = prediction_probabilities[ category] truth_binary_values: pd.Series = truth_category_probabilities.gt(0.5) prediction_binary_values: pd.Series = prediction_category_probabilities.gt( 0.5) category_cm = create_binary_confusion_matrix( truth_binary_values=truth_binary_values.to_numpy(), prediction_binary_values=prediction_binary_values.to_numpy(), weights=truth_weights.score_weight.to_numpy(), name=category, ) scores[category] = { 'accuracy': metrics.binary_accuracy(category_cm), 'sensitivity': metrics.binary_sensitivity(category_cm), 'specificity': metrics.binary_specificity(category_cm), 'dice': metrics.binary_dice(category_cm), 'ppv': metrics.binary_ppv(category_cm), 'npv': metrics.binary_npv(category_cm), 'auc': metrics.auc( truth_category_probabilities, prediction_category_probabilities, truth_weights.score_weight, ), 'auc_sens_80': metrics.auc_above_sensitivity( truth_category_probabilities, prediction_category_probabilities, truth_weights.score_weight, 0.80, ), 'ap': metrics.average_precision( truth_category_probabilities, prediction_category_probabilities, truth_weights.score_weight, ), 'roc': metrics.roc( truth_category_probabilities, prediction_category_probabilities, truth_weights.score_weight, ), } # Compute averages for all per-category metrics per_category_metrics: KeysView[str] = next(iter(scores.values())).keys() scores['macro_average'] = { metric: float(np.mean([scores[category][metric] for category in categories])) for metric in per_category_metrics if metric != 'roc' } # Compute multi-category aggregate metrics scores['aggregate'] = { 'balanced_accuracy': metrics.balanced_multiclass_accuracy(truth_probabilities, prediction_probabilities, truth_weights.score_weight) } scores['overall'] = scores['aggregate']['balanced_accuracy'] scores['validation'] = metrics.balanced_multiclass_accuracy( truth_probabilities, prediction_probabilities, truth_weights.validation_weight) return scores