def test_binary_accuracy_reference(truth_binary_values, prediction_binary_values):
    cm = create_binary_confusion_matrix(truth_binary_values, prediction_binary_values)

    value = metrics.binary_accuracy(cm)
    reference_value = sklearn.metrics.accuracy_score(truth_binary_values, prediction_binary_values)

    assert value == pytest.approx(reference_value)
def score(truth_path: pathlib.Path, prediction_path: pathlib.Path) -> Dict[str, Dict[str, float]]:
    confusion_matrics = pd.DataFrame(
        [
            create_binary_confusion_matrix(
                truth_binary_values=image_pair.truth_image > 128,
                prediction_binary_values=image_pair.predictionImage > 128,
                name=(image_pair.imageId, image_pair.attribute_id),
            )
            for image_pair in iter_image_pairs(truth_path, prediction_path)
        ]
    )

    return {
        'macro_average': {
            'threshold_jaccard': confusion_matrics.apply(
                metrics.binary_threshold_jaccard, threshold=0.65, axis='columns'
            ).mean(),
            'jaccard': confusion_matrics.apply(metrics.binary_jaccard, axis='columns').mean(),
            'dice': confusion_matrics.apply(metrics.binary_dice, axis='columns').mean(),
            'accuracy': confusion_matrics.apply(metrics.binary_accuracy, axis='columns').mean(),
            'sensitivity': confusion_matrics.apply(
                metrics.binary_sensitivity, axis='columns'
            ).mean(),
            'specificity': confusion_matrics.apply(
                metrics.binary_specificity, axis='columns'
            ).mean(),
        }
    }
Example #3
0
def test_binary_threshold_jaccard(truth_binary_image, prediction_binary_image,
                                  correct_value):
    cm = create_binary_confusion_matrix(truth_binary_image,
                                        prediction_binary_image)

    value = metrics.binary_threshold_jaccard(cm)

    assert value == correct_value
Example #4
0
def test_binary_accuracy(truth_binary_image, prediction_binary_image,
                         correct_value):
    cm = create_binary_confusion_matrix(truth_binary_image,
                                        prediction_binary_image)

    value = metrics.binary_accuracy(cm)

    assert value == correct_value
def test_jaccard_dice_equality(truth_binary_values, prediction_binary_values):
    # Some mathematical equalities which will always hold
    cm = create_binary_confusion_matrix(truth_binary_values, prediction_binary_values)

    jaccard = metrics.binary_jaccard(cm)
    dice = metrics.binary_dice(cm)

    assert dice == (2 * jaccard) / (1.0 + jaccard)
    assert jaccard == dice / (2.0 - dice)
def compute_metrics(truth_file_stream, prediction_file_stream) -> Dict[str, Dict[str, float]]:
    truth_probabilities = parse_csv(truth_file_stream, CATEGORIES)
    prediction_probabilities = parse_csv(prediction_file_stream, CATEGORIES)

    exclude_rows(truth_probabilities, EXCLUDE_LABELS)
    exclude_rows(prediction_probabilities, EXCLUDE_LABELS)

    validate_rows(truth_probabilities, prediction_probabilities)

    sort_rows(truth_probabilities)
    sort_rows(prediction_probabilities)

    scores: Dict[str, Dict[str, float]] = {}
    for category in CATEGORIES:
        truth_category_probabilities: pd.Series = truth_probabilities[category]
        prediction_category_probabilities: pd.Series = prediction_probabilities[category]

        truth_binary_values: pd.Series = truth_category_probabilities.gt(0.5)
        prediction_binary_values: pd.Series = prediction_category_probabilities.gt(0.5)

        category_cm = create_binary_confusion_matrix(
            truth_binary_values=truth_binary_values.to_numpy(),
            prediction_binary_values=prediction_binary_values.to_numpy(),
            name=category,
        )

        scores[category] = {
            'accuracy': metrics.binary_accuracy(category_cm),
            'sensitivity': metrics.binary_sensitivity(category_cm),
            'specificity': metrics.binary_specificity(category_cm),
            'dice': metrics.binary_dice(category_cm),
            'ppv': metrics.binary_ppv(category_cm),
            'npv': metrics.binary_npv(category_cm),
            'auc': metrics.auc(truth_category_probabilities, prediction_category_probabilities),
            'auc_sens_80': metrics.auc_above_sensitivity(
                truth_category_probabilities, prediction_category_probabilities, 0.80
            ),
            'ap': metrics.average_precision(
                truth_category_probabilities, prediction_category_probabilities
            ),
        }

    # Compute averages for all per-category metrics
    per_category_metrics: ValuesView[str] = next(iter(scores.values())).keys()
    scores['macro_average'] = {
        metric: float(np.mean([scores[category][metric] for category in CATEGORIES]))
        for metric in per_category_metrics
    }

    # Compute multi-category aggregate metrics
    scores['aggregate'] = {
        'balanced_accuracy': metrics.balanced_multiclass_accuracy(
            truth_probabilities, prediction_probabilities
        )
    }

    return scores
Example #7
0
    def _category_score(
        truth_category_probabilities: pd.Series,
        prediction_category_probabilities: pd.Series,
        truth_weights: pd.DataFrame,
        category: str,
    ) -> pd.Series:
        truth_binary_values: pd.Series = truth_category_probabilities.gt(0.5)
        prediction_binary_values: pd.Series = prediction_category_probabilities.gt(0.5)

        category_cm = create_binary_confusion_matrix(
            truth_binary_values=truth_binary_values.to_numpy(),
            prediction_binary_values=prediction_binary_values.to_numpy(),
            weights=truth_weights.score_weight.to_numpy(),
            name=category,
        )

        return pd.Series(
            {
                'accuracy': metrics.binary_accuracy(category_cm),
                'sensitivity': metrics.binary_sensitivity(category_cm),
                'specificity': metrics.binary_specificity(category_cm),
                'dice': metrics.binary_dice(category_cm),
                'ppv': metrics.binary_ppv(category_cm),
                'npv': metrics.binary_npv(category_cm),
                'auc': metrics.auc(
                    truth_category_probabilities,
                    prediction_category_probabilities,
                    truth_weights.score_weight,
                ),
                'auc_sens_80': metrics.auc_above_sensitivity(
                    truth_category_probabilities,
                    prediction_category_probabilities,
                    truth_weights.score_weight,
                    0.80,
                ),
                'ap': metrics.average_precision(
                    truth_category_probabilities,
                    prediction_category_probabilities,
                    truth_weights.score_weight,
                ),
            },
            index=[
                'accuracy',
                'sensitivity',
                'specificity',
                'dice',
                'ppv',
                'npv',
                'auc',
                'auc_sens_80',
                'ap',
            ],
            name=category,
        )
def test_binary_jaccard_reference(truth_binary_values, prediction_binary_values):
    cm = create_binary_confusion_matrix(truth_binary_values, prediction_binary_values)

    value = metrics.binary_jaccard(cm)
    # sklearn has a very idiosyncratic implementation of jaccard_similarity_score; unless the input
    # arrays are wrapped in an additional dimension, the result is actually the accuracy score
    # see: https://github.com/scikit-learn/scikit-learn/issues/3037
    reference_value = sklearn.metrics.jaccard_similarity_score(
        np.expand_dims(truth_binary_values, axis=0),
        np.expand_dims(prediction_binary_values, axis=0),
    )

    assert value == pytest.approx(reference_value)
    def __init__(self, image_pairs: Iterable[ImagePair]) -> None:
        # TODO: Add weighting
        confusion_matrics = pd.DataFrame([
            create_binary_confusion_matrix(
                truth_binary_values=image_pair.truth_image > 128,
                prediction_binary_values=image_pair.prediction_image > 128,
                name=image_pair.image_id,
            ) for image_pair in image_pairs
        ])

        per_image = pd.DataFrame(
            {
                'accuracy':
                confusion_matrics.apply(metrics.binary_accuracy,
                                        axis='columns'),
                'sensitivity':
                confusion_matrics.apply(metrics.binary_sensitivity,
                                        axis='columns'),
                'specificity':
                confusion_matrics.apply(metrics.binary_specificity,
                                        axis='columns'),
                'jaccard':
                confusion_matrics.apply(metrics.binary_jaccard,
                                        axis='columns'),
                'threshold_jaccard':
                confusion_matrics.apply(metrics.binary_threshold_jaccard,
                                        threshold=0.65,
                                        axis='columns'),
                'dice':
                confusion_matrics.apply(metrics.binary_dice, axis='columns'),
            },
            columns=[
                'accuracy',
                'sensitivity',
                'specificity',
                'jaccard',
                'threshold_jaccard',
                'dice',
            ],
        )

        self.macro_average = per_image.mean(axis='index').rename(
            'macro_average', inplace=True)

        self.overall = self.macro_average.at['threshold_jaccard']
        self.validation = self.macro_average.at['threshold_jaccard']
Example #10
0
def score(truth_path: pathlib.Path, prediction_path: pathlib.Path) -> ScoresType:
    confusion_matrics = pd.DataFrame(
        [
            create_binary_confusion_matrix(
                truth_binary_values=image_pair.truth_image > 128,
                prediction_binary_values=image_pair.prediction_image > 128,
                name=(image_pair.attribute_id, image_pair.image_id),
            )
            for image_pair in iter_image_pairs(truth_path, prediction_path)
        ]
    )
    confusion_matrics = confusion_matrics.reindex(
        index=pd.MultiIndex.from_tuples(confusion_matrics.index, names=('attribute_id', 'image_id'))
    )

    # Normalize all values, since image sizes vary
    normalized_confusion_matrics = confusion_matrics.apply(
        normalize_confusion_matrix, axis='columns'
    )

    scores: ScoresType = {}
    for attribute in sorted(confusion_matrics.index.unique('attribute_id')):
        attribute_confusion_matrics = normalized_confusion_matrics.loc(axis=0)[attribute, :]
        sum_attribute_confusion_matrics = attribute_confusion_matrics.sum(axis='index')

        scores[attribute] = {
            'jaccard': metrics.binary_jaccard(sum_attribute_confusion_matrics),
            'dice': metrics.binary_dice(sum_attribute_confusion_matrics),
        }

    sum_confusion_matrix = normalized_confusion_matrics.sum(axis='index')
    scores['micro_average'] = {
        'jaccard': metrics.binary_jaccard(sum_confusion_matrix),
        'dice': metrics.binary_dice(sum_confusion_matrix),
    }

    score['overall'] = scores['micro_average']['jaccard']

    return scores
def score(truth_path: pathlib.Path, prediction_path: pathlib.Path) -> Dict[str, Dict[str, float]]:
    confusion_matrics = pd.DataFrame(
        [
            create_binary_confusion_matrix(
                truth_binary_values=image_pair.truth_image > 128,
                prediction_binary_values=image_pair.predictionImage > 128,
                name=(image_pair.attribute_id, image_pair.imageId),
            )
            for image_pair in iter_image_pairs(truth_path, prediction_path)
        ]
    )
    confusion_matrics = confusion_matrics.reindex(
        index=pd.MultiIndex.from_tuples(confusion_matrics.index, names=('attributeId', 'imageId'))
    )

    # Normalize all values, since image sizes vary
    normalized_confusion_matrics = confusion_matrics.apply(
        normalize_confusion_matrix, axis='columns'
    )

    scores: Dict[str, Dict[str, float]] = {}
    for attribute in sorted(confusion_matrics.index.unique('attributeId')):
        attribute_confusion_matrics = normalized_confusion_matrics.loc(axis=0)[attribute, :]
        sum_attribute_confusion_matrics = attribute_confusion_matrics.sum(axis='index')

        scores[attribute] = {
            'jaccard': metrics.binary_jaccard(sum_attribute_confusion_matrics),
            'dice': metrics.binary_dice(sum_attribute_confusion_matrics),
        }

    sum_confusion_matrix = normalized_confusion_matrics.sum(axis='index')
    scores['micro_average'] = {
        'jaccard': metrics.binary_jaccard(sum_confusion_matrix),
        'dice': metrics.binary_dice(sum_confusion_matrix),
    }

    return scores
Example #12
0
def score(truth_path: pathlib.Path,
          prediction_path: pathlib.Path) -> ScoresType:
    confusion_matrics = pd.DataFrame([
        create_binary_confusion_matrix(
            truth_binary_values=image_pair.truth_image > 128,
            prediction_binary_values=image_pair.prediction_image > 128,
            name=(image_pair.image_id, image_pair.attribute_id),
        ) for image_pair in iter_image_pairs(truth_path, prediction_path)
    ])

    scores = {
        'macro_average': {
            'threshold_jaccard':
            confusion_matrics.apply(metrics.binary_threshold_jaccard,
                                    threshold=0.65,
                                    axis='columns').mean(),
            'jaccard':
            confusion_matrics.apply(metrics.binary_jaccard,
                                    axis='columns').mean(),
            'dice':
            confusion_matrics.apply(metrics.binary_dice,
                                    axis='columns').mean(),
            'accuracy':
            confusion_matrics.apply(metrics.binary_accuracy,
                                    axis='columns').mean(),
            'sensitivity':
            confusion_matrics.apply(metrics.binary_sensitivity,
                                    axis='columns').mean(),
            'specificity':
            confusion_matrics.apply(metrics.binary_specificity,
                                    axis='columns').mean(),
        }
    }
    scores['overall'] = scores['macro_average']['threshold_jaccard']

    return scores
Example #13
0
def real_cm(real_truth_binary_values,
            real_prediction_binary_values) -> pd.Series:
    return create_binary_confusion_matrix(real_truth_binary_values,
                                          real_prediction_binary_values)
def test_binary_threshold_jaccard(truth_binary_image, prediction_binary_image, correct_value):
    cm = create_binary_confusion_matrix(truth_binary_image, prediction_binary_image)

    value = metrics.binary_threshold_jaccard(cm)

    assert value == correct_value
Example #15
0
def cm(truth_binary_values, prediction_binary_values):
    return create_binary_confusion_matrix(truth_binary_values,
                                          prediction_binary_values)
def test_binary_accuracy(truth_binary_image, prediction_binary_image, correct_value):
    cm = create_binary_confusion_matrix(truth_binary_image, prediction_binary_image)

    value = metrics.binary_accuracy(cm)

    assert value == correct_value
Example #17
0
def compute_metrics(truth_file_stream, prediction_file_stream) -> ScoresType:
    truth_probabilities, truth_weights = parse_truth_csv(truth_file_stream)
    categories = truth_probabilities.columns
    prediction_probabilities = parse_csv(prediction_file_stream, categories)

    validate_rows(truth_probabilities, prediction_probabilities)

    sort_rows(truth_probabilities)
    sort_rows(prediction_probabilities)

    scores: ScoresType = {}
    for category in categories:
        truth_category_probabilities: pd.Series = truth_probabilities[category]
        prediction_category_probabilities: pd.Series = prediction_probabilities[
            category]

        truth_binary_values: pd.Series = truth_category_probabilities.gt(0.5)
        prediction_binary_values: pd.Series = prediction_category_probabilities.gt(
            0.5)

        category_cm = create_binary_confusion_matrix(
            truth_binary_values=truth_binary_values.to_numpy(),
            prediction_binary_values=prediction_binary_values.to_numpy(),
            weights=truth_weights.score_weight.to_numpy(),
            name=category,
        )

        scores[category] = {
            'accuracy':
            metrics.binary_accuracy(category_cm),
            'sensitivity':
            metrics.binary_sensitivity(category_cm),
            'specificity':
            metrics.binary_specificity(category_cm),
            'dice':
            metrics.binary_dice(category_cm),
            'ppv':
            metrics.binary_ppv(category_cm),
            'npv':
            metrics.binary_npv(category_cm),
            'auc':
            metrics.auc(
                truth_category_probabilities,
                prediction_category_probabilities,
                truth_weights.score_weight,
            ),
            'auc_sens_80':
            metrics.auc_above_sensitivity(
                truth_category_probabilities,
                prediction_category_probabilities,
                truth_weights.score_weight,
                0.80,
            ),
            'ap':
            metrics.average_precision(
                truth_category_probabilities,
                prediction_category_probabilities,
                truth_weights.score_weight,
            ),
            'roc':
            metrics.roc(
                truth_category_probabilities,
                prediction_category_probabilities,
                truth_weights.score_weight,
            ),
        }

    # Compute averages for all per-category metrics
    per_category_metrics: KeysView[str] = next(iter(scores.values())).keys()
    scores['macro_average'] = {
        metric:
        float(np.mean([scores[category][metric] for category in categories]))
        for metric in per_category_metrics if metric != 'roc'
    }

    # Compute multi-category aggregate metrics
    scores['aggregate'] = {
        'balanced_accuracy':
        metrics.balanced_multiclass_accuracy(truth_probabilities,
                                             prediction_probabilities,
                                             truth_weights.score_weight)
    }

    scores['overall'] = scores['aggregate']['balanced_accuracy']
    scores['validation'] = metrics.balanced_multiclass_accuracy(
        truth_probabilities, prediction_probabilities,
        truth_weights.validation_weight)

    return scores