Exemple #1
0
 def __init__(self,
              average: Union[Average, str] = Average.MACRO,
              output_transform: Callable = lambda x: x) -> None:
     metric_fn = ROCAUCMetric(average=Average(average))
     super().__init__(metric_fn=metric_fn,
                      output_transform=output_transform,
                      save_details=False)
Exemple #2
0
 def _compute_fn(pred, label):
     return compute_roc_auc(
         y_pred=pred,
         y=label,
         to_onehot_y=to_onehot_y,
         softmax=softmax,
         other_act=other_act,
         average=Average(average),
     )
Exemple #3
0
 def __init__(
     self,
     to_onehot_y: bool = False,
     softmax: bool = False,
     average: Union[Average, str] = Average.MACRO,
     output_transform: Callable = lambda x: x,
     device: Optional[Union[str, torch.device]] = None,
 ) -> None:
     super().__init__(output_transform, device=device)
     self.to_onehot_y = to_onehot_y
     self.softmax = softmax
     self.average: Average = Average(average)
Exemple #4
0
 def __init__(
     self,
     to_onehot_y: bool = False,
     softmax: bool = False,
     other_act: Optional[Callable] = None,
     average: Union[Average, str] = Average.MACRO,
     output_transform: Callable = lambda x: x,
 ) -> None:
     super().__init__(output_transform)
     self.to_onehot_y = to_onehot_y
     self.softmax = softmax
     self.other_act = other_act
     self.average: Average = Average(average)
Exemple #5
0
 def __init__(
     self,
     to_onehot_y: bool = False,
     activation: Optional[Union[str, Callable]] = None,
     bin_mode: Optional[str] = "threshold",
     bin_threshold: Union[float, Sequence[float]] = 0.5,
     metric_name: str = "hit_rate",
     average: Union[Average, str] = Average.MACRO,
     zero_division: int = 0,
     output_transform: Callable = lambda x: x,
     device: Optional[torch.device] = None,
 ) -> None:
     super().__init__(output_transform, device=device)
     self.to_onehot_y = to_onehot_y
     self.activation = activation
     self.bin_mode = bin_mode
     self.bin_threshold = bin_threshold
     self.metric_name = metric_name
     self.average: Average = Average(average)
     self.zero_division = zero_division
Exemple #6
0
def do_calculate_metric(
    confusion_ele_list: List[np.ndarray],
    metric_name: str,
    average: Union[Average, str] = "none",
    zero_division: int = 0,
):
    """
    Args:
        confusion_ele_list: the returned result of function ``cal_confusion_matrix_elements``.
        metric_name: the simplified metric name from function ``check_metric_name_and_unify``.
        average: type of averaging performed if not binary classification.
            Defaults to ``"macro"``.
            - ``"macro"``: calculate metrics for each label, and find their unweighted mean.
                This does not take label imbalance into account.
            - ``"weighted"``: calculate metrics for each label, and find their average
                weighted by support (the number of true instances for each label).
            - ``"micro"``: calculate metrics globally by considering each element of the label
                indicator matrix as a label.
            - ``"none"``: the scores for each class are returned.
        zero_division: the value to return when there is a zero division, for example, when all
            predictions and labels are negative. Defaults to 0.
    """
    ele_list: List[Union[np.ndarray, int, float]]
    metric = metric_name
    div_0 = zero_division
    # pre-process average
    average = Average(average)
    bin_flag: bool
    if len(confusion_ele_list[0].shape) == 0:
        bin_flag = True
        average = Average.NONE  # for binary tasks, other average methods are meaningless.
        ele_list = [int(l) for l in confusion_ele_list]
    else:
        bin_flag = False
    if average == Average.MICRO:
        ele_list = [int(l.sum()) for l in confusion_ele_list]
    else:
        ele_list = confusion_ele_list
    tp, tn, fp, fn, p, n = ele_list
    # calculate
    numerator: Union[np.ndarray, int, float]
    denominator: Union[np.ndarray, int, float]
    if metric == "tpr":
        numerator, denominator = tp, p
    elif metric == "tnr":
        numerator, denominator = tn, n
    elif metric == "ppv":
        numerator, denominator = tp, (tp + fp)
    elif metric == "npv":
        numerator, denominator = tn, (tn + fn)
    elif metric == "fnr":
        numerator, denominator = fn, p
    elif metric == "fpr":
        numerator, denominator = fp, n
    elif metric == "fdr":
        numerator, denominator = fp, (fp + tp)
    elif metric == "for":
        numerator, denominator = fn, (fn + tn)
    elif metric == "pt":
        tpr = handle_zero_divide(tp, p, div_0)
        tnr = handle_zero_divide(tn, n, div_0)
        numerator = np.sqrt(tpr * (1 - tnr)) + tnr - 1
        denominator = tpr + tnr - 1
    elif metric == "ts":
        numerator, denominator = tp, (tp + fn + fp)
    elif metric == "acc":
        numerator, denominator = (tp + tp), (p + n)
    elif metric == "ba":
        tpr = handle_zero_divide(tp, p, div_0)
        tnr = handle_zero_divide(tn, n, div_0)
        numerator, denominator = (tpr + tnr), 2
    elif metric == "f1":
        numerator, denominator = tp * 2, (tp * 2 + fn + fp)
    elif metric == "mcc":
        numerator = tp * tn - fp * fn
        denominator = np.sqrt((tp + fp) * (tp + fn) * (tn + fp) * (tn + fn))
    elif metric == "fm":
        tpr = handle_zero_divide(tp, p, div_0)
        ppv = handle_zero_divide(tp, (tp + fp), div_0)
        numerator = np.sqrt(ppv * tpr)
        denominator = 1
    elif metric == "bm":
        tpr = handle_zero_divide(tp, p, div_0)
        tnr = handle_zero_divide(tn, n, div_0)
        numerator = tpr + tnr - 1
        denominator = 1
    elif metric == "mk":
        ppv = handle_zero_divide(tp, (tp + fp), div_0)
        npv = handle_zero_divide(tn, (tn + fn), div_0)
        numerator = ppv + npv - 1
        denominator = 1
    else:
        raise NotImplementedError("the metric is not implemented.")
    result = handle_zero_divide(numerator, denominator, div_0)

    if average == Average.MICRO or average == Average.NONE:
        return result

    weights: Optional[np.ndarray]
    if average == Average.MACRO:
        weights = None
    elif average == Average.WEIGHTED:
        weights = p
    result = np.average(result, weights=weights)
    return result
Exemple #7
0
def compute_roc_auc(
    y_pred: torch.Tensor,
    y: torch.Tensor,
    to_onehot_y: bool = False,
    softmax: bool = False,
    other_act: Optional[Callable] = None,
    average: Union[Average, str] = Average.MACRO,
):
    """Computes Area Under the Receiver Operating Characteristic Curve (ROC AUC). Referring to:
    `sklearn.metrics.roc_auc_score <https://scikit-learn.org/stable/modules/generated/
    sklearn.metrics.roc_auc_score.html#sklearn.metrics.roc_auc_score>`_.

    Args:
        y_pred: input data to compute, typical classification model output.
            it must be One-Hot format and first dim is batch, example shape: [16] or [16, 2].
        y: ground truth to compute ROC AUC metric, the first dim is batch.
            example shape: [16, 1] will be converted into [16, 2] (where `2` is inferred from `y_pred`).
        to_onehot_y: whether to convert `y` into the one-hot format. Defaults to False.
        softmax: whether to add softmax function to `y_pred` before computation. Defaults to False.
        other_act: callable function to replace `softmax` as activation layer if needed, Defaults to ``None``.
            for example: `other_act = lambda x: torch.log_softmax(x)`.
        average: {``"macro"``, ``"weighted"``, ``"micro"``, ``"none"``}
            Type of averaging performed if not binary classification.
            Defaults to ``"macro"``.

            - ``"macro"``: calculate metrics for each label, and find their unweighted mean.
                This does not take label imbalance into account.
            - ``"weighted"``: calculate metrics for each label, and find their average,
                weighted by support (the number of true instances for each label).
            - ``"micro"``: calculate metrics globally by considering each element of the label
                indicator matrix as a label.
            - ``"none"``: the scores for each class are returned.

    Raises:
        ValueError: When ``y_pred`` dimension is not one of [1, 2].
        ValueError: When ``y`` dimension is not one of [1, 2].
        ValueError: When ``softmax=True`` and ``other_act is not None``. Incompatible values.
        TypeError: When ``other_act`` is not an ``Optional[Callable]``.
        ValueError: When ``average`` is not one of ["macro", "weighted", "micro", "none"].

    Note:
        ROCAUC expects y to be comprised of 0's and 1's. `y_pred` must be either prob. estimates or confidence values.

    """
    y_pred_ndim = y_pred.ndimension()
    y_ndim = y.ndimension()
    if y_pred_ndim not in (1, 2):
        raise ValueError(
            "Predictions should be of shape (batch_size, n_classes) or (batch_size, )."
        )
    if y_ndim not in (1, 2):
        raise ValueError(
            "Targets should be of shape (batch_size, n_classes) or (batch_size, )."
        )
    if y_pred_ndim == 2 and y_pred.shape[1] == 1:
        y_pred = y_pred.squeeze(dim=-1)
        y_pred_ndim = 1
    if y_ndim == 2 and y.shape[1] == 1:
        y = y.squeeze(dim=-1)

    if y_pred_ndim == 1:
        if to_onehot_y:
            warnings.warn(
                "y_pred has only one channel, to_onehot_y=True ignored.")
        if softmax:
            warnings.warn("y_pred has only one channel, softmax=True ignored.")
        return _calculate(y, y_pred)
    else:
        n_classes = y_pred.shape[1]
        if to_onehot_y:
            y = one_hot(y, n_classes)
        if softmax and other_act is not None:
            raise ValueError(
                "Incompatible values: softmax=True and other_act is not None.")
        if softmax:
            y_pred = y_pred.float().softmax(dim=1)
        if other_act is not None:
            if not callable(other_act):
                raise TypeError(
                    f"other_act must be None or callable but is {type(other_act).__name__}."
                )
            y_pred = other_act(y_pred)

        assert y.shape == y_pred.shape, "data shapes of y_pred and y do not match."

        average = Average(average)
        if average == Average.MICRO:
            return _calculate(y.flatten(), y_pred.flatten())
        else:
            y, y_pred = y.transpose(0, 1), y_pred.transpose(0, 1)
            auc_values = [
                _calculate(y_, y_pred_) for y_, y_pred_ in zip(y, y_pred)
            ]
            if average == Average.NONE:
                return auc_values
            if average == Average.MACRO:
                return np.mean(auc_values)
            if average == Average.WEIGHTED:
                weights = [sum(y_) for y_ in y]
                return np.average(auc_values, weights=weights)
            raise ValueError(
                f'Unsupported average: {average}, available options are ["macro", "weighted", "micro", "none"].'
            )
Exemple #8
0
 def _compute_fn(pred, label):
     return compute_roc_auc(
         y_pred=pred,
         y=label,
         average=Average(average),
     )
Exemple #9
0
def compute_roc_auc(
    y_pred: torch.Tensor,
    y: torch.Tensor,
    average: Union[Average, str] = Average.MACRO,
):
    """Computes Area Under the Receiver Operating Characteristic Curve (ROC AUC). Referring to:
    `sklearn.metrics.roc_auc_score <https://scikit-learn.org/stable/modules/generated/
    sklearn.metrics.roc_auc_score.html#sklearn.metrics.roc_auc_score>`_.

    Args:
        y_pred: input data to compute, typical classification model output.
            it must be One-Hot format and first dim is batch, example shape: [16] or [16, 2].
        y: ground truth to compute ROC AUC metric, the first dim is batch.
            example shape: [16, 1] will be converted into [16, 2] (where `2` is inferred from `y_pred`).
        average: {``"macro"``, ``"weighted"``, ``"micro"``, ``"none"``}
            Type of averaging performed if not binary classification.
            Defaults to ``"macro"``.

            - ``"macro"``: calculate metrics for each label, and find their unweighted mean.
                This does not take label imbalance into account.
            - ``"weighted"``: calculate metrics for each label, and find their average,
                weighted by support (the number of true instances for each label).
            - ``"micro"``: calculate metrics globally by considering each element of the label
                indicator matrix as a label.
            - ``"none"``: the scores for each class are returned.

    Raises:
        ValueError: When ``y_pred`` dimension is not one of [1, 2].
        ValueError: When ``y`` dimension is not one of [1, 2].
        ValueError: When ``average`` is not one of ["macro", "weighted", "micro", "none"].

    Note:
        ROCAUC expects y to be comprised of 0's and 1's. `y_pred` must be either prob. estimates or confidence values.

    """
    y_pred_ndim = y_pred.ndimension()
    y_ndim = y.ndimension()
    if y_pred_ndim not in (1, 2):
        raise ValueError(
            "Predictions should be of shape (batch_size, n_classes) or (batch_size, )."
        )
    if y_ndim not in (1, 2):
        raise ValueError(
            "Targets should be of shape (batch_size, n_classes) or (batch_size, )."
        )
    if y_pred_ndim == 2 and y_pred.shape[1] == 1:
        y_pred = y_pred.squeeze(dim=-1)
        y_pred_ndim = 1
    if y_ndim == 2 and y.shape[1] == 1:
        y = y.squeeze(dim=-1)

    if y_pred_ndim == 1:
        return _calculate(y_pred, y)

    if y.shape != y_pred.shape:
        raise AssertionError("data shapes of y_pred and y do not match.")

    average = Average(average)
    if average == Average.MICRO:
        return _calculate(y_pred.flatten(), y.flatten())
    y, y_pred = y.transpose(0, 1), y_pred.transpose(0, 1)
    auc_values = [_calculate(y_pred_, y_) for y_pred_, y_ in zip(y_pred, y)]
    if average == Average.NONE:
        return auc_values
    if average == Average.MACRO:
        return np.mean(auc_values)
    if average == Average.WEIGHTED:
        weights = [sum(y_) for y_ in y]
        return np.average(auc_values, weights=weights)
    raise ValueError(
        f'Unsupported average: {average}, available options are ["macro", "weighted", "micro", "none"].'
    )