def __init__(self, average: Union[Average, str] = Average.MACRO, output_transform: Callable = lambda x: x) -> None: metric_fn = ROCAUCMetric(average=Average(average)) super().__init__(metric_fn=metric_fn, output_transform=output_transform, save_details=False)
def _compute_fn(pred, label): return compute_roc_auc( y_pred=pred, y=label, to_onehot_y=to_onehot_y, softmax=softmax, other_act=other_act, average=Average(average), )
def __init__( self, to_onehot_y: bool = False, softmax: bool = False, average: Union[Average, str] = Average.MACRO, output_transform: Callable = lambda x: x, device: Optional[Union[str, torch.device]] = None, ) -> None: super().__init__(output_transform, device=device) self.to_onehot_y = to_onehot_y self.softmax = softmax self.average: Average = Average(average)
def __init__( self, to_onehot_y: bool = False, softmax: bool = False, other_act: Optional[Callable] = None, average: Union[Average, str] = Average.MACRO, output_transform: Callable = lambda x: x, ) -> None: super().__init__(output_transform) self.to_onehot_y = to_onehot_y self.softmax = softmax self.other_act = other_act self.average: Average = Average(average)
def __init__( self, to_onehot_y: bool = False, activation: Optional[Union[str, Callable]] = None, bin_mode: Optional[str] = "threshold", bin_threshold: Union[float, Sequence[float]] = 0.5, metric_name: str = "hit_rate", average: Union[Average, str] = Average.MACRO, zero_division: int = 0, output_transform: Callable = lambda x: x, device: Optional[torch.device] = None, ) -> None: super().__init__(output_transform, device=device) self.to_onehot_y = to_onehot_y self.activation = activation self.bin_mode = bin_mode self.bin_threshold = bin_threshold self.metric_name = metric_name self.average: Average = Average(average) self.zero_division = zero_division
def do_calculate_metric( confusion_ele_list: List[np.ndarray], metric_name: str, average: Union[Average, str] = "none", zero_division: int = 0, ): """ Args: confusion_ele_list: the returned result of function ``cal_confusion_matrix_elements``. metric_name: the simplified metric name from function ``check_metric_name_and_unify``. average: type of averaging performed if not binary classification. Defaults to ``"macro"``. - ``"macro"``: calculate metrics for each label, and find their unweighted mean. This does not take label imbalance into account. - ``"weighted"``: calculate metrics for each label, and find their average weighted by support (the number of true instances for each label). - ``"micro"``: calculate metrics globally by considering each element of the label indicator matrix as a label. - ``"none"``: the scores for each class are returned. zero_division: the value to return when there is a zero division, for example, when all predictions and labels are negative. Defaults to 0. """ ele_list: List[Union[np.ndarray, int, float]] metric = metric_name div_0 = zero_division # pre-process average average = Average(average) bin_flag: bool if len(confusion_ele_list[0].shape) == 0: bin_flag = True average = Average.NONE # for binary tasks, other average methods are meaningless. ele_list = [int(l) for l in confusion_ele_list] else: bin_flag = False if average == Average.MICRO: ele_list = [int(l.sum()) for l in confusion_ele_list] else: ele_list = confusion_ele_list tp, tn, fp, fn, p, n = ele_list # calculate numerator: Union[np.ndarray, int, float] denominator: Union[np.ndarray, int, float] if metric == "tpr": numerator, denominator = tp, p elif metric == "tnr": numerator, denominator = tn, n elif metric == "ppv": numerator, denominator = tp, (tp + fp) elif metric == "npv": numerator, denominator = tn, (tn + fn) elif metric == "fnr": numerator, denominator = fn, p elif metric == "fpr": numerator, denominator = fp, n elif metric == "fdr": numerator, denominator = fp, (fp + tp) elif metric == "for": numerator, denominator = fn, (fn + tn) elif metric == "pt": tpr = handle_zero_divide(tp, p, div_0) tnr = handle_zero_divide(tn, n, div_0) numerator = np.sqrt(tpr * (1 - tnr)) + tnr - 1 denominator = tpr + tnr - 1 elif metric == "ts": numerator, denominator = tp, (tp + fn + fp) elif metric == "acc": numerator, denominator = (tp + tp), (p + n) elif metric == "ba": tpr = handle_zero_divide(tp, p, div_0) tnr = handle_zero_divide(tn, n, div_0) numerator, denominator = (tpr + tnr), 2 elif metric == "f1": numerator, denominator = tp * 2, (tp * 2 + fn + fp) elif metric == "mcc": numerator = tp * tn - fp * fn denominator = np.sqrt((tp + fp) * (tp + fn) * (tn + fp) * (tn + fn)) elif metric == "fm": tpr = handle_zero_divide(tp, p, div_0) ppv = handle_zero_divide(tp, (tp + fp), div_0) numerator = np.sqrt(ppv * tpr) denominator = 1 elif metric == "bm": tpr = handle_zero_divide(tp, p, div_0) tnr = handle_zero_divide(tn, n, div_0) numerator = tpr + tnr - 1 denominator = 1 elif metric == "mk": ppv = handle_zero_divide(tp, (tp + fp), div_0) npv = handle_zero_divide(tn, (tn + fn), div_0) numerator = ppv + npv - 1 denominator = 1 else: raise NotImplementedError("the metric is not implemented.") result = handle_zero_divide(numerator, denominator, div_0) if average == Average.MICRO or average == Average.NONE: return result weights: Optional[np.ndarray] if average == Average.MACRO: weights = None elif average == Average.WEIGHTED: weights = p result = np.average(result, weights=weights) return result
def compute_roc_auc( y_pred: torch.Tensor, y: torch.Tensor, to_onehot_y: bool = False, softmax: bool = False, other_act: Optional[Callable] = None, average: Union[Average, str] = Average.MACRO, ): """Computes Area Under the Receiver Operating Characteristic Curve (ROC AUC). Referring to: `sklearn.metrics.roc_auc_score <https://scikit-learn.org/stable/modules/generated/ sklearn.metrics.roc_auc_score.html#sklearn.metrics.roc_auc_score>`_. Args: y_pred: input data to compute, typical classification model output. it must be One-Hot format and first dim is batch, example shape: [16] or [16, 2]. y: ground truth to compute ROC AUC metric, the first dim is batch. example shape: [16, 1] will be converted into [16, 2] (where `2` is inferred from `y_pred`). to_onehot_y: whether to convert `y` into the one-hot format. Defaults to False. softmax: whether to add softmax function to `y_pred` before computation. Defaults to False. other_act: callable function to replace `softmax` as activation layer if needed, Defaults to ``None``. for example: `other_act = lambda x: torch.log_softmax(x)`. average: {``"macro"``, ``"weighted"``, ``"micro"``, ``"none"``} Type of averaging performed if not binary classification. Defaults to ``"macro"``. - ``"macro"``: calculate metrics for each label, and find their unweighted mean. This does not take label imbalance into account. - ``"weighted"``: calculate metrics for each label, and find their average, weighted by support (the number of true instances for each label). - ``"micro"``: calculate metrics globally by considering each element of the label indicator matrix as a label. - ``"none"``: the scores for each class are returned. Raises: ValueError: When ``y_pred`` dimension is not one of [1, 2]. ValueError: When ``y`` dimension is not one of [1, 2]. ValueError: When ``softmax=True`` and ``other_act is not None``. Incompatible values. TypeError: When ``other_act`` is not an ``Optional[Callable]``. ValueError: When ``average`` is not one of ["macro", "weighted", "micro", "none"]. Note: ROCAUC expects y to be comprised of 0's and 1's. `y_pred` must be either prob. estimates or confidence values. """ y_pred_ndim = y_pred.ndimension() y_ndim = y.ndimension() if y_pred_ndim not in (1, 2): raise ValueError( "Predictions should be of shape (batch_size, n_classes) or (batch_size, )." ) if y_ndim not in (1, 2): raise ValueError( "Targets should be of shape (batch_size, n_classes) or (batch_size, )." ) if y_pred_ndim == 2 and y_pred.shape[1] == 1: y_pred = y_pred.squeeze(dim=-1) y_pred_ndim = 1 if y_ndim == 2 and y.shape[1] == 1: y = y.squeeze(dim=-1) if y_pred_ndim == 1: if to_onehot_y: warnings.warn( "y_pred has only one channel, to_onehot_y=True ignored.") if softmax: warnings.warn("y_pred has only one channel, softmax=True ignored.") return _calculate(y, y_pred) else: n_classes = y_pred.shape[1] if to_onehot_y: y = one_hot(y, n_classes) if softmax and other_act is not None: raise ValueError( "Incompatible values: softmax=True and other_act is not None.") if softmax: y_pred = y_pred.float().softmax(dim=1) if other_act is not None: if not callable(other_act): raise TypeError( f"other_act must be None or callable but is {type(other_act).__name__}." ) y_pred = other_act(y_pred) assert y.shape == y_pred.shape, "data shapes of y_pred and y do not match." average = Average(average) if average == Average.MICRO: return _calculate(y.flatten(), y_pred.flatten()) else: y, y_pred = y.transpose(0, 1), y_pred.transpose(0, 1) auc_values = [ _calculate(y_, y_pred_) for y_, y_pred_ in zip(y, y_pred) ] if average == Average.NONE: return auc_values if average == Average.MACRO: return np.mean(auc_values) if average == Average.WEIGHTED: weights = [sum(y_) for y_ in y] return np.average(auc_values, weights=weights) raise ValueError( f'Unsupported average: {average}, available options are ["macro", "weighted", "micro", "none"].' )
def _compute_fn(pred, label): return compute_roc_auc( y_pred=pred, y=label, average=Average(average), )
def compute_roc_auc( y_pred: torch.Tensor, y: torch.Tensor, average: Union[Average, str] = Average.MACRO, ): """Computes Area Under the Receiver Operating Characteristic Curve (ROC AUC). Referring to: `sklearn.metrics.roc_auc_score <https://scikit-learn.org/stable/modules/generated/ sklearn.metrics.roc_auc_score.html#sklearn.metrics.roc_auc_score>`_. Args: y_pred: input data to compute, typical classification model output. it must be One-Hot format and first dim is batch, example shape: [16] or [16, 2]. y: ground truth to compute ROC AUC metric, the first dim is batch. example shape: [16, 1] will be converted into [16, 2] (where `2` is inferred from `y_pred`). average: {``"macro"``, ``"weighted"``, ``"micro"``, ``"none"``} Type of averaging performed if not binary classification. Defaults to ``"macro"``. - ``"macro"``: calculate metrics for each label, and find their unweighted mean. This does not take label imbalance into account. - ``"weighted"``: calculate metrics for each label, and find their average, weighted by support (the number of true instances for each label). - ``"micro"``: calculate metrics globally by considering each element of the label indicator matrix as a label. - ``"none"``: the scores for each class are returned. Raises: ValueError: When ``y_pred`` dimension is not one of [1, 2]. ValueError: When ``y`` dimension is not one of [1, 2]. ValueError: When ``average`` is not one of ["macro", "weighted", "micro", "none"]. Note: ROCAUC expects y to be comprised of 0's and 1's. `y_pred` must be either prob. estimates or confidence values. """ y_pred_ndim = y_pred.ndimension() y_ndim = y.ndimension() if y_pred_ndim not in (1, 2): raise ValueError( "Predictions should be of shape (batch_size, n_classes) or (batch_size, )." ) if y_ndim not in (1, 2): raise ValueError( "Targets should be of shape (batch_size, n_classes) or (batch_size, )." ) if y_pred_ndim == 2 and y_pred.shape[1] == 1: y_pred = y_pred.squeeze(dim=-1) y_pred_ndim = 1 if y_ndim == 2 and y.shape[1] == 1: y = y.squeeze(dim=-1) if y_pred_ndim == 1: return _calculate(y_pred, y) if y.shape != y_pred.shape: raise AssertionError("data shapes of y_pred and y do not match.") average = Average(average) if average == Average.MICRO: return _calculate(y_pred.flatten(), y.flatten()) y, y_pred = y.transpose(0, 1), y_pred.transpose(0, 1) auc_values = [_calculate(y_pred_, y_) for y_pred_, y_ in zip(y_pred, y)] if average == Average.NONE: return auc_values if average == Average.MACRO: return np.mean(auc_values) if average == Average.WEIGHTED: weights = [sum(y_) for y_ in y] return np.average(auc_values, weights=weights) raise ValueError( f'Unsupported average: {average}, available options are ["macro", "weighted", "micro", "none"].' )