def binary_classification_accuracy(self, hue: str = MetricsDict.DEFAULT_HUE_KEY) -> float: """ :param hue: The hue to restrict the values, otherwise all values will be used. :return: binary classification accuracy """ return binary_classification_accuracy(model_output=self.get_predictions(hue=hue), label=self.get_labels(hue=hue))
def get_accuracy_at05(self, hue: str = DEFAULT_HUE_KEY) -> float: """ Returns the binary classification accuracy at threshold 0.5 """ return binary_classification_accuracy( model_output=self.get_predictions(hue=hue), label=self.get_labels(hue=hue))
def get_metrics_at_optimal_cutoff(self, hue: str = DEFAULT_HUE_KEY) -> Tuple: """ Computes the ROC to find the optimal cut-off i.e. the probability threshold for which the difference between true positive rate and false positive rate is smallest. Then, computes the false positive rate, false negative rate and accuracy at this threshold (i.e. when the predicted probability is higher than the threshold the predicted label is 1 otherwise 0). :param hue: The hue to restrict the values used for computation, otherwise all values will be used. :returns: Tuple(optimal_threshold, false positive rate, false negative rate, accuracy) """ fpr, tpr, thresholds = roc_curve(self.get_labels(hue=hue), self.get_predictions(hue=hue)) optimal_idx = MetricsDict.get_optimal_idx(fpr=fpr, tpr=tpr) optimal_threshold = float(thresholds[optimal_idx]) accuracy = binary_classification_accuracy(model_output=self.get_predictions(hue=hue), label=self.get_labels(hue=hue), threshold=optimal_threshold) false_negative_optimal = 1 - tpr[optimal_idx] false_positive_optimal = fpr[optimal_idx] return optimal_threshold, false_positive_optimal, false_negative_optimal, accuracy
def compute_scalar_metrics(metrics_dict: ScalarMetricsDict, subject_ids: Sequence[str], model_output: torch.Tensor, labels: torch.Tensor, loss_type: ScalarLoss = ScalarLoss.BinaryCrossEntropyWithLogits) -> None: """ Computes various metrics for a binary classification task from real-valued model output and a label vector, and stores them in the given `metrics_dict`. The model output is assumed to be in the range between 0 and 1, a value larger than 0.5 indicates a prediction of class 1. The label vector is expected to contain class indices 0 and 1 only. Metrics for each model output channel will be isolated, and a non-default hue for each model output channel is expected, and must exist in the provided metrics_dict. The Default hue is used for single model outputs. :param metrics_dict: An object that holds all metrics. It will be updated in-place. :param subject_ids: Subject ids for the model output and labels. :param model_output: A tensor containing model outputs. :param labels: A tensor containing class labels. :param loss_type: The type of loss that the model uses. This is required to optionally convert 2-dim model output to probabilities. """ _model_output_channels = model_output.shape[1] model_output_hues = metrics_dict.get_hue_names(include_default=len(metrics_dict.hues_without_default) == 0) if len(model_output_hues) < _model_output_channels: raise ValueError("Hues must be provided for each model output channel, found " f"{_model_output_channels} channels but only {len(model_output_hues)} hues") for i, hue in enumerate(model_output_hues): # mask the model outputs and labels if required masked_model_outputs_and_labels = get_masked_model_outputs_and_labels( model_output[:, i, ...], labels[:, i, ...], subject_ids) # compute metrics on valid masked tensors only if masked_model_outputs_and_labels is not None: _model_output, _labels, _subject_ids = \ masked_model_outputs_and_labels.model_outputs.data, \ masked_model_outputs_and_labels.labels.data, \ masked_model_outputs_and_labels.subject_ids # Convert labels to the same datatype as the model outputs, necessary when running with AMP _labels = _labels.to(dtype=_model_output.dtype) if loss_type == ScalarLoss.MeanSquaredError: metrics = { MetricType.MEAN_SQUARED_ERROR: F.mse_loss(_model_output, _labels, reduction='mean').item(), MetricType.MEAN_ABSOLUTE_ERROR: mean_absolute_error(_model_output, _labels), MetricType.EXPLAINED_VAR: r2_score(_model_output, _labels) } else: metrics = { MetricType.CROSS_ENTROPY: F.binary_cross_entropy(_model_output, _labels, reduction='mean').item(), MetricType.ACCURACY_AT_THRESHOLD_05: binary_classification_accuracy(_model_output, _labels) } for key, value in metrics.items(): if key == MetricType.EXPLAINED_VAR: # For a batch size 1, R2 score can be nan. We need to ignore nans # when average in case the last batch is of size 1. metrics_dict.add_metric(key, value, skip_nan_when_averaging=True, hue=hue) else: metrics_dict.add_metric(key, value, hue=hue) assert _subject_ids is not None metrics_dict.add_predictions(_subject_ids, _model_output.detach().cpu().numpy(), _labels.cpu().numpy(), hue=hue)