def get_metric(network, loader, weights, device, metric): if weights is not None: raise ArgumentError("Non-uniform weights not supported") network.eval() ys = [] ps = [] sigmoid = nn.Sigmoid() with torch.no_grad(): for x, y in loader: x = x.to(device) ys.extend(y) ps.extend(sigmoid(network.predict(x))) ps = torch.stack(ps).to(device) ys = torch.stack(ys).to(device) if metric == 'micro_f1': result = f1_score(ps, ys, num_classes=None, class_reduction='micro') elif metric == 'macro_f1': result = f1_score(ps, ys, num_classes=None, class_reduction='macro') elif metric == 'auroc': result = [] for d in range(ps.size(1)): result.append(auroc(ps[:, d], ys[:, d])) network.train() return result
def test_f1_score(pred, target, exp_score): score = f1_score(torch.tensor(pred), torch.tensor(target), reduction='none') assert torch.allclose(score, torch.tensor(exp_score)) score = f1_score(to_onehot(torch.tensor(pred)), torch.tensor(target), reduction='none') assert torch.allclose(score, torch.tensor(exp_score))
def test_tbd_remove_in_v1_2_0_metrics(): from pytorch_lightning.metrics.classification import Fbeta from pytorch_lightning.metrics.functional.classification import f1_score, fbeta_score with pytest.deprecated_call(match='will be removed in v1.2'): Fbeta(2) with pytest.deprecated_call(match='will be removed in v1.2'): fbeta_score(torch.tensor([0, 1, 2, 3]), torch.tensor([0, 1, 2, 1]), 0.2) with pytest.deprecated_call(match='will be removed in v1.2'): f1_score(torch.tensor([0, 1, 0, 1]), torch.tensor([0, 1, 0, 0]))
def training_epoch_end(self, train_outputs): ''' Log all the values after the end of the epoch. ''' outputs = [x['result'] for x in train_outputs] avg_class_loss = torch.stack([x['class_loss'] for x in outputs]).mean() avg_mask_loss = torch.stack([x['mask_loss'] for x in outputs]).mean() avg_loss = torch.stack([x['total_loss'] for x in outputs]).mean() all_predictions = torch.stack([x['predictions'] for x in outputs]).flatten() all_targets = torch.stack([x['targets'] for x in outputs]).flatten() class_accuracy = accuracy(all_predictions, all_targets, num_classes=2) class_f1 = f1_score(all_predictions, all_targets, num_classes=2) self.log('train_class_loss', avg_class_loss, sync_dist=True) self.log('train_mask_loss', avg_mask_loss, sync_dist=True) self.log('train_loss', avg_loss, sync_dist=True) self.log('train_accuracy', class_accuracy, prog_bar=True, sync_dist=True) self.log('train_f1', class_f1, sync_dist=True)
def forward(self, pred: torch.Tensor, target: torch.Tensor) -> torch.Tensor: """ Actual metric computation Args: pred: predicted labels target: groundtruth labels Return: torch.Tensor: classification score """ return f1_score(pred=pred, target=target, num_classes=self.num_classes, reduction=self.reduction)
def test_step(self, batch: tuple, batch_nb: int, *args, **kwargs) -> dict: """ Runs one training step. This usually consists in the forward function followed by the loss function. :param batch: The output of your dataloader. :param batch_nb: Integer displaying which batch this is Returns: - dictionary containing the loss and the metrics to be added to the lightning logger. """ inputs, targets = batch model_out = self.forward(**inputs) loss_val = self.loss(model_out, targets) # in DP mode (default) make sure if result is scalar, there's another dim in the beginning if self.trainer.use_dp or self.trainer.use_ddp2: loss_val = loss_val.unsqueeze(0) self.log('test_loss',loss_val) y_hat=model_out['logits'] labels_hat = torch.argmax(y_hat, dim=1) y=targets['labels'] f1 = metrics.f1_score(labels_hat,y, class_reduction='weighted') prec =metrics.precision(labels_hat,y, class_reduction='weighted') recall = metrics.recall(labels_hat,y, class_reduction='weighted') acc = metrics.accuracy(labels_hat,y, class_reduction='weighted') # auroc = metrics.multiclass_auroc(labels_hat, y) self.log('test_batch_prec',prec) self.log('test_batch_f1',f1) self.log('test_batch_recall',recall) self.log('test_batch_weighted_acc', acc) # self.log('test_batch_auc_roc', auroc) from pytorch_lightning.metrics.functional import confusion_matrix # TODO CHANGE THIS # return (labels_hat, y) cm = confusion_matrix(preds = labels_hat,target=y,normalize=None, num_classes=50) # cm = confusion_matrix(preds = labels_hat,target=y,normalize=False, num_classes=len(y.unique())) self.test_conf_matrices.append(cm)
def validation_epoch_end(self, outputs): ''' Log all the values after the end of the epoch. ''' avg_class_loss = torch.stack([x['class_loss'] for x in outputs]).mean() avg_mask_loss = torch.stack([x['mask_loss'] for x in outputs]).mean() avg_loss = torch.stack([x['total_loss'] for x in outputs]).mean() all_predictions = torch.stack([x['predictions'] for x in outputs]).flatten() all_targets = torch.stack([x['targets'] for x in outputs]).flatten() class_accuracy = accuracy(all_predictions, all_targets, num_classes=2) class_f1 = f1_score(all_predictions, all_targets, num_classes=2) self.log('valid_class_loss', avg_class_loss) self.log('valid_mask_loss', avg_mask_loss) self.log('valid_loss', avg_loss) self.log('valid_accuracy', class_accuracy, prog_bar=True) self.log('valid_f1', class_f1)
def validation_epoch_end(self, outputs): logits = torch.cat([o["logits"] for o in outputs], dim=0) labels = torch.cat([o["labels"] for o in outputs], dim=0) self.log( "val_f1", f1_score(torch.argmax(logits, dim=1), labels, class_reduction="macro").detach().cpu().item(), prog_bar=True, ) self.log( "val_acc", accuracy(torch.argmax(logits, dim=1), labels).detach().cpu().item(), prog_bar=True, ) self.log( "val_loss", self.loss(logits, labels).detach().cpu().item(), prog_bar=True, )
def validation_step(self, batch: tuple, batch_nb: int, *args, **kwargs) -> dict: """ Similar to the training step but with the model in eval mode. Returns: - dictionary passed to the validation_end function. """ inputs, targets = batch model_out = self.forward(**inputs) loss_val = self.loss(model_out, targets) y = targets["labels"] y_hat = model_out["logits"] # acc labels_hat = torch.argmax(y_hat, dim=1) val_acc = torch.sum(y == labels_hat).item() / (len(y) * 1.0) val_acc = torch.tensor(val_acc) if self.on_gpu: val_acc = val_acc.cuda(loss_val.device.index) # in DP mode (default) make sure if result is scalar, there's another dim in the beginning if self.trainer.use_dp or self.trainer.use_ddp2: loss_val = loss_val.unsqueeze(0) val_acc = val_acc.unsqueeze(0) self.log('val_loss',loss_val) f1 = metrics.f1_score(labels_hat, y,class_reduction='weighted') prec =metrics.precision(labels_hat, y,class_reduction='weighted') recall = metrics.recall(labels_hat, y,class_reduction='weighted') acc = metrics.accuracy(labels_hat, y,class_reduction='weighted') # auroc = metrics.multiclass_auroc(y_hat,y) self.log('val_prec',prec) self.log('val_f1',f1) self.log('val_recall',recall) self.log('val_acc_weighted', acc)