Exemple #1
0
    def _calculate_step_metrics(self, logits, y):
        # prepare the metrics
        loss = self._loss_function(logits[1], y)
        # loss = F.cross_entropy(logits[1], y)
        preds = torch.argmax(logits[1], dim=1)
        num_correct = torch.eq(preds.view(-1), y.view(-1)).sum()
        acc = accuracy(preds, y)
        f1_score = f1(preds, y, num_classes=2, average='weighted')
        fb05_score = fbeta(preds,
                           y,
                           num_classes=2,
                           average='weighted',
                           beta=0.5)
        fb2_score = fbeta(preds, y, num_classes=2, average='weighted', beta=2)
        cm = confusion_matrix(preds, y, num_classes=2)
        prec = precision(preds, y, num_classes=2, class_reduction='weighted')
        rec = recall(preds, y, num_classes=2, class_reduction='weighted')
        # au_roc = auroc(preds, y, pos_label=1)

        return {
            'loss': loss,
            'acc': acc,
            'f1_score': f1_score,
            'f05_score': fb05_score,
            'f2_score': fb2_score,
            'precision': prec,
            'recall': rec,
            # 'auroc': au_roc,
            'confusion_matrix': cm,
            'num_correct': num_correct
        }
Exemple #2
0
    def test_step(self, batch, batch_idx):
        # please notice that the test step is assuming that the whole test is passed as a unit.
        # Use LearningDataSet located in the simple_dataset module in order to achieve that.

        x, y = batch
        logits = torch.squeeze(self(x))
        if self.goal == 'binary':
            preds = self.probability_fn(logits)
            acc = accuracy(preds, y)
            f1_score = f1(preds, y, 1)
            auc_score = roc_auc_score(y, preds)
            metrics_dict = {'acc': acc.item(), 'f1_score': f1_score.item(), 'auc_score': auc_score}
        elif self.goal == 'multi_class':
            preds = self.probability_fn(logits)
            acc = accuracy(preds, y)
            metrics_dict = {'acc': acc.item()}
        else:
            r2 = r2score(logits,y)
            rho = stats.spearmanr(logits,y)[0]
            metrics_dict = {'r2': r2.item(),'correlation':rho}




        self.test_predictions = logits
        self.log_dict(metrics_dict)
Exemple #3
0
    def compute_metrics(self, pred, target):
        metrics = dict()
        metrics['accuracy'] = accuracy(pred, target, num_classes=self.num_classes)
        metrics['precision'] = precision(pred, target, num_classes=self.num_classes)
        metrics['recall'] = recall(pred, target, num_classes=self.num_classes)
        metrics['f1'] = f1(pred, target, num_classes=self.num_classes)

        return metrics
Exemple #4
0
 def _epoch_end(self, stage, steps):
     y_true = torch.cat([x["y_true"] for x in steps]).reshape(-1,1)
     y_pred = torch.cat([x["y_pred"] for x in steps]).reshape(-1,1)
     return {
         f"{stage}_acc": metrics.accuracy(y_pred, y_true),
         f"{stage}_f1": metrics.f1(y_pred, y_true, num_classes=self.num_classes),
         f"{stage}_recall": metrics.recall(y_pred, y_true),
         f"{stage}_precision": metrics.precision(y_pred, y_true),
     }
Exemple #5
0
def test_v1_5_metric_classif_mix():
    ConfusionMatrix.__init__._warned = False
    with pytest.deprecated_call(match="It will be removed in v1.5.0"):
        ConfusionMatrix(num_classes=1)

    FBeta.__init__._warned = False
    with pytest.deprecated_call(match="It will be removed in v1.5.0"):
        FBeta(num_classes=1)

    F1.__init__._warned = False
    with pytest.deprecated_call(match="It will be removed in v1.5.0"):
        F1(num_classes=1)

    HammingDistance.__init__._warned = False
    with pytest.deprecated_call(match="It will be removed in v1.5.0"):
        HammingDistance()

    StatScores.__init__._warned = False
    with pytest.deprecated_call(match="It will be removed in v1.5.0"):
        StatScores()

    target = torch.tensor([1, 1, 0, 0])
    preds = torch.tensor([0, 1, 0, 0])
    confusion_matrix._warned = False
    with pytest.deprecated_call(match="It will be removed in v1.5.0"):
        assert torch.equal(
            confusion_matrix(preds, target, num_classes=2).float(),
            torch.tensor([[2.0, 0.0], [1.0, 1.0]]))

    target = torch.tensor([0, 1, 2, 0, 1, 2])
    preds = torch.tensor([0, 2, 1, 0, 0, 1])
    fbeta._warned = False
    with pytest.deprecated_call(match="It will be removed in v1.5.0"):
        assert torch.allclose(fbeta(preds, target, num_classes=3, beta=0.5),
                              torch.tensor(0.3333),
                              atol=1e-4)

    f1._warned = False
    with pytest.deprecated_call(match="It will be removed in v1.5.0"):
        assert torch.allclose(f1(preds, target, num_classes=3),
                              torch.tensor(0.3333),
                              atol=1e-4)

    target = torch.tensor([[0, 1], [1, 1]])
    preds = torch.tensor([[0, 1], [0, 1]])
    hamming_distance._warned = False
    with pytest.deprecated_call(match="It will be removed in v1.5.0"):
        assert hamming_distance(preds, target) == torch.tensor(0.25)

    preds = torch.tensor([1, 0, 2, 1])
    target = torch.tensor([1, 1, 2, 0])
    stat_scores._warned = False
    with pytest.deprecated_call(match="It will be removed in v1.5.0"):
        assert torch.equal(stat_scores(preds, target, reduce="micro"),
                           torch.tensor([2, 2, 6, 2, 4]))
Exemple #6
0
 def validation_step(self, batch: Tuple[Tensor, Tensor],
                     batch_idx: int) -> Tensor:
     images, label = batch
     y_hat = self(images)
     loss = F.cross_entropy(y_hat, label)
     acc = accuracy(y_hat, label, num_classes=NUM_CLASSES)
     f1_score = f1(y_hat,
                   label,
                   num_classes=NUM_CLASSES,
                   average="weighted")
     metrics = {"val_acc": acc, "val_loss": loss, "val_f1": f1_score}
     self.log_dict(metrics, prog_bar=True, on_step=False, sync_dist=True)
     return metrics
Exemple #7
0
 def training_step(self, batch: Tuple[Tensor, Tensor],
                   batch_idx: int) -> Tensor:
     images, label = batch
     y_hat = self(images)
     loss = F.cross_entropy(y_hat, label, weight=self.weight)
     acc = accuracy(y_hat, label, num_classes=NUM_CLASSES)
     f1_score = f1(y_hat,
                   label,
                   num_classes=NUM_CLASSES,
                   average="weighted")
     metrics = {"train_acc": acc, "train_loss": loss, "train_f1": f1_score}
     self.log_dict(metrics, prog_bar=True, on_step=True)
     return loss
Exemple #8
0
    def validation_step(self, batch: tuple, batch_nb: int, *args,
                        **kwargs) -> dict:
        """ Similar to the training step but with the model in eval mode.

        Returns:
            - dictionary passed to the validation_end function.
        """
        inputs, targets = batch
        model_out = self.forward(**inputs)
        loss_val = self.loss(model_out, targets)

        y = targets["labels"]
        y_hat = model_out["logits"]

        # acc
        labels_hat = torch.argmax(y_hat, dim=1)
        val_acc = torch.sum(y == labels_hat).item() / (len(y) * 1.0)
        val_acc = torch.tensor(val_acc)

        if self.on_gpu:
            val_acc = val_acc.cuda(loss_val.device.index)

        # in DP mode (default) make sure if result is scalar, there's another dim in the beginning
        if self.trainer.use_dp or self.trainer.use_ddp2:
            loss_val = loss_val.unsqueeze(0)
            val_acc = val_acc.unsqueeze(0)

        self.log('val_loss', loss_val)

        f1 = metrics.f1(labels_hat, y, average='weighted', num_classes=3)
        prec = metrics.precision(labels_hat,
                                 y,
                                 class_reduction='weighted',
                                 num_classes=3)
        recall = metrics.recall(labels_hat,
                                y,
                                class_reduction='weighted',
                                num_classes=3)
        acc = metrics.accuracy(labels_hat,
                               y,
                               class_reduction='weighted',
                               num_classes=3)

        self.log('val_prec', prec)
        self.log('val_f1', f1)
        self.log('val_recall', recall)
        self.log('val_acc_weighted', acc)
Exemple #9
0
    def test_step(self, batch: tuple, batch_nb: int, *args, **kwargs) -> dict:
        """ 
        Runs one training step. This usually consists in the forward function followed
            by the loss function.
        
        :param batch: The output of your dataloader. 
        :param batch_nb: Integer displaying which batch this is

        Returns:
            - dictionary containing the loss and the metrics to be added to the lightning logger.
        """
        inputs, targets = batch
        model_out = self.forward(**inputs)
        loss_val = self.loss(model_out, targets)

        # in DP mode (default) make sure if result is scalar, there's another dim in the beginning
        if self.trainer.use_dp or self.trainer.use_ddp2:
            loss_val = loss_val.unsqueeze(0)

        self.log('test_loss', loss_val)

        y_hat = model_out['logits']
        labels_hat = torch.argmax(y_hat, dim=1)
        y = targets['labels']

        f1 = metrics.f1(labels_hat, y, average='weighted', num_classes=3)
        prec = metrics.precision(labels_hat,
                                 y,
                                 class_reduction='weighted',
                                 num_classes=3)
        recall = metrics.recall(labels_hat,
                                y,
                                class_reduction='weighted',
                                num_classes=3)
        acc = metrics.accuracy(labels_hat,
                               y,
                               class_reduction='weighted',
                               num_classes=3)

        self.confusion_matrix.update(labels_hat, y)
        self.log('test_batch_prec', prec)
        self.log('test_batch_f1', f1)
        self.log('test_batch_recall', recall)
        self.log('test_batch_weighted_acc', acc)
Exemple #10
0
def f1_score_dist(preds, target):
    return metrics.f1((preds < MIN_EPS_HOLDER.MIN_EPS / 2).int(),
                      (target == 0.0).int(),
                      num_classes=2)[1]
    def test_epoch_end(self, outputs):

        if self.incorrect_type != 'boundary':
            #####  Confusion Matrix  #####
            conf_mtx = confusion_matrix(
                torch.cat([b['preds'] for b in outputs]),
                torch.cat([b['labels'] for b in outputs]),
                normalize=False,
                num_classes=5)

            #####  Normalized Confusion Matrix  #####
            conf_mtx_normalized = confusion_matrix(
                torch.cat([b['preds'] for b in outputs]),
                torch.cat([b['labels'] for b in outputs]),
                normalize=True,
                num_classes=5)

            #####  Weighted Confusion Matrix  #####
            conf_mtx_weighted = conf_mtx.clone()
            for c, w in enumerate(self.weights):
                conf_mtx_weighted[c, :] *= w

            #####  ACCURACY  #####
            accuracy = torch.diag(conf_mtx).sum() / conf_mtx.sum()
            accuracy_weighted = torch.diag(
                conf_mtx_weighted).sum() / conf_mtx_weighted.sum()

            #####  AUC_SCORE  #####
            roc_results = multiclass_roc(
                torch.cat([b['logits'] for b in outputs]),
                torch.cat([b['labels'] for b in outputs]),
                num_classes=5)
            AUROC_str = ''
            AUROC_list = {}
            for cls, roc_cls in enumerate(roc_results):
                fpr, tpr, threshold = roc_cls
                self.logger.experiment.add_scalar(f'val_AUC[{cls}]',
                                                  auc(fpr, tpr),
                                                  self.current_epoch)
                AUROC_str += '\tAUC_SCORE[CLS %d]: \t%.4f\n' % (cls,
                                                                auc(fpr, tpr))
                AUROC_list['AUC_SCORE[CLS %d]' % cls] = auc(fpr, tpr)

            #####  F1  #####
            f1_score = f1(torch.cat([b['preds'] for b in outputs]),
                          torch.cat([b['labels'] for b in outputs]),
                          num_classes=5)

            #####  Average Precision  #####
            # TO DO

            #####  PRINT RESULTS  #####
            print('=' * 100)
            print(
                f'[MODEL NAME]: {self.model_name} \t [INCORRECT TYPE]: {self.incorrect_type}'
            )
            print('RESULTS:')
            print('\tAccuracy: \t\t%.4f' % accuracy)
            print('\tWeighted Accuracy: \t%.4f' % accuracy_weighted)
            print('\tF1 Score: \t\t%.4f' % f1_score)
            print(AUROC_str)

            self.metrics_result[self.incorrect_type][self.model_name] = {
                'Accuracy': round(float(accuracy), 4),
                'Weighted Accuracy': round(float(accuracy_weighted), 4),
                'F1_score': round(float(f1_score), 4)
            }
            for key, val in AUROC_list.items():
                self.metrics_result[self.incorrect_type][
                    self.model_name].update({key: round(float(val), 4)})
            print('Confusion Matrix')
            fig, ax = plt.subplots(figsize=(4, 4))
            sn.heatmap(conf_mtx.cpu(),
                       annot=True,
                       cbar=False,
                       annot_kws={"size": 15},
                       fmt='g',
                       cmap='mako')
            plt.show()
            fig, ax = plt.subplots(figsize=(4, 4))
            sn.heatmap(conf_mtx_normalized.cpu(),
                       annot=True,
                       cbar=False,
                       annot_kws={"size": 12},
                       fmt='.2f',
                       cmap='mako')
            plt.show()
            print('=' * 100)

        else:
            tol_correct = 0
            tol_samples = 0
            tol_drop = 0
            for batch in outputs:
                preds = batch['preds']
                labels = batch['labels']
                slope_id = batch['doc_ids']
                ##### Change lizhong's code ####
                for idx, slop_idx in enumerate(slope_id):
                    agree_by_user = bool(
                        slope_df[slope_df['slope_id'] == slop_idx.item()]
                        ['sentiment_correct'].values[0])
                    possible_classes = slope_df[
                        slope_df['slope_id'] ==
                        slop_idx.item()]['label_from_score'].values[0]

                    pred_class = preds[idx]
                    # difference between pred and true label
                    diff = torch.abs(pred_class - possible_classes)

                    # if correct label
                    if agree_by_user:  # True
                        if diff == 0:
                            # correct prediction
                            tol_correct += 1
                            tol_samples += 1
                        elif diff == 1:
                            # discard
                            tol_drop += 1
                        else:
                            # wrong prediction
                            tol_samples += 1
                    # if incorrect label
                    else:  # False
                        if diff == 0:
                            # wrong
                            tol_samples += 1
                        elif diff == 1:
                            # discard
                            tol_drop += 1
                        else:
                            # Correct
                            tol_correct += 1
                            tol_samples += 1

            boundary_accuracy = round(tol_correct / tol_samples, 4)
            self.metrics_result[self.incorrect_type][self.model_name] = {}
            self.metrics_result[self.incorrect_type][
                self.model_name]['boundary_acc'] = boundary_accuracy
            self.metrics_result[self.incorrect_type][
                self.model_name]['total_drop_sample'] = tol_drop
            print('=' * 100)
            print(
                f'[MODEL NAME]: {self.model_name} \t [INCORRECT TYPE]: {self.incorrect_type}'
            )
            print('\tBoundary Accuracy: \t\t%.4f' % boundary_accuracy)
            print('\tDrop Total Sample: \t\t%.4f' % tol_drop)
Exemple #12
0
def test_f1_score(pred, target, exp_score):
    score = f1(torch.tensor(pred),
               torch.tensor(target),
               num_classes=1,
               average='none')
    assert torch.allclose(score, torch.tensor(exp_score))
 def get_f1_score(self):
     f1_score = f1(self.preds, self.targets, num_classes=2, average='none')
     return f1_score
Exemple #14
0
def f1_score_v2(all_preds, target):
    preds, _, _ = all_preds
    return metrics.f1((preds > 0.5).int(), target.int(), num_classes=2)
Exemple #15
0
def f1_score(preds, target):
    return metrics.f1((preds > 0.5).int(), target.int(), num_classes=2)
Exemple #16
0
def f1_score(preds, target):
    return metrics.f1((preds[:, :, 0] > 0.5).int(), (target[:, :, 0]).int(),
                      num_classes=2)
Exemple #17
0
 def calculate_metrics(self, y, y_hat):
     loss = F.cross_entropy(y_hat, y)
     y_pred = y_hat.argmax(dim=1)
     acc = classification.accuracy(y_pred, y)
     f1_score = f1(y_pred, y, self.num_classes)
     return {"loss": loss, "acc": acc, "f1": f1_score}