Esempio n. 1
0
def test_v1_4_0_deprecated_metrics():
    from pytorch_lightning.metrics.functional.classification import stat_scores_multiple_classes
    with pytest.deprecated_call(match='will be removed in v1.4'):
        stat_scores_multiple_classes(pred=torch.tensor([0, 1]),
                                     target=torch.tensor([0, 1]))

    from pytorch_lightning.metrics.functional.classification import iou
    with pytest.deprecated_call(match='will be removed in v1.4'):
        iou(torch.randint(0, 2, (10, 3, 3)), torch.randint(0, 2, (10, 3, 3)))

    from pytorch_lightning.metrics.functional.classification import recall
    with pytest.deprecated_call(match='will be removed in v1.4'):
        recall(torch.randint(0, 2, (10, 3, 3)),
               torch.randint(0, 2, (10, 3, 3)))

    from pytorch_lightning.metrics.functional.classification import precision
    with pytest.deprecated_call(match='will be removed in v1.4'):
        precision(torch.randint(0, 2, (10, 3, 3)),
                  torch.randint(0, 2, (10, 3, 3)))

    from pytorch_lightning.metrics.functional.classification import precision_recall
    with pytest.deprecated_call(match='will be removed in v1.4'):
        precision_recall(torch.randint(0, 2, (10, 3, 3)),
                         torch.randint(0, 2, (10, 3, 3)))

    # Testing deprecation of class_reduction arg in the *new* precision
    from pytorch_lightning.metrics.functional import precision
    with pytest.deprecated_call(match='will be removed in v1.4'):
        precision(torch.randint(0, 2, (10, )),
                  torch.randint(0, 2, (10, )),
                  class_reduction='micro')

    # Testing deprecation of class_reduction arg in the *new* recall
    from pytorch_lightning.metrics.functional import recall
    with pytest.deprecated_call(match='will be removed in v1.4'):
        recall(torch.randint(0, 2, (10, )),
               torch.randint(0, 2, (10, )),
               class_reduction='micro')

    from pytorch_lightning.metrics.functional.classification import auc
    with pytest.deprecated_call(match='will be removed in v1.4'):
        auc(torch.rand(10, ).sort().values, torch.rand(10, ))

    from pytorch_lightning.metrics.functional.classification import auroc
    with pytest.deprecated_call(match='will be removed in v1.4'):
        auroc(torch.rand(10, ), torch.randint(0, 2, (10, )))

    from pytorch_lightning.metrics.functional.classification import multiclass_auroc
    with pytest.deprecated_call(match='will be removed in v1.4'):
        multiclass_auroc(torch.rand(20, 5).softmax(dim=-1),
                         torch.randint(0, 5, (20, )),
                         num_classes=5)

    from pytorch_lightning.metrics.functional.classification import auc_decorator
    with pytest.deprecated_call(match='will be removed in v1.4'):
        auc_decorator()

    from pytorch_lightning.metrics.functional.classification import multiclass_auc_decorator
    with pytest.deprecated_call(match='will be removed in v1.4'):
        multiclass_auc_decorator()
Esempio n. 2
0
 def validation_epoch_end(self, outputs):
     """
     After going through the entire validation set, we compute the final ROC curve accumulated overall
     predictions and target masks, then compute the AUROC
     """
     if self.hparams.auroc:
         fpr, tpr, thresholds = self.roc.compute()
         fpr, idx = torch.sort(fpr, descending=False)
         tpr, thresholds = tpr[idx], thresholds[idx]
         auroc = auc(fpr, tpr)
         self.log('auroc', auroc)
Esempio n. 3
0
def test_reorder_remove_in_v1_1():
    with pytest.deprecated_call(
            match='The `reorder` parameter to `auc` has been deprecated'):
        _ = auc(torch.tensor([0, 1, 2, 3]),
                torch.tensor([0, 1, 2, 2]),
                reorder=True)
Esempio n. 4
0
    def test_epoch_end(self, outputs):

        if self.incorrect_type != 'boundary':
            #####  Confusion Matrix  #####
            conf_mtx = confusion_matrix(
                torch.cat([b['preds'] for b in outputs]),
                torch.cat([b['labels'] for b in outputs]),
                normalize=False,
                num_classes=5)

            #####  Normalized Confusion Matrix  #####
            conf_mtx_normalized = confusion_matrix(
                torch.cat([b['preds'] for b in outputs]),
                torch.cat([b['labels'] for b in outputs]),
                normalize=True,
                num_classes=5)

            #####  Weighted Confusion Matrix  #####
            conf_mtx_weighted = conf_mtx.clone()
            for c, w in enumerate(self.weights):
                conf_mtx_weighted[c, :] *= w

            #####  ACCURACY  #####
            accuracy = torch.diag(conf_mtx).sum() / conf_mtx.sum()
            accuracy_weighted = torch.diag(
                conf_mtx_weighted).sum() / conf_mtx_weighted.sum()

            #####  AUC_SCORE  #####
            roc_results = multiclass_roc(
                torch.cat([b['logits'] for b in outputs]),
                torch.cat([b['labels'] for b in outputs]),
                num_classes=5)
            AUROC_str = ''
            AUROC_list = {}
            for cls, roc_cls in enumerate(roc_results):
                fpr, tpr, threshold = roc_cls
                self.logger.experiment.add_scalar(f'val_AUC[{cls}]',
                                                  auc(fpr, tpr),
                                                  self.current_epoch)
                AUROC_str += '\tAUC_SCORE[CLS %d]: \t%.4f\n' % (cls,
                                                                auc(fpr, tpr))
                AUROC_list['AUC_SCORE[CLS %d]' % cls] = auc(fpr, tpr)

            #####  F1  #####
            f1_score = f1(torch.cat([b['preds'] for b in outputs]),
                          torch.cat([b['labels'] for b in outputs]),
                          num_classes=5)

            #####  Average Precision  #####
            # TO DO

            #####  PRINT RESULTS  #####
            print('=' * 100)
            print(
                f'[MODEL NAME]: {self.model_name} \t [INCORRECT TYPE]: {self.incorrect_type}'
            )
            print('RESULTS:')
            print('\tAccuracy: \t\t%.4f' % accuracy)
            print('\tWeighted Accuracy: \t%.4f' % accuracy_weighted)
            print('\tF1 Score: \t\t%.4f' % f1_score)
            print(AUROC_str)

            self.metrics_result[self.incorrect_type][self.model_name] = {
                'Accuracy': round(float(accuracy), 4),
                'Weighted Accuracy': round(float(accuracy_weighted), 4),
                'F1_score': round(float(f1_score), 4)
            }
            for key, val in AUROC_list.items():
                self.metrics_result[self.incorrect_type][
                    self.model_name].update({key: round(float(val), 4)})
            print('Confusion Matrix')
            fig, ax = plt.subplots(figsize=(4, 4))
            sn.heatmap(conf_mtx.cpu(),
                       annot=True,
                       cbar=False,
                       annot_kws={"size": 15},
                       fmt='g',
                       cmap='mako')
            plt.show()
            fig, ax = plt.subplots(figsize=(4, 4))
            sn.heatmap(conf_mtx_normalized.cpu(),
                       annot=True,
                       cbar=False,
                       annot_kws={"size": 12},
                       fmt='.2f',
                       cmap='mako')
            plt.show()
            print('=' * 100)

        else:
            tol_correct = 0
            tol_samples = 0
            tol_drop = 0
            for batch in outputs:
                preds = batch['preds']
                labels = batch['labels']
                slope_id = batch['doc_ids']
                ##### Change lizhong's code ####
                for idx, slop_idx in enumerate(slope_id):
                    agree_by_user = bool(
                        slope_df[slope_df['slope_id'] == slop_idx.item()]
                        ['sentiment_correct'].values[0])
                    possible_classes = slope_df[
                        slope_df['slope_id'] ==
                        slop_idx.item()]['label_from_score'].values[0]

                    pred_class = preds[idx]
                    # difference between pred and true label
                    diff = torch.abs(pred_class - possible_classes)

                    # if correct label
                    if agree_by_user:  # True
                        if diff == 0:
                            # correct prediction
                            tol_correct += 1
                            tol_samples += 1
                        elif diff == 1:
                            # discard
                            tol_drop += 1
                        else:
                            # wrong prediction
                            tol_samples += 1
                    # if incorrect label
                    else:  # False
                        if diff == 0:
                            # wrong
                            tol_samples += 1
                        elif diff == 1:
                            # discard
                            tol_drop += 1
                        else:
                            # Correct
                            tol_correct += 1
                            tol_samples += 1

            boundary_accuracy = round(tol_correct / tol_samples, 4)
            self.metrics_result[self.incorrect_type][self.model_name] = {}
            self.metrics_result[self.incorrect_type][
                self.model_name]['boundary_acc'] = boundary_accuracy
            self.metrics_result[self.incorrect_type][
                self.model_name]['total_drop_sample'] = tol_drop
            print('=' * 100)
            print(
                f'[MODEL NAME]: {self.model_name} \t [INCORRECT TYPE]: {self.incorrect_type}'
            )
            print('\tBoundary Accuracy: \t\t%.4f' % boundary_accuracy)
            print('\tDrop Total Sample: \t\t%.4f' % tol_drop)
def test_auc(x, y, expected):
    # Test Area Under Curve (AUC) computation
    assert auc(torch.tensor(x), torch.tensor(y)) == expected
 def compute(self) -> torch.Tensor:
     preds, targets = self._get_preds_and_targets()
     if torch.unique(targets).numel() == 1:
         return torch.tensor(np.nan)
     prec, recall, _ = precision_recall_curve(preds, targets)
     return auc(recall, prec)
Esempio n. 7
0
    def test_epoch_end(self, outputs):
        """
        After going through the entire test set, we compute the final ROC curve accumulated overall
        predictions and target masks, then compute the AUROC
        """
        if self.hparams.auroc:
            # Compute ROC, then compute AUROC and log the value for the whole test set
            fpr, tpr, thresholds = self.roc.compute()
            fpr, idx = torch.sort(fpr, descending=False)
            tpr, thresholds = tpr[idx], thresholds[idx]
            auroc = auc(fpr, tpr)
            self.log('auroc_test', auroc)

            # Divide thresholds from ROC into 100 equally separated thresholds
            step_size = int(len(thresholds)/100)
            thresholds = thresholds[::step_size]

            # Find best best threshold based off of best IOU
            best_iou = 0
            best_threshold = -1
            # For each threshold, compute IOU for whole test set
            for i, threshold in enumerate(thresholds):
                test_dataloader = self.trainer.datamodule.test_dataloader()[1]
                ious = []
                for batch_idx, (x, y) in enumerate(test_dataloader):
                    x, y = x.to(self.device), y.to(self.device)
                    x_rec, M, colormaps = self.forward(x)
                    bloc_map = self.gen_bloc_map(M, threshold)
                    iou_score = iou(bloc_map, y)
                    ious.append(iou_score.detach().cpu().item())

                avg_iou = np.mean(ious)
                if avg_iou > best_iou:
                    best_iou = avg_iou
                    best_threshold = threshold

                self.trainer.logger.experiment.add_scalar('avg_iou', avg_iou, i)
                self.trainer.logger.experiment.add_scalar('threshold', threshold, i)
            
            # Log best iou and threshold
            self.log('best_iou', best_iou)
            self.log('best_threshold', best_threshold)

            # Now, using best threshold, generate the binary localization maps for 
            # all images in the test set and log/save them
            for batch_idx, (x, y) in enumerate(test_dataloader):
                x, y = x.to(self.device), y.to(self.device)
                x_rec, M, colormaps = self.forward(x)
                bloc_map = self.gen_bloc_map(M, best_threshold)

                # Save the binary localization maps
                bloc_map = bloc_map.detach().cpu()
                bloc_map_grid = make_grid(bloc_map).float()
                save_image(bloc_map_grid, f'{self.trainer.logger.log_dir}/batch{batch_idx}-blocmaps.png')
                self.trainer.logger.experiment.add_image('blocmaps', bloc_map_grid.numpy(), batch_idx)

                # Save the input images
                x = x.detach().cpu()
                x = self.trainer.datamodule.unnormalize_batch(x)
                x_grid = make_grid(x).float()
                save_image(x_grid, f'{self.trainer.logger.log_dir}/batch{batch_idx}-input.png')
                self.trainer.logger.experiment.add_image('input', x_grid.numpy(), batch_idx)

                # Save teh target masks
                y = y.detach().cpu()
                y_grid = make_grid(y).float()
                save_image(y_grid, f'{self.trainer.logger.log_dir}/batch{batch_idx}-targets.png')
                self.trainer.logger.experiment.add_image('targets', y_grid.numpy(), batch_idx)