Esempio n. 1
0
 def eval(self, confidences, epoch_loss, epoch):
     epoch_labels = self.learners_data.set_data[1]
     thresholds = calculate_optimal_thresholds_one_by_one(epoch_labels,
                                                          confidences,
                                                          slices=250)
     #thresholds = 0.5
     f1, precision, recall = \
       f1_score(*reduce_stats(
         *multilabel_stats(np.array(epoch_labels, dtype=np.float32), np.array(confidences, dtype=np.float32),
                           threshold=thresholds)))
     print(
         'epoch:{}, loss: {:.4f} F1: {:.4f}, precision: {:.4f}, recall: {:.4f}'
         .format(epoch, epoch_loss, f1, precision, recall))
     return f1, thresholds
Esempio n. 2
0
    def infer(self):
        self._calculate_thresholds()
        img_ids, labels, confidences, set_thresholds = self.set_data
        fused_confidences = self._combine(confidences)

        vec_preds = fused_confidences > self.thresholds
        preds = vector_to_index_list(vec_preds)

        if self.set_type == 'test':
            global_scores = None
            annotations = load_annotations()
            classes = annotations['train']['classes']
            save_kaggle_submision("ensemble_kaggle_submision.csv", img_ids,
                                  preds, classes)
        else:
            global_scores = f1_score(*reduce_stats(
                *multilabel_stats(labels, fused_confidences, self.thresholds)))
            print(
                "Ensemble results for {}. F1: {:.4}, precision: {:.4}, recall: {:.4}"
                .format(self.set_type, *global_scores))

        return img_ids, labels, preds, confidences, global_scores
Esempio n. 3
0
    def infer(self):
        img_ids, labels, confidences, _ = self.set_data
        _, _, _, thresholds = self.thresholds_data
        M = confidences.shape[0]
        assert M % 2 == 1, "Number of models for this modality must be odd"
        # confidences: M x N x L, thresholds: M x L
        vec_preds_per_model = confidences > thresholds[:, np.newaxis, :]
        vec_preds = vec_preds_per_model.sum(axis=0) > M // 2
        preds = vector_to_index_list(vec_preds)

        if self.set_type == 'test':
            global_scores = None
            annotations = load_annotations()
            classes = annotations['train']['classes']
            save_kaggle_submision("ensemble_kaggle_submision.csv", img_ids,
                                  preds, classes)
        else:
            global_scores = f1_score(*reduce_stats(
                *multilabel_stats_from_pred(labels, vec_preds)))
            print(
                "Ensemble results for {}. F1: {:.4}, precision: {:.4}, recall: {:.4}"
                .format(self.set_type, *global_scores))

        return img_ids, labels, preds, confidences, global_scores
Esempio n. 4
0
def infer_runner(img_set_folder,
                 model_file,
                 samples_limit=None,
                 tta=False,
                 batch_size=64,
                 write=True):
    set_type = img_set_folder.split("/")[-1]
    model_type = model_type_from_model_file(model_file)
    image_dataset, dataloader = get_data_loader(img_set_folder,
                                                model_type,
                                                set_type,
                                                batch_size=batch_size,
                                                tta=tta,
                                                use_test_transforms=True)

    class_names = image_dataset.classes

    print("Is CUDA available?: {}".format(torch.cuda.is_available()))
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    model = get_model(model_type, len(class_names), model_file=model_file)

    model = model.to(device)

    #model.thresholds = np.load("thresholds.npy")

    image_ids, labels, preds, confidences, global_scores, per_class_scores = \
      infer(model, dataloader, device, samples_limit=samples_limit, threshold=model.thresholds)

    # Uncomment for calculate the thresholds for a particular model
    if True and set_type in ['validation', 'train']:
        print("Calculating thresholds on the fly.")
        model.thresholds = calculate_optimal_thresholds_one_by_one(
            labels, confidences, slices=250, old_thresholds=model.thresholds)
        vec_preds = np.array(
            confidences
        ) > model.thresholds  # updating prediction with new thresholds.
        preds = vector_to_index_list(vec_preds)
        global_scores = f1_score(*reduce_stats(*multilabel_stats(
            np.array(labels), np.array(confidences), model.thresholds)))
        if write:
            np.save("thresholds", model.thresholds)
            np.save(model_file + ".thresholds", model.thresholds)

    #if set_type in ['train', 'validation']:
    #  print("Global results for {}. F1: {:.3}, precision: {:.3}, recall: {:.3}".format(set_type, *global_scores))
    #  np.savetxt("{}_per_class_scores.csv".format(set_type),
    #             np.array([image_dataset.class_frequency()] + list(per_class_scores)).T,
    #             header="original_frequency, f1, precision, recall", delimiter=",")

    if write and \
            ((samples_limit is None and set_type in ['validation', 'test']) or
             (samples_limit > 25000 and set_type == 'train')):
        # Saving results just for full sets inference
        # They can be used for ensembling
        base_path = os.path.dirname(model_file)
        results_file = os.path.join(
            base_path, "inference_{}_{}.th".format(set_type,
                                                   "tta" if tta else "no_tta"))
        torch.save(
            {
                "image_ids": image_ids,
                "thresholds": model.thresholds,
                "labels": labels,
                "confidences": confidences,
                "f1": global_scores[0]
            }, results_file)
        performance_file = os.path.join(
            base_path,
            "performance_{}_{}.txt".format(set_type,
                                           "tta" if tta else "no_tta"))
        with open(performance_file, "w") as f:
            f.write("{:.4}\n".format(global_scores[0]))

    if write and set_type == 'test':
        save_kaggle_submision("kaggle_submision.csv", image_ids, preds,
                              image_dataset.classes)
Esempio n. 5
0
def infer(model, dataloader, device, threshold=0.5, samples_limit=None):
    running_stats = (0., 0., 0.)

    ret_labels = []
    ret_image_ids = []
    ret_preds = []
    ret_confidences = []
    # Iterate over data.
    samples = 0

    dataloader_size = len(dataloader) * dataloader.batch_size
    estimated_size = min(samples_limit,
                         dataloader_size) if samples_limit else dataloader_size
    with tqdm(total=estimated_size) as progress_bar:
        for inputs, labels, img_ids in dataloader:
            batch_size = inputs.size()[0]
            samples += batch_size
            model.eval()  # Set model to evaluate mode

            ret_image_ids += list(img_ids.data.numpy())

            ret_labels += list(labels.data.numpy())
            labels = labels.to(device)
            inputs = inputs.to(device)
            if len(inputs.size()) == 5:
                # 5d tensor. Then several crops to be evaluated for the same sample
                bs, ncrops, c, h, w = inputs.size()
                inputs = inputs.view(-1, c, h, w)
                multicrop = True
            else:
                # Single crop scenario.
                multicrop = False

            with torch.set_grad_enabled(False):
                outputs = model(inputs)
                confidences = torch.sigmoid(outputs)

                if multicrop:
                    confidences = confidences.view(bs, ncrops, -1).mean(1)

                ret_confidences += list(confidences.cpu().numpy())
                if isinstance(threshold, np.ndarray):
                    threshold = torch.from_numpy(threshold.astype(
                        np.float32)).to(device)
                vec_preds = torch.ge(confidences, threshold).type(
                    confidences.type()).cpu().numpy()
                ret_preds += vector_to_index_list(vec_preds)

                # statistics
                batch_stats = multilabel_stats(labels,
                                               confidences,
                                               threshold=threshold)
                running_stats = tuple(
                    [np.add(a, b) for a, b in zip(running_stats, batch_stats)])

            progress_bar.update(batch_size)
            if samples_limit and samples >= samples_limit:
                break

    per_class_scores = f1_score(*running_stats)
    global_scores = f1_score(*reduce_stats(*running_stats))

    return ret_image_ids, ret_labels, ret_preds, ret_confidences, global_scores, per_class_scores
Esempio n. 6
0
    def train_model(self, num_epochs=25):
        board = self.tensorboard
        model = self.model
        scheduler = self.scheduler
        criterion = self.criterion
        optimizer = self.optimizer
        dataloaders = {
            'validation': self.val_dataloader,
            'train': self.train_dataloader
        }
        since = time.time()

        best_f1 = 0.0
        self.global_step = 0
        thresholds = self.thresholds

        for epoch in range(num_epochs):
            print('Epoch {}/{}'.format(epoch, num_epochs - 1))
            print('-' * 10)

            # Each epoch has a training and validation phase
            phases = ['train']
            if self.val_dataloader:
                phases += ['validation']
            for phase in phases:
                phase_start = time.time()
                if phase == 'train':
                    scheduler.step()
                    board.add_scalars("epoch/optimizer",
                                      {'lr': scheduler.get_lr()[0]},
                                      self.global_step + 1)

                    loss, labels, confidences = self._train_epoch()

                else:
                    image_ids, labels, preds, confidences, global_scores, per_class_scores = \
                      infer(model, dataloaders[phase], self.device, samples_limit=self.validation_samples_limit)

                    thresholds = calculate_optimal_thresholds_one_by_one(
                        labels,
                        confidences,
                        slices=250,
                        old_thresholds=(thresholds if isinstance(
                            thresholds, np.ndarray) else None))
                    self.thresholds = thresholds

                f1, precision, recall = \
                  f1_score(*reduce_stats(
                    *multilabel_stats(np.array(labels, dtype=np.float32), np.array(confidences, dtype=np.float32),
                                      threshold=thresholds)))

                print(
                    '{} loss: {:.4f} F1: {:.4f}, precision: {:.4f}, recall: {:.4f}'
                    .format(phase, loss, f1, precision, recall))

                # Saving best model.
                if phase == 'validation' and f1 > best_f1:
                    best_f1 = f1
                    model_path = os.path.join(self.running_dir,
                                              "model_best.pth.tar")
                    thresholds_path = model_path + ".thresholds"
                    print("Saving model with F1 {} to '{}'".format(
                        best_f1, model_path))
                    torch.save(model.state_dict(), model_path)
                    print("Saving thresholds to '{}'".format(thresholds_path))
                    np.save(thresholds_path, thresholds)

                if phase == 'train':
                    board.add_scalars("epoch/loss", {'train': loss},
                                      self.global_step)
                board.add_scalars("epoch/f1", {phase: f1}, self.global_step)
                board.add_scalars("epoch/precision", {phase: precision},
                                  self.global_step)
                board.add_scalars("epoch/recall", {phase: recall},
                                  self.global_step)

                phase_elapsed = time.time() - phase_start
                print("{} phase took {:.0f}s".format(phase, phase_elapsed))

        time_elapsed = time.time() - since
        print('Training complete in {:.0f}m {:.0f}s'.format(
            time_elapsed // 60, time_elapsed % 60))
        print('Best val f1: {:4f}'.format(best_f1))

        # load best model weights
        # model.load_state_dict(best_model_wts)
        return  # model
Esempio n. 7
0
 def eval_global(thresholds):
   return (
     f1_score(*reduce_stats(*multilabel_stats(labels_t, confidences_t, torch.from_numpy(thresholds.astype(np.float32))))))[0]
Esempio n. 8
0
 def eval(thresholds):
   return (
     f1_score(*multilabel_stats(labels_t, confidences_t, torch.from_numpy(thresholds))))[0]