Esempio n. 1
0
    def evaluate(self,
                 dloader,
                 len_dataset,
                 split="test",
                 verbose=False,
                 **args):
        self.model.eval()
        metrics = Metrics(self.metrics, len_dataset, self.num_classes)
        loss = 0

        # Evaluation loop
        loop = tqdm(dloader, disable=not verbose)
        for batch_id, (data, target) in enumerate(loop):
            data, target = data.to(self.device), target.to(self.device)

            with torch.no_grad():
                output = self.model(data)
                if self.task == "classification":
                    loss += self.criterion(output, target)
                elif self.task == "segmentation":
                    loss += self.criterion(output, target.squeeze(dim=1))
                # Update metrics
                pred = output[0].argmax(dim=1, keepdim=True)
                confidence = torch.sigmoid(output[1])
                metrics.update(pred, target, confidence)

        scores = metrics.get_scores(split=split)
        losses = {"loss_confid": loss}
        return losses, scores
    def train(self, epoch):
        self.model.train()
        self.disable_bn()
        if self.config_args["model"].get("uncertainty", None):
            self.disable_dropout()
        metrics = Metrics(self.metrics, self.prod_train_len, self.num_classes)
        loss, confid_loss = 0, 0
        len_steps, len_data = 0, 0

        # Training loop
        loop = tqdm(self.train_loader)

        for step, batch in enumerate(tqdm(loop, desc="Iteration")):
            batch = tuple(t.to(self.device) for t in batch)
            idx_ids, input_ids, input_mask, segment_ids, label_ids = batch

            output = self.model(input_ids,
                                segment_ids,
                                input_mask,
                                labels=None)

            print('output', output[0], output[1], torch.sigmoid(output[1]))

            current_loss = self.criterion(output, label_ids)
            """ loss """
            n_gpu = torch.cuda.device_count()
            if n_gpu > 1:
                current_loss = current_loss.mean()
            if self.config_args['training']['gradient_accumulation_steps'] > 1:
                current_loss = current_loss / self.config_args['training'][
                    'gradient_accumulation_steps']

            current_loss.backward()
            loss += current_loss

            len_steps += len(input_ids)
            len_data = len_steps

            # Update metrics
            pred = output[0].argmax(dim=1, keepdim=True)

            confidence = torch.sigmoid(output[1])
            metrics.update(idx_ids, pred, label_ids, confidence)

            pred_detach, label_detach, confidence_detach, idx_detach = pred.detach(
            ), label_ids.detach(), confidence.detach(), idx_ids.detach()

            print('pred', pred_detach.cpu())
            print('label', label_detach.cpu())
            print('idx', idx_detach.cpu())
            print('confidence', confidence_detach.cpu())

            if (step + 1) % self.config_args['training'][
                    'gradient_accumulation_steps'] == 0:
                print('optimizer step', step + 1)
                self.optimizer.step()
                self.optimizer.zero_grad()

                # Update the average loss
            loop.set_description(f"Epoch {epoch}/{self.nb_epochs}")
            loop.set_postfix(
                OrderedDict({
                    "loss_confid": f"{(loss / len_data):05.3e}",
                    "acc": f"{(metrics.accuracy / len_steps):05.2%}",
                }))
            loop.update()

        # Eval on epoch end
        scores = metrics.get_scores(split="train")
        logs_dict = OrderedDict({
            "epoch": {
                "value": epoch,
                "string": f"{epoch:03}"
            },
            "lr": {
                "value": self.optimizer.param_groups[0]["lr"],
                "string": f"{self.optimizer.param_groups[0]['lr']:05.1e}",
            },
            "train/loss_confid": {
                "value": loss / len_data,
                "string": f"{(loss / len_data):05.4e}",
            },
        })
        for s in scores:
            logs_dict[s] = scores[s]

        # Val scores
        val_losses, scores_val = self.evaluate(self.val_loader,
                                               self.prod_val_len,
                                               split="val")
        logs_dict["val/loss_confid"] = {
            "value":
            val_losses["loss_confid"].item() / self.nsamples_val,
            "string":
            f"{(val_losses['loss_confid'].item() / self.nsamples_val):05.4e}",
        }
        for sv in scores_val:
            logs_dict[sv] = scores_val[sv]

        # Test scores
        test_losses, scores_test = self.evaluate(self.test_loader,
                                                 self.prod_test_len,
                                                 split="test")
        logs_dict["test/loss_confid"] = {
            "value":
            test_losses["loss_confid"].item() / self.nsamples_test,
            "string":
            f"{(test_losses['loss_confid'].item() / self.nsamples_test):05.4e}",
        }
        for st in scores_test:
            logs_dict[st] = scores_test[st]

        # Print metrics
        misc.print_dict(logs_dict)

        # Save the model checkpoint
        self.save_checkpoint(epoch)

        # CSV logging
        misc.csv_writter(path=self.output_folder / "logs.csv",
                         dic=OrderedDict(logs_dict))

        # Tensorboard logging
        self.save_tb(logs_dict)

        # Scheduler step
        if self.scheduler:
            self.scheduler.step()
    def evaluate(self,
                 dloader,
                 len_dataset,
                 split="test",
                 verbose=False,
                 **args):
        self.model.eval()  # use with torch.no_grad()
        metrics = Metrics(self.metrics, len_dataset, self.num_classes)
        loss = 0

        # Evaluation loop
        loop = tqdm(dloader, disable=not verbose)

        for step, batch in enumerate(tqdm(loop, desc="Iteration")):
            batch = tuple(t.to(self.device) for t in batch)
            idx_ids, input_ids, input_mask, segment_ids, label_ids = batch

            with torch.no_grad():
                output = self.model(input_ids,
                                    segment_ids,
                                    input_mask,
                                    labels=None)
                if self.task == "classification":
                    current_loss = self.criterion(output, label_ids)  # ok
                    loss += current_loss

                # Update metrics
                pred = F.softmax(output[0]).argmax(dim=1, keepdim=True)

                confidence = torch.sigmoid(output[1])

                metrics.update(idx_ids, pred, label_ids, confidence)

                pred_detach, label_detach, confidence_detach, idx_detach = pred.detach(
                ), label_ids.detach(), confidence.detach(), idx_ids.detach()

                print('pred', pred_detach.cpu())
                print('label', label_detach.cpu())
                print('idx', idx_detach.cpu())
                print('confidence', confidence_detach.cpu())

        print('----------------------------------------------------')
        pred_list = []
        target_list = []
        confidence_list = []

        for i, p, t, c in zip(metrics.new_idx, metrics.new_pred,
                              metrics.new_taget, metrics.new_conf):
            print('idx,pred,target,confidence', i, p[0], t, c[0])
            pred_list.append(p[0])
            target_list.append(t)
            confidence_list.append(c[0])

        print('----------------------------------------------------')
        report = classifiction_metric(
            np.array(pred_list), np.array(target_list),
            np.array(self.config_args['data']['label_list']))
        print(report)
        print('----------------------------------------------------')

        scores = metrics.get_scores(split=split)
        losses = {"loss_confid": loss}
        return losses, scores
Esempio n. 4
0
    def train(self, epoch):
        self.model.train()
        metrics = Metrics(self.metrics, self.prod_train_len, self.num_classes)
        loss, len_steps, len_data = 0, 0, 0

        # Training loop
        loop = tqdm(self.train_loader)
        for batch_id, (data, target) in enumerate(loop):
            data, target = data.to(self.device), target.to(self.device)
            self.optimizer.zero_grad()
            output = self.model(data)
            if self.task == "classification":
                current_loss = self.criterion(output, target)
            elif self.task == "segmentation":
                current_loss = self.criterion(output, target.squeeze(dim=1))
            current_loss.backward()
            loss += current_loss
            self.optimizer.step()
            if self.task == "classification":
                len_steps += len(data)
                len_data = len_steps
            elif self.task == "segmentation":
                len_steps += len(data) * np.prod(data.shape[-2:])
                len_data += len(data)

            # Update metrics
            confidence, pred = F.softmax(output, dim=1).max(dim=1,
                                                            keepdim=True)
            metrics.update(pred, target, confidence)

            # Update the average loss
            loop.set_description(f"Epoch {epoch}/{self.nb_epochs}")
            loop.set_postfix(
                OrderedDict({
                    "loss_nll": f"{(loss / len_data):05.4e}",
                    "acc": f"{(metrics.accuracy / len_steps):05.2%}",
                }))
            loop.update()

        # Eval on epoch end
        scores = metrics.get_scores(split="train")
        logs_dict = OrderedDict({
            "epoch": {
                "value": epoch,
                "string": f"{epoch:03}"
            },
            "lr": {
                "value": self.optimizer.param_groups[0]["lr"],
                "string": f"{self.optimizer.param_groups[0]['lr']:05.1e}",
            },
            "train/loss_nll": {
                "value": loss / len_data,
                "string": f"{(loss / len_data):05.4e}",
            },
        })
        for s in scores:
            logs_dict[s] = scores[s]

        # Val scores
        if self.val_loader is not None:
            val_losses, scores_val = self.evaluate(self.val_loader,
                                                   self.prod_val_len,
                                                   split="val")
            logs_dict["val/loss_nll"] = {
                "value":
                val_losses["loss_nll"].item() / self.nsamples_val,
                "string":
                f"{(val_losses['loss_nll'].item() / self.nsamples_val):05.4e}",
            }
            for sv in scores_val:
                logs_dict[sv] = scores_val[sv]

        # Test scores
        test_losses, scores_test = self.evaluate(self.test_loader,
                                                 self.prod_test_len,
                                                 split="test")
        logs_dict["test/loss_nll"] = {
            "value":
            test_losses["loss_nll"].item() / self.nsamples_test,
            "string":
            f"{(test_losses['loss_nll'].item() / self.nsamples_test):05.4e}",
        }
        for st in scores_test:
            logs_dict[st] = scores_test[st]

        # Print metrics
        misc.print_dict(logs_dict)

        # Save the model checkpoint
        self.save_checkpoint(epoch)

        # CSV logging
        misc.csv_writter(path=self.output_folder / "logs.csv",
                         dic=OrderedDict(logs_dict))

        # Tensorboard logging
        self.save_tb(logs_dict)

        # Scheduler step
        if self.scheduler:
            self.scheduler.step()
Esempio n. 5
0
    def evaluate(self,
                 dloader,
                 len_dataset,
                 split="test",
                 mode="mcp",
                 samples=50,
                 verbose=False):
        self.model.eval()
        metrics = Metrics(self.metrics, len_dataset, self.num_classes)
        loss = 0

        # Special case of mc-dropout
        if mode == "mc_dropout":
            self.model.keep_dropout_in_test()
            LOGGER.info(f"Sampling {samples} times")

        # Evaluation loop
        loop = tqdm(dloader, disable=not verbose)
        for batch_id, (data, target) in enumerate(loop):
            data, target = data.to(self.device), target.to(self.device)

            with torch.no_grad():
                if mode == "mcp":
                    output = self.model(data)
                    if self.task == "classification":
                        loss += self.criterion(output, target)
                    elif self.task == "segmentation":
                        loss += self.criterion(output, target.squeeze(dim=1))
                    confidence, pred = F.softmax(output,
                                                 dim=1).max(dim=1,
                                                            keepdim=True)

                elif mode == "tcp":
                    output = self.model(data)
                    if self.task == "classification":
                        loss += self.criterion(output, target)
                    elif self.task == "segmentation":
                        loss += self.criterion(output, target.squeeze(dim=1))
                    probs = F.softmax(output, dim=1)
                    pred = probs.max(dim=1, keepdim=True)[1]
                    labels_hot = misc.one_hot_embedding(
                        target, self.num_classes).to(self.device)
                    # Segmentation special case
                    if self.task == "segmentation":
                        labels_hot = labels_hot.squeeze(1).permute(0, 3, 1, 2)
                    confidence, _ = (labels_hot * probs).max(dim=1,
                                                             keepdim=True)

                elif mode == "mc_dropout":
                    if self.task == "classification":
                        outputs = torch.zeros(samples, data.shape[0],
                                              self.num_classes).to(self.device)
                    elif self.task == "segmentation":
                        outputs = torch.zeros(
                            samples,
                            data.shape[0],
                            self.num_classes,
                            data.shape[2],
                            data.shape[3],
                        ).to(self.device)
                    for i in range(samples):
                        outputs[i] = self.model(data)
                    output = outputs.mean(0)
                    if self.task == "classification":
                        loss += self.criterion(output, target)
                    elif self.task == "segmentation":
                        loss += self.criterion(output, target.squeeze(dim=1))
                    probs = F.softmax(output, dim=1)
                    confidence = (probs * torch.log(probs + 1e-9)).sum(
                        dim=1)  # entropy
                    pred = probs.max(dim=1, keepdim=True)[1]

                metrics.update(pred, target, confidence)

        scores = metrics.get_scores(split=split)
        losses = {"loss_nll": loss}
        return losses, scores
Esempio n. 6
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--config_path",
                        "-c",
                        type=str,
                        default=None,
                        help="Path for config yaml")
    parser.add_argument("--epoch",
                        "-e",
                        type=int,
                        default=None,
                        help="Epoch to analyse")
    parser.add_argument(
        "--mode",
        "-m",
        type=str,
        default="mcp",
        choices=MODE_TYPE,
        help="Type of confidence testing",
    )
    parser.add_argument("--samples",
                        "-s",
                        type=int,
                        default=50,
                        help="Samples in case of MCDropout")
    parser.add_argument("--no-cuda",
                        action="store_true",
                        default=False,
                        help="disables CUDA training")
    args = parser.parse_args()

    config_args = load_yaml(args.config_path)

    # Overwrite for release
    config_args["training"]["output_folder"] = Path(args.config_path).parent

    config_args["training"]["metrics"] = [
        "accuracy", "auc", "ap_success", "ap_errors", "fpr_at_95tpr", "aurc"
    ]

    # Special case of MC Dropout
    if args.mode == "mc_dropout":
        config_args["training"]["mc_dropout"] = True

    # Device configuration
    device = torch.device(
        "cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu")

    # setting random seed
    setup_seed(42)

    # Load dataset
    LOGGER.info(f"Loading dataset {config_args['data']['dataset']}")
    dloader = get_loader(config_args)

    # Make loaders
    dloader.make_loaders()

    # Set learner
    LOGGER.warning(f"Learning type: {config_args['training']['learner']}")

    config = BertConfig(
        os.path.join(config_args['data']['bert_model_dir'],
                     'bert_config.json'))  # ++

    learner = get_learner(config, config_args, dloader.train_loader,
                          dloader.val_loader, dloader.test_loader, -1,
                          device)  # ++

    # Initialize and load model
    ckpt_path = config_args["training"][
        "output_folder"] / f"model_bert_{args.epoch:03d}.ckpt"  # ++

    checkpoint = torch.load(ckpt_path)
    learner.model.load_state_dict(checkpoint, strict=False)

    # Get scores
    LOGGER.info(f"Inference mode: {args.mode}")

    if args.mode != "trust_score":
        _, scores_test = learner.evaluate(
            learner.test_loader,
            learner.prod_test_len,
            split="test",
            mode=args.mode,
            samples=args.samples,
            verbose=True,
        )

    # Special case TrustScore
    else:
        # Create feature extractor model
        config_args["model"][
            "name"] = config_args["model"]["name"] + "_extractor"
        print(config_args["model"]["name"])

        features_extractor = get_model(config, config_args).from_pretrained(
            config_args['data']['bert_model_dir'],
            config_args).to(device)  # ++

        features_extractor.load_state_dict(learner.model.state_dict(),
                                           strict=False)

        LOGGER.info(f"Using extractor {config_args['model']['name']}")

        # Get features for KDTree
        LOGGER.info("Get features for KDTree")
        features_extractor.eval()
        metrics = Metrics(learner.metrics, learner.prod_test_len,
                          config_args["data"]["num_classes"])
        train_features, train_target = [], []
        with torch.no_grad():
            loop = tqdm(learner.train_loader)
            for step, batch in enumerate(tqdm(loop, desc="Iteration")):
                batch = tuple(t.to(device) for t in batch)
                _, input_ids, input_mask, segment_ids, label_ids = batch
                output = features_extractor(input_ids,
                                            segment_ids,
                                            input_mask,
                                            labels=None)
                output = output.view(output.size(0), -1)
                train_features.append(output)
                train_target.append(label_ids)

        train_features = torch.cat(train_features).detach().cpu().numpy()
        train_target = torch.cat(train_target).detach().cpu().numpy()

        LOGGER.info("Create KDTree")
        trust_model = trust_scores.TrustScore(
            num_workers=max(config_args["data"]["num_classes"], 2))
        trust_model.fit(train_features, train_target)

        LOGGER.info("Execute on test set")
        test_features, test_pred = [], []
        learner.model.eval()
        with torch.no_grad():
            loop = tqdm(learner.test_loader)
            for step, batch in enumerate(tqdm(loop, desc="Iteration")):
                batch = tuple(t.to(device) for t in batch)
                idx_ids, input_ids, input_mask, segment_ids, label_ids = batch  # ++
                output, pooled_output = learner.model(input_ids,
                                                      segment_ids,
                                                      input_mask,
                                                      labels=None)

                confidence, pred = output.max(dim=1, keepdim=True)
                features = features_extractor(input_ids,
                                              segment_ids,
                                              input_mask,
                                              labels=None)

                features = features.view(features.size(0), -1)

                test_features.append(features)
                test_pred.append(pred)
                metrics.update(idx_ids, pred, label_ids, confidence)

        test_features = torch.cat(test_features).detach().to("cpu").numpy()
        test_pred = torch.cat(test_pred).squeeze().detach().to("cpu").numpy()
        proba_pred = trust_model.get_score(test_features, test_pred)
        metrics.update(idx_ids, pred, label_ids, confidence)

        print('test_features', test_features)
        print('idx_ids', idx_ids)
        print('labels', label_ids.detach().cpu())
        print('test_pred', test_pred)
        print('trust_score', proba_pred)

        print('----------------------------------------------------')
        pred_list = []
        target_list = []
        confidence_list = []
        proba_pred_list = []

        for i, p, t, c in zip(metrics.new_idx, metrics.new_pred,
                              metrics.new_taget, metrics.new_conf):
            print('idx,pred,target,confidence', i, p[0], t, c[0])
            pred_list.append(p[0])
            target_list.append(t)
            confidence_list.append(c[0])

        print('----------------------------------------------------')
        report = classifiction_metric(
            np.array(pred_list), np.array(target_list),
            np.array(config_args['data']['label_list']))
        print(report)
        print('----------------------------------------------------')
        scores_test = metrics.get_scores(split="test")

    LOGGER.info("Results")
    print("----------------------------------------------------------------")

    for st in scores_test:
        print(st)
        print(scores_test[st])
        print(
            "----------------------------------------------------------------")
Esempio n. 7
0
    def evaluate(self,
                 dloader,
                 len_dataset,
                 split="test",
                 mode="mcp",
                 samples=50,
                 verbose=False):
        self.model.eval()
        metrics = Metrics(self.metrics, len_dataset, self.num_classes)
        loss = 0

        # Special case of mc-dropout
        if mode == "mc_dropout":
            self.model.keep_dropout_in_test()
            LOGGER.info(f"Sampling {samples} times")

        # Evaluation loop
        loop = tqdm(dloader, disable=not verbose)
        for step, batch in enumerate(tqdm(loop, desc="Iteration")):
            batch = tuple(t.to(self.device) for t in batch)
            idx_ids, input_ids, input_mask, segment_ids, label_ids = batch
            print(label_ids)

            with torch.no_grad():
                if mode == "mcp":
                    print(True)
                    output, pooled_output = self.model(input_ids,
                                                       segment_ids,
                                                       input_mask,
                                                       labels=None)

                    current_loss = self.criterion(output.view(-1, 2),
                                                  label_ids.view(-1))
                    loss += current_loss

                    confidence, pred = F.softmax(output,
                                                 dim=1).max(dim=1,
                                                            keepdim=True)

                    print(confidence)
                    print(pred)

                elif mode == "tcp":
                    output, pooled_output = self.model(input_ids,
                                                       segment_ids,
                                                       input_mask,
                                                       labels=None)

                    current_loss = self.criterion(output.view(-1, 2),
                                                  label_ids.view(-1))
                    loss += current_loss

                    probs = F.softmax(output, dim=1)

                    pred = probs.max(dim=1, keepdim=True)[1]

                    labels_hot = misc.one_hot_embedding(
                        label_ids, self.num_classes).to(self.device)

                    confidence, _ = (labels_hot * probs).max(dim=1,
                                                             keepdim=True)

                elif mode == "mc_dropout":
                    print('---------------input_ids.shape---------------')
                    print(input_ids.shape)
                    outputs = torch.zeros(
                        samples, self.config_args['training']['batch_size'],
                        self.num_classes).to(self.device)

                    for i in range(samples):
                        outputs[i], _ = self.model(input_ids,
                                                   segment_ids,
                                                   input_mask,
                                                   labels=None)
                    output = outputs.mean(0)

                    loss += self.criterion(output.view(-1, 2),
                                           label_ids.view(-1))

                    probs = F.softmax(output, dim=1)
                    confidence = (probs * torch.log(probs + 1e-9)).sum(dim=1)
                    pred = probs.max(dim=1, keepdim=True)[1]

                metrics.update(idx_ids, pred, label_ids, confidence)
                pred_detach, label_detach, confidence_detach, idx_detach = pred.detach(
                ), label_ids.detach(), confidence.detach(), idx_ids.detach()
                print('pred', pred_detach.cpu())
                print('label', label_detach.cpu())
                print('idx', idx_detach.cpu())
                print('confidence', confidence_detach.cpu())

        print('----------------------------------------------------')
        pred_list = []
        target_list = []
        confidence_list = []

        for i, p, t, c in zip(metrics.new_idx, metrics.new_pred,
                              metrics.new_taget, metrics.new_conf):
            print('idx,pred,target,confidence', i, p[0], t, c[0])
            pred_list.append(p[0])
            target_list.append(t)
            confidence_list.append(c[0])

        print('----------------------------------------------------')
        report = classifiction_metric(
            np.array(pred_list), np.array(target_list),
            np.array(self.config_args['data']['label_list']))
        print(report)
        print('----------------------------------------------------')

        scores = metrics.get_scores(split=split)
        losses = {"loss_nll": loss}
        return losses, scores
Esempio n. 8
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--config_path",
                        "-c",
                        type=str,
                        default=None,
                        help="Path for config yaml")
    parser.add_argument("--epoch",
                        "-e",
                        type=int,
                        default=None,
                        help="Epoch to analyse")
    parser.add_argument(
        "--mode",
        "-m",
        type=str,
        default="normal",
        choices=MODE_TYPE,
        help="Type of confidence testing",
    )
    parser.add_argument("--samples",
                        "-s",
                        type=int,
                        default=50,
                        help="Samples in case of MCDropout")
    parser.add_argument("--no-cuda",
                        action="store_true",
                        default=False,
                        help="disables CUDA training")
    args = parser.parse_args()

    config_args = load_yaml(args.config_path)

    # Overwrite for release
    config_args["training"]["output_folder"] = Path(args.config_path).parent

    config_args["training"]["metrics"] = [
        "accuracy",
        "auc",
        "ap_success",
        "ap_errors",
        "fpr_at_95tpr",
    ]
    if config_args["training"]["task"] == "segmentation":
        config_args["training"]["metrics"].append("mean_iou")

    # Special case of MC Dropout
    if args.mode == "mc_dropout":
        config_args["training"]["mc_dropout"] = True

    # Device configuration
    device = torch.device(
        "cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu")

    # Load dataset
    LOGGER.info(f"Loading dataset {config_args['data']['dataset']}")
    dloader = get_loader(config_args)

    # Make loaders
    dloader.make_loaders()

    # Set learner
    LOGGER.warning(f"Learning type: {config_args['training']['learner']}")
    learner = get_learner(config_args, dloader.train_loader,
                          dloader.val_loader, dloader.test_loader, -1, device)

    # Initialize and load model
    ckpt_path = config_args["training"][
        "output_folder"] / f"model_epoch_{args.epoch:03d}.ckpt"
    checkpoint = torch.load(ckpt_path)
    learner.model.load_state_dict(checkpoint["model_state_dict"])

    # Get scores
    LOGGER.info(f"Inference mode: {args.mode}")

    if args.mode != "trust_score":
        _, scores_test, confidence_data = learner.evaluate(
            learner.test_loader,
            learner.prod_test_len,
            split="test",
            mode=args.mode,
            samples=args.samples,
            verbose=True,
        )
        acc_pred, conf_pred = confidence_data
        # write_file('./results_ver1/%s_confidnet_score_epoch_%i.txt' % (config_args['data']['dataset'], args.epoch), conf_pred)
        # write_file('./results_ver1/%s_confidnet_accurate_epoch_%i.txt' % (config_args['data']['dataset'], args.epoch), acc_pred)

        write_file(
            './results/%s_confidnet_score_epoch_%i.txt' %
            (config_args['data']['dataset'], args.epoch), conf_pred)
        write_file(
            './results/%s_confidnet_accurate_epoch_%i.txt' %
            (config_args['data']['dataset'], args.epoch), acc_pred)

    # Special case TrustScore
    else:
        # For segmentation, reduce number of samples, else it is too long to compute
        if config_args["training"]["task"] == "segmentation":
            learner.prod_test_len = MAX_NUMBER_TRUSTSCORE_SEG * np.ceil(
                learner.nsamples_test / config_args["training"]["batch_size"])

        # Create feature extractor model
        config_args["model"][
            "name"] = config_args["model"]["name"] + "_extractor"
        features_extractor = get_model(config_args, device).to(device)
        features_extractor.load_state_dict(learner.model.state_dict(),
                                           strict=False)
        LOGGER.info(f"Using extractor {config_args['model']['name']}")
        features_extractor.print_summary(input_size=tuple(
            [shape_i for shape_i in learner.train_loader.dataset[0][0].shape]))

        # Get features for KDTree
        LOGGER.info("Get features for KDTree")
        features_extractor.eval()
        metrics = Metrics(learner.metrics, learner.prod_test_len,
                          config_args["data"]["num_classes"])
        train_features, train_target = [], []
        with torch.no_grad():
            loop = tqdm(learner.train_loader)
            for j, (data, target) in enumerate(loop):
                data, target = data.to(device), target.to(device)
                output = features_extractor(data)
                if config_args["training"]["task"] == "segmentation":
                    # Select only a fraction of outputs for segmentation trustscore
                    output = (output.permute(0, 2, 3, 1).contiguous().view(
                        output.size(0) * output.size(2) * output.size(3), -1))
                    target = (target.permute(0, 2, 3, 1).contiguous().view(
                        target.size(0) * target.size(2) * target.size(3), -1))
                    idx = torch.randperm(
                        output.size(0))[:MAX_NUMBER_TRUSTSCORE_SEG]
                    output = output[idx, :]
                    target = target[idx, :]
                else:
                    output = output.view(output.size(0), -1)
                train_features.append(output)
                train_target.append(target)
        train_features = torch.cat(train_features).detach().cpu().numpy()
        train_target = torch.cat(train_target).detach().cpu().numpy()

        LOGGER.info("Create KDTree")
        trust_model = trust_scores.TrustScore(
            num_workers=max(config_args["data"]["num_classes"], 20))
        trust_model.fit(train_features, train_target)

        LOGGER.info("Execute on test set")
        test_features, test_pred = [], []
        learner.model.eval()
        with torch.no_grad():
            loop = tqdm(learner.test_loader)
            for j, (data, target) in enumerate(loop):
                data, target = data.to(device), target.to(device)
                output = learner.model(data)
                confidence, pred = output.max(dim=1, keepdim=True)
                features = features_extractor(data)

                if config_args["training"]["task"] == "segmentation":
                    features = (features.permute(0, 2, 3, 1).contiguous().view(
                        features.size(0) * features.size(2) * features.size(3),
                        -1))
                    target = (target.permute(0, 2, 3, 1).contiguous().view(
                        target.size(0) * target.size(2) * target.size(3), -1))
                    pred = (pred.permute(0, 2, 3, 1).contiguous().view(
                        pred.size(0) * pred.size(2) * pred.size(3), -1))
                    confidence = (confidence.permute(
                        0, 2, 3, 1).contiguous().view(
                            confidence.size(0) * confidence.size(2) *
                            confidence.size(3), -1))
                    idx = torch.randperm(
                        features.size(0))[:MAX_NUMBER_TRUSTSCORE_SEG]
                    features = features[idx, :]
                    target = target[idx, :]
                    pred = pred[idx, :]
                    confidence = confidence[idx, :]
                else:
                    features = features.view(features.size(0), -1)

                test_features.append(features)
                test_pred.append(pred)
                metrics.update(pred, target, confidence)

        test_features = torch.cat(test_features).detach().to("cpu").numpy()
        test_pred = torch.cat(test_pred).squeeze().detach().to("cpu").numpy()
        proba_pred = trust_model.get_score(test_features, test_pred)
        metrics.proba_pred = proba_pred
        scores_test = metrics.get_scores(split="test")

    LOGGER.info("Results")
    print("----------------------------------------------------------------")
    for st in scores_test:
        print(st)
        print(scores_test[st])
        print(
            "----------------------------------------------------------------")
Esempio n. 9
0
    def train(self, epoch):
        self.model.train()

        # self.disable_bn()
        # if self.config_args["model"].get("uncertainty", None):
        #     self.disable_dropout()

        metrics = Metrics(self.metrics, self.prod_train_len, self.num_classes)
        loss, confid_loss = 0, 0
        len_steps, len_data = 0, 0

        # Training loop
        loop = tqdm(self.train_loader)

        for batch_id, (data, target) in enumerate(loop):
            data, target = data.to(self.device), target.to(self.device)
            self.optimizer.zero_grad()
            output = self.model(data)

            # import pdb
            # pdb.set_trace()
            # print(output[0])
            # exit()

            if self.task == "classification":
                current_loss = self.criterion(output, target)
            elif self.task == "segmentation":
                current_loss = self.criterion(output, target.squeeze(dim=1))
            current_loss.backward()
            loss += current_loss
            self.optimizer.step()
            if self.task == "classification":
                len_steps += len(data)
                len_data = len_steps
            elif self.task == "segmentation":
                len_steps += len(data) * np.prod(data.shape[-2:])
                len_data += len(data)

            # Update metrics
            pred = output[0].argmax(dim=1, keepdim=True)
            confidence = torch.sigmoid(output[1])
            metrics.update(pred, target, confidence)

            # Update the average loss
            loop.set_description(f"Epoch {epoch}/{self.nb_epochs}")
            loop.set_postfix(
                OrderedDict({
                    "loss_confid": f"{(loss / len_data):05.3e}",
                    "acc": f"{(metrics.accuracy / len_steps):05.2%}",
                }))
            loop.update()

        # Eval on epoch end
        scores = metrics.get_scores(split="train")
        logs_dict = OrderedDict({
            "epoch": {
                "value": epoch,
                "string": f"{epoch:03}"
            },
            "train/loss_confid": {
                "value": loss / len_data,
                "string": f"{(loss / len_data):05.4e}",
            },
        })
        for s in scores:
            logs_dict[s] = scores[s]

        # Val scores
        val_losses, scores_val, _ = self.evaluate(self.val_loader,
                                                  self.prod_val_len,
                                                  split="val")
        logs_dict["val/loss_confid"] = {
            "value":
            val_losses["loss_confid"].item() / self.nsamples_val,
            "string":
            f"{(val_losses['loss_confid'].item() / self.nsamples_val):05.4e}",
        }
        for sv in scores_val:
            logs_dict[sv] = scores_val[sv]

        # Test scores
        test_losses, scores_test, _ = self.evaluate(self.test_loader,
                                                    self.prod_test_len,
                                                    split="test")
        logs_dict["test/loss_confid"] = {
            "value":
            test_losses["loss_confid"].item() / self.nsamples_test,
            "string":
            f"{(test_losses['loss_confid'].item() / self.nsamples_test):05.4e}",
        }
        for st in scores_test:
            logs_dict[st] = scores_test[st]

        # Print metrics
        misc.print_dict(logs_dict)

        # Save the model checkpoint
        self.save_checkpoint(epoch)

        # CSV logging
        misc.csv_writter(path=self.output_folder / "logs.csv",
                         dic=OrderedDict(logs_dict))

        # Tensorboard logging
        self.save_tb(logs_dict)