def evaluate(self, dloader, len_dataset, split="test", verbose=False, **args): self.model.eval() metrics = Metrics(self.metrics, len_dataset, self.num_classes) loss = 0 # Evaluation loop loop = tqdm(dloader, disable=not verbose) for batch_id, (data, target) in enumerate(loop): data, target = data.to(self.device), target.to(self.device) with torch.no_grad(): output = self.model(data) if self.task == "classification": loss += self.criterion(output, target) elif self.task == "segmentation": loss += self.criterion(output, target.squeeze(dim=1)) # Update metrics pred = output[0].argmax(dim=1, keepdim=True) confidence = torch.sigmoid(output[1]) metrics.update(pred, target, confidence) scores = metrics.get_scores(split=split) losses = {"loss_confid": loss} return losses, scores
def train(self, epoch): self.model.train() self.disable_bn() if self.config_args["model"].get("uncertainty", None): self.disable_dropout() metrics = Metrics(self.metrics, self.prod_train_len, self.num_classes) loss, confid_loss = 0, 0 len_steps, len_data = 0, 0 # Training loop loop = tqdm(self.train_loader) for step, batch in enumerate(tqdm(loop, desc="Iteration")): batch = tuple(t.to(self.device) for t in batch) idx_ids, input_ids, input_mask, segment_ids, label_ids = batch output = self.model(input_ids, segment_ids, input_mask, labels=None) print('output', output[0], output[1], torch.sigmoid(output[1])) current_loss = self.criterion(output, label_ids) """ loss """ n_gpu = torch.cuda.device_count() if n_gpu > 1: current_loss = current_loss.mean() if self.config_args['training']['gradient_accumulation_steps'] > 1: current_loss = current_loss / self.config_args['training'][ 'gradient_accumulation_steps'] current_loss.backward() loss += current_loss len_steps += len(input_ids) len_data = len_steps # Update metrics pred = output[0].argmax(dim=1, keepdim=True) confidence = torch.sigmoid(output[1]) metrics.update(idx_ids, pred, label_ids, confidence) pred_detach, label_detach, confidence_detach, idx_detach = pred.detach( ), label_ids.detach(), confidence.detach(), idx_ids.detach() print('pred', pred_detach.cpu()) print('label', label_detach.cpu()) print('idx', idx_detach.cpu()) print('confidence', confidence_detach.cpu()) if (step + 1) % self.config_args['training'][ 'gradient_accumulation_steps'] == 0: print('optimizer step', step + 1) self.optimizer.step() self.optimizer.zero_grad() # Update the average loss loop.set_description(f"Epoch {epoch}/{self.nb_epochs}") loop.set_postfix( OrderedDict({ "loss_confid": f"{(loss / len_data):05.3e}", "acc": f"{(metrics.accuracy / len_steps):05.2%}", })) loop.update() # Eval on epoch end scores = metrics.get_scores(split="train") logs_dict = OrderedDict({ "epoch": { "value": epoch, "string": f"{epoch:03}" }, "lr": { "value": self.optimizer.param_groups[0]["lr"], "string": f"{self.optimizer.param_groups[0]['lr']:05.1e}", }, "train/loss_confid": { "value": loss / len_data, "string": f"{(loss / len_data):05.4e}", }, }) for s in scores: logs_dict[s] = scores[s] # Val scores val_losses, scores_val = self.evaluate(self.val_loader, self.prod_val_len, split="val") logs_dict["val/loss_confid"] = { "value": val_losses["loss_confid"].item() / self.nsamples_val, "string": f"{(val_losses['loss_confid'].item() / self.nsamples_val):05.4e}", } for sv in scores_val: logs_dict[sv] = scores_val[sv] # Test scores test_losses, scores_test = self.evaluate(self.test_loader, self.prod_test_len, split="test") logs_dict["test/loss_confid"] = { "value": test_losses["loss_confid"].item() / self.nsamples_test, "string": f"{(test_losses['loss_confid'].item() / self.nsamples_test):05.4e}", } for st in scores_test: logs_dict[st] = scores_test[st] # Print metrics misc.print_dict(logs_dict) # Save the model checkpoint self.save_checkpoint(epoch) # CSV logging misc.csv_writter(path=self.output_folder / "logs.csv", dic=OrderedDict(logs_dict)) # Tensorboard logging self.save_tb(logs_dict) # Scheduler step if self.scheduler: self.scheduler.step()
def evaluate(self, dloader, len_dataset, split="test", verbose=False, **args): self.model.eval() # use with torch.no_grad() metrics = Metrics(self.metrics, len_dataset, self.num_classes) loss = 0 # Evaluation loop loop = tqdm(dloader, disable=not verbose) for step, batch in enumerate(tqdm(loop, desc="Iteration")): batch = tuple(t.to(self.device) for t in batch) idx_ids, input_ids, input_mask, segment_ids, label_ids = batch with torch.no_grad(): output = self.model(input_ids, segment_ids, input_mask, labels=None) if self.task == "classification": current_loss = self.criterion(output, label_ids) # ok loss += current_loss # Update metrics pred = F.softmax(output[0]).argmax(dim=1, keepdim=True) confidence = torch.sigmoid(output[1]) metrics.update(idx_ids, pred, label_ids, confidence) pred_detach, label_detach, confidence_detach, idx_detach = pred.detach( ), label_ids.detach(), confidence.detach(), idx_ids.detach() print('pred', pred_detach.cpu()) print('label', label_detach.cpu()) print('idx', idx_detach.cpu()) print('confidence', confidence_detach.cpu()) print('----------------------------------------------------') pred_list = [] target_list = [] confidence_list = [] for i, p, t, c in zip(metrics.new_idx, metrics.new_pred, metrics.new_taget, metrics.new_conf): print('idx,pred,target,confidence', i, p[0], t, c[0]) pred_list.append(p[0]) target_list.append(t) confidence_list.append(c[0]) print('----------------------------------------------------') report = classifiction_metric( np.array(pred_list), np.array(target_list), np.array(self.config_args['data']['label_list'])) print(report) print('----------------------------------------------------') scores = metrics.get_scores(split=split) losses = {"loss_confid": loss} return losses, scores
def train(self, epoch): self.model.train() metrics = Metrics(self.metrics, self.prod_train_len, self.num_classes) loss, len_steps, len_data = 0, 0, 0 # Training loop loop = tqdm(self.train_loader) for batch_id, (data, target) in enumerate(loop): data, target = data.to(self.device), target.to(self.device) self.optimizer.zero_grad() output = self.model(data) if self.task == "classification": current_loss = self.criterion(output, target) elif self.task == "segmentation": current_loss = self.criterion(output, target.squeeze(dim=1)) current_loss.backward() loss += current_loss self.optimizer.step() if self.task == "classification": len_steps += len(data) len_data = len_steps elif self.task == "segmentation": len_steps += len(data) * np.prod(data.shape[-2:]) len_data += len(data) # Update metrics confidence, pred = F.softmax(output, dim=1).max(dim=1, keepdim=True) metrics.update(pred, target, confidence) # Update the average loss loop.set_description(f"Epoch {epoch}/{self.nb_epochs}") loop.set_postfix( OrderedDict({ "loss_nll": f"{(loss / len_data):05.4e}", "acc": f"{(metrics.accuracy / len_steps):05.2%}", })) loop.update() # Eval on epoch end scores = metrics.get_scores(split="train") logs_dict = OrderedDict({ "epoch": { "value": epoch, "string": f"{epoch:03}" }, "lr": { "value": self.optimizer.param_groups[0]["lr"], "string": f"{self.optimizer.param_groups[0]['lr']:05.1e}", }, "train/loss_nll": { "value": loss / len_data, "string": f"{(loss / len_data):05.4e}", }, }) for s in scores: logs_dict[s] = scores[s] # Val scores if self.val_loader is not None: val_losses, scores_val = self.evaluate(self.val_loader, self.prod_val_len, split="val") logs_dict["val/loss_nll"] = { "value": val_losses["loss_nll"].item() / self.nsamples_val, "string": f"{(val_losses['loss_nll'].item() / self.nsamples_val):05.4e}", } for sv in scores_val: logs_dict[sv] = scores_val[sv] # Test scores test_losses, scores_test = self.evaluate(self.test_loader, self.prod_test_len, split="test") logs_dict["test/loss_nll"] = { "value": test_losses["loss_nll"].item() / self.nsamples_test, "string": f"{(test_losses['loss_nll'].item() / self.nsamples_test):05.4e}", } for st in scores_test: logs_dict[st] = scores_test[st] # Print metrics misc.print_dict(logs_dict) # Save the model checkpoint self.save_checkpoint(epoch) # CSV logging misc.csv_writter(path=self.output_folder / "logs.csv", dic=OrderedDict(logs_dict)) # Tensorboard logging self.save_tb(logs_dict) # Scheduler step if self.scheduler: self.scheduler.step()
def evaluate(self, dloader, len_dataset, split="test", mode="mcp", samples=50, verbose=False): self.model.eval() metrics = Metrics(self.metrics, len_dataset, self.num_classes) loss = 0 # Special case of mc-dropout if mode == "mc_dropout": self.model.keep_dropout_in_test() LOGGER.info(f"Sampling {samples} times") # Evaluation loop loop = tqdm(dloader, disable=not verbose) for batch_id, (data, target) in enumerate(loop): data, target = data.to(self.device), target.to(self.device) with torch.no_grad(): if mode == "mcp": output = self.model(data) if self.task == "classification": loss += self.criterion(output, target) elif self.task == "segmentation": loss += self.criterion(output, target.squeeze(dim=1)) confidence, pred = F.softmax(output, dim=1).max(dim=1, keepdim=True) elif mode == "tcp": output = self.model(data) if self.task == "classification": loss += self.criterion(output, target) elif self.task == "segmentation": loss += self.criterion(output, target.squeeze(dim=1)) probs = F.softmax(output, dim=1) pred = probs.max(dim=1, keepdim=True)[1] labels_hot = misc.one_hot_embedding( target, self.num_classes).to(self.device) # Segmentation special case if self.task == "segmentation": labels_hot = labels_hot.squeeze(1).permute(0, 3, 1, 2) confidence, _ = (labels_hot * probs).max(dim=1, keepdim=True) elif mode == "mc_dropout": if self.task == "classification": outputs = torch.zeros(samples, data.shape[0], self.num_classes).to(self.device) elif self.task == "segmentation": outputs = torch.zeros( samples, data.shape[0], self.num_classes, data.shape[2], data.shape[3], ).to(self.device) for i in range(samples): outputs[i] = self.model(data) output = outputs.mean(0) if self.task == "classification": loss += self.criterion(output, target) elif self.task == "segmentation": loss += self.criterion(output, target.squeeze(dim=1)) probs = F.softmax(output, dim=1) confidence = (probs * torch.log(probs + 1e-9)).sum( dim=1) # entropy pred = probs.max(dim=1, keepdim=True)[1] metrics.update(pred, target, confidence) scores = metrics.get_scores(split=split) losses = {"loss_nll": loss} return losses, scores
def main(): parser = argparse.ArgumentParser() parser.add_argument("--config_path", "-c", type=str, default=None, help="Path for config yaml") parser.add_argument("--epoch", "-e", type=int, default=None, help="Epoch to analyse") parser.add_argument( "--mode", "-m", type=str, default="mcp", choices=MODE_TYPE, help="Type of confidence testing", ) parser.add_argument("--samples", "-s", type=int, default=50, help="Samples in case of MCDropout") parser.add_argument("--no-cuda", action="store_true", default=False, help="disables CUDA training") args = parser.parse_args() config_args = load_yaml(args.config_path) # Overwrite for release config_args["training"]["output_folder"] = Path(args.config_path).parent config_args["training"]["metrics"] = [ "accuracy", "auc", "ap_success", "ap_errors", "fpr_at_95tpr", "aurc" ] # Special case of MC Dropout if args.mode == "mc_dropout": config_args["training"]["mc_dropout"] = True # Device configuration device = torch.device( "cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu") # setting random seed setup_seed(42) # Load dataset LOGGER.info(f"Loading dataset {config_args['data']['dataset']}") dloader = get_loader(config_args) # Make loaders dloader.make_loaders() # Set learner LOGGER.warning(f"Learning type: {config_args['training']['learner']}") config = BertConfig( os.path.join(config_args['data']['bert_model_dir'], 'bert_config.json')) # ++ learner = get_learner(config, config_args, dloader.train_loader, dloader.val_loader, dloader.test_loader, -1, device) # ++ # Initialize and load model ckpt_path = config_args["training"][ "output_folder"] / f"model_bert_{args.epoch:03d}.ckpt" # ++ checkpoint = torch.load(ckpt_path) learner.model.load_state_dict(checkpoint, strict=False) # Get scores LOGGER.info(f"Inference mode: {args.mode}") if args.mode != "trust_score": _, scores_test = learner.evaluate( learner.test_loader, learner.prod_test_len, split="test", mode=args.mode, samples=args.samples, verbose=True, ) # Special case TrustScore else: # Create feature extractor model config_args["model"][ "name"] = config_args["model"]["name"] + "_extractor" print(config_args["model"]["name"]) features_extractor = get_model(config, config_args).from_pretrained( config_args['data']['bert_model_dir'], config_args).to(device) # ++ features_extractor.load_state_dict(learner.model.state_dict(), strict=False) LOGGER.info(f"Using extractor {config_args['model']['name']}") # Get features for KDTree LOGGER.info("Get features for KDTree") features_extractor.eval() metrics = Metrics(learner.metrics, learner.prod_test_len, config_args["data"]["num_classes"]) train_features, train_target = [], [] with torch.no_grad(): loop = tqdm(learner.train_loader) for step, batch in enumerate(tqdm(loop, desc="Iteration")): batch = tuple(t.to(device) for t in batch) _, input_ids, input_mask, segment_ids, label_ids = batch output = features_extractor(input_ids, segment_ids, input_mask, labels=None) output = output.view(output.size(0), -1) train_features.append(output) train_target.append(label_ids) train_features = torch.cat(train_features).detach().cpu().numpy() train_target = torch.cat(train_target).detach().cpu().numpy() LOGGER.info("Create KDTree") trust_model = trust_scores.TrustScore( num_workers=max(config_args["data"]["num_classes"], 2)) trust_model.fit(train_features, train_target) LOGGER.info("Execute on test set") test_features, test_pred = [], [] learner.model.eval() with torch.no_grad(): loop = tqdm(learner.test_loader) for step, batch in enumerate(tqdm(loop, desc="Iteration")): batch = tuple(t.to(device) for t in batch) idx_ids, input_ids, input_mask, segment_ids, label_ids = batch # ++ output, pooled_output = learner.model(input_ids, segment_ids, input_mask, labels=None) confidence, pred = output.max(dim=1, keepdim=True) features = features_extractor(input_ids, segment_ids, input_mask, labels=None) features = features.view(features.size(0), -1) test_features.append(features) test_pred.append(pred) metrics.update(idx_ids, pred, label_ids, confidence) test_features = torch.cat(test_features).detach().to("cpu").numpy() test_pred = torch.cat(test_pred).squeeze().detach().to("cpu").numpy() proba_pred = trust_model.get_score(test_features, test_pred) metrics.update(idx_ids, pred, label_ids, confidence) print('test_features', test_features) print('idx_ids', idx_ids) print('labels', label_ids.detach().cpu()) print('test_pred', test_pred) print('trust_score', proba_pred) print('----------------------------------------------------') pred_list = [] target_list = [] confidence_list = [] proba_pred_list = [] for i, p, t, c in zip(metrics.new_idx, metrics.new_pred, metrics.new_taget, metrics.new_conf): print('idx,pred,target,confidence', i, p[0], t, c[0]) pred_list.append(p[0]) target_list.append(t) confidence_list.append(c[0]) print('----------------------------------------------------') report = classifiction_metric( np.array(pred_list), np.array(target_list), np.array(config_args['data']['label_list'])) print(report) print('----------------------------------------------------') scores_test = metrics.get_scores(split="test") LOGGER.info("Results") print("----------------------------------------------------------------") for st in scores_test: print(st) print(scores_test[st]) print( "----------------------------------------------------------------")
def evaluate(self, dloader, len_dataset, split="test", mode="mcp", samples=50, verbose=False): self.model.eval() metrics = Metrics(self.metrics, len_dataset, self.num_classes) loss = 0 # Special case of mc-dropout if mode == "mc_dropout": self.model.keep_dropout_in_test() LOGGER.info(f"Sampling {samples} times") # Evaluation loop loop = tqdm(dloader, disable=not verbose) for step, batch in enumerate(tqdm(loop, desc="Iteration")): batch = tuple(t.to(self.device) for t in batch) idx_ids, input_ids, input_mask, segment_ids, label_ids = batch print(label_ids) with torch.no_grad(): if mode == "mcp": print(True) output, pooled_output = self.model(input_ids, segment_ids, input_mask, labels=None) current_loss = self.criterion(output.view(-1, 2), label_ids.view(-1)) loss += current_loss confidence, pred = F.softmax(output, dim=1).max(dim=1, keepdim=True) print(confidence) print(pred) elif mode == "tcp": output, pooled_output = self.model(input_ids, segment_ids, input_mask, labels=None) current_loss = self.criterion(output.view(-1, 2), label_ids.view(-1)) loss += current_loss probs = F.softmax(output, dim=1) pred = probs.max(dim=1, keepdim=True)[1] labels_hot = misc.one_hot_embedding( label_ids, self.num_classes).to(self.device) confidence, _ = (labels_hot * probs).max(dim=1, keepdim=True) elif mode == "mc_dropout": print('---------------input_ids.shape---------------') print(input_ids.shape) outputs = torch.zeros( samples, self.config_args['training']['batch_size'], self.num_classes).to(self.device) for i in range(samples): outputs[i], _ = self.model(input_ids, segment_ids, input_mask, labels=None) output = outputs.mean(0) loss += self.criterion(output.view(-1, 2), label_ids.view(-1)) probs = F.softmax(output, dim=1) confidence = (probs * torch.log(probs + 1e-9)).sum(dim=1) pred = probs.max(dim=1, keepdim=True)[1] metrics.update(idx_ids, pred, label_ids, confidence) pred_detach, label_detach, confidence_detach, idx_detach = pred.detach( ), label_ids.detach(), confidence.detach(), idx_ids.detach() print('pred', pred_detach.cpu()) print('label', label_detach.cpu()) print('idx', idx_detach.cpu()) print('confidence', confidence_detach.cpu()) print('----------------------------------------------------') pred_list = [] target_list = [] confidence_list = [] for i, p, t, c in zip(metrics.new_idx, metrics.new_pred, metrics.new_taget, metrics.new_conf): print('idx,pred,target,confidence', i, p[0], t, c[0]) pred_list.append(p[0]) target_list.append(t) confidence_list.append(c[0]) print('----------------------------------------------------') report = classifiction_metric( np.array(pred_list), np.array(target_list), np.array(self.config_args['data']['label_list'])) print(report) print('----------------------------------------------------') scores = metrics.get_scores(split=split) losses = {"loss_nll": loss} return losses, scores
def main(): parser = argparse.ArgumentParser() parser.add_argument("--config_path", "-c", type=str, default=None, help="Path for config yaml") parser.add_argument("--epoch", "-e", type=int, default=None, help="Epoch to analyse") parser.add_argument( "--mode", "-m", type=str, default="normal", choices=MODE_TYPE, help="Type of confidence testing", ) parser.add_argument("--samples", "-s", type=int, default=50, help="Samples in case of MCDropout") parser.add_argument("--no-cuda", action="store_true", default=False, help="disables CUDA training") args = parser.parse_args() config_args = load_yaml(args.config_path) # Overwrite for release config_args["training"]["output_folder"] = Path(args.config_path).parent config_args["training"]["metrics"] = [ "accuracy", "auc", "ap_success", "ap_errors", "fpr_at_95tpr", ] if config_args["training"]["task"] == "segmentation": config_args["training"]["metrics"].append("mean_iou") # Special case of MC Dropout if args.mode == "mc_dropout": config_args["training"]["mc_dropout"] = True # Device configuration device = torch.device( "cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu") # Load dataset LOGGER.info(f"Loading dataset {config_args['data']['dataset']}") dloader = get_loader(config_args) # Make loaders dloader.make_loaders() # Set learner LOGGER.warning(f"Learning type: {config_args['training']['learner']}") learner = get_learner(config_args, dloader.train_loader, dloader.val_loader, dloader.test_loader, -1, device) # Initialize and load model ckpt_path = config_args["training"][ "output_folder"] / f"model_epoch_{args.epoch:03d}.ckpt" checkpoint = torch.load(ckpt_path) learner.model.load_state_dict(checkpoint["model_state_dict"]) # Get scores LOGGER.info(f"Inference mode: {args.mode}") if args.mode != "trust_score": _, scores_test, confidence_data = learner.evaluate( learner.test_loader, learner.prod_test_len, split="test", mode=args.mode, samples=args.samples, verbose=True, ) acc_pred, conf_pred = confidence_data # write_file('./results_ver1/%s_confidnet_score_epoch_%i.txt' % (config_args['data']['dataset'], args.epoch), conf_pred) # write_file('./results_ver1/%s_confidnet_accurate_epoch_%i.txt' % (config_args['data']['dataset'], args.epoch), acc_pred) write_file( './results/%s_confidnet_score_epoch_%i.txt' % (config_args['data']['dataset'], args.epoch), conf_pred) write_file( './results/%s_confidnet_accurate_epoch_%i.txt' % (config_args['data']['dataset'], args.epoch), acc_pred) # Special case TrustScore else: # For segmentation, reduce number of samples, else it is too long to compute if config_args["training"]["task"] == "segmentation": learner.prod_test_len = MAX_NUMBER_TRUSTSCORE_SEG * np.ceil( learner.nsamples_test / config_args["training"]["batch_size"]) # Create feature extractor model config_args["model"][ "name"] = config_args["model"]["name"] + "_extractor" features_extractor = get_model(config_args, device).to(device) features_extractor.load_state_dict(learner.model.state_dict(), strict=False) LOGGER.info(f"Using extractor {config_args['model']['name']}") features_extractor.print_summary(input_size=tuple( [shape_i for shape_i in learner.train_loader.dataset[0][0].shape])) # Get features for KDTree LOGGER.info("Get features for KDTree") features_extractor.eval() metrics = Metrics(learner.metrics, learner.prod_test_len, config_args["data"]["num_classes"]) train_features, train_target = [], [] with torch.no_grad(): loop = tqdm(learner.train_loader) for j, (data, target) in enumerate(loop): data, target = data.to(device), target.to(device) output = features_extractor(data) if config_args["training"]["task"] == "segmentation": # Select only a fraction of outputs for segmentation trustscore output = (output.permute(0, 2, 3, 1).contiguous().view( output.size(0) * output.size(2) * output.size(3), -1)) target = (target.permute(0, 2, 3, 1).contiguous().view( target.size(0) * target.size(2) * target.size(3), -1)) idx = torch.randperm( output.size(0))[:MAX_NUMBER_TRUSTSCORE_SEG] output = output[idx, :] target = target[idx, :] else: output = output.view(output.size(0), -1) train_features.append(output) train_target.append(target) train_features = torch.cat(train_features).detach().cpu().numpy() train_target = torch.cat(train_target).detach().cpu().numpy() LOGGER.info("Create KDTree") trust_model = trust_scores.TrustScore( num_workers=max(config_args["data"]["num_classes"], 20)) trust_model.fit(train_features, train_target) LOGGER.info("Execute on test set") test_features, test_pred = [], [] learner.model.eval() with torch.no_grad(): loop = tqdm(learner.test_loader) for j, (data, target) in enumerate(loop): data, target = data.to(device), target.to(device) output = learner.model(data) confidence, pred = output.max(dim=1, keepdim=True) features = features_extractor(data) if config_args["training"]["task"] == "segmentation": features = (features.permute(0, 2, 3, 1).contiguous().view( features.size(0) * features.size(2) * features.size(3), -1)) target = (target.permute(0, 2, 3, 1).contiguous().view( target.size(0) * target.size(2) * target.size(3), -1)) pred = (pred.permute(0, 2, 3, 1).contiguous().view( pred.size(0) * pred.size(2) * pred.size(3), -1)) confidence = (confidence.permute( 0, 2, 3, 1).contiguous().view( confidence.size(0) * confidence.size(2) * confidence.size(3), -1)) idx = torch.randperm( features.size(0))[:MAX_NUMBER_TRUSTSCORE_SEG] features = features[idx, :] target = target[idx, :] pred = pred[idx, :] confidence = confidence[idx, :] else: features = features.view(features.size(0), -1) test_features.append(features) test_pred.append(pred) metrics.update(pred, target, confidence) test_features = torch.cat(test_features).detach().to("cpu").numpy() test_pred = torch.cat(test_pred).squeeze().detach().to("cpu").numpy() proba_pred = trust_model.get_score(test_features, test_pred) metrics.proba_pred = proba_pred scores_test = metrics.get_scores(split="test") LOGGER.info("Results") print("----------------------------------------------------------------") for st in scores_test: print(st) print(scores_test[st]) print( "----------------------------------------------------------------")
def train(self, epoch): self.model.train() # self.disable_bn() # if self.config_args["model"].get("uncertainty", None): # self.disable_dropout() metrics = Metrics(self.metrics, self.prod_train_len, self.num_classes) loss, confid_loss = 0, 0 len_steps, len_data = 0, 0 # Training loop loop = tqdm(self.train_loader) for batch_id, (data, target) in enumerate(loop): data, target = data.to(self.device), target.to(self.device) self.optimizer.zero_grad() output = self.model(data) # import pdb # pdb.set_trace() # print(output[0]) # exit() if self.task == "classification": current_loss = self.criterion(output, target) elif self.task == "segmentation": current_loss = self.criterion(output, target.squeeze(dim=1)) current_loss.backward() loss += current_loss self.optimizer.step() if self.task == "classification": len_steps += len(data) len_data = len_steps elif self.task == "segmentation": len_steps += len(data) * np.prod(data.shape[-2:]) len_data += len(data) # Update metrics pred = output[0].argmax(dim=1, keepdim=True) confidence = torch.sigmoid(output[1]) metrics.update(pred, target, confidence) # Update the average loss loop.set_description(f"Epoch {epoch}/{self.nb_epochs}") loop.set_postfix( OrderedDict({ "loss_confid": f"{(loss / len_data):05.3e}", "acc": f"{(metrics.accuracy / len_steps):05.2%}", })) loop.update() # Eval on epoch end scores = metrics.get_scores(split="train") logs_dict = OrderedDict({ "epoch": { "value": epoch, "string": f"{epoch:03}" }, "train/loss_confid": { "value": loss / len_data, "string": f"{(loss / len_data):05.4e}", }, }) for s in scores: logs_dict[s] = scores[s] # Val scores val_losses, scores_val, _ = self.evaluate(self.val_loader, self.prod_val_len, split="val") logs_dict["val/loss_confid"] = { "value": val_losses["loss_confid"].item() / self.nsamples_val, "string": f"{(val_losses['loss_confid'].item() / self.nsamples_val):05.4e}", } for sv in scores_val: logs_dict[sv] = scores_val[sv] # Test scores test_losses, scores_test, _ = self.evaluate(self.test_loader, self.prod_test_len, split="test") logs_dict["test/loss_confid"] = { "value": test_losses["loss_confid"].item() / self.nsamples_test, "string": f"{(test_losses['loss_confid'].item() / self.nsamples_test):05.4e}", } for st in scores_test: logs_dict[st] = scores_test[st] # Print metrics misc.print_dict(logs_dict) # Save the model checkpoint self.save_checkpoint(epoch) # CSV logging misc.csv_writter(path=self.output_folder / "logs.csv", dic=OrderedDict(logs_dict)) # Tensorboard logging self.save_tb(logs_dict)