def test_zero_div(): loss = Loss(nll_loss) with pytest.raises( NotComputableError, match= r"Loss must have at least one example before it can be computed"): loss.compute()
def _test(metric_device): criterion = nn.NLLLoss().to(device) loss = Loss(criterion, device=metric_device) y_pred = torch.tensor([[0.1, 0.4, 0.5], [0.1, 0.7, 0.2]], device=device).log() y = torch.tensor([2, 2], device=device).long() loss.update((y_pred, y)) n = loss._num_examples assert n == len(y) res = loss.compute() assert n * idist.get_world_size() == loss._num_examples y_pred = idist.all_gather(y_pred) y = idist.all_gather(y) true_loss_value = criterion(y_pred, y) assert_almost_equal(res, true_loss_value.item()) loss.reset() y_pred = torch.tensor([[0.1, 0.3, 0.6], [0.6, 0.2, 0.2], [0.2, 0.7, 0.1]], device=device).log() y = torch.tensor([2, 0, 2], device=device).long() loss.update((y_pred, y)) n = loss._num_examples res = loss.compute() assert n * idist.get_world_size() == loss._num_examples y_pred = idist.all_gather(y_pred) y = idist.all_gather(y) true_loss_value = criterion(y_pred, y) if tol is None: assert_almost_equal(res, true_loss_value.item()) else: assert pytest.approx(res, rel=tol) == true_loss_value.item()
def _test(metric_device, y_test_1, y_test_2): criterion = nn.NLLLoss().to(device) loss = Loss(criterion, device=metric_device) y_pred, y, _ = y_test_1 loss.update((y_pred, y)) n = loss._num_examples assert n == len(y) res = loss.compute() assert n * idist.get_world_size() == loss._num_examples y_pred = idist.all_gather(y_pred) y = idist.all_gather(y) true_loss_value = criterion(y_pred, y) assert_almost_equal(res, true_loss_value.item()) loss.reset() y_pred, y, _ = y_test_2 loss.update((y_pred, y)) n = loss._num_examples res = loss.compute() assert n * idist.get_world_size() == loss._num_examples y_pred = idist.all_gather(y_pred) y = idist.all_gather(y) true_loss_value = criterion(y_pred, y) if tol is None: assert_almost_equal(res, true_loss_value.item()) else: assert pytest.approx(res, rel=tol) == true_loss_value.item()
def test_reset(): loss = Loss(nll_loss) y_pred, y = y_test_3() loss.update((y_pred, y)) loss.compute() loss.reset() with pytest.raises(NotComputableError): loss.compute()
def _test(y_test_1, y_test_2): loss = Loss(nn.NLLLoss()) y_pred, y, expected_loss = y_test_1 loss.update((y_pred, y)) assert_almost_equal(loss.compute(), expected_loss) y_pred, y, expected_loss = y_test_2 loss.update((y_pred, y)) assert_almost_equal(loss.compute(), expected_loss) # average
def test_reset(): loss = Loss(nll_loss) y_pred = torch.tensor([[0.1, 0.3, 0.6], [0.6, 0.2, 0.2]]).log() y = torch.tensor([2, 0]).long() loss.update((y_pred, y)) loss.compute() loss.reset() with pytest.raises(NotComputableError): loss.compute()
def test_compute(criterion): loss = Loss(criterion) y_pred, y, expected_loss = y_test_1() loss.update((y_pred, y)) assert_almost_equal(loss.compute(), expected_loss) y_pred, y, expected_loss = y_test_2() loss.update((y_pred, y)) assert_almost_equal(loss.compute(), expected_loss) # average
def test_compute_on_criterion(): loss = Loss(nn.NLLLoss()) y_pred = torch.tensor([[0.1, 0.4, 0.5], [0.1, 0.7, 0.2]]).log() y = torch.tensor([2, 2]).long() loss.update((y_pred, y)) assert_almost_equal(loss.compute(), 1.1512925625) y_pred = torch.tensor([[0.1, 0.3, 0.6], [0.6, 0.2, 0.2], [0.2, 0.7, 0.1]]).log() y = torch.tensor([2, 0, 2]).long() loss.update((y_pred, y)) assert_almost_equal(loss.compute(), 1.1253643036) # average
def test_compute(): loss = Loss(nll_loss) y_pred = torch.Tensor([[0.1, 0.4, 0.5], [0.1, 0.7, 0.2]]).log() y = torch.LongTensor([2, 2]) loss.update((y_pred, y)) assert_almost_equal(loss.compute(), 1.1512925625) y_pred = torch.Tensor([[0.1, 0.3, 0.6], [0.6, 0.2, 0.2], [0.2, 0.7, 0.1]]).log() y = torch.LongTensor([2, 0, 2]) loss.update((y_pred, y)) assert_almost_equal(loss.compute(), 1.1253643036) # average
def test_kwargs_loss(): loss = Loss(nll_loss) y_pred = torch.tensor([[0.1, 0.4, 0.5], [0.1, 0.7, 0.2]]).log() y = torch.tensor([2, 2]).long() loss.update((y_pred, y, {"weight": torch.tensor([0, 0, 0], dtype=torch.float)})) assert_almost_equal(loss.compute(), 0)
def _evaluate_one_epoch(self, loader, epoch): """ Evaluate one epoch Args: loader (DataLoader): pytorch dataloader epoch (int): the current epoch number """ logger.info(f"Epoch[{epoch}] evaluation started") self.model.eval() loss_metric = Loss(self._loss_fn) # TODO: Support other metrics other than IoU and support multiple # mettics iou_metric = EvaluationMetric.create(self.config.metric, num_classes=self.num_classes) with torch.no_grad(): for image, target in loader: image, target = image.to(self.device), target.to(self.device) output = self.model(image) loss_metric.update((output, target)) iou_metric.update((output["out"], target)) loss = loss_metric.compute() iou = iou_metric.compute() # some classes are not used in cityscapes evaluation. # TODO: Move class masking logic to IoU metric class. keep_mask = [ not c.ignore_in_eval for c in torchvision.datasets.Cityscapes.classes ] class_names = [c.name for c in torchvision.datasets.Cityscapes.classes] iou_info = { name: f"{iou[i].item():.3f}" for i, name in enumerate(class_names) if keep_mask[i] } miou = iou[keep_mask].mean() logger.info(f"Epoch[{epoch}] evaluation completed. " f"Loss: {loss:.3f}, mIoU: {miou:.3f}\n" f"IoU per class: {iou_info}") self.writer.add_scalar("validation/loss", loss, epoch) self.writer.add_scalar("validation/miou", miou, epoch) inv_normalize = T.Normalize(mean=_INV_IMGNET_MEAN, std=_INV_IMGNET_STD) # Visualize segmentation images from last mini-batch n_images = list(image.shape)[0] image_grid = [] for i in range(n_images): img = inv_normalize(image[i, :]).permute(1, 2, 0).cpu().numpy() out = decode_segmap(output["out"][i, :].max(0)[1].cpu().numpy()) tgt = decode_segmap(target[i, :].cpu().numpy()) image_grid.append([img, out, tgt]) fig = grid_plot(image_grid) self.writer.add_figure("validation/visualize", fig, epoch) loss_metric.reset() iou_metric.reset()
def test_kwargs_loss(): loss = Loss(nll_loss) y_pred, y, _ = y_test_1() kwargs = {"weight": torch.tensor([0.1, 0.1, 0.1])} loss.update((y_pred, y, kwargs)) expected_value = nll_loss(y_pred, y, **kwargs) assert_almost_equal(loss.compute(), expected_value)
def test_kwargs_loss(): loss = Loss(nll_loss) y_pred, y, _ = y_test_1() loss.update((y_pred, y, { "weight": torch.tensor([0, 0, 0], dtype=torch.float) })) assert_almost_equal(loss.compute(), 0)
def evaluate(self, data_loader, epoch=0): loss_metric = Loss(self.loss_func) for i, (inputs, targets) in enumerate(data_loader): preds = self.model.forward(inputs) loss = self.loss_func(preds, targets) loss_metric.update((preds, targets)) logger.debug(f"validation: {i} has loss {loss.item()}") mean_epoch_loss = loss_metric.compute() logger.info(f"validation mean loss is {mean_epoch_loss}") self.writer.add_scalar("val/loss", mean_epoch_loss, epoch)
def _test_distrib_compute_on_criterion(device): import torch.distributed as dist def _gather(y): output = [torch.zeros_like(y) for i in range(dist.get_world_size())] dist.all_gather(output, y) y = torch.cat(output, dim=0) return y criterion = nn.NLLLoss().to(device) loss = Loss(criterion, device=device) y_pred = torch.tensor([[0.1, 0.4, 0.5], [0.1, 0.7, 0.2]], device=device).log() y = torch.tensor([2, 2], device=device).long() loss.update((y_pred, y)) n = loss._num_examples assert n == len(y) res = loss.compute() assert n * dist.get_world_size() == loss._num_examples y_pred = _gather(y_pred) y = _gather(y) true_loss_value = criterion(y_pred, y) assert_almost_equal(res, true_loss_value.item()) loss.reset() y_pred = torch.tensor([[0.1, 0.3, 0.6], [0.6, 0.2, 0.2], [0.2, 0.7, 0.1]], device=device).log() y = torch.tensor([2, 0, 2], device=device).long() loss.update((y_pred, y)) n = loss._num_examples res = loss.compute() assert n * dist.get_world_size() == loss._num_examples y_pred = _gather(y_pred) y = _gather(y) true_loss_value = criterion(y_pred, y) assert_almost_equal(res, true_loss_value.item())
def train_one_epoch(self, dataloader, model, epoch, loss_func, optimizer): loss_metric = Loss(self.loss_func) for i, (inputs, targets) in enumerate(dataloader): preds = model.forward(inputs) loss = loss_func(preds, targets) loss_metric.update((preds, targets)) optimizer.zero_grad() loss.backward() optimizer.step() logger.debug( f"epoch: {epoch}, batch: {i} has training loss {loss.item()}") mean_epoch_loss = loss_metric.compute() logger.info( f"epoch {epoch}, has mean training loss is {mean_epoch_loss}") self.writer.add_scalar("training/loss", mean_epoch_loss, epoch)
def _train_one_epoch(self, loader, epoch): """ Train one epoch Args: loader (DataLoader): pytorch dataloader epoch (int): the current epoch number """ logger.info(f"Epoch[{epoch}] training started.") self.model.train() n_batch = len(loader) accumulation_steps = self.config.train.accumulation_steps loss_metric = Loss(self._loss_fn) self.optimizer.zero_grad() for i, (image, target) in enumerate(loader): image, target = image.to(self.device), target.to(self.device) output = self.model(image) loss = self._loss_fn(output, target) loss.backward() # Accumulated Gradients are only updated after X steps. # This creates an effective batch size of # batch_size * accumulation_steps if (i + 1) % accumulation_steps == 0: self.optimizer.step() self.lr_scheduler.step() self.optimizer.zero_grad() loss_metric.update((output, target)) iter_num = (i + 1) % n_batch logger.debug(f"Epoch[{epoch}] Iteration[{iter_num}/{n_batch}] " f"Loss: {loss:.3f}") epoch_loss = loss_metric.compute() logger.info( f"Epoch[{epoch}] training completed. Loss: {epoch_loss:.3f}") self.writer.add_scalar("training/loss", epoch_loss, epoch) loss_metric.reset()
loss = Loss(F.cross_entropy) precision = Precision() sensitivity = Sensitivity() specificity = Specificity() for i in range(FG.fold): parser.args.cur_fold = i output, target = run_fold(parser, vis) output = torch.cat(output) target = torch.cat(target) arg = (output, target) acc.update(arg) loss.update(arg) precision.update(arg) sensitivity.update(arg) specificity.update(arg) end = '<br>' text = 'Over all result<br>' text += 'accuracy: ' + '{:.4f}'.format(acc.compute()) + end text += 'loss: ' + '{:.4f}'.format(loss.compute()) + end text += 'precision: ' + '{}'.format(precision.compute()) + end text += 'sensitivity: ' + '{}'.format(sensitivity.compute()) + end text += 'specificity: ' + '{}'.format(specificity.compute()) + end vis.text(text, 'result_overall') vis.save([vis.env])
def _evaluate_one_epoch(self, loader, epoch, n_epochs): """ Evaluate one epoch Args: loader (DataLoader): pytorch dataloader epoch (int): the current epoch number n_epochs (int): total epoch number """ logger.info(f"Epoch[{epoch}/{n_epochs}] evaluation started") self.model.eval() loss_metric = Loss(self._loss_fn) # metrics # The RMSE is smaller than the normal scale. Because we didn't do the: # depth = self.to_tensor(depth).float() * 10 are_metric = EvaluationMetric.create("AverageRelativeError") log10_metric = EvaluationMetric.create("AverageLog10Error") rmse_metric = EvaluationMetric.create("RootMeanSquareError") a1_metric = EvaluationMetric.create("ThresholdAccuracy", threshold=1.25) a2_metric = EvaluationMetric.create("ThresholdAccuracy", threshold=1.25**2) a3_metric = EvaluationMetric.create("ThresholdAccuracy", threshold=1.25**3) with torch.no_grad(): for image, depth in loader: image = image.to(self.device) depth_n = depth.to(self.device) output = self.model(image) loss_metric.update((output, depth_n)) depth_image_pair = (output.cpu().numpy(), depth_n.cpu().numpy()) are_metric.update(depth_image_pair) log10_metric.update(depth_image_pair) rmse_metric.update(depth_image_pair) a1_metric.update(depth_image_pair) a2_metric.update(depth_image_pair) a3_metric.update(depth_image_pair) # Compute the loss loss = loss_metric.compute() are_metric_val = are_metric.compute() log10_metric_val = log10_metric.compute() rmse_metric_val = rmse_metric.compute() a1_metric_val = a1_metric.compute() a2_metric_val = a2_metric.compute() a3_metric_val = a3_metric.compute() logger.info(f"Epoch[{epoch}/{n_epochs}] evaluation completed.\n" f"Validation Loss: {loss:.3f}\n" f"Average Relative Error: {are_metric_val:.3f}\n" f"Average Log10 Error: {log10_metric_val:.3f}\n" f"Root Mean Square Error: {rmse_metric_val:.3f}\n" f"Threshold Accuracy (delta1): {a1_metric_val:.3f}\n" f"Threshold Accuracy (delta2): {a2_metric_val:.3f}\n" f"Threshold Accuracy (delta3): {a3_metric_val:.3f}\n") self.writer.add_scalar("Validation/loss", loss, epoch) self.writer.add_scalar("Validation/Average_Relative_Error", are_metric_val, epoch) self.writer.add_scalar("Validation/Average_Log10_Error", log10_metric_val, epoch) self.writer.add_scalar("Validation/Root_Mean_Square_Error", rmse_metric_val, epoch) self.writer.add_scalar("Validation/Threshold_Accuracy__delta1_", a1_metric_val, epoch) self.writer.add_scalar("Validation/Threshold_Accuracy__delta2_", a2_metric_val, epoch) self.writer.add_scalar("Validation/Threshold_Accuracy__delta3_", a3_metric_val, epoch) # Visualize depth images from last mini-batch n_images = image.shape[0] image_grid = [] gray_depth_grid = [] for i in range(n_images): rgb_image = image[i].permute(1, 2, 0) image_grid.append([rgb_image.cpu().numpy()]) gray_depth_grid.append([ output[i].permute(1, 2, 0)[:, :, -1].cpu().numpy(), depth_n[i].permute(1, 2, 0)[:, :, -1].cpu().numpy(), ]) # Add figures fig = grid_plot(image_grid) gray_figs = grid_plot(gray_depth_grid, img_type="gray") self.writer.add_figure("Validation/visualize_depths", gray_figs, epoch) self.writer.add_figure("Validation/visualize_image", fig, epoch) loss_metric.reset() are_metric.reset() log10_metric.reset() rmse_metric.reset() a1_metric.reset() a2_metric.reset() a3_metric.reset()
def train(self, **kwargs): # Training parameters config = self.config optimizer = self.optimizer val_interval = config.system.val_interval writer = self.writer # Load data train_dataset = Dataset.create( config.train.dataset, split="train", data_root=config.system.data_root, transforms=self._NYU_transforms(is_train=True), ) train_loader = create_loader( train_dataset, batch_size=config.train.batch_size, num_workers=config.system.workers, dryrun=config.system.dryrun, ) val_dataset = Dataset.create( config.val.dataset, split="test", data_root=config.system.data_root, transforms=self._NYU_transforms(is_train=False), ) val_loader = create_loader( val_dataset, batch_size=config.val.batch_size, num_workers=config.system.workers, dryrun=config.system.dryrun, ) # Logging logger.info("Start training estimator: %s", type(self).__name__) self.model.to(self.device) n_epochs = config.train.epochs # Start training for epoch in range(1, n_epochs + 1): logger.info(f"Epoch[{epoch}/{n_epochs}] training started.") loss_metric = Loss(self._loss_fn) self.model.train() N = len(train_loader) accumulation_steps = self.config.train.accumulation_steps optimizer.zero_grad() for i, (image, depth) in enumerate(train_loader): # Prepare sample and depth image = image.to(self.device) depth_n = depth.to(self.device) # Predict output = self.model(image) # Compute loss loss = self._loss_fn(output, depth_n) # Backward loss.backward() if (i + 1) % accumulation_steps == 0: self.optimizer.step() self.optimizer.zero_grad() loss_metric.update((output, depth_n)) # Log progress logger.debug(f"[{i}/{N}] Loss: {loss:.4f}") epoch_loss = loss_metric.compute() if epoch % val_interval == 0: self._evaluate_one_epoch(val_loader, epoch, n_epochs) # Record epoch's intermediate results writer.add_scalar("Training/Loss", epoch_loss, epoch) self.checkpointer.save(self, epoch=epoch)
def test_zero_div(): loss = Loss(nll_loss) with pytest.raises(NotComputableError): loss.compute()