def validation(self, epoch): self.metric.reset() if self.args.distributed: model = self.model.module else: model = self.model torch.cuda.empty_cache() model.eval() for i, (image, target, filename) in enumerate(self.val_loader): image = image.to(self.device) target = target.to(self.device) with torch.no_grad(): if cfg.DATASET.MODE == 'val' or cfg.TEST.CROP_SIZE is None: output = model(image)[0] else: size = image.size()[2:] pad_height = cfg.TEST.CROP_SIZE[0] - size[0] pad_width = cfg.TEST.CROP_SIZE[1] - size[1] image = F.pad(image, (0, pad_height, 0, pad_width)) output = model(image)[0] output = output[..., :size[0], :size[1]] self.metric.update(output, target) pixAcc, mIoU = self.metric.get() logging.info("[EVAL] Sample: {:d}, pixAcc: {:.3f}, mIoU: {:.3f}".format(i + 1, pixAcc * 100, mIoU * 100)) pixAcc, mIoU = self.metric.get() logging.info("[EVAL END] Epoch: {:d}, pixAcc: {:.3f}, mIoU: {:.3f}".format(epoch, pixAcc * 100, mIoU * 100)) synchronize() if self.best_pred < mIoU and self.save_to_disk: self.best_pred = mIoU logging.info('Epoch {} is the best model, best pixAcc: {:.3f}, mIoU: {:.3f}, save the model..'.format(epoch, pixAcc * 100, mIoU * 100)) save_checkpoint(model, epoch, is_best=True)
def train(self): self.save_to_disk = get_rank() == 0 epochs, max_iters, iters_per_epoch = cfg.TRAIN.EPOCHS, self.max_iters, self.iters_per_epoch log_per_iters, val_per_iters = self.args.log_iter, self.args.val_epoch * self.iters_per_epoch # save_per_iters = cfg.TRAIN.SNAPSHOT_EPOCH * self.iters_per_epoch start_time = time.time() logging.info('Start training, Total Epochs: {:d} = Total Iterations {:d}'.format(epochs, max_iters)) self.model.train() iteration = self.start_epoch * iters_per_epoch if self.start_epoch > 0 else 0 for (images, targets, _) in self.train_loader: epoch = iteration // iters_per_epoch + 1 iteration += 1 images = images.to(self.device) targets = targets.to(self.device) outputs = self.model(images) loss_dict = self.criterion(outputs, targets) losses = sum(loss for loss in loss_dict.values()) # reduce losses over all GPUs for logging purposes loss_dict_reduced = reduce_loss_dict(loss_dict) losses_reduced = sum(loss for loss in loss_dict_reduced.values()) self.optimizer.zero_grad() losses.backward() self.optimizer.step() self.lr_scheduler.step() eta_seconds = ((time.time() - start_time) / iteration) * (max_iters - iteration) eta_string = str(datetime.timedelta(seconds=int(eta_seconds))) if iteration % log_per_iters == 0 and self.save_to_disk: logging.info( "Epoch: {:d}/{:d} || Iters: {:d}/{:d} || Lr: {:.6f} || " "Loss: {:.4f} || Cost Time: {} || Estimated Time: {}".format( epoch, epochs, iteration % iters_per_epoch, iters_per_epoch, self.optimizer.param_groups[0]['lr'], losses_reduced.item(), str(datetime.timedelta(seconds=int(time.time() - start_time))), eta_string)) #if iteration % save_per_iters == 0 and self.save_to_disk: if iteration % self.iters_per_epoch == 0 and self.save_to_disk: save_checkpoint(self.model, epoch, self.optimizer, self.lr_scheduler, is_best=False) if not self.args.skip_val and iteration % val_per_iters == 0: self.validation(epoch) self.model.train() total_training_time = time.time() - start_time total_training_str = str(datetime.timedelta(seconds=total_training_time)) logging.info( "Total training time: {} ({:.4f}s / it)".format( total_training_str, total_training_time / max_iters))
def validation(self, epoch, val_loader, writer): self.metric.reset() self.ece_evaluator.reset() self.cce_evaluator.reset() model = self.model torch.cuda.empty_cache() model.eval() for i, (image, target, filename) in enumerate(self.val_loader): image = image.to(self.device) target = target.to(self.device) with torch.no_grad(): # output = mmseg_evaluate(model, image, target) output = model.encode_decode(image, None) self.metric.update(output, target) if (i == 0): import cv2 image_read = cv2.imread(filename[0]) writer.add_image("Image[0] Read", image_read, epoch, dataformats="HWC") save_imgs = torch.softmax(output, dim=1)[0] for class_no, class_distri in enumerate(save_imgs): plt.clf() class_distri[0][0] = 0 class_distri[0][1] = 1 im = plt.imshow(class_distri.detach().cpu().numpy(), cmap="Greens") plt.colorbar(im) plt.savefig("temp_files/temp.jpg") plt.clf() import cv2 img_dif = cv2.imread("temp_files/temp.jpg") writer.add_image(f"Class_{self.classes[class_no]}", img_dif, epoch, dataformats="HWC") with torch.no_grad(): self.ece_evaluator.forward(output, target) self.cce_evaluator.forward(output, target) pixAcc, mIoU = self.metric.get() logging.info( "[EVAL] Sample: {:d}, pixAcc: {:.3f}, mIoU: {:.3f}".format( i + 1, pixAcc * 100, mIoU * 100)) pixAcc, mIoU = self.metric.get() logging.info( "[EVAL END] Epoch: {:d}, pixAcc: {:.3f}, mIoU: {:.3f}".format( epoch, pixAcc * 100, mIoU * 100)) writer.add_scalar("[EVAL END] pixAcc", pixAcc * 100, epoch) writer.add_scalar("[EVAL END] mIoU", mIoU * 100, epoch) ece_count_table_image, _ = self.ece_evaluator.get_count_table_img( self.classes) ece_table_image, ece_dif_map = self.ece_evaluator.get_perc_table_img( self.classes) cce_count_table_image, _ = self.cce_evaluator.get_count_table_img( self.classes) cce_table_image, cce_dif_map = self.cce_evaluator.get_perc_table_img( self.classes) ece_dif_mean, ece_dif_std = self.ece_evaluator.get_diff_mean_std() cce_dif_mean, cce_dif_std = self.cce_evaluator.get_diff_mean_std() writer.add_image("ece_table", ece_table_image, epoch, dataformats="HWC") writer.add_image("ece Count table", ece_count_table_image, epoch, dataformats="HWC") writer.add_image("ece DifMap", ece_dif_map, epoch, dataformats="HWC") writer.add_scalar("ece_mean", ece_dif_mean, epoch) writer.add_scalar("ece_std", ece_dif_std, epoch) writer.add_scalar("ece Score", self.ece_evaluator.get_overall_ECELoss(), epoch) writer.add_scalar("ece dif Score", self.ece_evaluator.get_diff_score(), epoch) writer.add_image("cce_table", cce_table_image, epoch, dataformats="HWC") writer.add_image("cce Count table", cce_count_table_image, epoch, dataformats="HWC") writer.add_image("cce DifMap", cce_dif_map, epoch, dataformats="HWC") cces = self.cce_evaluator.get_overall_CCELoss() writer.add_scalar("cce_mean", cce_dif_mean, epoch) writer.add_scalar("cce_std", cce_dif_std, epoch) writer.add_scalar("cce Score", cces, epoch) writer.add_scalar("cce dif Score", self.cce_evaluator.get_diff_score(), epoch) synchronize() if self.best_pred_miou < mIoU and self.save_to_disk: self.best_pred_miou = mIoU logging.info( 'Epoch {} is the best model for mIoU, best pixAcc: {:.3f}, mIoU: {:.3f}, save the model..' .format(epoch, pixAcc * 100, mIoU * 100)) save_checkpoint(model, epoch, is_best=True, mode="iou") if self.best_pred_cces > cces and self.save_to_disk: self.best_pred_cces = cces logging.info( 'Epoch {} is the best model for cceScore, best pixAcc: {:.3f}, mIoU: {:.3f}, save the model..' .format(epoch, pixAcc * 100, mIoU * 100)) save_checkpoint(model, epoch, is_best=True, mode="cces")
def train(self): self.save_to_disk = get_rank() == 0 epochs, max_iters, iters_per_epoch = cfg.TRAIN.EPOCHS, self.max_iters, self.iters_per_epoch log_per_iters, val_per_iters = self.args.log_iter, self.args.val_epoch * self.iters_per_epoch start_time = time.time() logging.info( 'Start training, Total Epochs: {:d} = Total Iterations {:d}'. format(epochs, max_iters)) self.model.train() iteration = self.start_epoch * iters_per_epoch if self.start_epoch > 0 else 0 for (images, targets, ids) in self.train_loader: epoch = iteration // iters_per_epoch + 1 iteration += 1 images = images.to(self.device) targets = targets.to(self.device) ''' image_arrays = np.array(images) target_arrays = np.array(targets) print(image_arrays.shape) print(target_arrays.shape) for i in range(image_arrays.shape[0]): print(i, ids[i]) ay = (np.swapaxes(image_arrays[i], 0, 2)*255).astype('uint8') im = Image.fromarray(ay) im.save("./image{}.jpeg".format(i)) _ay = np.expand_dims(target_arrays[i], 2) ay = (np.concatenate([_ay, _ay, _ay], 2)*255).astype('uint8') im = Image.fromarray(ay) im.save("./mask{}.jpeg".format(i)) a = 1/0 ''' outputs = self.model(images) #outputs = list(self.model(images)) #outputs = [torch.squeeze(output) for output in outputs] #outputs = tuple(outputs) loss_dict = self.criterion(outputs, targets) losses = sum(loss for loss in loss_dict.values()) # reduce losses over all GPUs for logging purposes loss_dict_reduced = reduce_loss_dict(loss_dict) losses_reduced = sum(loss for loss in loss_dict_reduced.values()) self.optimizer.zero_grad() losses.backward() self.optimizer.step() self.lr_scheduler.step() eta_seconds = ((time.time() - start_time) / iteration) * (max_iters - iteration) eta_string = str(datetime.timedelta(seconds=int(eta_seconds))) if iteration % log_per_iters == 0 and self.save_to_disk: logging.info( "Epoch: {:d}/{:d} || Iters: {:d}/{:d} || Lr: {:.6f} || " "Loss: {:.4f} || Cost Time: {} || Estimated Time: {}". format( epoch, epochs, iteration % iters_per_epoch, iters_per_epoch, self.optimizer.param_groups[0]['lr'], losses_reduced.item(), str( datetime.timedelta(seconds=int(time.time() - start_time))), eta_string)) if iteration % self.iters_per_epoch == 0 and self.save_to_disk: save_checkpoint(self.model, epoch, self.optimizer, self.lr_scheduler, is_best=False) if not self.args.skip_val and iteration % val_per_iters == 0: self.validation(epoch) self.model.train() total_training_time = time.time() - start_time total_training_str = str( datetime.timedelta(seconds=total_training_time)) logging.info("Total training time: {} ({:.4f}s / it)".format( total_training_str, total_training_time / max_iters))
def train(self): self.save_to_disk = get_rank() == 0 epochs, max_iters, iters_per_epoch = cfg.TRAIN.EPOCHS, self.max_iters, self.iters_per_epoch log_per_iters, val_per_iters = self.args.log_iter, self.args.val_epoch * self.iters_per_epoch start_time = time.time() logging.info( 'Start training, Total Epochs: {:d} = Total Iterations {:d}'. format(epochs, max_iters)) self.model.train() iteration = self.start_epoch * iters_per_epoch if self.start_epoch > 0 else 0 for (images, targets, boundary, _) in self.train_loader: epoch = iteration // iters_per_epoch + 1 iteration += 1 images = images.to(self.device) targets = targets.to(self.device) boundarys = boundary.to(self.device) outputs, outputs_boundary = self.model(images) loss_dict = self.criterion(outputs, targets) # embed(header='check loss') boundarys = boundarys.float() valid = torch.ones_like(boundarys) lossb_dict = self.criterion_b(outputs_boundary[0], boundarys, valid) weight_boundary = 5 lossb_dict['loss'] = weight_boundary * lossb_dict['loss'] losses = sum(loss for loss in loss_dict.values()) + \ sum(loss for loss in lossb_dict.values()) # reduce losses over all GPUs for logging purposes loss_dict_reduced = reduce_loss_dict(loss_dict) losses_reduced = sum(loss for loss in loss_dict_reduced.values()) lossb_dict_reduced = reduce_loss_dict(lossb_dict) lossesb_reduced = sum(loss for loss in lossb_dict_reduced.values()) # embed(header='check loader') self.optimizer.zero_grad() losses.backward() self.optimizer.step() self.lr_scheduler.step() eta_seconds = ((time.time() - start_time) / iteration) * (max_iters - iteration) eta_string = str(datetime.timedelta(seconds=int(eta_seconds))) if iteration % log_per_iters == 0 and self.save_to_disk: logging.info( "Epoch: {:d}/{:d} || Iters: {:d}/{:d} || Lr: {:.6f} || " "Loss: {:.4f} || Loss_b: {:.4f} || Cost Time: {} || Estimated Time: {}" .format( epoch, epochs, iteration % iters_per_epoch, iters_per_epoch, self.optimizer.param_groups[0]['lr'], losses_reduced.item(), lossesb_reduced.item(), str( datetime.timedelta(seconds=int(time.time() - start_time))), eta_string)) if iteration % self.iters_per_epoch == 0 and self.save_to_disk: save_checkpoint(self.model, epoch, self.optimizer, self.lr_scheduler, is_best=False) total_training_time = time.time() - start_time total_training_str = str( datetime.timedelta(seconds=total_training_time)) logging.info("Total training time: {} ({:.4f}s / it)".format( total_training_str, total_training_time / max_iters))
def validation(self, epoch, val_loader, writer): self.metric.reset() self.ece_evaluator.reset() self.cce_evaluator.reset() eceEvaluator_perimage = perimageCCE(n_classes=len(self.classes)) if self.args.distributed: model = self.model.module else: model = self.model torch.cuda.empty_cache() model.eval() for i, (image, target, filename) in enumerate(val_loader): image = image.to(self.device) target = target.to(self.device) # print("dataset_mode",cfg.DATASET.MODE) # print("test+crop+soize", cfg.TEST.CROP_SIZE) with torch.no_grad(): if cfg.DATASET.MODE == 'val' or cfg.TEST.CROP_SIZE is None: output = model(image)[0] else: size = image.size()[2:] pad_height = cfg.TEST.CROP_SIZE[0] - size[0] pad_width = cfg.TEST.CROP_SIZE[1] - size[1] image = F.pad(image, (0, pad_height, 0, pad_width)) output = model(image)[0] output = output[..., :size[0], :size[1]] # print(output.shape) # output is [1, 19, 1024, 2048] logits # target is [1, 1024, 2048] self.metric.update(output, target) if (i == 0): import cv2 image_read = cv2.imread(filename[0]) writer.add_image("Image[0] Read", image_read, epoch, dataformats="HWC") save_imgs = torch.softmax(output, dim=1)[0] for class_no, class_distri in enumerate(save_imgs): plt.clf() class_distri[0][0] = 0 class_distri[0][1] = 1 im = plt.imshow(class_distri.detach().cpu().numpy(), cmap="Greens") plt.colorbar(im) plt.savefig("temp_files/temp.jpg") plt.clf() import cv2 img_dif = cv2.imread("temp_files/temp.jpg") writer.add_image(f"Class_{self.classes[class_no]}", img_dif, epoch, dataformats="HWC") with torch.no_grad(): self.ece_evaluator.forward(output, target) self.cce_evaluator.forward(output, target) # for output, target in zip(output,target.detach()): # #older ece requires softmax and size output=[class,w,h] target=[w,h] # eceEvaluator_perimage.update(output.softmax(dim=0), target) pixAcc, mIoU = self.metric.get() logging.info( "[EVAL] Sample: {:d}, pixAcc: {:.3f}, mIoU: {:.3f}".format( i + 1, pixAcc * 100, mIoU * 100)) pixAcc, mIoU = self.metric.get() logging.info( "[EVAL END] Epoch: {:d}, pixAcc: {:.3f}, mIoU: {:.3f}".format( epoch, pixAcc * 100, mIoU * 100)) writer.add_scalar("[EVAL END] pixAcc", pixAcc * 100, epoch) writer.add_scalar("[EVAL END] mIoU", mIoU * 100, epoch) # count_table_image, _ = eceEvaluator_perimage.get_count_table_img(self.classes) # cce_table_image, dif_map = eceEvaluator_perimage.get_perc_table_img(self.classes) ece_count_table_image, _ = self.ece_evaluator.get_count_table_img( self.classes) ece_table_image, ece_dif_map = self.ece_evaluator.get_perc_table_img( self.classes) cce_count_table_image, _ = self.cce_evaluator.get_count_table_img( self.classes) cce_table_image, cce_dif_map = self.cce_evaluator.get_perc_table_img( self.classes) # writer.add_image("CCE_table", cce_table_image, epoch, dataformats="HWC") # writer.add_image("CCE Count table", count_table_image, epoch, dataformats="HWC") # writer.add_image("CCE DifMap", dif_map, epoch, dataformats="HWC") # writer.add_scalar("CCE Score", eceEvaluator_perimage.get_overall_CCELoss(), epoch) writer.add_image("ece_table", ece_table_image, epoch, dataformats="HWC") writer.add_image("ece Count table", ece_count_table_image, epoch, dataformats="HWC") writer.add_image("ece DifMap", ece_dif_map, epoch, dataformats="HWC") writer.add_scalar("ece Score", self.ece_evaluator.get_overall_ECELoss(), epoch) writer.add_scalar("ece dif Score", self.ece_evaluator.get_diff_score(), epoch) writer.add_image("cce_table", cce_table_image, epoch, dataformats="HWC") writer.add_image("cce Count table", cce_count_table_image, epoch, dataformats="HWC") writer.add_image("cce DifMap", cce_dif_map, epoch, dataformats="HWC") writer.add_scalar("cce Score", self.cce_evaluator.get_overall_CCELoss(), epoch) writer.add_scalar("cce dif Score", self.cce_evaluator.get_diff_score(), epoch) synchronize() if self.best_pred < mIoU and self.save_to_disk: self.best_pred = mIoU logging.info( 'Epoch {} is the best model, best pixAcc: {:.3f}, mIoU: {:.3f}, save the model..' .format(epoch, pixAcc * 100, mIoU * 100)) save_checkpoint(model, epoch, is_best=True)