def train(self, args): transform = get_transformation((args.crop_height, args.crop_width), resize=True, dataset=args.dataset) # let the choice of dataset configurable if self.args.dataset == 'voc2012': labeled_set = VOCDataset(root_path=root, name='label', ratio=0.2, transformation=transform, augmentation=None) unlabeled_set = VOCDataset(root_path=root, name='unlabel', ratio=0.2, transformation=transform, augmentation=None) val_set = VOCDataset(root_path=root, name='val', ratio=0.5, transformation=transform, augmentation=None) elif self.args.dataset == 'cityscapes': labeled_set = CityscapesDataset(root_path=root_cityscapes, name='label', ratio=0.5, transformation=transform, augmentation=None) unlabeled_set = CityscapesDataset(root_path=root_cityscapes, name='unlabel', ratio=0.5, transformation=transform, augmentation=None) val_set = CityscapesDataset(root_path=root_cityscapes, name='val', ratio=0.5, transformation=transform, augmentation=None) elif self.args.dataset == 'acdc': labeled_set = ACDCDataset(root_path=root_acdc, name='label', ratio=0.5, transformation=transform, augmentation=None) unlabeled_set = ACDCDataset(root_path=root_acdc, name='unlabel', ratio=0.5, transformation=transform, augmentation=None) val_set = ACDCDataset(root_path=root_acdc, name='val', ratio=0.5, transformation=transform, augmentation=None) ''' https://discuss.pytorch.org/t/about-the-relation-between-batch-size-and-length-of-data-loader/10510 ^^ The reason for using drop_last=True so as to obtain an even size of all the batches and deleting the last batch with less images ''' labeled_loader = DataLoader(labeled_set, batch_size=args.batch_size, shuffle=True, drop_last=True) unlabeled_loader = DataLoader(unlabeled_set, batch_size=args.batch_size, shuffle=True, drop_last=True) val_loader = DataLoader(val_set, batch_size=args.batch_size, shuffle=True, drop_last=True) new_img_fake_sample = utils.Sample_from_Pool() img_fake_sample = utils.Sample_from_Pool() gt_fake_sample = utils.Sample_from_Pool() img_dis_loss, gt_dis_loss, unsupervisedloss, fullsupervisedloss = 0, 0, 0, 0 ### Variable to regulate the frequency of update between Discriminators and Generators counter = 0 for epoch in range(self.start_epoch, args.epochs): lr = self.g_optimizer.param_groups[0]['lr'] print('learning rate = %.7f' % lr) self.Gsi.train() self.Gis.train() # if (epoch+1)%10 == 0: # args.lamda_img = args.lamda_img + 0.08 # args.lamda_gt = args.lamda_gt + 0.04 for i, ((l_img, l_gt, _), (unl_img, _, _)) in enumerate(zip(labeled_loader, unlabeled_loader)): # step step = epoch * min(len(labeled_loader), len(unlabeled_loader)) + i + 1 l_img, unl_img, l_gt = utils.cuda([l_img, unl_img, l_gt], args.gpu_ids) # Generator Computations ################################################## set_grad([self.Di, self.Ds, self.old_Di], False) set_grad([self.old_Gsi, self.old_Gis], False) self.g_optimizer.zero_grad() # Forward pass through generators ################################################## fake_img = self.Gis( make_one_hot(l_gt, args.dataset, args.gpu_ids).float()) fake_gt = self.Gsi(unl_img.float()) ### having 21 channels lab_gt = self.Gsi(l_img) ### having 21 channels ### Getting the outputs of the model to correct dimensions fake_img = self.interp(fake_img) fake_gt = self.interp(fake_gt) lab_gt = self.interp(lab_gt) # fake_gt = fake_gt.data.max(1)[1].squeeze_(1).squeeze_(0) ### will get into no channels # fake_gt = fake_gt.unsqueeze(1) ### will get into 1 channel only # fake_gt = make_one_hot(fake_gt, args.dataset, args.gpu_ids) lab_loss_CE = self.CE(lab_gt, l_gt.squeeze(1)) ### Again applying activations lab_gt = self.activation_softmax(lab_gt) fake_gt = self.activation_softmax(fake_gt) # fake_gt = fake_gt.data.max(1)[1].squeeze_(1).squeeze_(0) # fake_gt = fake_gt.unsqueeze(1) # fake_gt = make_one_hot(fake_gt, args.dataset, args.gpu_ids) # fake_img = self.activation_tanh(fake_img) recon_img = self.Gis(fake_gt.float()) recon_lab_img = self.Gis(lab_gt.float()) recon_gt = self.Gsi(fake_img.float()) ### Getting the outputs of the model to correct dimensions recon_img = self.interp(recon_img) recon_lab_img = self.interp(recon_lab_img) recon_gt = self.interp(recon_gt) ### This is for the case of the new loss between the recon_img from resnet and deeplab network resnet_fake_gt = self.old_Gsi(unl_img.float()) resnet_lab_gt = self.old_Gsi(l_img) resnet_lab_gt = self.activation_softmax(resnet_lab_gt) resnet_fake_gt = self.activation_softmax(resnet_fake_gt) resnet_recon_img = self.old_Gis(resnet_fake_gt.float()) resnet_recon_lab_img = self.old_Gis(resnet_lab_gt.float()) ## Applying the tanh activations # recon_img = self.activation_tanh(recon_img) # recon_lab_img = self.activation_tanh(recon_lab_img) # Adversarial losses ################################################### fake_img_dis = self.Di(fake_img) resnet_fake_img_dis = self.old_Di(recon_img) ### For passing different type of input to Ds fake_gt_discriminator = fake_gt.data.max(1)[1].squeeze_( 1).squeeze_(0) fake_gt_discriminator = fake_gt_discriminator.unsqueeze(1) fake_gt_discriminator = make_one_hot(fake_gt_discriminator, args.dataset, args.gpu_ids) fake_gt_dis = self.Ds(fake_gt_discriminator.float()) # lab_gt_dis = self.Ds(lab_gt) real_label_gt = utils.cuda( Variable(torch.ones(fake_gt_dis.size())), args.gpu_ids) real_label_img = utils.cuda( Variable(torch.ones(fake_img_dis.size())), args.gpu_ids) # here is much better to have a cross entropy loss for classification. img_gen_loss = self.MSE(fake_img_dis, real_label_img) gt_gen_loss = self.MSE(fake_gt_dis, real_label_gt) # gt_label_gen_loss = self.MSE(lab_gt_dis, real_label) # Cycle consistency losses ################################################### resnet_img_cycle_loss = self.MSE(resnet_fake_img_dis, real_label_img) # img_cycle_loss = self.L1(recon_img, unl_img) # img_cycle_loss_perceptual = perceptual_loss(recon_img, unl_img, args.gpu_ids) gt_cycle_loss = self.CE(recon_gt, l_gt.squeeze(1)) # lab_img_cycle_loss = self.L1(recon_lab_img, l_img) * args.lamda # Total generators losses ################################################### # lab_loss_CE = self.CE(lab_gt, l_gt.squeeze(1)) lab_loss_MSE = self.L1(fake_img, l_img) # lab_loss_perceptual = perceptual_loss(fake_img, l_img, args.gpu_ids) fullsupervisedloss = args.lab_CE_weight * lab_loss_CE + args.lab_MSE_weight * lab_loss_MSE unsupervisedloss = args.adversarial_weight * ( img_gen_loss + gt_gen_loss ) + resnet_img_cycle_loss + gt_cycle_loss * args.lamda_gt gen_loss = fullsupervisedloss + unsupervisedloss # Update generators ################################################### gen_loss.backward() self.g_optimizer.step() if counter % 1 == 0: # Discriminator Computations ################################################# set_grad([self.Di, self.Ds, self.old_Di], True) self.d_optimizer.zero_grad() # Sample from history of generated images ################################################# if torch.rand(1) < 0.0: fake_img = self.gauss_noise(fake_img.cpu()) fake_gt = self.gauss_noise(fake_gt.cpu()) recon_img = Variable( torch.Tensor( new_img_fake_sample([recon_img.cpu().data.numpy() ])[0])) fake_img = Variable( torch.Tensor( img_fake_sample([fake_img.cpu().data.numpy()])[0])) # lab_gt = Variable(torch.Tensor(gt_fake_sample([lab_gt.cpu().data.numpy()])[0])) fake_gt = Variable( torch.Tensor( gt_fake_sample([fake_gt.cpu().data.numpy()])[0])) recon_img, fake_img, fake_gt = utils.cuda( [recon_img, fake_img, fake_gt], args.gpu_ids) # Forward pass through discriminators ################################################# unl_img_dis = self.Di(unl_img) fake_img_dis = self.Di(fake_img) resnet_recon_img_dis = self.old_Di(resnet_recon_img) resnet_fake_img_dis = self.old_Di(recon_img) # lab_gt_dis = self.Ds(lab_gt) l_gt = make_one_hot(l_gt, args.dataset, args.gpu_ids) real_gt_dis = self.Ds(l_gt.float()) fake_gt_discriminator = fake_gt.data.max(1)[1].squeeze_( 1).squeeze_(0) fake_gt_discriminator = fake_gt_discriminator.unsqueeze(1) fake_gt_discriminator = make_one_hot( fake_gt_discriminator, args.dataset, args.gpu_ids) fake_gt_dis = self.Ds(fake_gt_discriminator.float()) real_label_img = utils.cuda( Variable(torch.ones(unl_img_dis.size())), args.gpu_ids) fake_label_img = utils.cuda( Variable(torch.zeros(fake_img_dis.size())), args.gpu_ids) real_label_gt = utils.cuda( Variable(torch.ones(real_gt_dis.size())), args.gpu_ids) fake_label_gt = utils.cuda( Variable(torch.zeros(fake_gt_dis.size())), args.gpu_ids) # Discriminator losses ################################################## img_dis_real_loss = self.MSE(unl_img_dis, real_label_img) img_dis_fake_loss = self.MSE(fake_img_dis, fake_label_img) gt_dis_real_loss = self.MSE(real_gt_dis, real_label_gt) gt_dis_fake_loss = self.MSE(fake_gt_dis, fake_label_gt) # lab_gt_dis_fake_loss = self.MSE(lab_gt_dis, fake_label) cycle_img_dis_real_loss = self.MSE(resnet_recon_img_dis, real_label_img) cycle_img_dis_fake_loss = self.MSE(resnet_fake_img_dis, fake_label_img) # Total discriminators losses img_dis_loss = (img_dis_real_loss + img_dis_fake_loss) * 0.5 gt_dis_loss = (gt_dis_real_loss + gt_dis_fake_loss) * 0.5 # lab_gt_dis_loss = (gt_dis_real_loss + lab_gt_dis_fake_loss)*0.33 cycle_img_dis_loss = cycle_img_dis_real_loss + cycle_img_dis_fake_loss # Update discriminators ################################################## discriminator_loss = args.discriminator_weight * ( img_dis_loss + gt_dis_loss) + cycle_img_dis_loss discriminator_loss.backward() # lab_gt_dis_loss.backward() self.d_optimizer.step() print( "Epoch: (%3d) (%5d/%5d) | Dis Loss:%.2e | Unlab Gen Loss:%.2e | Lab Gen loss:%.2e" % (epoch, i + 1, min(len(labeled_loader), len(unlabeled_loader)), img_dis_loss + gt_dis_loss, unsupervisedloss, fullsupervisedloss)) self.writer_semisuper.add_scalars( 'Dis Loss', { 'img_dis_loss': img_dis_loss, 'gt_dis_loss': gt_dis_loss, 'cycle_img_dis_loss': cycle_img_dis_loss }, len(labeled_loader) * epoch + i) self.writer_semisuper.add_scalars( 'Unlabelled Loss', { 'img_gen_loss': img_gen_loss, 'gt_gen_loss': gt_gen_loss, 'img_cycle_loss': resnet_img_cycle_loss, 'gt_cycle_loss': gt_cycle_loss }, len(labeled_loader) * epoch + i) self.writer_semisuper.add_scalars( 'Labelled Loss', { 'lab_loss_CE': lab_loss_CE, 'lab_loss_MSE': lab_loss_MSE }, len(labeled_loader) * epoch + i) counter += 1 ### For getting the mean IoU self.Gsi.eval() self.Gis.eval() with torch.no_grad(): for i, (val_img, val_gt, _) in enumerate(val_loader): val_img, val_gt = utils.cuda([val_img, val_gt], args.gpu_ids) outputs = self.Gsi(val_img) outputs = self.interp(outputs) outputs = self.activation_softmax(outputs) pred = outputs.data.max(1)[1].cpu().numpy() gt = val_gt.squeeze().data.cpu().numpy() self.running_metrics_val.update(gt, pred) score, class_iou = self.running_metrics_val.get_scores() self.running_metrics_val.reset() print('The mIoU for the epoch is: ', score["Mean IoU : \t"]) ### For displaying the images generated by generator on tensorboard using validation images val_image, val_gt, _ = iter(val_loader).next() val_image, val_gt = utils.cuda([val_image, val_gt], args.gpu_ids) with torch.no_grad(): fake_label = self.Gsi(val_image).detach() fake_label = self.interp(fake_label) fake_label = self.activation_softmax(fake_label) fake_label = fake_label.data.max(1)[1].squeeze_(1).squeeze_(0) fake_label = fake_label.unsqueeze(1) fake_label = make_one_hot(fake_label, args.dataset, args.gpu_ids) fake_img = self.Gis(fake_label).detach() fake_img = self.interp(fake_img) # fake_img = self.activation_tanh(fake_img) fake_img_from_labels = self.Gis( make_one_hot(val_gt, args.dataset, args.gpu_ids).float()).detach() fake_img_from_labels = self.interp(fake_img_from_labels) # fake_img_from_labels = self.activation_tanh(fake_img_from_labels) fake_label_regenerated = self.Gsi( fake_img_from_labels).detach() fake_label_regenerated = self.interp(fake_label_regenerated) fake_label_regenerated = self.activation_softmax( fake_label_regenerated) fake_prediction_label = fake_label.data.max(1)[1].squeeze_( 1).cpu().numpy() fake_regenerated_label = fake_label_regenerated.data.max( 1)[1].squeeze_(1).cpu().numpy() val_gt = val_gt.cpu() fake_img = fake_img.cpu() fake_img_from_labels = fake_img_from_labels.cpu() ### Now i am going to revert back the transformation on these images if self.args.dataset == 'voc2012' or self.args.dataset == 'cityscapes': trans_mean = [0.5, 0.5, 0.5] trans_std = [0.5, 0.5, 0.5] for i in range(3): fake_img[:, i, :, :] = ( (fake_img[:, i, :, :] * trans_std[i]) + trans_mean[i]) fake_img_from_labels[:, i, :, :] = ( (fake_img_from_labels[:, i, :, :] * trans_std[i]) + trans_mean[i]) elif self.args.dataset == 'acdc': trans_mean = [0.5] trans_std = [0.5] for i in range(1): fake_img[:, i, :, :] = ( (fake_img[:, i, :, :] * trans_std[i]) + trans_mean[i]) fake_img_from_labels[:, i, :, :] = ( (fake_img_from_labels[:, i, :, :] * trans_std[i]) + trans_mean[i]) ### display_tensor is the final tensor that will be displayed on tensorboard display_tensor_label = torch.zeros([ fake_label.shape[0], 3, fake_label.shape[2], fake_label.shape[3] ]) display_tensor_gt = torch.zeros( [val_gt.shape[0], 3, val_gt.shape[2], val_gt.shape[3]]) display_tensor_regen_label = torch.zeros([ fake_label_regenerated.shape[0], 3, fake_label_regenerated.shape[2], fake_label_regenerated.shape[3] ]) for i in range(fake_prediction_label.shape[0]): new_img_label = fake_prediction_label[i] new_img_label = utils.colorize_mask( new_img_label, self.args.dataset ) ### So this is the generated image in PIL.Image format img_tensor_label = utils.PIL_to_tensor(new_img_label, self.args.dataset) display_tensor_label[i, :, :, :] = img_tensor_label display_tensor_gt[i, :, :, :] = val_gt[i] regen_label = fake_regenerated_label[i] regen_label = utils.colorize_mask(regen_label, self.args.dataset) regen_tensor_label = utils.PIL_to_tensor( regen_label, self.args.dataset) display_tensor_regen_label[i, :, :, :] = regen_tensor_label self.writer_semisuper.add_image( 'Generated segmented image: ', torchvision.utils.make_grid(display_tensor_label, nrow=2, normalize=True), epoch) self.writer_semisuper.add_image( 'Generated image back from segmentation: ', torchvision.utils.make_grid(fake_img, nrow=2, normalize=True), epoch) self.writer_semisuper.add_image( 'Ground truth for the image: ', torchvision.utils.make_grid(display_tensor_gt, nrow=2, normalize=True), epoch) self.writer_semisuper.add_image( 'Image generated from val labels: ', torchvision.utils.make_grid(fake_img_from_labels, nrow=2, normalize=True), epoch) self.writer_semisuper.add_image( 'Labels generated back from the cycle: ', torchvision.utils.make_grid(display_tensor_regen_label, nrow=2, normalize=True), epoch) if score["Mean IoU : \t"] >= self.best_iou: self.best_iou = score["Mean IoU : \t"] # Override the latest checkpoint ####################################################### utils.save_checkpoint( { 'epoch': epoch + 1, 'Di': self.Di.state_dict(), 'Ds': self.Ds.state_dict(), 'Gis': self.Gis.state_dict(), 'Gsi': self.Gsi.state_dict(), 'd_optimizer': self.d_optimizer.state_dict(), 'g_optimizer': self.g_optimizer.state_dict(), 'best_iou': self.best_iou, 'class_iou': class_iou }, '%s/latest_semisuper_cycleGAN.ckpt' % (args.checkpoint_dir)) # Update learning rates ######################## self.g_lr_scheduler.step() self.d_lr_scheduler.step() self.writer_semisuper.close()
def train(self, args): transform = get_transformation( (self.args.crop_height, self.args.crop_width), resize=True, dataset=args.dataset) val_transform = get_transformation((512, 512), resize=True, dataset=args.dataset) # let the choice of dataset configurable if self.args.dataset == 'voc2012': labeled_set = VOCDataset(root_path=root, name='label', ratio=1.0, transformation=transform, augmentation=None) val_set = VOCDataset(root_path=root, name='val', ratio=0.5, transformation=val_transform, augmentation=None) labeled_loader = DataLoader(labeled_set, batch_size=self.args.batch_size, shuffle=True, drop_last=True) val_loader = DataLoader(val_set, batch_size=self.args.batch_size, shuffle=True) elif self.args.dataset == 'cityscapes': labeled_set = CityscapesDataset(root_path=root_cityscapes, name='label', ratio=0.5, transformation=transform, augmentation=None) val_set = CityscapesDataset(root_path=root_cityscapes, name='val', ratio=0.5, transformation=transform, augmentation=None) labeled_loader = DataLoader(labeled_set, batch_size=self.args.batch_size, shuffle=True, drop_last=True) val_loader = DataLoader(val_set, batch_size=self.args.batch_size, shuffle=True, drop_last=True) elif self.args.dataset == 'acdc': labeled_set = ACDCDataset(root_path=root_acdc, name='label', ratio=0.5, transformation=transform, augmentation=None) val_set = ACDCDataset(root_path=root_acdc, name='val', ratio=0.5, transformation=transform, augmentation=None) labeled_loader = DataLoader(labeled_set, batch_size=self.args.batch_size, shuffle=True, drop_last=True) val_loader = DataLoader(val_set, batch_size=self.args.batch_size, shuffle=True, drop_last=True) img_fake_sample = utils.Sample_from_Pool() gt_fake_sample = utils.Sample_from_Pool() for epoch in range(self.start_epoch, self.args.epochs): self.Gsi.train() for i, (l_img, l_gt, img_name) in enumerate(labeled_loader): # step step = epoch * len(labeled_loader) + i + 1 self.gsi_optimizer.zero_grad() l_img, l_gt = utils.cuda([l_img, l_gt], args.gpu_ids) lab_gt = self.Gsi(l_img) lab_gt = self.interp( lab_gt) ### To get the output of model same as labels # CE losses fullsupervisedloss = self.CE(lab_gt, l_gt.squeeze(1)) fullsupervisedloss.backward() self.gsi_optimizer.step() print("Epoch: (%3d) (%5d/%5d) | Crossentropy Loss:%.2e" % (epoch, i + 1, len(labeled_loader), fullsupervisedloss.item())) self.writer_supervised.add_scalars( 'Supervised Loss', {'CE Loss ': fullsupervisedloss}, len(labeled_loader) * epoch + i) ### For getting the IoU for the image self.Gsi.eval() with torch.no_grad(): for i, (val_img, val_gt, _) in enumerate(val_loader): val_img, val_gt = utils.cuda([val_img, val_gt], args.gpu_ids) outputs = self.Gsi(val_img) outputs = self.interp_val(outputs) outputs = self.activation_softmax(outputs) pred = outputs.data.max(1)[1].cpu().numpy() gt = val_gt.squeeze().data.cpu().numpy() self.running_metrics_val.update(gt, pred) score, class_iou = self.running_metrics_val.get_scores() self.running_metrics_val.reset() ### For displaying the images generated by generator on tensorboard val_img, val_gt, _ = iter(val_loader).next() val_img, val_gt = utils.cuda([val_img, val_gt], args.gpu_ids) with torch.no_grad(): fake = self.Gsi(val_img).detach() fake = self.interp_val(fake) fake = self.activation_softmax(fake) fake_prediction = fake.data.max(1)[1].squeeze_(1).squeeze_( 0).cpu().numpy() val_gt = val_gt.cpu() ### display_tensor is the final tensor that will be displayed on tensorboard display_tensor = torch.zeros( [fake.shape[0], 3, fake.shape[2], fake.shape[3]]) display_tensor_gt = torch.zeros( [val_gt.shape[0], 3, val_gt.shape[2], val_gt.shape[3]]) for i in range(fake_prediction.shape[0]): new_img = fake_prediction[i] new_img = utils.colorize_mask( new_img, self.args.dataset ) ### So this is the generated image in PIL.Image format img_tensor = utils.PIL_to_tensor(new_img, self.args.dataset) display_tensor[i, :, :, :] = img_tensor display_tensor_gt[i, :, :, :] = val_gt[i] self.writer_supervised.add_image( 'Generated segmented image', torchvision.utils.make_grid(display_tensor, nrow=2, normalize=True), epoch) self.writer_supervised.add_image( 'Ground truth for the image', torchvision.utils.make_grid(display_tensor_gt, nrow=2, normalize=True), epoch) if score["Mean IoU : \t"] >= self.best_iou: self.best_iou = score["Mean IoU : \t"] # Override the latest checkpoint utils.save_checkpoint( { 'epoch': epoch + 1, 'Gsi': self.Gsi.state_dict(), 'gsi_optimizer': self.gsi_optimizer.state_dict(), 'best_iou': self.best_iou, 'class_iou': class_iou }, '%s/latest_supervised_model.ckpt' % (self.args.checkpoint_dir)) self.writer_supervised.close()