def calculate_style_loss(x, epsilon=1e-5): y_trues, y_preds = x loss = [ mse_loss(K.mean(y_true, axis=(1, 2)), K.mean(y_pred, axis=(1, 2))) + mse_loss(K.sqrt(K.var(y_true, axis=(1, 2)) + epsilon), K.sqrt(K.var(y_pred, axis=(1, 2)) + epsilon)) for y_true, y_pred in zip(y_trues, y_preds) ] return K.sum(loss)
def forward_szn(self, data, target): # get score target, target_embed = target if self.cuda: data, target = data.cuda(), target.cuda() data, target = Variable(data), Variable(target) if self.cuda: target_embed = target_embed.cuda() target_embed = Variable(target_embed) fcn_score, seen_mask_score = self.model(data, mode='both') # get fcn loss if self.loss_func == "cos": loss = utils.cosine_loss(fcn_score, target, target_embed) elif self.loss_func == "mse": loss = utils.mse_loss(fcn_score, target, target_embed) lbl_pred = utils.infer_lbl_szn(fcn_score, seen_mask_score, self.seen_embeddings, self.unseen_embeddings, self.cuda) lbl_true = target.data.cpu() return fcn_score, loss, lbl_pred, lbl_true
def evaluate(model, data): model.eval() with torch.no_grad(): x, y, mask, idx = data output, _ = model(x) output = output.squeeze(0) loss = mse_loss(output, y, mask) mse = loss.item() rmse = np.sqrt(mse) return output, mse, rmse
def forward(self, x, edge_index, batch, num_graphs): # batch_size = data.num_graphs if x is None: x = torch.ones(batch.shape[0]).to(device) node_mu, node_logvar, class_mu, class_logvar = self.encoder(x, edge_index, batch) grouped_mu, grouped_logvar = accumulate_group_evidence( class_mu.data, class_logvar.data, batch, True ) # kl-divergence error for style latent space node_kl_divergence_loss = torch.mean( - 0.5 * torch.sum(1 + node_logvar - node_mu.pow(2) - node_logvar.exp()) ) node_kl_divergence_loss = 0.0000001 * node_kl_divergence_loss *num_graphs node_kl_divergence_loss.backward(retain_graph=True) # kl-divergence error for class latent space class_kl_divergence_loss = torch.mean( - 0.5 * torch.sum(1 + grouped_logvar - grouped_mu.pow(2) - grouped_logvar.exp()) ) class_kl_divergence_loss = 0.0000001 * class_kl_divergence_loss * num_graphs class_kl_divergence_loss.backward(retain_graph=True) # reconstruct samples """ sampling from group mu and logvar for each graph in mini-batch differently makes the decoder consider class latent embeddings as random noise and ignore them """ node_latent_embeddings = reparameterize(training=True, mu=node_mu, logvar=node_logvar) class_latent_embeddings = group_wise_reparameterize( training=True, mu=grouped_mu, logvar=grouped_logvar, labels_batch=batch, cuda=True ) #need to reduce ml between node and class latents '''measure='JSD' mi_loss = local_global_loss_disen(node_latent_embeddings, class_latent_embeddings, edge_index, batch, measure) mi_loss.backward(retain_graph=True)''' reconstructed_node = self.decoder(node_latent_embeddings, class_latent_embeddings, edge_index) #check input feat first #print('recon ', x[0],reconstructed_node[0]) reconstruction_error = 0.1*mse_loss(reconstructed_node, x) * num_graphs reconstruction_error.backward() return reconstruction_error.item() , class_kl_divergence_loss.item() , node_kl_divergence_loss.item()
def train(model, data, optimizer): model.train() x, y, mask, idx = data optimizer.zero_grad() output, KLD = model(x) output = output.squeeze(0) loss = mse_loss(output, y, mask) loss = loss + KLD / torch.sum(mask) loss.backward() torch.nn.utils.clip_grad_norm_(model.parameters(), 1000.0) optimizer.step() mse = loss.item() rmse = np.sqrt(mse) return output, mse, rmse
def fit(self, input_data, output_data, epochs, batch_size=1): itr = 0 while itr < 50: for X, Y in zip(input_data, output_data): # do feed forward self.forward(itr, X) print("I : ", self.layer_list[1].neurons) print("J : ", self.layer_list[2].neurons) print("D : ", Y) loss = utils.mse_loss(self.layer_list[-1].neurons, Y) print("loss : ", loss) # do backprop self.backward(itr, Y) utils.init_layers(self.layer_list) # init every neurons to -1. itr = itr + 1 # increase step return None
def forward(self, data, target): # get score if self.pixel_embeddings: target, target_embed = target if self.cuda: data, target = data.cuda(), target.cuda() data, target = Variable(data), Variable(target) if self.pixel_embeddings: if self.cuda: target_embed = target_embed.cuda() target_embed = Variable(target_embed) score = self.model(data, mode='fcn') # get loss if self.loss_func == "cos": loss = utils.cosine_loss(score, target, target_embed) elif self.loss_func == "mse": loss = utils.mse_loss(score, target, target_embed) elif self.loss_func == "cross_entropy": loss = utils.cross_entropy2d(score, target, size_average=False) if np.isnan(float(loss.data[0])): raise ValueError('loss is nan while training') # inference if self.pixel_embeddings: if self.forced_unseen: lbl_pred = utils.infer_lbl_forced_unseen( score, target, self.seen_embeddings, self.unseen_embeddings, self.unseen, self.cuda) else: lbl_pred = utils.infer_lbl(score, self.embeddings, self.cuda) else: lbl_pred = score.data.max(1)[1].cpu().numpy()[:, :, :] lbl_true = target.data.cpu() return score, loss, lbl_pred, lbl_true
def train(self): loss_collector = [] pbar_epoch = tqdm(total=self.max_epoch, desc='[Epoch]') max_iteration = int(len(self.dataset) / self.batch_size) for epoch in range(self.max_epoch): pbar_iteration = tqdm(total=max_iteration, desc='[Iteration]') iteration = 0 for data_x_s, data_x_t, data_c_s, data_c_t in self.dataloader: self.batch_size = data_x_s.size(0) self.x_s.resize_(self.batch_size, 50, 4) self.x_t.resize_(self.batch_size, 50, 4) self.c_s.resize_(self.batch_size, 1, self.image_size, self.image_size) self.c_t.resize_(self.batch_size, 1, self.image_size, self.image_size) pbar_iteration.update(1) iteration += 1 self.x_s.copy_(data_x_s) self.x_t.copy_(data_x_t) self.c_s.copy_(data_c_s) self.c_t.copy_(data_c_t) x_s_, x_t_, z_s_bag, z_t_bag, cons_bag = self.ada_vae( self.x_s, self.x_t, self.c_s, self.c_t) # kl divergence kld_s = kld_loss(z_s_bag) kld_t = kld_loss(z_t_bag) kld = kld_s + kld_t # reconstruction loss recon_s = mse_loss(self.x_s, x_s_) recon_t = mse_loss(self.x_t, x_t_) recon = recon_s + recon_t # fusion loss fusion_loss_1 = mse_loss(cons_bag[0], cons_bag[1]) fusion_loss_2 = mse_loss(cons_bag[2], cons_bag[3]) fusion_loss_3 = mse_loss(cons_bag[4], cons_bag[5]) fusion_loss_4 = mse_loss(cons_bag[6], cons_bag[7]) fusion = fusion_loss_1 + fusion_loss_2 + fusion_loss_3 + fusion_loss_4 # total loss total_loss = self.alpha * recon + self.beta * kld + self.gamma * fusion self.optimizer.zero_grad() total_loss.backward() self.optimizer.step() if iteration % self.print_iter == 0: pbar_iteration.write( '[%d/%d] kld: %.6f, recon: %.6f, fusion: %.6f, total_loss: %.6f' % (iteration, max_iteration, kld.detach().cpu().numpy(), recon.detach().cpu().numpy(), fusion.detach().cpu().numpy(), total_loss.detach().cpu().numpy())) loss_collector.append([ epoch, iteration, kld.detach().cpu().numpy(), recon.detach().cpu().numpy(), fusion.detach().cpu().numpy(), total_loss.detach().cpu().numpy() ]) # save model if epoch % self.save_epoch == 0: self.save_model() pbar_iteration.write('[*] Save one model') np.save(self.sample_path + '/loss.npy', np.array(loss_collector)) pbar_iteration.close() pbar_epoch.update(1) pbar_epoch.write("[*] Training stage finishes") pbar_epoch.close()
def main(): parser = argparse.ArgumentParser() parser.add_argument('--log_dir', type=str, default='log', help='Name of the log folder') parser.add_argument('--save_models', type=bool, default=True, help='Set True if you want to save trained models') parser.add_argument('--pre_trained_model_path', type=str, default=None, help='Pre-trained model path') parser.add_argument('--pre_trained_model_epoch', type=str, default=None, help='Pre-trained model epoch e.g 200') parser.add_argument('--train_imgs_path', type=str, default='/mnt/sdb/data/COCO/train2017', help='Path to training images') parser.add_argument( '--train_annotation_path', type=str, default='/mnt/sdb/data/COCO/annotations/instances_train2017.json', help='Path to annotation file, .json file') parser.add_argument('--category_names', type=str, default='giraffe,elephant,zebra,sheep,cow,bear', help='List of categories in MS-COCO dataset') parser.add_argument('--num_test_img', type=int, default=4, help='Number of images saved during training') parser.add_argument('--img_size', type=int, default=256, help='Generated image size') parser.add_argument( '--local_patch_size', type=int, default=256, help='Image size of instance images after interpolation') parser.add_argument('--batch_size', type=int, default=4, help='Mini-batch size') parser.add_argument('--train_epoch', type=int, default=400, help='Maximum training epoch') parser.add_argument('--lr', type=float, default=0.0002, help='Initial learning rate') parser.add_argument('--optim_step_size', type=int, default=80, help='Learning rate decay step size') parser.add_argument('--optim_gamma', type=float, default=0.5, help='Learning rate decay ratio') parser.add_argument( '--critic_iter', type=int, default=5, help='Number of discriminator update against each generator update') parser.add_argument('--noise_size', type=int, default=256, help='Noise vector size') parser.add_argument('--lambda_FM', type=float, default=1, help='Trade-off param for feature matching loss') parser.add_argument('--lambda_branch', type=float, default=100, help='Trade-off param for reconstruction loss') parser.add_argument( '--num_res_blocks', type=int, default=2, help='Number of residual block in generator shared part') parser.add_argument('--num_res_blocks_fg', type=int, default=2, help='Number of residual block in non-bg branch') parser.add_argument('--num_res_blocks_bg', type=int, default=0, help='Number of residual block in generator bg branch') opt = parser.parse_args() print(opt) #Create log folder root = 'result_bg/' model = 'coco_model_' result_folder_name = 'images_' + opt.log_dir model_folder_name = 'models_' + opt.log_dir if not os.path.isdir(root): os.mkdir(root) if not os.path.isdir(root + result_folder_name): os.mkdir(root + result_folder_name) if not os.path.isdir(root + model_folder_name): os.mkdir(root + model_folder_name) #Save the script copyfile(os.path.basename(__file__), root + result_folder_name + '/' + os.path.basename(__file__)) #Define transformation for dataset images - e.g scaling transform = transforms.Compose([ transforms.Scale((opt.img_size, opt.img_size)), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)), ]) #Load dataset category_names = opt.category_names.split(',') dataset = CocoData(root=opt.train_imgs_path, annFile=opt.train_annotation_path, category_names=category_names, transform=transform, final_img_size=opt.img_size) #Discard images contain very small instances dataset.discard_small(min_area=0.0, max_area=1) #dataset.discard_bad_examples('bad_examples_list.txt') #Define data loader train_loader = DataLoader(dataset, batch_size=opt.batch_size, shuffle=True) #For evaluation define fixed masks and noises data_iter = iter(train_loader) sample_batched = data_iter.next() y_fixed = sample_batched['seg_mask'][0:opt.num_test_img] y_fixed = Variable(y_fixed.cuda()) z_fixed = torch.randn((opt.num_test_img, opt.noise_size)) z_fixed = Variable(z_fixed.cuda()) #Define networks G_bg = Generator_BG(z_dim=opt.noise_size, label_channel=len(category_names), num_res_blocks=opt.num_res_blocks, num_res_blocks_fg=opt.num_res_blocks_fg, num_res_blocks_bg=opt.num_res_blocks_bg) D_glob = Discriminator(channels=3 + len(category_names), input_size=opt.img_size) G_bg.cuda() D_glob.cuda() #Load parameters from pre-trained models if opt.pre_trained_model_path != None and opt.pre_trained_model_epoch != None: try: G_bg.load_state_dict( torch.load(opt.pre_trained_model_path + 'G_bg_epoch_' + opt.pre_trained_model_epoch)) D_glob.load_state_dict( torch.load(opt.pre_trained_model_path + 'D_glob_epoch_' + opt.pre_trained_model_epoch)) print('Parameters are loaded!') except: print('Error: Pre-trained parameters are not loaded!') pass #Define training loss function - binary cross entropy BCE_loss = nn.BCELoss() #Define feature matching loss criterionVGG = VGGLoss() criterionVGG = criterionVGG.cuda() #Define optimizer G_local_optimizer = optim.Adam(G_bg.parameters(), lr=opt.lr, betas=(0.0, 0.9)) D_local_optimizer = optim.Adam(filter(lambda p: p.requires_grad, D_glob.parameters()), lr=opt.lr, betas=(0.0, 0.9)) #Deine learning rate scheduler scheduler_G = lr_scheduler.StepLR(G_local_optimizer, step_size=opt.optim_step_size, gamma=opt.optim_gamma) scheduler_D = lr_scheduler.StepLR(D_local_optimizer, step_size=opt.optim_step_size, gamma=opt.optim_gamma) #----------------------------TRAIN--------------------------------------- print('training start!') start_time = time.time() for epoch in range(opt.train_epoch): scheduler_G.step() scheduler_D.step() D_local_losses = [] G_local_losses = [] y_real_ = torch.ones(opt.batch_size) y_fake_ = torch.zeros(opt.batch_size) y_real_, y_fake_ = Variable(y_real_.cuda()), Variable(y_fake_.cuda()) epoch_start_time = time.time() data_iter = iter(train_loader) num_iter = 0 while num_iter < len(train_loader): j = 0 while j < opt.critic_iter and num_iter < len(train_loader): j += 1 sample_batched = data_iter.next() num_iter += 1 x_ = sample_batched['image'] y_ = sample_batched['seg_mask'] y_reduced = torch.sum(y_, 1).view(y_.size(0), 1, y_.size(2), y_.size(3)) y_reduced = torch.clamp(y_reduced, 0, 1) y_reduced = Variable(y_reduced.cuda()) #Update discriminators - D #Real examples D_glob.zero_grad() mini_batch = x_.size()[0] if mini_batch != opt.batch_size: y_real_ = torch.ones(mini_batch) y_fake_ = torch.zeros(mini_batch) y_real_, y_fake_ = Variable(y_real_.cuda()), Variable( y_fake_.cuda()) x_, y_ = Variable(x_.cuda()), Variable(y_.cuda()) x_d = torch.cat([x_, y_], 1) D_result = D_glob(x_d).squeeze() D_real_loss = BCE_loss(D_result, y_real_) D_real_loss.backward() #Fake examples z_ = torch.randn((mini_batch, opt.noise_size)) z_ = Variable(z_.cuda()) #Generate fake images G_result, G_result_bg = G_bg(z_, y_) G_result_d = torch.cat([G_result, y_], 1) D_result = D_glob(G_result_d.detach()).squeeze() D_fake_loss = BCE_loss(D_result, y_fake_) D_fake_loss.backward() D_local_optimizer.step() D_train_loss = D_real_loss + D_fake_loss D_local_losses.append(D_train_loss.item()) #Update generator G G_bg.zero_grad() D_result = D_glob(G_result_d).squeeze() G_train_loss = BCE_loss(D_result, y_real_) #Feature matching loss between generated image and corresponding ground truth FM_loss = criterionVGG(G_result, x_) #Branch-similar loss branch_sim_loss = mse_loss(torch.mul(G_result, (1 - y_reduced)), torch.mul(G_result_bg, (1 - y_reduced))) total_loss = G_train_loss + opt.lambda_FM * FM_loss + opt.lambda_branch * branch_sim_loss total_loss.backward() G_local_optimizer.step() G_local_losses.append(G_train_loss.item()) print('loss_d: %.3f, loss_g: %.3f' % (D_train_loss.item(), G_train_loss.item())) if (num_iter % 100) == 0: print('%d - %d complete!' % ((epoch + 1), num_iter)) print(result_folder_name) epoch_end_time = time.time() per_epoch_ptime = epoch_end_time - epoch_start_time print('[%d/%d] - ptime: %.2f, loss_d: %.3f, loss_g: %.3f' % ((epoch + 1), opt.train_epoch, per_epoch_ptime, torch.mean(torch.FloatTensor(D_local_losses)), torch.mean(torch.FloatTensor(G_local_losses)))) #Save images G_bg.eval() G_result, G_result_bg = G_bg(z_fixed, y_fixed) G_bg.train() if epoch % 10 == 0: for t in range(y_fixed.size()[1]): show_result((epoch + 1), y_fixed[:, t:t + 1, :, :], save=True, path=root + result_folder_name + '/' + model + str(epoch + 1) + '_masked.png') show_result((epoch + 1), G_result, save=True, path=root + result_folder_name + '/' + model + str(epoch + 1) + '.png') show_result((epoch + 1), G_result_bg, save=True, path=root + result_folder_name + '/' + model + str(epoch + 1) + '_bg.png') #Save model params if opt.save_models and (epoch > 21 and epoch % 10 == 0): torch.save( G_bg.state_dict(), root + model_folder_name + '/' + model + 'G_bg_epoch_' + str(epoch) + '.pth') torch.save( D_glob.state_dict(), root + model_folder_name + '/' + model + 'D_glob_epoch_' + str(epoch) + '.pth') end_time = time.time() total_ptime = end_time - start_time print("Training finish!... save training results") print('Training time: ' + str(total_ptime))
epoch_loss = 0 for iteration in range(len(dataset) // BATCH_SIZE): # load a batch of videos X_in = next(loader).float().cuda() Y_flat = X_in.view(X_in.size()[0], -1) optimizer.zero_grad() X1, KL1, muL1, det_q1 = encoder(X_in) dec = decoder(X1) # calculate recon loss dec_flat = dec.view(dec.size()[0], -1) img_loss = mse_loss(Y_flat, dec_flat) img_loss.backward(retain_graph=True) sigma_q1 = torch.einsum('ijkl,ijlm->ijkm', KL1, torch.einsum('ijkl->ijlk', KL1)) mul1_transpose = torch.transpose(muL1, dim0=1, dim1=2) if (ZERO_MEAN_FEA): mu_p_transpose = get_prior_mean(FEA_MEAN_S, FEA_MEAN_E) kl_loss1 = KL_loss_L1(sigma_p_inv, sigma_q1, mul1_transpose, mu_p_transpose, det_p, det_q1) else: kl_loss1 = KL_loss_L1_without_mean(sigma_p_inv, sigma_q1, mul1_transpose, det_p, det_q1)
def training_procedure(FLAGS): """ model definition """ encoder = Encoder(style_dim=FLAGS.style_dim, class_dim=FLAGS.class_dim) encoder.apply(weights_init) decoder = Decoder(style_dim=FLAGS.style_dim, class_dim=FLAGS.class_dim) decoder.apply(weights_init) # load saved models if load_saved flag is true if FLAGS.load_saved: encoder.load_state_dict( torch.load(os.path.join('checkpoints', FLAGS.encoder_save))) decoder.load_state_dict( torch.load(os.path.join('checkpoints', FLAGS.decoder_save))) """ variable definition """ X = torch.FloatTensor(FLAGS.batch_size, 1, FLAGS.image_size, FLAGS.image_size) ''' add option to run on GPU ''' if FLAGS.cuda: encoder.cuda() decoder.cuda() X = X.cuda() """ optimizer definition """ auto_encoder_optimizer = optim.Adam(list(encoder.parameters()) + list(decoder.parameters()), lr=FLAGS.initial_learning_rate, betas=(FLAGS.beta_1, FLAGS.beta_2)) """ training """ if torch.cuda.is_available() and not FLAGS.cuda: print( "WARNING: You have a CUDA device, so you should probably run with --cuda" ) if not os.path.exists('checkpoints'): os.makedirs('checkpoints') # load_saved is false when training is started from 0th iteration if not FLAGS.load_saved: with open(FLAGS.log_file, 'w') as log: log.write( 'Epoch\tIteration\tReconstruction_loss\tStyle_KL_divergence_loss\tClass_KL_divergence_loss\n' ) # load data set and create data loader instance print('Loading MNIST dataset...') mnist = datasets.MNIST(root='mnist', download=True, train=True, transform=transform_config) loader = cycle( DataLoader(mnist, batch_size=FLAGS.batch_size, shuffle=True, num_workers=0, drop_last=True)) # initialize summary writer writer = SummaryWriter() for epoch in range(FLAGS.start_epoch, FLAGS.end_epoch): print('') print( 'Epoch #' + str(epoch) + '..........................................................................' ) for iteration in range(int(len(mnist) / FLAGS.batch_size)): # load a mini-batch image_batch, labels_batch = next(loader) # set zero_grad for the optimizer auto_encoder_optimizer.zero_grad() X.copy_(image_batch) style_mu, style_logvar, class_mu, class_logvar = encoder( Variable(X)) grouped_mu, grouped_logvar = accumulate_group_evidence( class_mu.data, class_logvar.data, labels_batch, FLAGS.cuda) # kl-divergence error for style latent space style_kl_divergence_loss = FLAGS.kl_divergence_coef * ( -0.5 * torch.sum(1 + style_logvar - style_mu.pow(2) - style_logvar.exp())) style_kl_divergence_loss /= (FLAGS.batch_size * FLAGS.num_channels * FLAGS.image_size * FLAGS.image_size) style_kl_divergence_loss.backward(retain_graph=True) # kl-divergence error for class latent space class_kl_divergence_loss = FLAGS.kl_divergence_coef * ( -0.5 * torch.sum(1 + grouped_logvar - grouped_mu.pow(2) - grouped_logvar.exp())) class_kl_divergence_loss /= (FLAGS.batch_size * FLAGS.num_channels * FLAGS.image_size * FLAGS.image_size) class_kl_divergence_loss.backward(retain_graph=True) # reconstruct samples """ sampling from group mu and logvar for each image in mini-batch differently makes the decoder consider class latent embeddings as random noise and ignore them """ style_latent_embeddings = reparameterize(training=True, mu=style_mu, logvar=style_logvar) class_latent_embeddings = group_wise_reparameterize( training=True, mu=grouped_mu, logvar=grouped_logvar, labels_batch=labels_batch, cuda=FLAGS.cuda) reconstructed_images = decoder(style_latent_embeddings, class_latent_embeddings) reconstruction_error = FLAGS.reconstruction_coef * mse_loss( reconstructed_images, Variable(X)) reconstruction_error.backward() auto_encoder_optimizer.step() if (iteration + 1) % 50 == 0: print('') print('Epoch #' + str(epoch)) print('Iteration #' + str(iteration)) print('') print('Reconstruction loss: ' + str(reconstruction_error.data.storage().tolist()[0])) print('Style KL-Divergence loss: ' + str(style_kl_divergence_loss.data.storage().tolist()[0])) print('Class KL-Divergence loss: ' + str(class_kl_divergence_loss.data.storage().tolist()[0])) # write to log with open(FLAGS.log_file, 'a') as log: log.write('{0}\t{1}\t{2}\t{3}\t{4}\n'.format( epoch, iteration, reconstruction_error.data.storage().tolist()[0], style_kl_divergence_loss.data.storage().tolist()[0], class_kl_divergence_loss.data.storage().tolist()[0])) # write to tensorboard writer.add_scalar( 'Reconstruction loss', reconstruction_error.data.storage().tolist()[0], epoch * (int(len(mnist) / FLAGS.batch_size) + 1) + iteration) writer.add_scalar( 'Style KL-Divergence loss', style_kl_divergence_loss.data.storage().tolist()[0], epoch * (int(len(mnist) / FLAGS.batch_size) + 1) + iteration) writer.add_scalar( 'Class KL-Divergence loss', class_kl_divergence_loss.data.storage().tolist()[0], epoch * (int(len(mnist) / FLAGS.batch_size) + 1) + iteration) # save checkpoints after every 5 epochs if (epoch + 1) % 5 == 0 or (epoch + 1) == FLAGS.end_epoch: torch.save(encoder.state_dict(), os.path.join('checkpoints', FLAGS.encoder_save)) torch.save(decoder.state_dict(), os.path.join('checkpoints', FLAGS.decoder_save))
def train(self, data, all_y_trues): ''' - data is a (n x 2) numpy array, n = # of samples in the dataset. - all_y_trues is a numpy array with n elements. Elements in all_y_trues correspond to those in data. ''' learn_rate = 0.1 epochs = 1000 # number of times to loop through the entire dataset for epoch in range(epochs): for x, y_true in zip(data, all_y_trues): # --- Do a feedforward (we'll need these values later) sum_h1 = self.w1 * x[0] + self.w2 * x[1] + self.b1 h1 = sigmoid(sum_h1) sum_h2 = self.w3 * x[0] + self.w4 * x[1] + self.b2 h2 = sigmoid(sum_h2) sum_o1 = self.w5 * h1 + self.w6 * h2 + self.b3 o1 = sigmoid(sum_o1) y_pred = o1 # --- Calculate partial derivatives. # --- Naming: d_L_d_w1 represents "partial L / partial w1" d_L_d_ypred = -2 * (y_true - y_pred) # Neuron o1 d_ypred_d_w5 = h1 * derive_sigmoid(sum_o1) d_ypred_d_w6 = h2 * derive_sigmoid(sum_o1) d_ypred_d_b3 = derive_sigmoid(sum_o1) d_ypred_d_h1 = self.w5 * derive_sigmoid(sum_o1) d_ypred_d_h2 = self.w6 * derive_sigmoid(sum_o1) # Neuron h1 d_h1_d_w1 = x[0] * derive_sigmoid(sum_h1) d_h1_d_w2 = x[1] * derive_sigmoid(sum_h1) d_h1_d_b1 = derive_sigmoid(sum_h1) # Neuron h2 d_h2_d_w3 = x[0] * derive_sigmoid(sum_h2) d_h2_d_w4 = x[1] * derive_sigmoid(sum_h2) d_h2_d_b2 = derive_sigmoid(sum_h2) # --- Update weights and biases # Neuron h1 self.w1 -= learn_rate * d_L_d_ypred * d_ypred_d_h1 * d_h1_d_w1 self.w2 -= learn_rate * d_L_d_ypred * d_ypred_d_h1 * d_h1_d_w2 self.b1 -= learn_rate * d_L_d_ypred * d_ypred_d_h1 * d_h1_d_b1 # Neuron h2 self.w3 -= learn_rate * d_L_d_ypred * d_ypred_d_h2 * d_h2_d_w3 self.w4 -= learn_rate * d_L_d_ypred * d_ypred_d_h2 * d_h2_d_w4 self.b2 -= learn_rate * d_L_d_ypred * d_ypred_d_h2 * d_h2_d_b2 # Neuron o1 self.w5 -= learn_rate * d_L_d_ypred * d_ypred_d_w5 self.w6 -= learn_rate * d_L_d_ypred * d_ypred_d_w6 self.b3 -= learn_rate * d_L_d_ypred * d_ypred_d_b3 # --- Calculate total loss at the end of each epoch if epoch % 10 == 0: y_preds = np.apply_along_axis(self.feedforward, 1, data) loss = mse_loss(all_y_trues, y_preds) print("Epoch %d loss: %.3f" % (epoch, loss))
def training_procedure(FLAGS): """ model definition """ encoder = Encoder(style_dim=FLAGS.style_dim, class_dim=FLAGS.class_dim) encoder.apply(weights_init) decoder = Decoder(style_dim=FLAGS.style_dim, class_dim=FLAGS.class_dim) decoder.apply(weights_init) # load saved models if load_saved flag is true if FLAGS.load_saved: encoder.load_state_dict( torch.load(os.path.join('checkpoints', FLAGS.encoder_save))) decoder.load_state_dict( torch.load(os.path.join('checkpoints', FLAGS.decoder_save))) """ variable definition """ X = torch.FloatTensor(FLAGS.batch_size, 784) ''' run on GPU if GPU is available ''' device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') encoder.to(device=device) decoder.to(device=device) X = X.to(device=device) """ optimizer definition """ auto_encoder_optimizer = optim.Adam(list(encoder.parameters()) + list(decoder.parameters()), lr=FLAGS.initial_learning_rate, betas=(FLAGS.beta_1, FLAGS.beta_2)) """ """ if torch.cuda.is_available() and not FLAGS.cuda: print( "WARNING: You have a CUDA device, so you should probably run with --cuda" ) if not os.path.exists('checkpoints'): os.makedirs('checkpoints') # load_saved is false when training is started from 0th iteration if not FLAGS.load_saved: with open(FLAGS.log_file, 'w') as log: log.write( 'Epoch\tIteration\tReconstruction_loss\tStyle_KL_divergence_loss\tClass_KL_divergence_loss\n' ) # load data set and create data loader instance dirs = os.listdir(os.path.join(os.getcwd(), 'data')) print('Loading double multivariate normal time series data...') for dsname in dirs: params = dsname.split('_') if params[2] in ('theta=-1'): print('Running dataset ', dsname) ds = DoubleMulNormal(dsname) # ds = experiment3(1000, 50, 3) loader = cycle( DataLoader(ds, batch_size=FLAGS.batch_size, shuffle=True, drop_last=True)) # initialize summary writer writer = SummaryWriter() for epoch in range(FLAGS.start_epoch, FLAGS.end_epoch): print() print( 'Epoch #' + str(epoch) + '........................................................') # the total loss at each epoch after running iterations of batches total_loss = 0 for iteration in range(int(len(ds) / FLAGS.batch_size)): # load a mini-batch image_batch, labels_batch = next(loader) # set zero_grad for the optimizer auto_encoder_optimizer.zero_grad() X.copy_(image_batch) style_mu, style_logvar, class_mu, class_logvar = encoder( Variable(X)) grouped_mu, grouped_logvar = accumulate_group_evidence( class_mu.data, class_logvar.data, labels_batch, FLAGS.cuda) # kl-divergence error for style latent space style_kl_divergence_loss = FLAGS.kl_divergence_coef * ( -0.5 * torch.sum(1 + style_logvar - style_mu.pow(2) - style_logvar.exp())) style_kl_divergence_loss /= (FLAGS.batch_size * FLAGS.num_channels * FLAGS.image_size * FLAGS.image_size) style_kl_divergence_loss.backward(retain_graph=True) # kl-divergence error for class latent space class_kl_divergence_loss = FLAGS.kl_divergence_coef * ( -0.5 * torch.sum(1 + grouped_logvar - grouped_mu.pow(2) - grouped_logvar.exp())) class_kl_divergence_loss /= (FLAGS.batch_size * FLAGS.num_channels * FLAGS.image_size * FLAGS.image_size) class_kl_divergence_loss.backward(retain_graph=True) # reconstruct samples """ sampling from group mu and logvar for each image in mini-batch differently makes the decoder consider class latent embeddings as random noise and ignore them """ style_latent_embeddings = reparameterize( training=True, mu=style_mu, logvar=style_logvar) class_latent_embeddings = group_wise_reparameterize( training=True, mu=grouped_mu, logvar=grouped_logvar, labels_batch=labels_batch, cuda=FLAGS.cuda) reconstructed_images = decoder(style_latent_embeddings, class_latent_embeddings) reconstruction_error = FLAGS.reconstruction_coef * mse_loss( reconstructed_images, Variable(X)) reconstruction_error.backward() total_loss += style_kl_divergence_loss + class_kl_divergence_loss + reconstruction_error auto_encoder_optimizer.step() if (iteration + 1) % 50 == 0: print('\tIteration #' + str(iteration)) print('Reconstruction loss: ' + str( reconstruction_error.data.storage().tolist()[0])) print('Style KL loss: ' + str(style_kl_divergence_loss.data.storage(). tolist()[0])) print('Class KL loss: ' + str(class_kl_divergence_loss.data.storage(). tolist()[0])) # write to log with open(FLAGS.log_file, 'a') as log: log.write('{0}\t{1}\t{2}\t{3}\t{4}\n'.format( epoch, iteration, reconstruction_error.data.storage().tolist()[0], style_kl_divergence_loss.data.storage().tolist() [0], class_kl_divergence_loss.data.storage().tolist() [0])) # write to tensorboard writer.add_scalar( 'Reconstruction loss', reconstruction_error.data.storage().tolist()[0], epoch * (int(len(ds) / FLAGS.batch_size) + 1) + iteration) writer.add_scalar( 'Style KL-Divergence loss', style_kl_divergence_loss.data.storage().tolist()[0], epoch * (int(len(ds) / FLAGS.batch_size) + 1) + iteration) writer.add_scalar( 'Class KL-Divergence loss', class_kl_divergence_loss.data.storage().tolist()[0], epoch * (int(len(ds) / FLAGS.batch_size) + 1) + iteration) if epoch == 0 and (iteration + 1) % 50 == 0: torch.save( encoder.state_dict(), os.path.join('checkpoints', 'encoder_' + dsname)) torch.save( decoder.state_dict(), os.path.join('checkpoints', 'decoder_' + dsname)) # save checkpoints after every 10 epochs if (epoch + 1) % 10 == 0 or (epoch + 1) == FLAGS.end_epoch: torch.save( encoder.state_dict(), os.path.join('checkpoints', 'encoder_' + dsname)) torch.save( decoder.state_dict(), os.path.join('checkpoints', 'decoder_' + dsname)) print('Total loss at current epoch: ', total_loss.item())
def loss(self): return digits.mse_loss(self.inference, self.x)
def training_procedure(FLAGS): """ model definition """ encoder = Encoder(style_dim=FLAGS.style_dim, class_dim=FLAGS.class_dim) encoder.apply(weights_init) decoder = Decoder(style_dim=FLAGS.style_dim, class_dim=FLAGS.class_dim) decoder.apply(weights_init) discriminator = Discriminator() discriminator.apply(weights_init) # load saved models if load_saved flag is true if FLAGS.load_saved: raise Exception('This is not implemented') encoder.load_state_dict(torch.load(os.path.join('checkpoints', FLAGS.encoder_save))) decoder.load_state_dict(torch.load(os.path.join('checkpoints', FLAGS.decoder_save))) """ variable definition """ X_1 = torch.FloatTensor(FLAGS.batch_size, FLAGS.num_channels, FLAGS.image_size, FLAGS.image_size) X_2 = torch.FloatTensor(FLAGS.batch_size, FLAGS.num_channels, FLAGS.image_size, FLAGS.image_size) X_3 = torch.FloatTensor(FLAGS.batch_size, FLAGS.num_channels, FLAGS.image_size, FLAGS.image_size) style_latent_space = torch.FloatTensor(FLAGS.batch_size, FLAGS.style_dim) """ loss definitions """ cross_entropy_loss = nn.CrossEntropyLoss() adversarial_loss = nn.BCELoss() ''' add option to run on GPU ''' if FLAGS.cuda: encoder.cuda() decoder.cuda() discriminator.cuda() cross_entropy_loss.cuda() adversarial_loss.cuda() X_1 = X_1.cuda() X_2 = X_2.cuda() X_3 = X_3.cuda() style_latent_space = style_latent_space.cuda() """ optimizer and scheduler definition """ auto_encoder_optimizer = optim.Adam( list(encoder.parameters()) + list(decoder.parameters()), lr=FLAGS.initial_learning_rate, betas=(FLAGS.beta_1, FLAGS.beta_2) ) reverse_cycle_optimizer = optim.Adam( list(encoder.parameters()), lr=FLAGS.initial_learning_rate, betas=(FLAGS.beta_1, FLAGS.beta_2) ) generator_optimizer = optim.Adam( list(decoder.parameters()), lr=FLAGS.initial_learning_rate, betas=(FLAGS.beta_1, FLAGS.beta_2) ) discriminator_optimizer = optim.Adam( list(discriminator.parameters()), lr=FLAGS.initial_learning_rate, betas=(FLAGS.beta_1, FLAGS.beta_2) ) # divide the learning rate by a factor of 10 after 80 epochs auto_encoder_scheduler = optim.lr_scheduler.StepLR(auto_encoder_optimizer, step_size=80, gamma=0.1) reverse_cycle_scheduler = optim.lr_scheduler.StepLR(reverse_cycle_optimizer, step_size=80, gamma=0.1) generator_scheduler = optim.lr_scheduler.StepLR(generator_optimizer, step_size=80, gamma=0.1) discriminator_scheduler = optim.lr_scheduler.StepLR(discriminator_optimizer, step_size=80, gamma=0.1) # Used later to define discriminator ground truths Tensor = torch.cuda.FloatTensor if FLAGS.cuda else torch.FloatTensor """ training """ if torch.cuda.is_available() and not FLAGS.cuda: print("WARNING: You have a CUDA device, so you should probably run with --cuda") if not os.path.exists('checkpoints'): os.makedirs('checkpoints') if not os.path.exists('reconstructed_images'): os.makedirs('reconstructed_images') # load_saved is false when training is started from 0th iteration if not FLAGS.load_saved: with open(FLAGS.log_file, 'w') as log: headers = ['Epoch', 'Iteration', 'Reconstruction_loss', 'KL_divergence_loss', 'Reverse_cycle_loss'] if FLAGS.forward_gan: headers.extend(['Generator_forward_loss', 'Discriminator_forward_loss']) if FLAGS.reverse_gan: headers.extend(['Generator_reverse_loss', 'Discriminator_reverse_loss']) log.write('\t'.join(headers) + '\n') # load data set and create data loader instance print('Loading CIFAR paired dataset...') paired_cifar = CIFAR_Paired(root='cifar', download=True, train=True, transform=transform_config) loader = cycle(DataLoader(paired_cifar, batch_size=FLAGS.batch_size, shuffle=True, num_workers=0, drop_last=True)) # Save a batch of images to use for visualization image_sample_1, image_sample_2, _ = next(loader) image_sample_3, _, _ = next(loader) # initialize summary writer writer = SummaryWriter() for epoch in range(FLAGS.start_epoch, FLAGS.end_epoch): print('') print('Epoch #' + str(epoch) + '..........................................................................') # update the learning rate scheduler auto_encoder_scheduler.step() reverse_cycle_scheduler.step() generator_scheduler.step() discriminator_scheduler.step() for iteration in range(int(len(paired_cifar) / FLAGS.batch_size)): # Adversarial ground truths valid = Variable(Tensor(FLAGS.batch_size, 1).fill_(1.0), requires_grad=False) fake = Variable(Tensor(FLAGS.batch_size, 1).fill_(0.0), requires_grad=False) # A. run the auto-encoder reconstruction image_batch_1, image_batch_2, _ = next(loader) auto_encoder_optimizer.zero_grad() X_1.copy_(image_batch_1) X_2.copy_(image_batch_2) style_mu_1, style_logvar_1, class_latent_space_1 = encoder(Variable(X_1)) style_latent_space_1 = reparameterize(training=True, mu=style_mu_1, logvar=style_logvar_1) kl_divergence_loss_1 = FLAGS.kl_divergence_coef * ( - 0.5 * torch.sum(1 + style_logvar_1 - style_mu_1.pow(2) - style_logvar_1.exp()) ) kl_divergence_loss_1 /= (FLAGS.batch_size * FLAGS.num_channels * FLAGS.image_size * FLAGS.image_size) kl_divergence_loss_1.backward(retain_graph=True) style_mu_2, style_logvar_2, class_latent_space_2 = encoder(Variable(X_2)) style_latent_space_2 = reparameterize(training=True, mu=style_mu_2, logvar=style_logvar_2) kl_divergence_loss_2 = FLAGS.kl_divergence_coef * ( - 0.5 * torch.sum(1 + style_logvar_2 - style_mu_2.pow(2) - style_logvar_2.exp()) ) kl_divergence_loss_2 /= (FLAGS.batch_size * FLAGS.num_channels * FLAGS.image_size * FLAGS.image_size) kl_divergence_loss_2.backward(retain_graph=True) reconstructed_X_1 = decoder(style_latent_space_1, class_latent_space_2) reconstructed_X_2 = decoder(style_latent_space_2, class_latent_space_1) reconstruction_error_1 = FLAGS.reconstruction_coef * mse_loss(reconstructed_X_1, Variable(X_1)) reconstruction_error_1.backward(retain_graph=True) reconstruction_error_2 = FLAGS.reconstruction_coef * mse_loss(reconstructed_X_2, Variable(X_2)) reconstruction_error_2.backward() reconstruction_error = (reconstruction_error_1 + reconstruction_error_2) / FLAGS.reconstruction_coef kl_divergence_error = (kl_divergence_loss_1 + kl_divergence_loss_2) / FLAGS.kl_divergence_coef auto_encoder_optimizer.step() # A-1. Discriminator training during forward cycle if FLAGS.forward_gan: # Training generator generator_optimizer.zero_grad() g_loss_1 = adversarial_loss(discriminator(Variable(reconstructed_X_1)), valid) g_loss_2 = adversarial_loss(discriminator(Variable(reconstructed_X_2)), valid) gen_f_loss = (g_loss_1 + g_loss_2) / 2.0 gen_f_loss.backward() generator_optimizer.step() # Training discriminator discriminator_optimizer.zero_grad() real_loss_1 = adversarial_loss(discriminator(Variable(X_1)), valid) real_loss_2 = adversarial_loss(discriminator(Variable(X_2)), valid) fake_loss_1 = adversarial_loss(discriminator(Variable(reconstructed_X_1)), fake) fake_loss_2 = adversarial_loss(discriminator(Variable(reconstructed_X_2)), fake) dis_f_loss = (real_loss_1 + real_loss_2 + fake_loss_1 + fake_loss_2) / 4.0 dis_f_loss.backward() discriminator_optimizer.step() # B. reverse cycle image_batch_1, _, __ = next(loader) image_batch_2, _, __ = next(loader) reverse_cycle_optimizer.zero_grad() X_1.copy_(image_batch_1) X_2.copy_(image_batch_2) style_latent_space.normal_(0., 1.) _, __, class_latent_space_1 = encoder(Variable(X_1)) _, __, class_latent_space_2 = encoder(Variable(X_2)) reconstructed_X_1 = decoder(Variable(style_latent_space), class_latent_space_1.detach()) reconstructed_X_2 = decoder(Variable(style_latent_space), class_latent_space_2.detach()) style_mu_1, style_logvar_1, _ = encoder(reconstructed_X_1) style_latent_space_1 = reparameterize(training=False, mu=style_mu_1, logvar=style_logvar_1) style_mu_2, style_logvar_2, _ = encoder(reconstructed_X_2) style_latent_space_2 = reparameterize(training=False, mu=style_mu_2, logvar=style_logvar_2) reverse_cycle_loss = FLAGS.reverse_cycle_coef * l1_loss(style_latent_space_1, style_latent_space_2) reverse_cycle_loss.backward() reverse_cycle_loss /= FLAGS.reverse_cycle_coef reverse_cycle_optimizer.step() # B-1. Discriminator training during reverse cycle if FLAGS.reverse_gan: # Training generator generator_optimizer.zero_grad() g_loss_1 = adversarial_loss(discriminator(Variable(reconstructed_X_1)), valid) g_loss_2 = adversarial_loss(discriminator(Variable(reconstructed_X_2)), valid) gen_r_loss = (g_loss_1 + g_loss_2) / 2.0 gen_r_loss.backward() generator_optimizer.step() # Training discriminator discriminator_optimizer.zero_grad() real_loss_1 = adversarial_loss(discriminator(Variable(X_1)), valid) real_loss_2 = adversarial_loss(discriminator(Variable(X_2)), valid) fake_loss_1 = adversarial_loss(discriminator(Variable(reconstructed_X_1)), fake) fake_loss_2 = adversarial_loss(discriminator(Variable(reconstructed_X_2)), fake) dis_r_loss = (real_loss_1 + real_loss_2 + fake_loss_1 + fake_loss_2) / 4.0 dis_r_loss.backward() discriminator_optimizer.step() if (iteration + 1) % 10 == 0: print('') print('Epoch #' + str(epoch)) print('Iteration #' + str(iteration)) print('') print('Reconstruction loss: ' + str(reconstruction_error.data.storage().tolist()[0])) print('KL-Divergence loss: ' + str(kl_divergence_error.data.storage().tolist()[0])) print('Reverse cycle loss: ' + str(reverse_cycle_loss.data.storage().tolist()[0])) if FLAGS.forward_gan: print('Generator F loss: ' + str(gen_f_loss.data.storage().tolist()[0])) print('Discriminator F loss: ' + str(dis_f_loss.data.storage().tolist()[0])) if FLAGS.reverse_gan: print('Generator R loss: ' + str(gen_r_loss.data.storage().tolist()[0])) print('Discriminator R loss: ' + str(dis_r_loss.data.storage().tolist()[0])) # write to log with open(FLAGS.log_file, 'a') as log: row = [] row.append(epoch) row.append(iteration) row.append(reconstruction_error.data.storage().tolist()[0]) row.append(kl_divergence_error.data.storage().tolist()[0]) row.append(reverse_cycle_loss.data.storage().tolist()[0]) if FLAGS.forward_gan: row.append(gen_f_loss.data.storage().tolist()[0]) row.append(dis_f_loss.data.storage().tolist()[0]) if FLAGS.reverse_gan: row.append(gen_r_loss.data.storage().tolist()[0]) row.append(dis_r_loss.data.storage().tolist()[0]) row = [str(x) for x in row] log.write('\t'.join(row) + '\n') # write to tensorboard writer.add_scalar('Reconstruction loss', reconstruction_error.data.storage().tolist()[0], epoch * (int(len(paired_cifar) / FLAGS.batch_size) + 1) + iteration) writer.add_scalar('KL-Divergence loss', kl_divergence_error.data.storage().tolist()[0], epoch * (int(len(paired_cifar) / FLAGS.batch_size) + 1) + iteration) writer.add_scalar('Reverse cycle loss', reverse_cycle_loss.data.storage().tolist()[0], epoch * (int(len(paired_cifar) / FLAGS.batch_size) + 1) + iteration) if FLAGS.forward_gan: writer.add_scalar('Generator F loss', gen_f_loss.data.storage().tolist()[0], epoch * (int(len(paired_cifar) / FLAGS.batch_size) + 1) + iteration) writer.add_scalar('Discriminator F loss', dis_f_loss.data.storage().tolist()[0], epoch * (int(len(paired_cifar) / FLAGS.batch_size) + 1) + iteration) if FLAGS.reverse_gan: writer.add_scalar('Generator R loss', gen_r_loss.data.storage().tolist()[0], epoch * (int(len(paired_cifar) / FLAGS.batch_size) + 1) + iteration) writer.add_scalar('Discriminator R loss', dis_r_loss.data.storage().tolist()[0], epoch * (int(len(paired_cifar) / FLAGS.batch_size) + 1) + iteration) # save model after every 5 epochs if (epoch + 1) % 5 == 0 or (epoch + 1) == FLAGS.end_epoch: torch.save(encoder.state_dict(), os.path.join('checkpoints', FLAGS.encoder_save)) torch.save(decoder.state_dict(), os.path.join('checkpoints', FLAGS.decoder_save)) """ save reconstructed images and style swapped image generations to check progress """ X_1.copy_(image_sample_1) X_2.copy_(image_sample_2) X_3.copy_(image_sample_3) style_mu_1, style_logvar_1, _ = encoder(Variable(X_1)) _, __, class_latent_space_2 = encoder(Variable(X_2)) style_mu_3, style_logvar_3, _ = encoder(Variable(X_3)) style_latent_space_1 = reparameterize(training=False, mu=style_mu_1, logvar=style_logvar_1) style_latent_space_3 = reparameterize(training=False, mu=style_mu_3, logvar=style_logvar_3) reconstructed_X_1_2 = decoder(style_latent_space_1, class_latent_space_2) reconstructed_X_3_2 = decoder(style_latent_space_3, class_latent_space_2) # save input image batch image_batch = np.transpose(X_1.cpu().numpy(), (0, 2, 3, 1)) if FLAGS.num_channels == 1: image_batch = np.concatenate((image_batch, image_batch, image_batch), axis=3) imshow_grid(image_batch, name=str(epoch) + '_original', save=True) # save reconstructed batch reconstructed_x = np.transpose(reconstructed_X_1_2.cpu().data.numpy(), (0, 2, 3, 1)) if FLAGS.num_channels == 1: reconstructed_x = np.concatenate((reconstructed_x, reconstructed_x, reconstructed_x), axis=3) imshow_grid(reconstructed_x, name=str(epoch) + '_target', save=True) style_batch = np.transpose(X_3.cpu().numpy(), (0, 2, 3, 1)) if FLAGS.num_channels == 1: style_batch = np.concatenate((style_batch, style_batch, style_batch), axis=3) imshow_grid(style_batch, name=str(epoch) + '_style', save=True) # save style swapped reconstructed batch reconstructed_style = np.transpose(reconstructed_X_3_2.cpu().data.numpy(), (0, 2, 3, 1)) if FLAGS.num_channels == 1: reconstructed_style = np.concatenate((reconstructed_style, reconstructed_style, reconstructed_style), axis=3) imshow_grid(reconstructed_style, name=str(epoch) + '_style_target', save=True)
def calculate_content_loss(x): y_true, y_pred = x return mse_loss(y_true, y_pred)
anchor = layer_values_A[ layer_name] # TODO: already switched anchor and pos pos = layer_values_Ap[layer_name] else: pos = layer_values_A[ layer_name] #TODO: already switched anchor and pos anchor = layer_values_Ap[layer_name] if Use_B_Bp_A: neg = layer_values_Bp[layer_name] else: neg = layer_values_B[layer_name] triplet_loss_data_A_Ap_B[layer_name] = triplet_loss_dict( anchor, pos, neg, triplet_loss_type, regularize_lambda, triplet_loss_margins[layer_name]['A_Ap_B']) mse_loss_A_Ap[layer_name] = mse_loss(anchor, pos) mse_loss_A_B[layer_name] = mse_loss(anchor, neg) # We only add mse here to add the ALP. if Use_A1_Ap_B: for i in range(A1_Ap_B_num): if switch_an_neg: anchor = A1_Ap_B_list[i]['layer_values_A1'][ layer_name] # TODO: I've changed the a and p here pos = layer_values_Ap[layer_name] else: pos = A1_Ap_B_list[i]['layer_values_A1'][ layer_name] #TODO: I've changed the a and p here anchor = layer_values_Ap[layer_name] neg = layer_values_B[layer_name] triplet_loss_data_A1_Ap_B_list[i][layer_name] = triplet_loss_dict(
def main(rank): #Modified for TPU purposes #Seed - Added for TPU purposes torch.manual_seed(1) #Define Dataset - Modified for TPU purposes dataset = SERIAL_EXEC.run( lambda: CocoData(root=FLAGS['train_imgs_path'], annFile=FLAGS['train_annotation_path'], category_names=category_names, transform=transform, final_img_size=FLAGS['img_size'])) #Discard images contain very small instances dataset.discard_small(min_area=0.0, max_area=1) #dataset.discard_bad_examples('bad_examples_list.txt') #Define data sampler - Added for TPU purposes train_sampler = DistributedSampler(dataset, num_replicas=xm.xrt_world_size(), rank=xm.get_ordinal(), shuffle=True) #Define data loader train_loader = DataLoader( #Modified for TPU purposes dataset, batch_size=FLAGS['batch_size'], sampler=train_sampler, num_workers=FLAGS['num_workers'], # shuffle=True ) #Define device - Added for TPU purposes device = xm.xla_device(devkind='TPU') #For evaluation define fixed masks and noises data_iter = iter(train_loader) sample_batched = data_iter.next() y_fixed = sample_batched['seg_mask'][0:FLAGS['num_test_img']] y_fixed = Variable(y_fixed.to(device)) #Modified for TPU purposes z_fixed = torch.randn((FLAGS['num_test_img'], FLAGS['noise_size'])) z_fixed = Variable(z_fixed.to(device)) #Modified for TPU purposes #Define networks G_bg = WRAPPED_GENERATOR.to(device) #Modified for TPU purposes D_glob = WRAPPED_DISCRIMINATOR.to(device) #Modified for TPU purposes #Load parameters from pre-trained models - Modified for TPU purposes if FLAGS['pre_trained_model_path'] != None and FLAGS[ 'pre_trained_model_epoch'] != None: try: G_bg.load_state_dict( xser.load(FLAGS['pre_trained_model_path'] + 'G_bg_epoch_' + FLAGS['pre_trained_model_epoch'])) D_glob.load_state_dict( xser.load(FLAGS['pre_trained_model_path'] + 'D_glob_epoch_' + FLAGS['pre_trained_model_epoch'])) xm.master_print('Parameters are loaded!') except: xm.master_print('Error: Pre-trained parameters are not loaded!') pass #Define training loss function - binary cross entropy BCE_loss = nn.BCELoss() #Define feature matching loss criterionVGG = VGGLoss() criterionVGG = criterionVGG.to(device) #Modified for TPU purposes #Define optimizer G_local_optimizer = optim.Adam(G_bg.parameters(), lr=FLAGS['lr'], betas=(0.0, 0.9)) D_local_optimizer = optim.Adam(filter(lambda p: p.requires_grad, D_glob.parameters()), lr=FLAGS['lr'], betas=(0.0, 0.9)) #Define learning rate scheduler scheduler_G = lr_scheduler.StepLR(G_local_optimizer, step_size=FLAGS['optim_step_size'], gamma=FLAGS['optim_gamma']) scheduler_D = lr_scheduler.StepLR(D_local_optimizer, step_size=FLAGS['optim_step_size'], gamma=FLAGS['optim_gamma']) #----------------------------TRAIN--------------------------------------- xm.master_print('training start!') #Modified for TPU reasons tracker = xm.RateTracker() #Added for TPU reasons start_time = time.time() for epoch in range(FLAGS['train_epoch']): epoch_start_time = time.time() para_loader = pl.ParallelLoader(train_loader, [device]) #Added for TPU purposes loader = para_loader.per_device_loader(device) #Added for TPU purposes D_local_losses = [] G_local_losses = [] y_real_ = torch.ones(FLAGS['batch_size']) y_fake_ = torch.zeros(FLAGS['batch_size']) y_real_ = Variable(y_real_.to(device)) #Modified for TPU purposes y_fake_ = Variable(y_fake_.to(device)) #Modified for TPU purposes data_iter = iter(loader) #Modified for TPU purposes num_iter = 0 while num_iter < len(loader): j = 0 while j < FLAGS['critic_iter'] and num_iter < len(loader): j += 1 sample_batched = data_iter.next() num_iter += 1 x_ = sample_batched['image'] x_ = Variable(x_.to(device)) #Modified for TPU purposes y_ = sample_batched['seg_mask'] y_ = Variable(y_.to(device)) #Modified for TPU purposes y_reduced = torch.sum(y_, 1).view(y_.size(0), 1, y_.size(2), y_.size(3)) y_reduced = torch.clamp(y_reduced, 0, 1) y_reduced = Variable( y_reduced.to(device)) #Modified for TPU purposes #Update discriminators - D #Real examples D_glob.zero_grad() mini_batch = x_.size()[0] if mini_batch != FLAGS['batch_size']: y_real_ = torch.ones(mini_batch) y_fake_ = torch.zeros(mini_batch) y_real_ = Variable( y_real_.to(device)) #Modified for TPU purposes y_fake_ = Variable( y_fake_.to(device)) #Modified for TPU purposes x_d = torch.cat([x_, y_], 1) D_result = D_glob(x_d).squeeze() D_real_loss = BCE_loss(D_result, y_real_) D_real_loss.backward() #Fake examples z_ = torch.randn((mini_batch, FLAGS['noise_size'])) z_ = Variable(z_.to(device)) #Generate fake images G_result, G_bg = G_bg(z_, y_) G_result_d = torch.cat([G_result, y_], 1) D_result = D_glob(G_result_d.detach()).squeeze() D_fake_loss = BCE_loss(D_result, y_fake_) D_fake_loss.backward() xm.optimizer_step(D_local_optimizer) D_train_loss = D_real_loss + D_fake_loss D_local_losses.append(D_train_loss.data[0]) #Update generator G G_bg.zero_grad() D_result = D_glob(G_result_d).squeeze() G_train_loss = BCE_loss(D_result, y_real_) #Feature matching loss between generated image and corresponding ground truth FM_loss = criterionVGG(G_result, x_) #Branch-similar loss branch_sim_loss = mse_loss(torch.mul(G_result, (1 - y_reduced)), torch.mul(G_bg, (1 - y_reduced))) total_loss = G_train_loss + FLAGS['lambda_FM'] * FM_loss + FLAGS[ 'lambda_branch'] * branch_sim_loss total_loss.backward() xm.optimizer_step(G_local_optimizer) G_local_losses.append(G_train_loss.data[0]) xm.master_print('loss_d: %.3f, loss_g: %.3f' % (D_train_loss.data[0], G_train_loss.data[0])) if (num_iter % 100) == 0: xm.master_print('%d - %d complete!' % ((epoch + 1), num_iter)) xm.master_print(result_folder_name) #Modified location of the scheduler step to avoid warning scheduler_G.step() scheduler_D.step() epoch_end_time = time.time() per_epoch_ptime = epoch_end_time - epoch_start_time xm.master_print('[%d/%d] - ptime: %.2f, loss_d: %.3f, loss_g: %.3f' % ((epoch + 1), FLAGS['train_epoch'], per_epoch_ptime, torch.mean(torch.FloatTensor(D_local_losses)), torch.mean(torch.FloatTensor(G_local_losses)))) #Save images G_bg.eval() G_result, G_bg = G_bg(z_fixed, y_fixed) G_bg.train() if epoch == 0: for t in range(y_fixed.size()[1]): show_result((epoch + 1), y_fixed[:, t:t + 1, :, :], save=True, path=root + result_folder_name + '/' + model + str(epoch + 1) + '_masked.png') show_result((epoch + 1), G_result, save=True, path=root + result_folder_name + '/' + model + str(epoch + 1) + '.png') show_result((epoch + 1), G_bg, save=True, path=root + result_folder_name + '/' + model + str(epoch + 1) + '_bg.png') #Save model params - Modified for TPU purposes if FLAGS['save_models'] and (epoch > 21 and epoch % 10 == 0): xser.save(G_bg.state_dict(), root + model_folder_name + '/' + model + 'G_bg_epoch_' + str(epoch) + '.pth', master_only=True) xser.save(D_glob.state_dict(), root + model_folder_name + '/' + model + 'D_glob_epoch_' + str(epoch) + '.pth', master_only=True) end_time = time.time() total_ptime = end_time - start_time xm.master_print("Training finish!... save training results") xm.master_print('Training time: ' + str(total_ptime))
def main(rank): #Seed - Added for TPU purposes torch.manual_seed(1) #Create log folder root = 'result_fg/' model = 'coco_model_' result_folder_name = 'images_' + FLAGS['log_dir'] model_folder_name = 'models_' + FLAGS['log_dir'] if not os.path.isdir(root): os.mkdir(root) if not os.path.isdir(root + result_folder_name): os.mkdir(root + result_folder_name) if not os.path.isdir(root + model_folder_name): os.mkdir(root + model_folder_name) #Save the script copyfile(os.path.basename(__file__), root + result_folder_name + '/' + os.path.basename(__file__)) #Define transformation for dataset images - e.g scaling transform = transforms.Compose( [ transforms.Scale((FLAGS['img_size'],FLAGS['img_size'])), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ] ) #Load dataset category_names = FLAGS['category_names'].split(',') #Serial Executor - This is needed to spread inside TPU for memory purposes SERIAL_EXEC = xmp.MpSerialExecutor() #Define Dataset dataset = SERIAL_EXEC.run( lambda: CocoData( root = FLAGS['train_imgs_path'], annFile = FLAGS['train_annotation_path'], category_names = category_names, transform=transform, final_img_size=FLAGS['img_size'] ) ) #Discard images contain very small instances dataset.discard_small(min_area=0.03, max_area=1) #Define data sampler - Added for TPU purposes train_sampler = DistributedSampler( dataset, num_replicas=xm.xrt_world_size(), rank=xm.get_ordinal(), shuffle=True ) #Define data loader train_loader = DataLoader( #Modified for TPU purposes dataset, batch_size=FLAGS['batch_size'], sampler=train_sampler, num_workers=FLAGS['num_workers'] # shuffle=True ) #Define device - Added for TPU purposes device = xm.xla_device(devkind='TPU') #For evaluation define fixed masks and noises data_iter = iter(train_loader) sample_batched = data_iter.next() x_fixed = sample_batched['image'][0:FLAGS['num_test_img']] x_fixed = Variable(x_fixed.to(device)) y_fixed = sample_batched['single_fg_mask'][0:FLAGS['num_test_img']] y_fixed = Variable(y_fixed.to(device)) z_fixed = torch.randn((FLAGS['num_test_img'],FLAGS['noise_size'])) z_fixed = Variable(z_fixed.to(device)) #Define networks generator = Generator_FG( z_dim=FLAGS['noise_size'], label_channel=len(category_names), num_res_blocks=FLAGS['num_res_blocks'] ) discriminator_glob = Discriminator( channels=3+len(category_names) ) discriminator_instance = Discriminator( channels=3+len(category_names), input_size=FLAGS['local_patch_size'] ) WRAPPED_GENERATOR = xmp.MpModelWrapper(generator) #Added for TPU purposes WRAPPED_DISCRIMINATOR_GLOB = xmp.MpModelWrapper(discriminator) #Added for TPU purposes WRAPPED_DISCRIMINATOR_INSTANCE = xmp.MpModelWrapper(discriminator) #Added for TPU purposes G_fg = WRAPPED_GENERATOR.to(device) #Modified for TPU purposes D_glob = WRAPPED_DISCRIMINATOR.to(device) #Modified for TPU purposes D_instance = WRAPPED_DISCRIMINATOR.to(device) #Modified for TPU purposes #Load parameters from pre-trained models if FLAGS['pre_trained_model_path'] != None and FLAGS['pre_trained_model_epoch'] != None: try: G_fg.load_state_dict(xser.load(FLAGS['pre_trained_model_path'] + 'G_fg_epoch_' + FLAGS['pre_trained_model_epoch'])) D_glob.load_state_dict(xser.load(FLAGS['pre_trained_model_path'] + 'D_glob_epoch_' + FLAGS['pre_trained_model_epoch'])) D_instance.load_state_dict(xser.load(FLAGS['pre_trained_model_path'] + 'D_local_epoch_' + FLAGS['pre_trained_model_epoch'])) xm.master_print('Parameters are loaded!') except: xm.master_print('Error: Pre-trained parameters are not loaded!') pass #Define interpolation operation up_instance = nn.Upsample( size=(FLAGS['local_patch_size'],FLAGS['local_patch_size']), mode='bilinear' ) #Define pooling operation for the case that image size and local patch size are mismatched pooling_instance = nn.Sequential() if FLAGS['local_patch_size']!=FLAGS['img_size']: pooling_instance.add_module( '0', nn.AvgPool2d(int(FLAGS['img_size']/FLAGS['local_patch_size'])) ) #Define training loss function - binary cross entropy BCE_loss = nn.BCELoss() #Define feature matching loss criterionVGG = VGGLoss() criterionVGG = criterionVGG.to(device) #Modified for TPU Purposes #Define optimizer G_local_optimizer = optim.Adam( G_fg.parameters(), lr=FLAGS['lr'], betas=(0.0, 0.9) ) D_local_optimizer = optim.Adam( list(filter(lambda p: p.requires_grad, D_glob.parameters())) + list(filter(lambda p: p.requires_grad, D_instance.parameters())), lr=FLAGS['lr'], betas=(0.0,0.9) ) #Deine learning rate scheduler scheduler_G = lr_scheduler.StepLR( G_local_optimizer, step_size=FLAGS['optim_step_size'], gamma=FLAGS['optim_gamma'] ) scheduler_D = lr_scheduler.StepLR( D_local_optimizer, step_size=FLAGS['optim_step_size'], gamma=FLAGS['optim_gamma'] ) #----------------------------TRAIN----------------------------------------- xm.master_print('training start!') tracker = xm.RateTracker() #Added for TPU reasons start_time = time.time() for epoch in range(FLAGS['train_epoch']): epoch_start_time = time.time() para_loader = pl.ParallelLoader(train_loader, [device]) #Added for TPU purposes loader = para_loader.per_device_loader(device) #Added for TPU purposes D_local_losses = [] G_local_losses = [] y_real_ = torch.ones(FLAGS['batch_size']) y_fake_ = torch.zeros(FLAGS['batch_size']) y_real_ = Variable(y_real_.to(device)) #Modified for TPU purposes y_fake_ = Variable(y_fake_.to(device)) #Modified for TPU purposes data_iter = iter(loader) num_iter = 0 while num_iter < len(loader): #Modified for TPU purposes j=0 while j < FLAGS['critic_iter'] and num_iter < len(loader): j += 1 sample_batched = data_iter.next() num_iter += 1 x_ = sample_batched['image'] x_ = Variable(x_.to(device)) #Modified for TPU purposes y_ = sample_batched['single_fg_mask'] y_ = Variable(y_.to(device)) #Modified for TPU purposes fg_mask = sample_batched['seg_mask'] fg_mask = Variable(fg_mask.to(device)) #Modified for TPU purposes y_instances = sample_batched['mask_instance'] bbox = sample_batched['bbox'] mini_batch = x_.size()[0] if mini_batch != FLAGS['batch_size']: break #Update discriminators - D #Real examples D_glob.zero_grad() D_instance.zero_grad() y_reduced = torch.sum(y_,1).clamp(0,1).view(y_.size(0),1,FLAGS['img_size'],FLAGS['img_size']) x_d = torch.cat([x_,fg_mask],1) x_instances = torch.zeros((FLAGS['batch_size'],3,FLAGS['local_patch_size'],FLAGS['local_patch_size'])) x_instances = Variable(x_instances.to(device)) y_instances = Variable(y_instances.to(device)) y_instances = pooling_instance(y_instances) G_instances = torch.zeros((FLAGS['batch_size'],3,FLAGS['local_patch_size'],FLAGS['local_patch_size'])) G_instances = Variable(G_instances.to(device)) #Obtain instances for t in range(x_d.size()[0]): x_instance = x_[t,0:3,bbox[0][t]:bbox[1][t],bbox[2][t]:bbox[3][t]] x_instance = x_instance.contiguous().view(1,x_instance.size()[0],x_instance.size()[1],x_instance.size()[2]) x_instances[t] = up_instance(x_instance) D_result_instance = D_instance(torch.cat([x_instances,y_instances],1)).squeeze() D_result = D_glob(x_d).squeeze() D_real_loss = BCE_loss(D_result, y_real_) + BCE_loss(D_result_instance, y_real_) D_real_loss.backward() #Fake examples z_ = torch.randn((mini_batch,FLAGS['noise_size'])) z_ = Variable(z_.to(device)) #Generate fake images G_fg_result = G_fg(z_,y_, torch.mul(x_,(1-y_reduced))) G_result_d = torch.cat([G_fg_result,fg_mask],1) #Obtain fake instances for t in range(x_d.size()[0]): G_instance = G_result_d[t,0:3,bbox[0][t]:bbox[1][t],bbox[2][t]:bbox[3][t]] G_instance = G_instance.contiguous().view(1,G_instance.size()[0],G_instance.size()[1],G_instance.size()[2]) G_instances[t] = up_instance(G_instance) D_result_instance = D_instance(torch.cat([G_instances,y_instances],1).detach()).squeeze() D_result = D_glob(G_result_d.detach()).squeeze() D_fake_loss = BCE_loss(D_result, y_fake_) + BCE_loss(D_result_instance, y_fake_) D_fake_loss.backward() xm.optimizer_step(D_local_optimizer) #Modified for TPU purposes D_train_loss = D_real_loss + D_fake_loss D_local_losses.append(D_train_loss.data[0]) if mini_batch != FLAGS['batch_size']: break #Update generator G G_fg.zero_grad() D_result = D_glob(G_result_d).squeeze() D_result_instance = D_instance(torch.cat([G_instances,y_instances],1)).squeeze() G_train_loss = (1-FLAGS['trade_off_G'])*BCE_loss(D_result, y_real_) + FLAGS['trade_off_G']*BCE_loss(D_result_instance, y_real_) #Feature matching loss between generated image and corresponding ground truth FM_loss = criterionVGG(G_fg_result, x_) #Reconstruction loss Recon_loss = mse_loss(torch.mul(x_,(1-y_reduced) ), torch.mul(G_fg_result,(1-y_reduced)) ) total_loss = G_train_loss + FLAGS['lambda_FM']*FM_loss + FLAGS['lambda_recon']*Recon_loss total_loss.backward() xm.optimizer_step(G_local_optimizer) G_local_losses.append(G_train_loss.data[0]) xm.master_print('loss_d: %.3f, loss_g: %.3f' % (D_train_loss.data[0],G_train_loss.data[0])) if (num_iter % 100) == 0: xm.master_print('%d - %d complete!' % ((epoch+1), num_iter)) xm.master_print(result_folder_name) #Modified location of the scheduler step to avoid warning scheduler_G.step() scheduler_D.step() epoch_end_time = time.time() per_epoch_ptime = epoch_end_time - epoch_start_time xm.master_print('[%d/%d] - ptime: %.2f, loss_d: %.3f, loss_g: %.3f' % ((epoch + 1), FLAGS['train_epoch'], per_epoch_ptime, torch.mean(torch.FloatTensor(D_local_losses)), torch.mean(torch.FloatTensor(G_local_losses)))) #Save images G_fg.eval() if epoch == 0: show_result( (epoch+1), x_fixed, save=True, path=root + result_folder_name+ '/' + model + str(epoch + 1 ) + '_gt.png' ) for t in range(y_fixed.size()[1]): show_result( (epoch+1), y_fixed[:,t:t+1,:,:], save=True, path=root + result_folder_name+ '/' + model + str(epoch + 1 ) +'_'+ str(t) +'_masked.png' ) show_result( (epoch+1), G_fg( z_fixed, y_fixed, torch.mul( x_fixed, (1-torch.sum(y_fixed,1).view(y_fixed.size(0),1,FLAGS['img_size'],FLAGS['img_size'])) ) ), save=True, path=root + result_folder_name+ '/' + model + str(epoch + 1 ) + '_fg.png' ) G_fg.train() #Save model params if FLAGS['save_models'] and (epoch>11 and epoch % 10 == 0 ): xser.save( G_fg.state_dict(), root + model_folder_name + '/' + model + 'G_fg_epoch_'+str(epoch)+'.pth' master_only=True ) xser.save( D_glob.state_dict(), root + model_folder_name + '/' + model + 'D_glob_epoch_'+str(epoch)+'.pth' master_only=True ) xser.save( D_instance.state_dict(), root + model_folder_name + '/' + model + 'D_local_epoch_'+str(epoch)+'.pth' master_only=True ) end_time = time.time() total_ptime = end_time - start_time xm.master_print("Training finish!... save training results") xm.master_print('Training time: ' + str(total_ptime))
def loss(self): label = tf.reshape(self.y, shape=[-1, 2]) model = self.inference loss = digits.mse_loss(model, label) return loss
def train(): method = train_parameters['method'] print(method) save_dir = train_parameters['save_dir'] print(save_dir) train_reader = paddle.batch(SH_data_loader('/home/aistudio/sh/sh/part_B_final/train_data/images/', size=[256, 512], mode='train', scale=8), batch_size=train_parameters['train_batch_size'], drop_last=False) test_reader = paddle.batch(SH_data_loader('/home/aistudio/sh/sh/part_B_final/test_data/images/', size=[256, 512],mode='val', scale=8), batch_size=1, drop_last=False) with fluid.dygraph.guard(): epoch_num = train_parameters["num_epochs"] # 5 print("epocj_num", epoch_num) print("CSR") net = CSRNet("CSR") print('train') optimizer = optimizer_setting(train_parameters) #optimizer = fluid.optimizer.SGD(1e-6,momentum=0.95) if train_parameters["continue_train"]: # 加载上一次训练的模型,继续训练 model, _ = fluid.load_dygraph(train_parameters['continue_train_dir']) net.load_dict(model) optimizer.set_dict(_) print('继续训练', train_parameters['continue_train_dir']) best_mae = 1000000 min_epoch=0 for epoch in range(epoch_num): epoch_loss = 0 #mae = 0 for batch_id, data in enumerate(train_reader()): image = np.array([x[0] for x in data]).astype('float32') label = np.array([x[1] for x in data]).astype('float32') image = fluid.dygraph.to_variable(image) label = fluid.dygraph.to_variable(label) label.stop_gradient = True predict = net(image) loss = mse_loss(predict, label) backward_strategy = fluid.dygraph.BackwardStrategy() backward_strategy.sort_sum_gradient = True loss.backward(backward_strategy) epoch_loss+=loss.numpy()[0] #print(net._x_for_debug.gradient()) optimizer.minimize(loss) net.clear_gradients() #mae+=abs(predict.numpy().sum()-label.numpy().sum()) print('epoch:', epoch, 'loss:', epoch_loss) # dy_param_value = {} # for param in net.parameters(): # dy_param_value[param.name] = param.numpy() # fluid.save_dygraph(net.state_dict(), save_dir + method + str(epoch)) # fluid.save_dygraph(optimizer.state_dict(), save_dir + method + str(epoch)) net.eval() mae=0 mse = 0 val_loss = 0 for batch_id, data in enumerate(test_reader()): image = np.array([x[0] for x in data]).astype('float32') label = np.array([x[1] for x in data]).astype('float32') image = fluid.dygraph.to_variable(image) label = fluid.dygraph.to_variable(label) label.stop_gradient = True predict = net(image) loss = mse_loss(predict, label) val_loss += loss.numpy()[0] mae += abs(predict.numpy().sum()-label.numpy().sum()) mse += (predict.numpy().sum()-label.numpy().sum())*(predict.numpy().sum()-label.numpy().sum()) net.train() if mae/(batch_id+1)<best_mae: best_mae=mae/(batch_id+1) min_epoch=epoch fluid.save_dygraph(net.state_dict(), save_dir + method + str(epoch)) fluid.save_dygraph(optimizer.state_dict(), save_dir + method + str(epoch)) print("test epoch:", str(epoch), 'loss:',val_loss, " error:", str(mae/(batch_id+1)), " min_mae:", str(best_mae), " min_epoch:", str(min_epoch), 'mse:', mse/(batch_id+1), 'real:', label.numpy()[0].sum(), 'pre:', predict.numpy()[0].sum()) del mae, mse, image, label, predict
def training_procedure(FLAGS): """ model definition """ encoder = Encoder(nv_dim=FLAGS.nv_dim, nc_dim=FLAGS.nc_dim) encoder.apply(weights_init) decoder = Decoder(nv_dim=FLAGS.nv_dim, nc_dim=FLAGS.nc_dim) decoder.apply(weights_init) discriminator = Discriminator() discriminator.apply(weights_init) # load saved models if load_saved flag is true if FLAGS.load_saved: encoder.load_state_dict( torch.load(os.path.join('checkpoints', FLAGS.encoder_save))) decoder.load_state_dict( torch.load(os.path.join('checkpoints', FLAGS.decoder_save))) discriminator.load_state_dict( torch.load(os.path.join('checkpoints', FLAGS.discriminator_save))) """ variable definition """ real_domain_labels = 1 fake_domain_labels = 0 X_1 = torch.FloatTensor(FLAGS.batch_size, FLAGS.num_channels, FLAGS.image_size, FLAGS.image_size) X_2 = torch.FloatTensor(FLAGS.batch_size, FLAGS.num_channels, FLAGS.image_size, FLAGS.image_size) X_3 = torch.FloatTensor(FLAGS.batch_size, FLAGS.num_channels, FLAGS.image_size, FLAGS.image_size) domain_labels = torch.LongTensor(FLAGS.batch_size) """ loss definitions """ cross_entropy_loss = nn.CrossEntropyLoss() ''' add option to run on GPU ''' if FLAGS.cuda: encoder.cuda() decoder.cuda() discriminator.cuda() cross_entropy_loss.cuda() X_1 = X_1.cuda() X_2 = X_2.cuda() X_3 = X_3.cuda() domain_labels = domain_labels.cuda() """ optimizer definition """ auto_encoder_optimizer = optim.Adam(list(encoder.parameters()) + list(decoder.parameters()), lr=FLAGS.initial_learning_rate, betas=(FLAGS.beta_1, FLAGS.beta_2)) discriminator_optimizer = optim.Adam(list(discriminator.parameters()), lr=FLAGS.initial_learning_rate, betas=(FLAGS.beta_1, FLAGS.beta_2)) generator_optimizer = optim.Adam(list(encoder.parameters()) + list(decoder.parameters()), lr=FLAGS.initial_learning_rate, betas=(FLAGS.beta_1, FLAGS.beta_2)) """ training """ if torch.cuda.is_available() and not FLAGS.cuda: print( "WARNING: You have a CUDA device, so you should probably run with --cuda" ) if not os.path.exists('checkpoints'): os.makedirs('checkpoints') if not os.path.exists('reconstructed_images'): os.makedirs('reconstructed_images') # load_saved is false when training is started from 0th iteration if not FLAGS.load_saved: with open(FLAGS.log_file, 'w') as log: log.write('Epoch\tIteration\tReconstruction_loss\t') log.write( 'Generator_loss\tDiscriminator_loss\tDiscriminator_accuracy\n') # load data set and create data loader instance print('Loading MNIST paired dataset...') paired_mnist = MNIST_Paired(root='mnist', download=True, train=True, transform=transform_config) loader = cycle( DataLoader(paired_mnist, batch_size=FLAGS.batch_size, shuffle=True, num_workers=0, drop_last=True)) # initialise variables discriminator_accuracy = 0. # initialize summary writer writer = SummaryWriter() for epoch in range(FLAGS.start_epoch, FLAGS.end_epoch): print('') print( 'Epoch #' + str(epoch) + '..........................................................................' ) for iteration in range(int(len(paired_mnist) / FLAGS.batch_size)): # A. run the auto-encoder reconstruction image_batch_1, image_batch_2, labels_batch_1 = next(loader) auto_encoder_optimizer.zero_grad() X_1.copy_(image_batch_1) X_2.copy_(image_batch_2) nv_1, nc_1 = encoder(Variable(X_1)) nv_2, nc_2 = encoder(Variable(X_2)) reconstructed_X_1 = decoder(nv_1, nc_2) reconstructed_X_2 = decoder(nv_2, nc_1) reconstruction_error_1 = mse_loss(reconstructed_X_1, Variable(X_1)) reconstruction_error_1.backward(retain_graph=True) reconstruction_error_2 = mse_loss(reconstructed_X_2, Variable(X_2)) reconstruction_error_2.backward() reconstruction_error = reconstruction_error_1 + reconstruction_error_2 if FLAGS.train_auto_encoder: auto_encoder_optimizer.step() # B. run the adversarial part of the architecture # B. a) run the discriminator for i in range(FLAGS.discriminator_times): discriminator_optimizer.zero_grad() # train discriminator on real data domain_labels.fill_(real_domain_labels) image_batch_1, image_batch_2, labels_batch_1 = next(loader) X_1.copy_(image_batch_1) X_2.copy_(image_batch_2) real_output = discriminator(Variable(X_1), Variable(X_2)) discriminator_real_error = FLAGS.disc_coef * cross_entropy_loss( real_output, Variable(domain_labels)) discriminator_real_error.backward() # train discriminator on fake data domain_labels.fill_(fake_domain_labels) image_batch_3, _, labels_batch_3 = next(loader) X_3.copy_(image_batch_3) nv_3, nc_3 = encoder(Variable(X_3)) # reconstruction is taking common factor from X_1 and varying factor from X_3 reconstructed_X_3_1 = decoder(nv_3, encoder(Variable(X_1))[1]) fake_output = discriminator(Variable(X_1), reconstructed_X_3_1) discriminator_fake_error = FLAGS.disc_coef * cross_entropy_loss( fake_output, Variable(domain_labels)) discriminator_fake_error.backward() # total discriminator error discriminator_error = discriminator_real_error + discriminator_fake_error # calculate discriminator accuracy for this step target_true_labels = torch.cat((torch.ones( FLAGS.batch_size), torch.zeros(FLAGS.batch_size)), dim=0) if FLAGS.cuda: target_true_labels = target_true_labels.cuda() discriminator_predictions = torch.cat( (real_output, fake_output), dim=0) _, discriminator_predictions = torch.max( discriminator_predictions, 1) discriminator_accuracy = (discriminator_predictions.data == target_true_labels.long()).sum( ).item() / (FLAGS.batch_size * 2) if discriminator_accuracy < FLAGS.discriminator_limiting_accuracy and FLAGS.train_discriminator: discriminator_optimizer.step() # B. b) run the generator for i in range(FLAGS.generator_times): generator_optimizer.zero_grad() image_batch_1, _, labels_batch_1 = next(loader) image_batch_3, __, labels_batch_3 = next(loader) domain_labels.fill_(real_domain_labels) X_1.copy_(image_batch_1) X_3.copy_(image_batch_3) nv_3, nc_3 = encoder(Variable(X_3)) # reconstruction is taking common factor from X_1 and varying factor from X_3 reconstructed_X_3_1 = decoder(nv_3, encoder(Variable(X_1))[1]) output = discriminator(Variable(X_1), reconstructed_X_3_1) generator_error = FLAGS.gen_coef * cross_entropy_loss( output, Variable(domain_labels)) generator_error.backward() if FLAGS.train_generator: generator_optimizer.step() # print progress after 10 iterations if (iteration + 1) % 10 == 0: print('') print('Epoch #' + str(epoch)) print('Iteration #' + str(iteration)) print('') print('Reconstruction loss: ' + str(reconstruction_error.data.storage().tolist()[0])) print('Generator loss: ' + str(generator_error.data.storage().tolist()[0])) print('') print('Discriminator loss: ' + str(discriminator_error.data.storage().tolist()[0])) print('Discriminator accuracy: ' + str(discriminator_accuracy)) print('..........') # write to log with open(FLAGS.log_file, 'a') as log: log.write('{0}\t{1}\t{2}\t{3}\t{4}\t{5}\n'.format( epoch, iteration, reconstruction_error.data.storage().tolist()[0], generator_error.data.storage().tolist()[0], discriminator_error.data.storage().tolist()[0], discriminator_accuracy)) # write to tensorboard writer.add_scalar( 'Reconstruction loss', reconstruction_error.data.storage().tolist()[0], epoch * (int(len(paired_mnist) / FLAGS.batch_size) + 1) + iteration) writer.add_scalar( 'Generator loss', generator_error.data.storage().tolist()[0], epoch * (int(len(paired_mnist) / FLAGS.batch_size) + 1) + iteration) writer.add_scalar( 'Discriminator loss', discriminator_error.data.storage().tolist()[0], epoch * (int(len(paired_mnist) / FLAGS.batch_size) + 1) + iteration) # save model after every 5 epochs if (epoch + 1) % 5 == 0 or (epoch + 1) == FLAGS.end_epoch: torch.save(encoder.state_dict(), os.path.join('checkpoints', FLAGS.encoder_save)) torch.save(decoder.state_dict(), os.path.join('checkpoints', FLAGS.decoder_save)) torch.save(discriminator.state_dict(), os.path.join('checkpoints', FLAGS.discriminator_save)) """ save reconstructed images and style swapped image generations to check progress """ image_batch_1, image_batch_2, labels_batch_1 = next(loader) image_batch_3, _, __ = next(loader) X_1.copy_(image_batch_1) X_2.copy_(image_batch_2) X_3.copy_(image_batch_3) nv_1, nc_1 = encoder(Variable(X_1)) nv_2, nc_2 = encoder(Variable(X_2)) nv_3, nc_3 = encoder(Variable(X_3)) reconstructed_X_1 = decoder(nv_1, nc_2) reconstructed_X_3_2 = decoder(nv_3, nc_2) # save input image batch image_batch = np.transpose(X_1.cpu().numpy(), (0, 2, 3, 1)) image_batch = np.concatenate( (image_batch, image_batch, image_batch), axis=3) imshow_grid(image_batch, name=str(epoch) + '_original', save=True) # save reconstructed batch reconstructed_x = np.transpose( reconstructed_X_1.cpu().data.numpy(), (0, 2, 3, 1)) reconstructed_x = np.concatenate( (reconstructed_x, reconstructed_x, reconstructed_x), axis=3) imshow_grid(reconstructed_x, name=str(epoch) + '_target', save=True) # save cross reconstructed batch style_batch = np.transpose(X_3.cpu().numpy(), (0, 2, 3, 1)) style_batch = np.concatenate( (style_batch, style_batch, style_batch), axis=3) imshow_grid(style_batch, name=str(epoch) + '_style', save=True) reconstructed_style = np.transpose( reconstructed_X_3_2.cpu().data.numpy(), (0, 2, 3, 1)) reconstructed_style = np.concatenate( (reconstructed_style, reconstructed_style, reconstructed_style), axis=3) imshow_grid(reconstructed_style, name=str(epoch) + '_style_target', save=True)
# augmented_batch = augment_batch(X1) augmented_batch, mask = get_augmentations_and_mask(X1) encoder_outputs = encoder(X1) specified_latents, unspecified_variational_latent, mu, logvar = encoder_outputs[0], encoder_outputs[1], encoder_outputs[2], encoder_outputs[3] augmented_encoder_outputs = encoder(augmented_batch) aug_specified_latents, aug_unspecified_variational_latent, aug_mu, aug_logvar = augmented_encoder_outputs[0], augmented_encoder_outputs[1], augmented_encoder_outputs[2], augmented_encoder_outputs[3] # kl loss kl_loss = FLAGS.kl_divergence_coef * (-0.5 * (torch.sum(1 + logvar - mu.pow(2) - logvar.exp()))) kl_loss /= FLAGS.batch_size * FLAGS.num_channels * FLAGS.image_size * FLAGS.image_size # reconstruction loss batch image_batch_recon = decoder(specified_latents, unspecified_variational_latent) recon_loss = mse_loss(image_batch_recon, X1) gen_loss = recon_loss + kl_loss # center loss cv, cv_full_view = cv_network(specified_latents) transformed_chunks = torch.zeros(FLAGS.batch_size*FLAGS.z_num_chunks, FLAGS.c_num_chunks*FLAGS.c_chunk_size) with torch.no_grad(): for i in range(FLAGS.batch_size): transformed_temp_chunks = [] for j in range(FLAGS.z_num_chunks): curr_tensor = specified_latents[j][i]
def training_procedure(FLAGS): """ model definition """ encoder = Encoder(style_dim=FLAGS.style_dim, class_dim=FLAGS.class_dim) encoder.apply(weights_init) decoder = Decoder(style_dim=FLAGS.style_dim, class_dim=FLAGS.class_dim) decoder.apply(weights_init) # load saved models if load_saved flag is true if FLAGS.load_saved: encoder.load_state_dict( torch.load(os.path.join('checkpoints', FLAGS.encoder_save))) decoder.load_state_dict( torch.load(os.path.join('checkpoints', FLAGS.decoder_save))) """ variable definition """ X_1 = torch.FloatTensor(FLAGS.batch_size, FLAGS.num_channels, FLAGS.image_size, FLAGS.image_size) X_2 = torch.FloatTensor(FLAGS.batch_size, FLAGS.num_channels, FLAGS.image_size, FLAGS.image_size) X_3 = torch.FloatTensor(FLAGS.batch_size, FLAGS.num_channels, FLAGS.image_size, FLAGS.image_size) style_latent_space = torch.FloatTensor(FLAGS.batch_size, FLAGS.style_dim) """ loss definitions """ cross_entropy_loss = nn.CrossEntropyLoss() ''' add option to run on GPU ''' if FLAGS.cuda: encoder.cuda() decoder.cuda() cross_entropy_loss.cuda() X_1 = X_1.cuda() X_2 = X_2.cuda() X_3 = X_3.cuda() style_latent_space = style_latent_space.cuda() """ optimizer and scheduler definition """ auto_encoder_optimizer = optim.Adam(list(encoder.parameters()) + list(decoder.parameters()), lr=FLAGS.initial_learning_rate, betas=(FLAGS.beta_1, FLAGS.beta_2)) reverse_cycle_optimizer = optim.Adam(list(encoder.parameters()), lr=FLAGS.initial_learning_rate, betas=(FLAGS.beta_1, FLAGS.beta_2)) # divide the learning rate by a factor of 10 after 80 epochs auto_encoder_scheduler = optim.lr_scheduler.StepLR(auto_encoder_optimizer, step_size=80, gamma=0.1) reverse_cycle_scheduler = optim.lr_scheduler.StepLR( reverse_cycle_optimizer, step_size=80, gamma=0.1) """ training """ if torch.cuda.is_available() and not FLAGS.cuda: print( "WARNING: You have a CUDA device, so you should probably run with --cuda" ) if not os.path.exists('checkpoints'): os.makedirs('checkpoints') if not os.path.exists('reconstructed_images'): os.makedirs('reconstructed_images') # load_saved is false when training is started from 0th iteration if not FLAGS.load_saved: with open(FLAGS.log_file, 'w') as log: log.write( 'Epoch\tIteration\tReconstruction_loss\tKL_divergence_loss\tReverse_cycle_loss\n' ) # load data set and create data loader instance print('Loading MNIST paired dataset...') paired_mnist = MNIST_Paired(root='mnist', download=True, train=True, transform=transform_config) loader = cycle( DataLoader(paired_mnist, batch_size=FLAGS.batch_size, shuffle=True, num_workers=0, drop_last=True)) # initialize summary writer writer = SummaryWriter() for epoch in range(FLAGS.start_epoch, FLAGS.end_epoch): print('') print( 'Epoch #' + str(epoch) + '..........................................................................' ) # update the learning rate scheduler auto_encoder_scheduler.step() reverse_cycle_scheduler.step() for iteration in range(int(len(paired_mnist) / FLAGS.batch_size)): # A. run the auto-encoder reconstruction image_batch_1, image_batch_2, _ = next(loader) auto_encoder_optimizer.zero_grad() X_1.copy_(image_batch_1) X_2.copy_(image_batch_2) style_mu_1, style_logvar_1, class_latent_space_1 = encoder( Variable(X_1)) style_latent_space_1 = reparameterize(training=True, mu=style_mu_1, logvar=style_logvar_1) kl_divergence_loss_1 = FLAGS.kl_divergence_coef * ( -0.5 * torch.sum(1 + style_logvar_1 - style_mu_1.pow(2) - style_logvar_1.exp())) kl_divergence_loss_1 /= (FLAGS.batch_size * FLAGS.num_channels * FLAGS.image_size * FLAGS.image_size) kl_divergence_loss_1.backward(retain_graph=True) style_mu_2, style_logvar_2, class_latent_space_2 = encoder( Variable(X_2)) style_latent_space_2 = reparameterize(training=True, mu=style_mu_2, logvar=style_logvar_2) kl_divergence_loss_2 = FLAGS.kl_divergence_coef * ( -0.5 * torch.sum(1 + style_logvar_2 - style_mu_2.pow(2) - style_logvar_2.exp())) kl_divergence_loss_2 /= (FLAGS.batch_size * FLAGS.num_channels * FLAGS.image_size * FLAGS.image_size) kl_divergence_loss_2.backward(retain_graph=True) reconstructed_X_1 = decoder(style_latent_space_1, class_latent_space_2) reconstructed_X_2 = decoder(style_latent_space_2, class_latent_space_1) reconstruction_error_1 = FLAGS.reconstruction_coef * mse_loss( reconstructed_X_1, Variable(X_1)) reconstruction_error_1.backward(retain_graph=True) reconstruction_error_2 = FLAGS.reconstruction_coef * mse_loss( reconstructed_X_2, Variable(X_2)) reconstruction_error_2.backward() reconstruction_error = ( reconstruction_error_1 + reconstruction_error_2) / FLAGS.reconstruction_coef kl_divergence_error = (kl_divergence_loss_1 + kl_divergence_loss_2 ) / FLAGS.kl_divergence_coef auto_encoder_optimizer.step() # B. reverse cycle image_batch_1, _, __ = next(loader) image_batch_2, _, __ = next(loader) reverse_cycle_optimizer.zero_grad() X_1.copy_(image_batch_1) X_2.copy_(image_batch_2) style_latent_space.normal_(0., 1.) _, __, class_latent_space_1 = encoder(Variable(X_1)) _, __, class_latent_space_2 = encoder(Variable(X_2)) reconstructed_X_1 = decoder(Variable(style_latent_space), class_latent_space_1.detach()) reconstructed_X_2 = decoder(Variable(style_latent_space), class_latent_space_2.detach()) style_mu_1, style_logvar_1, _ = encoder(reconstructed_X_1) style_latent_space_1 = reparameterize(training=False, mu=style_mu_1, logvar=style_logvar_1) style_mu_2, style_logvar_2, _ = encoder(reconstructed_X_2) style_latent_space_2 = reparameterize(training=False, mu=style_mu_2, logvar=style_logvar_2) reverse_cycle_loss = FLAGS.reverse_cycle_coef * l1_loss( style_latent_space_1, style_latent_space_2) reverse_cycle_loss.backward() reverse_cycle_loss /= FLAGS.reverse_cycle_coef reverse_cycle_optimizer.step() if (iteration + 1) % 10 == 0: print('') print('Epoch #' + str(epoch)) print('Iteration #' + str(iteration)) print('') print('Reconstruction loss: ' + str(reconstruction_error.data.storage().tolist()[0])) print('KL-Divergence loss: ' + str(kl_divergence_error.data.storage().tolist()[0])) print('Reverse cycle loss: ' + str(reverse_cycle_loss.data.storage().tolist()[0])) # write to log with open(FLAGS.log_file, 'a') as log: log.write('{0}\t{1}\t{2}\t{3}\t{4}\n'.format( epoch, iteration, reconstruction_error.data.storage().tolist()[0], kl_divergence_error.data.storage().tolist()[0], reverse_cycle_loss.data.storage().tolist()[0])) # write to tensorboard writer.add_scalar( 'Reconstruction loss', reconstruction_error.data.storage().tolist()[0], epoch * (int(len(paired_mnist) / FLAGS.batch_size) + 1) + iteration) writer.add_scalar( 'KL-Divergence loss', kl_divergence_error.data.storage().tolist()[0], epoch * (int(len(paired_mnist) / FLAGS.batch_size) + 1) + iteration) writer.add_scalar( 'Reverse cycle loss', reverse_cycle_loss.data.storage().tolist()[0], epoch * (int(len(paired_mnist) / FLAGS.batch_size) + 1) + iteration) # save model after every 5 epochs if (epoch + 1) % 5 == 0 or (epoch + 1) == FLAGS.end_epoch: torch.save(encoder.state_dict(), os.path.join('checkpoints', FLAGS.encoder_save)) torch.save(decoder.state_dict(), os.path.join('checkpoints', FLAGS.decoder_save)) """ save reconstructed images and style swapped image generations to check progress """ image_batch_1, image_batch_2, _ = next(loader) image_batch_3, _, __ = next(loader) X_1.copy_(image_batch_1) X_2.copy_(image_batch_2) X_3.copy_(image_batch_3) style_mu_1, style_logvar_1, _ = encoder(Variable(X_1)) _, __, class_latent_space_2 = encoder(Variable(X_2)) style_mu_3, style_logvar_3, _ = encoder(Variable(X_3)) style_latent_space_1 = reparameterize(training=False, mu=style_mu_1, logvar=style_logvar_1) style_latent_space_3 = reparameterize(training=False, mu=style_mu_3, logvar=style_logvar_3) reconstructed_X_1_2 = decoder(style_latent_space_1, class_latent_space_2) reconstructed_X_3_2 = decoder(style_latent_space_3, class_latent_space_2) # save input image batch image_batch = np.transpose(X_1.cpu().numpy(), (0, 2, 3, 1)) image_batch = np.concatenate( (image_batch, image_batch, image_batch), axis=3) imshow_grid(image_batch, name=str(epoch) + '_original', save=True) # save reconstructed batch reconstructed_x = np.transpose( reconstructed_X_1_2.cpu().data.numpy(), (0, 2, 3, 1)) reconstructed_x = np.concatenate( (reconstructed_x, reconstructed_x, reconstructed_x), axis=3) imshow_grid(reconstructed_x, name=str(epoch) + '_target', save=True) style_batch = np.transpose(X_3.cpu().numpy(), (0, 2, 3, 1)) style_batch = np.concatenate( (style_batch, style_batch, style_batch), axis=3) imshow_grid(style_batch, name=str(epoch) + '_style', save=True) # save style swapped reconstructed batch reconstructed_style = np.transpose( reconstructed_X_3_2.cpu().data.numpy(), (0, 2, 3, 1)) reconstructed_style = np.concatenate( (reconstructed_style, reconstructed_style, reconstructed_style), axis=3) imshow_grid(reconstructed_style, name=str(epoch) + '_style_target', save=True)
def training_procedure(FLAGS): """ model definition """ encoder = Encoder(style_dim=FLAGS.style_dim, class_dim=FLAGS.class_dim) encoder.apply(weights_init) decoder = Decoder(style_dim=FLAGS.style_dim, class_dim=FLAGS.class_dim) decoder.apply(weights_init) discriminator = Discriminator() discriminator.apply(weights_init) # load saved models if load_saved flag is true if FLAGS.load_saved: encoder.load_state_dict(torch.load(os.path.join('checkpoints', FLAGS.encoder_save))) decoder.load_state_dict(torch.load(os.path.join('checkpoints', FLAGS.decoder_save))) discriminator.load_state_dict(torch.load(os.path.join('checkpoints', FLAGS.discriminator_save))) """ variable definition """ real_domain_labels = 1 fake_domain_labels = 0 X_1 = torch.FloatTensor(FLAGS.batch_size, FLAGS.num_channels, FLAGS.image_size, FLAGS.image_size) X_2 = torch.FloatTensor(FLAGS.batch_size, FLAGS.num_channels, FLAGS.image_size, FLAGS.image_size) X_3 = torch.FloatTensor(FLAGS.batch_size, FLAGS.num_channels, FLAGS.image_size, FLAGS.image_size) domain_labels = torch.LongTensor(FLAGS.batch_size) style_latent_space = torch.FloatTensor(FLAGS.batch_size, FLAGS.style_dim) """ loss definitions """ cross_entropy_loss = nn.CrossEntropyLoss() ''' add option to run on GPU ''' if FLAGS.cuda: encoder.cuda() decoder.cuda() discriminator.cuda() cross_entropy_loss.cuda() X_1 = X_1.cuda() X_2 = X_2.cuda() X_3 = X_3.cuda() domain_labels = domain_labels.cuda() style_latent_space = style_latent_space.cuda() """ optimizer definition """ auto_encoder_optimizer = optim.Adam( list(encoder.parameters()) + list(decoder.parameters()), lr=FLAGS.initial_learning_rate, betas=(FLAGS.beta_1, FLAGS.beta_2) ) discriminator_optimizer = optim.Adam( list(discriminator.parameters()), lr=FLAGS.initial_learning_rate, betas=(FLAGS.beta_1, FLAGS.beta_2) ) generator_optimizer = optim.Adam( list(encoder.parameters()) + list(decoder.parameters()), lr=FLAGS.initial_learning_rate, betas=(FLAGS.beta_1, FLAGS.beta_2) ) """ training """ if torch.cuda.is_available() and not FLAGS.cuda: print("WARNING: You have a CUDA device, so you should probably run with --cuda") if not os.path.exists('checkpoints'): os.makedirs('checkpoints') # load_saved is false when training is started from 0th iteration if not FLAGS.load_saved: with open(FLAGS.log_file, 'w') as log: log.write('Epoch\tIteration\tReconstruction_loss\tKL_divergence_loss\t') log.write('Generator_loss\tDiscriminator_loss\tDiscriminator_accuracy\n') # load data set and create data loader instance print('Loading MNIST paired dataset...') paired_mnist = MNIST_Paired(root='mnist', download=True, train=True, transform=transform_config) loader = cycle(DataLoader(paired_mnist, batch_size=FLAGS.batch_size, shuffle=True, num_workers=0, drop_last=True)) # initialise variables discriminator_accuracy = 0. # initialize summary writer writer = SummaryWriter() for epoch in range(FLAGS.start_epoch, FLAGS.end_epoch): print('') print('Epoch #' + str(epoch) + '..........................................................................') for iteration in range(int(len(paired_mnist) / FLAGS.batch_size)): # A. run the auto-encoder reconstruction image_batch_1, image_batch_2, _ = next(loader) auto_encoder_optimizer.zero_grad() X_1.copy_(image_batch_1) X_2.copy_(image_batch_2) style_mu_1, style_logvar_1, class_1 = encoder(Variable(X_1)) style_1 = reparameterize(training=True, mu=style_mu_1, logvar=style_logvar_1) kl_divergence_loss_1 = - 0.5 * torch.sum(1 + style_logvar_1 - style_mu_1.pow(2) - style_logvar_1.exp()) kl_divergence_loss_1 /= (FLAGS.batch_size * FLAGS.num_channels * FLAGS.image_size * FLAGS.image_size) kl_divergence_loss_1.backward(retain_graph=True) _, __, class_2 = encoder(Variable(X_2)) reconstructed_X_1 = decoder(style_1, class_1) reconstructed_X_2 = decoder(style_1, class_2) reconstruction_error_1 = mse_loss(reconstructed_X_1, Variable(X_1)) reconstruction_error_1.backward(retain_graph=True) reconstruction_error_2 = mse_loss(reconstructed_X_2, Variable(X_1)) reconstruction_error_2.backward() reconstruction_error = reconstruction_error_1 + reconstruction_error_2 kl_divergence_error = kl_divergence_loss_1 auto_encoder_optimizer.step() # B. run the generator for i in range(FLAGS.generator_times): generator_optimizer.zero_grad() image_batch_1, _, __ = next(loader) image_batch_3, _, __ = next(loader) domain_labels.fill_(real_domain_labels) X_1.copy_(image_batch_1) X_3.copy_(image_batch_3) style_mu_1, style_logvar_1, _ = encoder(Variable(X_1)) style_1 = reparameterize(training=True, mu=style_mu_1, logvar=style_logvar_1) kl_divergence_loss_1 = - 0.5 * torch.sum(1 + style_logvar_1 - style_mu_1.pow(2) - style_logvar_1.exp()) kl_divergence_loss_1 /= (FLAGS.batch_size * FLAGS.num_channels * FLAGS.image_size * FLAGS.image_size) kl_divergence_loss_1.backward(retain_graph=True) _, __, class_3 = encoder(Variable(X_3)) reconstructed_X_1_3 = decoder(style_1, class_3) output_1 = discriminator(Variable(X_3), reconstructed_X_1_3) generator_error_1 = cross_entropy_loss(output_1, Variable(domain_labels)) generator_error_1.backward(retain_graph=True) style_latent_space.normal_(0., 1.) reconstructed_X_latent_3 = decoder(Variable(style_latent_space), class_3) output_2 = discriminator(Variable(X_3), reconstructed_X_latent_3) generator_error_2 = cross_entropy_loss(output_2, Variable(domain_labels)) generator_error_2.backward() generator_error = generator_error_1 + generator_error_2 kl_divergence_error += kl_divergence_loss_1 generator_optimizer.step() # C. run the discriminator for i in range(FLAGS.discriminator_times): discriminator_optimizer.zero_grad() # train discriminator on real data domain_labels.fill_(real_domain_labels) image_batch_1, _, __ = next(loader) image_batch_2, image_batch_3, _ = next(loader) X_1.copy_(image_batch_1) X_2.copy_(image_batch_2) X_3.copy_(image_batch_3) real_output = discriminator(Variable(X_2), Variable(X_3)) discriminator_real_error = cross_entropy_loss(real_output, Variable(domain_labels)) discriminator_real_error.backward() # train discriminator on fake data domain_labels.fill_(fake_domain_labels) style_mu_1, style_logvar_1, _ = encoder(Variable(X_1)) style_1 = reparameterize(training=False, mu=style_mu_1, logvar=style_logvar_1) _, __, class_3 = encoder(Variable(X_3)) reconstructed_X_1_3 = decoder(style_1, class_3) fake_output = discriminator(Variable(X_3), reconstructed_X_1_3) discriminator_fake_error = cross_entropy_loss(fake_output, Variable(domain_labels)) discriminator_fake_error.backward() # total discriminator error discriminator_error = discriminator_real_error + discriminator_fake_error # calculate discriminator accuracy for this step target_true_labels = torch.cat((torch.ones(FLAGS.batch_size), torch.zeros(FLAGS.batch_size)), dim=0) if FLAGS.cuda: target_true_labels = target_true_labels.cuda() discriminator_predictions = torch.cat((real_output, fake_output), dim=0) _, discriminator_predictions = torch.max(discriminator_predictions, 1) discriminator_accuracy = (discriminator_predictions.data == target_true_labels.long() ).sum().item() / (FLAGS.batch_size * 2) if discriminator_accuracy < FLAGS.discriminator_limiting_accuracy: discriminator_optimizer.step() if (iteration + 1) % 50 == 0: print('') print('Epoch #' + str(epoch)) print('Iteration #' + str(iteration)) print('') print('Reconstruction loss: ' + str(reconstruction_error.data.storage().tolist()[0])) print('KL-Divergence loss: ' + str(kl_divergence_error.data.storage().tolist()[0])) print('') print('Generator loss: ' + str(generator_error.data.storage().tolist()[0])) print('Discriminator loss: ' + str(discriminator_error.data.storage().tolist()[0])) print('Discriminator accuracy: ' + str(discriminator_accuracy)) print('..........') # write to log with open(FLAGS.log_file, 'a') as log: log.write('{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}\n'.format( epoch, iteration, reconstruction_error.data.storage().tolist()[0], kl_divergence_error.data.storage().tolist()[0], generator_error.data.storage().tolist()[0], discriminator_error.data.storage().tolist()[0], discriminator_accuracy )) # write to tensorboard writer.add_scalar('Reconstruction loss', reconstruction_error.data.storage().tolist()[0], epoch * (int(len(paired_mnist) / FLAGS.batch_size) + 1) + iteration) writer.add_scalar('KL-Divergence loss', kl_divergence_error.data.storage().tolist()[0], epoch * (int(len(paired_mnist) / FLAGS.batch_size) + 1) + iteration) writer.add_scalar('Generator loss', generator_error.data.storage().tolist()[0], epoch * (int(len(paired_mnist) / FLAGS.batch_size) + 1) + iteration) writer.add_scalar('Discriminator loss', discriminator_error.data.storage().tolist()[0], epoch * (int(len(paired_mnist) / FLAGS.batch_size) + 1) + iteration) writer.add_scalar('Discriminator accuracy', discriminator_accuracy * 100, epoch * (int(len(paired_mnist) / FLAGS.batch_size) + 1) + iteration) # save model after every 5 epochs if (epoch + 1) % 5 == 0 or (epoch + 1) == FLAGS.end_epoch: torch.save(encoder.state_dict(), os.path.join('checkpoints', FLAGS.encoder_save)) torch.save(decoder.state_dict(), os.path.join('checkpoints', FLAGS.decoder_save)) torch.save(discriminator.state_dict(), os.path.join('checkpoints', FLAGS.discriminator_save))
def main(): parser = argparse.ArgumentParser() parser.add_argument('--train_imgs', type=str, help='dataset path') parser.add_argument('--mask_imgs', type=str, help='dataset path') parser.add_argument('--log_dir', type=str, default='log', help='Name of the log folder') parser.add_argument('--save_models', type=bool, default=True, help='Set True if you want to save trained models') parser.add_argument('--pre_trained_model_path', type=str, default=None, help='Pre-trained model path') parser.add_argument('--pre_trained_model_epoch', type=str, default=None, help='Pre-trained model epoch e.g 200') parser.add_argument('--train_imgs_path', type=str, default='C:/Users/motur/coco/images/train2017', help='Path to training images') parser.add_argument( '--train_annotation_path', type=str, default='C:/Users/motur/coco/annotations/instances_train2017.json', help='Path to annotation file, .json file') parser.add_argument('--category_names', type=str, default='giraffe,elephant,zebra,sheep,cow,bear', help='List of categories in MS-COCO dataset') parser.add_argument('--num_test_img', type=int, default=16, help='Number of images saved during training') parser.add_argument('--img_size', type=int, default=256, help='Generated image size') parser.add_argument( '--local_patch_size', type=int, default=256, help='Image size of instance images after interpolation') parser.add_argument('--batch_size', type=int, default=16, help='Mini-batch size') parser.add_argument('--train_epoch', type=int, default=20, help='Maximum training epoch') parser.add_argument('--lr', type=float, default=0.0002, help='Initial learning rate') parser.add_argument('--optim_step_size', type=int, default=80, help='Learning rate decay step size') parser.add_argument('--optim_gamma', type=float, default=0.5, help='Learning rate decay ratio') parser.add_argument( '--critic_iter', type=int, default=5, help='Number of discriminator update against each generator update') parser.add_argument('--noise_size', type=int, default=128, help='Noise vector size') parser.add_argument('--lambda_FM', type=float, default=1, help='Trade-off param for feature matching loss') parser.add_argument('--lambda_recon', type=float, default=0.00001, help='Trade-off param for reconstruction loss') parser.add_argument('--num_res_blocks', type=int, default=5, help='Number of residual block in generator network') parser.add_argument( '--trade_off_G', type=float, default=0.1, help= 'Trade-off parameter which controls gradient flow to generator from D_local and D_glob' ) opt = parser.parse_args() print(opt) #Create log folder root = 'result_fg/' + opt.category_names + '/' model = 'coco_model_' result_folder_name = 'images_' + opt.log_dir model_folder_name = 'models_' + opt.log_dir if not os.path.isdir(root): os.makedirs(root) if not os.path.isdir(root + result_folder_name): os.makedirs(root + result_folder_name) if not os.path.isdir(root + model_folder_name): os.makedirs(root + model_folder_name) #Save the script copyfile(os.path.basename(__file__), root + result_folder_name + '/' + os.path.basename(__file__)) #Define transformation for dataset images - e.g scaling transform = transforms.Compose([ transforms.Scale((opt.img_size, opt.img_size)), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)), ]) #Load dataset category_names = opt.category_names.split(',') allmasks = sorted( glob.glob(os.path.join(opt.mask_imgs, '**', '*.png'), recursive=True)) print('Number of masks: %d' % len(allmasks)) dataset = chairs(imfile=opt.train_imgs, mfiles=allmasks, category_names=category_names, transform=transform, final_img_size=opt.img_size) #Discard images contain very small instances # dataset.discard_small(min_area=0.03, max_area=1) #Define data loader train_loader = DataLoader(dataset, batch_size=opt.batch_size, shuffle=True) #For evaluation define fixed masks and noises data_iter = iter(train_loader) sample_batched = data_iter.next() x_fixed = sample_batched['image'][0:opt.num_test_img] x_fixed = Variable(x_fixed.cuda()) y_fixed = sample_batched['single_fg_mask'][0:opt.num_test_img] y_fixed = Variable(y_fixed.cuda()) z_fixed = torch.randn((opt.num_test_img, opt.noise_size)) z_fixed = Variable(z_fixed.cuda()) #Define networks G_fg = Generator_FG(z_dim=opt.noise_size, label_channel=len(category_names), num_res_blocks=opt.num_res_blocks) D_glob = Discriminator(channels=3 + len(category_names)) D_instance = Discriminator(channels=3 + len(category_names), input_size=opt.local_patch_size) G_fg.cuda() D_glob.cuda() D_instance.cuda() #Load parameters from pre-trained models if opt.pre_trained_model_path != None and opt.pre_trained_model_epoch != None: try: G_fg.load_state_dict( torch.load(opt.pre_trained_model_path + 'G_fg_epoch_' + opt.pre_trained_model_epoch)) D_glob.load_state_dict( torch.load(opt.pre_trained_model_path + 'D_glob_epoch_' + opt.pre_trained_model_epoch)) D_instance.load_state_dict( torch.load(opt.pre_trained_model_path + 'D_local_epoch_' + opt.pre_trained_model_epoch)) print('Parameters are loaded!') except: print('Error: Pre-trained parameters are not loaded!') pass #Define interpolation operation up_instance = nn.Upsample(size=(opt.local_patch_size, opt.local_patch_size), mode='bilinear') #Define pooling operation for the case that image size and local patch size are mismatched pooling_instance = nn.Sequential() if opt.local_patch_size != opt.img_size: pooling_instance.add_module( '0', nn.AvgPool2d(int(opt.img_size / opt.local_patch_size))) #Define training loss function - binary cross entropy BCE_loss = nn.BCELoss() #Define feature matching loss criterionVGG = VGGLoss() criterionVGG = criterionVGG.cuda() #Define optimizer G_local_optimizer = optim.Adam(G_fg.parameters(), lr=opt.lr, betas=(0.0, 0.9)) D_local_optimizer = optim.Adam( list(filter(lambda p: p.requires_grad, D_glob.parameters())) + list(filter(lambda p: p.requires_grad, D_instance.parameters())), lr=opt.lr, betas=(0.0, 0.9)) #Deine learning rate scheduler scheduler_G = lr_scheduler.StepLR(G_local_optimizer, step_size=opt.optim_step_size, gamma=opt.optim_gamma) scheduler_D = lr_scheduler.StepLR(D_local_optimizer, step_size=opt.optim_step_size, gamma=opt.optim_gamma) #----------------------------TRAIN----------------------------------------- print('training start!') start_time = time.time() for epoch in range(opt.train_epoch): epoch_start_time = time.time() scheduler_G.step() scheduler_D.step() D_local_losses = [] G_local_losses = [] y_real_ = torch.ones(opt.batch_size) y_fake_ = torch.zeros(opt.batch_size) y_real_, y_fake_ = Variable(y_real_.cuda()), Variable(y_fake_.cuda()) data_iter = iter(train_loader) num_iter = 0 while num_iter < len(train_loader): j = 0 while j < opt.critic_iter and num_iter < len(train_loader): j += 1 sample_batched = data_iter.next() num_iter += 1 x_ = sample_batched['image'] y_ = sample_batched['single_fg_mask'] fg_mask = sample_batched['seg_mask'] y_instances = sample_batched['mask_instance'] bbox = sample_batched['bbox'] mini_batch = x_.size()[0] if mini_batch != opt.batch_size: break #Update discriminators - D #Real examples D_glob.zero_grad() D_instance.zero_grad() x_, y_ = Variable(x_.cuda()), Variable(y_.cuda()) fg_mask = Variable(fg_mask.cuda()) y_reduced = torch.sum(y_, 1).clamp(0, 1).view(y_.size(0), 1, opt.img_size, opt.img_size) x_d = torch.cat([x_, fg_mask], 1) x_instances = torch.zeros( (opt.batch_size, 3, opt.local_patch_size, opt.local_patch_size)) x_instances = Variable(x_instances.cuda()) y_instances = Variable(y_instances.cuda()) y_instances = pooling_instance(y_instances) G_instances = torch.zeros( (opt.batch_size, 3, opt.local_patch_size, opt.local_patch_size)) G_instances = Variable(G_instances.cuda()) #Obtain instances for t in range(x_d.size()[0]): x_instance = x_[t, 0:3, bbox[0][t]:bbox[1][t], bbox[2][t]:bbox[3][t]] x_instance = x_instance.contiguous().view( 1, x_instance.size()[0], x_instance.size()[1], x_instance.size()[2]) x_instances[t] = up_instance(x_instance) D_result_instance = D_instance( torch.cat([x_instances, y_instances], 1)).squeeze() D_result = D_glob(x_d).squeeze() D_real_loss = BCE_loss(D_result, y_real_) + BCE_loss( D_result_instance, y_real_) D_real_loss.backward() #Fake examples z_ = torch.randn((mini_batch, opt.noise_size)) z_ = Variable(z_.cuda()) #Generate fake images G_fg_result = G_fg(z_, y_, torch.mul(x_, (1 - y_reduced))) G_result_d = torch.cat([G_fg_result, fg_mask], 1) #Obtain fake instances for t in range(x_d.size()[0]): G_instance = G_result_d[t, 0:3, bbox[0][t]:bbox[1][t], bbox[2][t]:bbox[3][t]] G_instance = G_instance.contiguous().view( 1, G_instance.size()[0], G_instance.size()[1], G_instance.size()[2]) G_instances[t] = up_instance(G_instance) D_result_instance = D_instance( torch.cat([G_instances, y_instances], 1).detach()).squeeze() D_result = D_glob(G_result_d.detach()).squeeze() D_fake_loss = BCE_loss(D_result, y_fake_) + BCE_loss( D_result_instance, y_fake_) D_fake_loss.backward() D_local_optimizer.step() D_train_loss = D_real_loss + D_fake_loss D_local_losses.append(D_train_loss.data) if mini_batch != opt.batch_size: break #Update generator G G_fg.zero_grad() D_result = D_glob(G_result_d).squeeze() D_result_instance = D_instance( torch.cat([G_instances, y_instances], 1)).squeeze() G_train_loss = (1 - opt.trade_off_G) * BCE_loss( D_result, y_real_) + opt.trade_off_G * BCE_loss( D_result_instance, y_real_) #Feature matching loss between generated image and corresponding ground truth FM_loss = criterionVGG(G_fg_result, x_) #Reconstruction loss Recon_loss = mse_loss(torch.mul(x_, (1 - y_reduced)), torch.mul(G_fg_result, (1 - y_reduced))) total_loss = G_train_loss + opt.lambda_FM * FM_loss + opt.lambda_recon * Recon_loss total_loss.backward() G_local_optimizer.step() G_local_losses.append(G_train_loss.data) print('loss_d: %.3f, loss_g: %.3f' % (D_train_loss.data, G_train_loss.data)) if (num_iter % 100) == 0: print('%d - %d complete!' % ((epoch + 1), num_iter)) print(result_folder_name) epoch_end_time = time.time() per_epoch_ptime = epoch_end_time - epoch_start_time print('[%d/%d] - ptime: %.2f, loss_d: %.3f, loss_g: %.3f' % ((epoch + 1), opt.train_epoch, per_epoch_ptime, torch.mean(torch.FloatTensor(D_local_losses)), torch.mean(torch.FloatTensor(G_local_losses)))) #Save images G_fg.eval() if epoch == 0: show_result_rgb((epoch + 1), x_fixed, save=True, path=root + result_folder_name + '/' + model + str(epoch + 1) + '_gt.png') for t in range(y_fixed.size()[1]): show_result_rgb((epoch + 1), y_fixed[:, t:t + 1, :, :], save=True, path=root + result_folder_name + '/' + model + str(epoch + 1) + '_' + str(t) + '_masked.png') show_result_rgb( (epoch + 1), G_fg( z_fixed, y_fixed, torch.mul(x_fixed, (1 - torch.sum(y_fixed, 1).view( y_fixed.size(0), 1, opt.img_size, opt.img_size)))), save=True, path=root + result_folder_name + '/' + model + str(epoch + 1) + '_fg.png') G_fg.train() #Save model params if opt.save_models and (epoch > 11 and epoch % 10 == 0): torch.save( G_fg.state_dict(), root + model_folder_name + '/' + model + 'G_fg_epoch_' + str(epoch) + '.pth') torch.save( D_glob.state_dict(), root + model_folder_name + '/' + model + 'D_glob_epoch_' + str(epoch) + '.pth') torch.save( D_instance.state_dict(), root + model_folder_name + '/' + model + 'D_local_epoch_' + str(epoch) + '.pth') torch.save( G_fg.state_dict(), root + model_folder_name + '/' + model + 'G_fg_epoch_' + str(epoch) + '.pth') torch.save( D_glob.state_dict(), root + model_folder_name + '/' + model + 'D_glob_epoch_' + str(epoch) + '.pth') torch.save( D_instance.state_dict(), root + model_folder_name + '/' + model + 'D_local_epoch_' + str(epoch) + '.pth') end_time = time.time() total_ptime = end_time - start_time print("Training finish!... save training results") print('Training time: ' + str(total_ptime))
def loss(self): y = tf.reshape(self.y, shape=[-1, self.input_shape[0], self.input_shape[1], self.input_shape[2]]) # For a fancy tensorboard summary: put the input, label and model side by side (sbs) for a fancy image summary: # tf.summary.image(sbs.op.name, sbs, max_outputs=3, collections=["training summary"]) return digits.mse_loss(self.inference, y)