def train_discriminator(images): noise = tf.random.normal([batch_size, latent_dim]) with tf.GradientTape() as disc_tape: generated_imgs = generator(noise, training=True) generated_output = discriminator(generated_imgs, training=True) real_output = discriminator(images, training=True) interpolated_img = random_weighted_average( [images, generated_imgs]) validity_interpolated = discriminator(interpolated_img, training=True) disc_loss = discriminator_loss(real_output, generated_output, validity_interpolated, interpolated_img) grad_disc = disc_tape.gradient(disc_loss, discriminator.trainable_variables) disc_optimizer.apply_gradients( zip(grad_disc, discriminator.trainable_variables)) return disc_loss
def train_step(lr_images, hr_images): with tf.GradientTape(persistent=True) as tape: sr_images = generator(lr_images) # sr -> super resolution real_output = discriminator(hr_images) fake_output = discriminator(sr_images) # adversarial loss gen_loss = generator_loss(cross_entropy, fake_output) * 1e-3 disc_loss = discriminator_loss(cross_entropy, real_output, fake_output) * 1e-3 # content loss hr_feat = extractor(hr_images) sr_feat = extractor(sr_images) cont_loss = content_loss(mse, hr_feat, sr_feat) * 0.006 perc_loss = cont_loss + gen_loss grad_gen = tape.gradient(perc_loss, generator.trainable_variables) grad_disc = tape.gradient(disc_loss, discriminator.trainable_variables) gen_optimizer.apply_gradients(zip(grad_gen, generator.trainable_variables)) disc_optimizer.apply_gradients(zip(grad_disc, discriminator.trainable_variables)) return gen_loss, disc_loss, cont_loss
def train_step(models, opts, images, loss): Generator, Discriminator = models G_opt, D_opt = opts noise = tf.random.normal([images.shape[0], FLAGS.noise_dim]) with tf.GradientTape() as gen_tape, tf.GradientTape() as disc_tape: generated_images = Generator(noise, training=True) real_output = Discriminator(images, training=True) fake_output = Discriminator(generated_images, training=True) gen_loss = generator_loss(fake_output) real_loss, fake_loss = discriminator_loss(real_output, fake_output) disc_loss = real_loss + fake_loss loss[0].update_state(real_output) loss[1].update_state(fake_output) loss[2].update_state(gen_loss) loss[3].update_state(disc_loss) gradients_of_gen = gen_tape.gradient(gen_loss, Generator.trainable_variables) gradients_of_dis = disc_tape.gradient(disc_loss, Discriminator.trainable_variables) G_opt.apply_gradients(zip(gradients_of_gen, Generator.trainable_variables)) D_opt.apply_gradients( zip(gradients_of_dis, Discriminator.trainable_variables))
def log_loss_and_save_images(self, data, epoch, test_input, debug=False): # Notice `training` is set to False. # This is so all layers run in inference mode (batchnorm). generated_data = self.generator(test_input, training=False) real_output = self.discriminator(data, training=False) fake_output = self.discriminator(generated_data, training=True) gen_loss = generator_loss(fake_output) disc_loss = discriminator_loss(real_output, fake_output) self.logger.info( 'epoch {}, gen loss {}, discriminator loss {}'.format(epoch, gen_loss.numpy(), disc_loss.numpy())) pyplot.close() pyplot.scatter(*zip(*generated_data.numpy())) # ax = pyplot.gca() # pyplot.scatter(*zip(*data)) pyplot.scatter(data[0], data[1]) # ax = data.plot(kind='scatter', x=0, y=1) # fig = ax.get_figure() buffer = io.BytesIO() pyplot.savefig(buffer, format='png') if debug: buffer.seek(0) pyplot.imread(buffer) pyplot.show() return buffer
def train_discriminator(images, ori_labels, tar_labels): # gen_tar_labels = tf.reshape(tar_labels, (-1, 1, 1, tar_labels.shape[1])) # gen_tar_labels = tf.tile(gen_tar_labels, tf.constant([1, images.shape[1], images.shape[2], 1])) with tf.GradientTape(persistent=True) as tape: # real real_output, real_class = discriminator(images) # fake fake_images = generator(images, tar_labels) fake_output, fake_class = discriminator(fake_images) # x_hat interpolated_img = random_weighted_average([images, fake_images]) averaged_output, _ = discriminator(interpolated_img) disc_loss = discriminator_loss(real_output, fake_output, averaged_output, interpolated_img) real_class_loss = domain_classification_loss( bce, ori_labels, real_class) total_disc_loss = disc_loss + real_class_loss grad_disc = tape.gradient(total_disc_loss, discriminator.trainable_variables) disc_optimizer.apply_gradients( zip(grad_disc, discriminator.trainable_variables)) return real_class_loss, disc_loss
def train_step(real_x, real_y): with tf.GradientTape(persistent=True) as tape: fake_y = gene_G(real_x) rec_x = gene_F(fake_y) fake_x = gene_F(real_y) rec_y = gene_G(fake_x) same_x = gene_G(real_x) same_y = gene_F(real_y) disc_real_x = disc_X(real_x) disc_real_y = disc_Y(real_y) disc_fake_x = disc_X(fake_x) disc_fake_y = disc_Y(fake_y) # Loss Func. disc_x_loss = discriminator_loss(cross_entropy, disc_real_x, disc_fake_x) disc_y_loss = discriminator_loss(cross_entropy, disc_real_y, disc_fake_y) gene_g_loss = generator_loss(cross_entropy, disc_fake_y) gene_f_loss = generator_loss(cross_entropy, disc_fake_x) cycle_x_loss = cycle_loss(mae, real_x, rec_x) cycle_y_loss = cycle_loss(mae, real_y, rec_y,) total_cycle_loss = cycle_x_loss + cycle_y_loss total_gene_g_loss = gene_g_loss + total_cycle_loss + identity_loss(mae, real_y, same_y) total_gene_f_loss = gene_f_loss + total_cycle_loss + identity_loss(mae, real_x, same_x) grad_gene_G = tape.gradient(total_gene_g_loss, gene_G.trainable_variables) grad_gene_F = tape.gradient(total_gene_f_loss, gene_F.trainable_variables) grad_disc_X = tape.gradient(disc_x_loss, disc_X.trainable_variables) grad_disc_Y = tape.gradient(disc_y_loss, disc_Y.trainable_variables) gene_g_optimizer.apply_gradients(zip(grad_gene_G, gene_G.trainable_variables)) gene_f_optimizer.apply_gradients(zip(grad_gene_F, gene_F.trainable_variables)) disc_x_optimizer.apply_gradients(zip(grad_disc_X, disc_X.trainable_variables)) disc_y_optimizer.apply_gradients(zip(grad_disc_Y, disc_Y.trainable_variables)) return total_gene_g_loss, total_gene_f_loss, disc_x_loss, disc_y_loss
def train_step(self, x_batch): noise = tensorflow.random.normal([len(x_batch), noise_dim]) with tensorflow.GradientTape() as gen_tape, tensorflow.GradientTape() as disc_tape: generated_data = self.generator(noise, training=True) real_output = self.discriminator(x_batch, training=True) fake_output = self.discriminator(generated_data, training=True) gen_loss = generator_loss(fake_output) disc_loss = discriminator_loss(real_output, fake_output) gradients_of_generator = gen_tape.gradient(gen_loss, self.generator.trainable_variables) gradients_of_discriminator = disc_tape.gradient(disc_loss, self.discriminator.trainable_variables) self.generator_optimizer.apply_gradients(zip(gradients_of_generator, self.generator.trainable_variables)) self.discriminator_optimizer.apply_gradients( zip(gradients_of_discriminator, self.discriminator.trainable_variables))
def train_discriminator(images): noise = tf.random.normal([batch_size, latent_dim]) with tf.GradientTape() as disc_tape: generated_imgs = generator(noise, training=True) generated_output = discriminator(generated_imgs, training=True) real_output = discriminator(images, training=True) disc_loss = discriminator_loss(real_output, generated_output) grad_disc = disc_tape.gradient(disc_loss, discriminator.trainable_variables) disc_optimizer.apply_gradients(zip(grad_disc, discriminator.trainable_variables)) for param in discriminator.trainable_variables: # Except gamma and beta in Batch Normalization if param.name.split('/')[-1].find('gamma') == -1 and param.name.split('/')[-1].find('beta') == -1: param.assign(tf.clip_by_value(param, -0.01, 0.01)) return disc_loss
def train_step(images): noise = tf.random.normal([batch_size, latent_dim]) with tf.GradientTape(persistent=True) as tape: generated_images = generator(noise) real_output = discriminator(images) generated_output = discriminator(generated_images) gen_loss = generator_loss(mse, generated_output) disc_loss = discriminator_loss(mse, real_output, generated_output) grad_gen = tape.gradient(gen_loss, generator.trainable_variables) grad_disc = tape.gradient(disc_loss, discriminator.trainable_variables) gen_optimizer.apply_gradients( zip(grad_gen, generator.trainable_variables)) disc_optimizer.apply_gradients( zip(grad_disc, discriminator.trainable_variables)) return gen_loss, disc_loss
def train_scene_discriminator(xi_t, xi_tk, xj_tk): """ Args: xi_t: frame t from video i xi_tk: frame t + k from video i xj_tk: frame t + k from video j """ discriminator.zero_grad() # Compute pose vectors pi_t = pose_encoder(xi_t) pi_tk = pose_encoder(xi_tk) pj_tk = pose_encoder(xj_tk) # Compute the output of discriminator C pred_same = discriminator(pi_t, pi_tk) pred_diff = discriminator(pi_t, pj_tk).detach() loss, acc = nutils.discriminator_loss(pred_same, pred_diff, device=device) loss.backward() discriminator_optim.step() return get_value(loss), get_value(acc)
# evaluation SS_score = xx.evaluation(y_x_logits, Y) SS_score_t = xx.evaluation(y_x_logits_t, Y) ### DEFINE LOSSES ############################################################## #reconstruction loss if obj == 'MSE': R_loss = 0.5 * tf.reduce_mean( tf.reduce_sum(tf.pow(X - reconstruction, 2), 1)) else: R_loss = 0.5 * tf.reduce_mean(tf.reduce_sum(tf.abs(X - reconstruction), 1)) #discriminator loss DZ_loss = xx.discriminator_loss(dz_real_logits, dz_fake_logits) DY_loss = xx.discriminator_loss(dy_real_logits, dy_fake_logits) D_loss = DZ_loss + DY_loss #generator loss GZ_loss = xx.generator_loss(dz_fake_logits) GY_loss = xx.generator_loss(dy_fake_logits) G_loss = GZ_loss + GY_loss # semi-supervised loss C_loss_ = tf.nn.softmax_cross_entropy_with_logits(labels=Y, logits=y_x_logits) C_loss = tf.reduce_mean(C_loss_) ### DEFINE OPTIMIZATIONS ####################################################### R_solver = tf.train.MomentumOptimizer(ae_lr,0.9).minimize(R_loss, \
def train_step(real_x, real_y): # persistent is set to True because the tape is used more than # once to calculate the gradients. with tf.GradientTape(persistent=True) as tape: # Generator G translates X -> Y # Generator F translates Y -> X. fake_y = generator_g(real_x, training=True) cycled_x = generator_f(fake_y, training=True) fake_x = generator_f(real_y, training=True) cycled_y = generator_g(fake_x, training=True) # same_x and same_y are used for identity loss. same_x = generator_f(real_x, training=True) same_y = generator_g(real_y, training=True) disc_real_x = discriminator_x(real_x, training=True) disc_real_y = discriminator_y(real_y, training=True) disc_fake_x = discriminator_x(fake_x, training=True) disc_fake_y = discriminator_y(fake_y, training=True) # calculate the loss gen_g_loss = generator_loss(disc_fake_y) gen_f_loss = generator_loss(disc_fake_x) total_cycle_loss = calc_cycle_loss( real_x, cycled_x) + calc_cycle_loss(real_y, cycled_y) # Total generator loss = adversarial loss + cycle loss total_gen_g_loss = (gen_g_loss + total_cycle_loss + identity_loss(real_y, same_y)) total_gen_f_loss = (gen_f_loss + total_cycle_loss + identity_loss(real_x, same_x)) disc_x_loss = discriminator_loss(disc_real_x, disc_fake_x) disc_y_loss = discriminator_loss(disc_real_y, disc_fake_y) # Calculate the gradients for generator and discriminator generator_g_gradients = tape.gradient(total_gen_g_loss, generator_g.trainable_variables) generator_f_gradients = tape.gradient(total_gen_f_loss, generator_f.trainable_variables) discriminator_x_gradients = tape.gradient( disc_x_loss, discriminator_x.trainable_variables) discriminator_y_gradients = tape.gradient( disc_y_loss, discriminator_y.trainable_variables) # Apply the gradients to the optimizer generator_g_optimizer.apply_gradients( zip(generator_g_gradients, generator_g.trainable_variables)) generator_f_optimizer.apply_gradients( zip(generator_f_gradients, generator_f.trainable_variables)) discriminator_x_optimizer.apply_gradients( zip(discriminator_x_gradients, discriminator_x.trainable_variables)) discriminator_y_optimizer.apply_gradients( zip(discriminator_y_gradients, discriminator_y.trainable_variables)) # Log the losses generator_g_loss(total_gen_g_loss) generator_f_loss(total_gen_f_loss) discriminator_x_loss(disc_x_loss) discriminator_y_loss(disc_y_loss)
def train(self, data_loader, stage=1): if stage == 1: netG, netD = self.load_network_stageI() else: netG, netD = self.load_network_stageII() nz = cfg.Z_DIM batch_size = self.batch_size noise = Variable(torch.FloatTensor(batch_size, nz)) fixed_noise = Variable(torch.FloatTensor(batch_size, nz).normal_(0, 1), requires_grad=True) real_labels = Variable(torch.FloatTensor(batch_size).fill_(1)) fake_labels = Variable(torch.FloatTensor(batch_size).fill_(0)) if cfg.CUDA: noise, fixed_noise = noise.cuda(), fixed_noise.cuda() real_labels, fake_labels = real_labels.cuda(), fake_labels.cuda() generator_lr = cfg.TRAIN_GENERATOR_LR discriminator_lr = cfg.TRAIN_DISCRIMINATOR_LR lr_decay_step = cfg.TRAIN_LR_DECAY_EPOCH optimizerD = optim.Adam(netD.parameters(), lr=cfg.TRAIN_DISCRIMINATOR_LR, betas=(0.5, 0.999)) netG_para = [] for p in netG.parameters(): if p.requires_grad: netG_para.append(p) optimizerG = optim.Adam(netG_para, lr=cfg.TRAIN_GENERATOR_LR, betas=(0.5, 0.999)) count = 0 for epoch in range(self.max_epoch): start_t = time.time() if epoch % lr_decay_step == 0 and epoch > 0: generator_lr *= 0.5 for param_group in optimizerG.param_groups: param_group['lr'] = generator_lr discriminator_lr *= 0.5 for param_group in optimizerD.param_groups: param_group['lr'] = discriminator_lr for i, data in enumerate(data_loader, 0): # Prepare training data real_img_cpu, txt_embedding = data real_imgs = Variable(real_img_cpu) txt_embedding = Variable(txt_embedding) if cfg.CUDA: real_imgs = real_imgs.cuda() txt_embedding = txt_embedding.cuda() # Generate fake images noise.data.normal_(0, 1) inputs = (txt_embedding, noise) _, fake_imgs, mu, logvar = nn.parallel.data_parallel( netG, inputs, self.gpus) # Update D network netD.zero_grad() errD, errD_real, errD_wrong, errD_fake = discriminator_loss( netD, real_imgs, fake_imgs, real_labels, fake_labels, mu, self.gpus) errD.backward() optimizerD.step() ############################ # (2) Update G network ########################### netG.zero_grad() errG = generator_loss(netD, fake_imgs, real_labels, mu, self.gpus) kl_loss = KL_loss(mu, logvar) errG_total = errG + kl_loss * cfg.TRAIN_COEFF_KL errG_total.backward() optimizerG.step() count = count + 1 if i % 100 == 0: # save the image result for each epoch inputs = (txt_embedding, fixed_noise) lr_fake, fake, _, _ = \ nn.parallel.data_parallel(netG, inputs, self.gpus) save_img_results(real_img_cpu, fake, epoch, self.image_dir) if lr_fake is not None: save_img_results(None, lr_fake, epoch, self.image_dir) end_t = time.time() print( '''[%d/%d][%d/%d] Loss_D: %.4f Loss_G: %.4f Loss_KL: %.4f Loss_real: %.4f Loss_wrong:%.4f Loss_fake %.4f Total Time: %.2fsec ''' % (epoch, self.max_epoch, i, len(data_loader), errD.data, errG.data, kl_loss.data, errD_real, errD_wrong, errD_fake, (end_t - start_t))) if epoch % self.snapshot_interval == 0: save_model(netG, netD, epoch, self.model_dir) # save_model(netG, netD, self.max_epoch, self.model_dir)
cyc_Y = G(fake_X) rec_X = D_X(real_X) rec_Y = D_Y(real_Y) fake_rec_X = D_X(fake_X) fake_rec_Y = D_Y(fake_Y) fake_pool_rec_X = D_X(fake_pool_X) fake_pool_rec_Y = D_Y(fake_pool_Y) cycle_loss = cycle_consistency_loss(real_X, cyc_X, real_Y, cyc_Y) G_gen_loss = generator_loss(fake_rec_Y) G_loss = G_gen_loss + cycle_loss F_gen_loss = generator_loss(fake_rec_X) F_loss = F_gen_loss + cycle_loss D_X_loss = discriminator_loss(rec_X, fake_pool_rec_X) D_Y_loss = discriminator_loss(rec_Y, fake_pool_rec_Y) # summary tf.summary.histogram('D_Y/true', rec_Y) tf.summary.histogram('D_Y/fake', fake_rec_Y) tf.summary.histogram('D_X/true', rec_X) tf.summary.histogram('D_X/fake', fake_rec_X) tf.summary.scalar('loss/G', G_loss) tf.summary.scalar('loss/D_Y', D_Y_loss) tf.summary.scalar('loss/F', F_loss) tf.summary.scalar('loss/D_X', D_X_loss) tf.summary.scalar('loss/cycle', cycle_loss)