def _create_fake(self, batch_size): soft_label = self.trick_dict.get('label_smooth', None) if soft_label: fake_range = soft_label['fake_range'] else: fake_range = 0. if isinstance(fake_range, list): fake = Variable(FloatTensor(batch_size, 1).uniform_(*fake_range), requires_grad=False) else: fake = Variable(FloatTensor(batch_size, 1).fill_(fake_range), requires_grad=False) return fake
def _create_valid(self, batch_size): soft_label = self.trick_dict.get('label_smooth', None) if soft_label: valid_range = soft_label['valid_range'] else: valid_range = 1. if isinstance(valid_range, list): valid = Variable(FloatTensor(batch_size, 1).uniform_(*valid_range), requires_grad=False) else: valid = Variable(FloatTensor(batch_size, 1).fill_(valid_range), requires_grad=False) return valid
def generate(self, mode, *args): self._set_to_eval() if mode == 'fixed': noise = torch.from_numpy(args[0]).type(FloatTensor) elif mode == 'num': num_samples = args[0] noise = Variable( FloatTensor( np.random.normal(0, 1, (num_samples, self.code_size)))) else: raise ValueError('Unknown mode') gen_data = self.generator(noise) self._set_to_train() return gen_data
def train(self, num_epoch, train_data_loader, model: AAVAE, checkpoint_path, epoch_per_save, callbacks, summary_writer: SummaryWriter, adv_ratio): n_iter = 0 for epoch in range(num_epoch): model._set_to_train() reconstruction_loss_train = 0. kl_divergence_train = 0. print('Epoch {}/{}'.format(epoch + 1, num_epoch)) for input, label in tqdm(train_data_loader): batch_size = input.shape[0] # train generator freeze(model.discriminator) unfreeze(model.encoder) unfreeze(model.decoder) model.optimizer_G.zero_grad() input = input.type(FloatTensor) mu, logvar = model.encode(input) z = model.reparameterize(mu, logvar) out = model.decode(z) reconstruction_loss = self.recon_loss_f(out, input) kl_divergence = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp()) valid = Variable(FloatTensor(batch_size, 1).fill_(1.), requires_grad=False) validity = model.discriminator(out) adversarial_loss = self.validity_loss_f(validity, valid) loss = reconstruction_loss + kl_divergence + adversarial_loss * adv_ratio loss.backward() model.optimizer_G.step() reconstruction_loss_train += reconstruction_loss.item() kl_divergence_train += kl_divergence.item() summary_writer.add_scalar('data/reconstruction_loss', reconstruction_loss.item(), n_iter) summary_writer.add_scalar('data/kl_divergence', kl_divergence.item(), n_iter) # train discriminator unfreeze(model.discriminator) freeze(model.encoder) freeze(model.decoder) model.optimizer_D.zero_grad() fake = Variable(FloatTensor(batch_size, 1).fill_(0.), requires_grad=False) validity_real = model.discriminator(input) d_real_loss = self.validity_loss_f(validity_real, valid) validity_fake = model.discriminator(out.detach()) d_fake_loss = self.validity_loss_f(validity_fake, fake) d_loss = (d_real_loss + d_fake_loss) / 2 d_loss.backward() model.optimizer_D.step() summary_writer.add_scalars('data/adversarial_loss', {'g_loss': adversarial_loss.item(), 'd_loss': d_loss.item()}, n_iter) summary_writer.add_scalars('data/adversarial_accuracy', {'real': validity_real.data.mean(), 'fake': validity_fake.data.mean()}, n_iter) n_iter += 1 reconstruction_loss_train /= len(train_data_loader.dataset) kl_divergence_train /= len(train_data_loader.dataset) print('Reconstruction loss: {:.4f} - KL divergence: {:.4f}'.format(reconstruction_loss_train, kl_divergence_train)) if (epoch + 1) % epoch_per_save == 0: model.save_checkpoint(checkpoint_path) for callback in callbacks: callback(epoch, model, summary_writer) model.save_checkpoint(checkpoint_path)
def train(self, num_epoch, data_loader, gan_model, checkpoint_path, epoch_per_save, callbacks): for epoch in range(num_epoch): # we sample a batch after each epoch dis_loss_lst = [] gen_loss_lst = [] D_x_lst = [] D_G_z1_lst = [] D_G_z2_lst = [] # plot smoothing smooth_factor = 0.95 plot_s = 0 plot_D_x = 0 plot_D_G_z1 = 0 plot_D_G_z2 = 0 plot_ws = 0 d_real_loss, D_x, D_G_z1 = 0, 0, 0 print('Epoch {}'.format(epoch + 1)) for i, input_and_aux in enumerate(tqdm(data_loader)): # We assume the input_and_label is a tuple containing data and auxiliary information batch_size = input_and_aux[0].shape[0] z = Variable( FloatTensor( np.random.normal(0, 1, (batch_size, gan_model.code_size)))) if (i + 1) % self.num_iter_D != 0: # train dis real_data = self._create_real_data(input_and_aux[0]) d_real_loss, D_x, D_G_z1 = gan_model._train_dis( real_data, z, self.clip) else: g_loss, D_G_z2 = gan_model._train_gen(z) gen_loss = g_loss.item() dis_loss = d_real_loss.item() plot_dis_s = plot_s * smooth_factor + dis_loss * ( 1 - smooth_factor) plot_D_x = plot_D_x * smooth_factor + D_x.item() * ( 1 - smooth_factor) plot_D_G_z1 = plot_D_G_z1 * smooth_factor + D_G_z1.item( ) * (1 - smooth_factor) plot_ws = plot_ws * smooth_factor + (1 - smooth_factor) dis_loss_lst.append(plot_dis_s / plot_ws) D_x_lst.append(plot_D_x / plot_ws) D_G_z1_lst.append(plot_D_G_z1 / plot_ws) plot_gen_s = plot_s * smooth_factor + gen_loss * ( 1 - smooth_factor) plot_D_G_z2 = plot_D_G_z2 * smooth_factor + D_G_z2.item( ) * (1 - smooth_factor) gen_loss_lst.append(plot_gen_s / plot_ws) D_G_z2_lst.append(plot_D_G_z2 / plot_ws) self.global_step += 1 noisy_input = self.trick_dict.get('noisy_input', None) if noisy_input: print('Noisy input sigma: {:.4f}'.format(noisy_input['sigma'])) if checkpoint_path and (epoch + 1) % epoch_per_save == 0: gan_model.save_checkpoint(checkpoint_path) # plot loss figure step = [ a for a in range(self.global_step - len(gen_loss_lst), self.global_step) ] self.plotter.plot('gan_loss', 'dis_loss', step, dis_loss_lst) self.plotter.plot('gan_loss', 'gen_loss', step, gen_loss_lst) self.plotter.plot('gan_output', 'D_x', step, D_x_lst) self.plotter.plot('gan_output', 'D_G_z1', step, D_G_z1_lst) self.plotter.plot('gan_output', 'D_G_z2', step, D_G_z2_lst) # callbacks for callback in callbacks: callback(self, gan_model) if checkpoint_path: gan_model.save_checkpoint(checkpoint_path)