def train(self, imageloader, storyloader, testloader): self.imageloader = imageloader self.testloader = testloader self.imagedataset = None self.testdataset = None netG, netD_im, netD_st = self.load_networks() im_real_labels = Variable(torch.FloatTensor(self.imbatch_size).fill_(1)) im_fake_labels = Variable(torch.FloatTensor(self.imbatch_size).fill_(0)) st_real_labels = Variable(torch.FloatTensor(self.stbatch_size).fill_(1)) st_fake_labels = Variable(torch.FloatTensor(self.stbatch_size).fill_(0)) if cfg.CUDA: im_real_labels, im_fake_labels = im_real_labels.cuda(), im_fake_labels.cuda() st_real_labels, st_fake_labels = st_real_labels.cuda(), st_fake_labels.cuda() generator_lr = cfg.TRAIN.GENERATOR_LR discriminator_lr = cfg.TRAIN.DISCRIMINATOR_LR lr_decay_step = cfg.TRAIN.LR_DECAY_EPOCH im_optimizerD = \ optim.Adam(netD_im.parameters(), lr=cfg.TRAIN.DISCRIMINATOR_LR, betas=(0.5, 0.999)) st_optimizerD = \ optim.Adam(netD_st.parameters(), lr=cfg.TRAIN.DISCRIMINATOR_LR, betas=(0.5, 0.999)) netG_para = [] for p in netG.parameters(): if p.requires_grad: netG_para.append(p) optimizerG = optim.Adam(netG_para, lr=cfg.TRAIN.GENERATOR_LR, betas=(0.5, 0.999)) if self.tensorboard: self.build_tensorboard() loss = {} step = 0 torch.save({ 'netG': netG, 'netD_im': netD_im, 'netD_st': netD_st, }, os.path.join(self.model_dir, 'barebone.pth')) for epoch in range(self.max_epoch): start_t = time.time() if epoch % lr_decay_step == 0 and epoch > 0: generator_lr *= 0.5 for param_group in optimizerG.param_groups: param_group['lr'] = generator_lr discriminator_lr *= 0.5 for param_group in st_optimizerD.param_groups: param_group['lr'] = discriminator_lr for param_group in im_optimizerD.param_groups: param_group['lr'] = discriminator_lr loss.update({ 'D/lr': discriminator_lr, 'G/lr': generator_lr, }) print('Epoch [{}/{}]:'.format(epoch, self.max_epoch)) with tqdm(total=len(storyloader), dynamic_ncols=True) as pbar: for i, data in enumerate(storyloader, 0): ###################################################### # (1) Prepare training data ###################################################### im_batch = self.sample_real_image_batch() st_batch = data im_real_cpu = im_batch['images'] im_motion_input = im_batch['description'] im_content_input = im_batch['content'] im_content_input = im_content_input.mean(1).squeeze() im_catelabel = im_batch['label'] im_real_imgs = Variable(im_real_cpu) im_motion_input = Variable(im_motion_input) im_content_input = Variable(im_content_input) st_real_cpu = st_batch['images'] st_motion_input = st_batch['description'] st_content_input = st_batch['description'] st_catelabel = st_batch['label'] st_real_imgs = Variable(st_real_cpu) st_motion_input = Variable(st_motion_input) st_content_input = Variable(st_content_input) if cfg.CUDA: st_real_imgs = st_real_imgs.cuda() im_real_imgs = im_real_imgs.cuda() st_motion_input = st_motion_input.cuda() im_motion_input = im_motion_input.cuda() st_content_input = st_content_input.cuda() im_content_input = im_content_input.cuda() im_catelabel = im_catelabel.cuda() st_catelabel = st_catelabel.cuda() ####################################################### # (2) Generate fake stories and images ###################################################### with torch.no_grad(): im_inputs = (im_motion_input, im_content_input) _, im_fake, im_mu, im_logvar = netG.sample_images(*im_inputs) st_inputs = (st_motion_input, st_content_input) _, st_fake, c_mu, c_logvar, m_mu, m_logvar = netG.sample_videos(*st_inputs) ############################ # (3) Update D network ########################### netD_im.zero_grad() netD_st.zero_grad() im_errD, im_errD_real, im_errD_wrong, im_errD_fake, accD = \ compute_discriminator_loss(netD_im, im_real_imgs, im_fake, im_real_labels, im_fake_labels, im_catelabel, im_mu, self.gpus) st_errD, st_errD_real, st_errD_wrong, st_errD_fake, _ = \ compute_discriminator_loss(netD_st, st_real_imgs, st_fake, st_real_labels, st_fake_labels, st_catelabel, c_mu, self.gpus) loss.update({ 'D/story/loss': st_errD.data, 'D/story/real_loss': st_errD_real.data, 'D/story/fake_loss': st_errD_fake.data, 'D/image/accuracy': accD, 'D/image/loss': im_errD.data, 'D/image/real_loss': im_errD_real.data, 'D/image/fake_loss': im_errD_fake.data, }) im_errD.backward() st_errD.backward() im_optimizerD.step() st_optimizerD.step() ############################ # (2) Update G network ########################### for g_iter in range(2): netG.zero_grad() _, st_fake, c_mu, c_logvar, m_mu, m_logvar = netG.sample_videos( st_motion_input, st_content_input) _, im_fake, im_mu, im_logvar = netG.sample_images(im_motion_input, im_content_input) im_errG, accG = compute_generator_loss(netD_im, im_fake, im_real_labels, im_catelabel, im_mu, self.gpus) st_errG, _ = compute_generator_loss(netD_st, st_fake, st_real_labels, st_catelabel, c_mu, self.gpus) im_kl_loss = KL_loss(im_mu, im_logvar) st_kl_loss = KL_loss(m_mu, m_logvar) errG = im_errG + self.ratio * st_errG kl_loss = im_kl_loss + self.ratio * st_kl_loss loss.update({ 'G/loss': im_errG.data, 'G/kl': kl_loss.data, }) errG_total = im_errG + self.ratio * st_errG + kl_loss errG_total.backward() optimizerG.step() if self.writer: for key, value in loss.items(): self.writer.add_scalar(key, value, step) step += 1 pbar.update(1) if i % 100 == 0: # save the image result for each epoch lr_fake, fake, _, _, _, _ = netG.sample_videos(st_motion_input, st_content_input) save_story_results(st_real_cpu, fake, epoch, self.image_dir, writer=self.writer, steps=step) if lr_fake is not None: save_story_results(None, lr_fake, epoch, self.image_dir, writer=self.writer, steps=step) end_t = time.time() print('''[%d/%d][%d/%d] Loss_D: %.4f Loss_G: %.4f Loss_real: %.4f Loss_wrong:%.4f Loss_fake %.4f accG: %.4f accD: %.4f Total Time: %.2fsec ''' % (epoch, self.max_epoch, i, len(storyloader), st_errD.data, st_errG.data, st_errD_real, st_errD_wrong, st_errD_fake, accG, accD, (end_t - start_t))) if epoch % self.snapshot_interval == 0: save_model(netG, netD_im, netD_st, epoch, self.model_dir) save_test_samples(netG, self.testloader, self.test_dir, writer=self.writer, steps=step) # save_model(netG, netD_im, netD_st, self.max_epoch, self.model_dir)
def train(self, imageloader, storyloader, testloader): self.imageloader = imageloader self.testloader = testloader self.imagedataset = None self.testdataset = None netG, netD_im, netD_st = self.load_networks() im_real_labels = Variable(torch.FloatTensor(self.imbatch_size).fill_(1)) im_fake_labels = Variable(torch.FloatTensor(self.imbatch_size).fill_(0)) st_real_labels = Variable(torch.FloatTensor(self.stbatch_size).fill_(1)) st_fake_labels = Variable(torch.FloatTensor(self.stbatch_size).fill_(0)) if cfg.CUDA: im_real_labels, im_fake_labels = im_real_labels.cuda(), im_fake_labels.cuda() st_real_labels, st_fake_labels = st_real_labels.cuda(), st_fake_labels.cuda() generator_lr = cfg.TRAIN.GENERATOR_LR discriminator_lr = cfg.TRAIN.DISCRIMINATOR_LR lr_decay_step = cfg.TRAIN.LR_DECAY_EPOCH im_optimizerD = \ optim.Adam(netD_im.parameters(), lr=cfg.TRAIN.DISCRIMINATOR_LR, betas=(0.5, 0.999)) st_optimizerD = \ optim.Adam(netD_st.parameters(), lr=cfg.TRAIN.DISCRIMINATOR_LR, betas=(0.5, 0.999)) netG_para = [] for p in netG.parameters(): if p.requires_grad: netG_para.append(p) optimizerG = optim.Adam(netG_para, lr=cfg.TRAIN.GENERATOR_LR, betas=(0.5, 0.999)) for epoch in range(self.max_epoch): start_t = time.time() if epoch % lr_decay_step == 0 and epoch > 0: generator_lr *= 0.5 for param_group in optimizerG.param_groups: param_group['lr'] = generator_lr discriminator_lr *= 0.5 for param_group in st_optimizerD.param_groups: param_group['lr'] = discriminator_lr for param_group in im_optimizerD.param_groups: param_group['lr'] = discriminator_lr for i, data in enumerate(storyloader, 0): ###################################################### # (1) Prepare training data ###################################################### im_batch = self.sample_real_image_batch() st_batch = data im_real_cpu = im_batch['images'] im_motion_input = im_batch['description'] im_content_input = im_batch['content'] im_content_input = im_content_input.mean(1).squeeze() im_catelabel = im_batch['label'] im_real_imgs = Variable(im_real_cpu) im_motion_input = Variable(im_motion_input) im_content_input = Variable(im_content_input) st_real_cpu = st_batch['images'] st_motion_input = st_batch['description'] st_content_input = st_batch['description'] st_catelabel = st_batch['label'] st_real_imgs = Variable(st_real_cpu) st_motion_input = Variable(st_motion_input) st_content_input = Variable(st_content_input) if cfg.CUDA: st_real_imgs = st_real_imgs.cuda() im_real_imgs = im_real_imgs.cuda() st_motion_input = st_motion_input.cuda() im_motion_input = im_motion_input.cuda() st_content_input = st_content_input.cuda() im_content_input = im_content_input.cuda() im_catelabel = im_catelabel.cuda() st_catelabel = st_catelabel.cuda() ####################################################### # (2) Generate fake stories and images ###################################################### # im_inputs = (im_motion_input, im_content_input) # _, im_fake, im_mu, im_logvar =\ # nn.parallel.data_parallel(netG.sample_images, im_inputs, self.gpus) # st_inputs = (st_motion_input, st_content_input) # _, st_fake, c_mu, c_logvar, m_mu, m_logvar = \ # nn.parallel.data_parallel(netG.sample_videos, st_inputs, self.gpus) im_inputs = (im_motion_input, im_content_input) _, im_fake, im_mu, im_logvar = netG.sample_images(im_motion_input, im_content_input) st_inputs = (st_motion_input, st_content_input) _, st_fake, c_mu, c_logvar, m_mu, m_logvar = netG.sample_videos( st_motion_input, st_content_input) ############################ # (3) Update D network ########################### netD_im.zero_grad() netD_st.zero_grad() im_errD, im_errD_real, im_errD_wrong, im_errD_fake, accD = \ compute_discriminator_loss(netD_im, im_real_imgs, im_fake, im_real_labels, im_fake_labels, im_catelabel, im_mu, self.gpus) st_errD, st_errD_real, st_errD_wrong, st_errD_fake, _ = \ compute_discriminator_loss(netD_st, st_real_imgs, st_fake, st_real_labels, st_fake_labels, st_catelabel, c_mu, self.gpus) im_errD.backward() st_errD.backward() im_optimizerD.step() st_optimizerD.step() ############################ # (2) Update G network ########################### for g_iter in range(2): netG.zero_grad() _, st_fake, c_mu, c_logvar, m_mu, m_logvar = netG.sample_videos( st_motion_input, st_content_input) # st_mu = m_mu.view(cfg.TRAIN.ST_BATCH_SIZE, cfg.VIDEO_LEN, m_mu.shape[1]) # st_mu = st_mu.contiguous().view(-1, cfg.VIDEO_LEN * m_mu.shape[1]) _, im_fake, im_mu, im_logvar = netG.sample_images(im_motion_input, im_content_input) im_errG, accG = compute_generator_loss(netD_im, im_fake, im_real_labels, im_catelabel, im_mu, self.gpus) st_errG, _ = compute_generator_loss(netD_st, st_fake, st_real_labels, st_catelabel, c_mu, self.gpus) im_kl_loss = KL_loss(im_mu, im_logvar) st_kl_loss = KL_loss(m_mu, m_logvar) errG = im_errG + self.ratio * st_errG kl_loss = im_kl_loss + self.ratio * st_kl_loss errG_total = im_errG + self.ratio * st_errG + kl_loss errG_total.backward() optimizerG.step() if i % 100 == 0: # save the image result for each epoch lr_fake, fake, _, _, _, _ = netG.sample_videos(st_motion_input, st_content_input) save_story_results(st_real_cpu, fake, epoch, self.image_dir) if lr_fake is not None: save_story_results(None, lr_fake, epoch, self.image_dir) end_t = time.time() print('''[%d/%d][%d/%d] Loss_D: %.4f Loss_G: %.4f Loss_real: %.4f Loss_wrong:%.4f Loss_fake %.4f accG: %.4f accD: %.4f Total Time: %.2fsec ''' % (epoch, self.max_epoch, i, len(storyloader), st_errD.data, st_errG.data, st_errD_real, st_errD_wrong, st_errD_fake, accG, accD, (end_t - start_t))) if epoch % self.snapshot_interval == 0: save_model(netG, netD_im, netD_st, epoch, self.model_dir) save_test_samples(netG, self.testloader, self.test_dir) # save_model(netG, netD_im, netD_st, self.max_epoch, self.model_dir)