def main(args): # Create model directory if not os.path.exists(args.model_path): os.makedirs(args.model_path) if not os.path.exists(args.figure_path): os.makedirs(args.figure_path) # Image preprocessing # For normalization, see https://github.com/pytorch/vision#models transform = transforms.Compose([ transforms.RandomCrop(args.crop_size), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) ]) # Load vocabulary wrapper. with open(args.vocab_path, 'rb') as f: vocab = pickle.load(f) # Build data loader data_loader = get_loader(args.image_dir, args.caption_path, vocab, transform, args.batch_size, shuffle=True, num_workers=args.num_workers) # Build the models (Gen) generator = Generator(args.embed_size, args.hidden_size, len(vocab), args.num_layers) # Build the models (Disc) discriminator = Discriminator(args.embed_size, args.hidden_size, len(vocab), args.num_layers) if torch.cuda.is_available(): generator.cuda() discriminator.cuda() # Loss and Optimizer (Gen) mle_criterion = nn.CrossEntropyLoss() params_gen = list(generator.parameters()) optimizer_gen = torch.optim.Adam(params_gen) # Loss and Optimizer (Disc) params_disc = list(discriminator.parameters()) optimizer_disc = torch.optim.Adam(params_disc) if int(args.pretraining) == 1: # Pre-training: train generator with MLE and discriminator with 3 losses (real + fake + wrong) total_steps = len(data_loader) print(total_steps) disc_losses = [] gen_losses = [] print('pre-training') generator.load_state_dict(torch.load(args.pretrained_gen_path)) discriminator.load_state_dict(torch.load(args.pretrained_disc_path)) for epoch in range( max([ int(args.gen_pretrain_num_epochs), int(args.disc_pretrain_num_epochs) ])): if epoch < 5: continue # for epoch in range(max([int(args.gen_pretrain_num_epochs), int(args.disc_pretrain_num_epochs)])): for i, (images, captions, lengths, wrong_captions, wrong_lengths) in enumerate(data_loader): images = to_var(images, volatile=True) captions = to_var(captions) wrong_captions = to_var(wrong_captions) targets = pack_padded_sequence(captions, lengths, batch_first=True)[0] if epoch < int(args.gen_pretrain_num_epochs): generator.zero_grad() outputs, _ = generator(images, captions, lengths) loss_gen = mle_criterion(outputs, targets) # gen_losses.append(loss_gen.cpu().data.numpy()[0]) loss_gen.backward() optimizer_gen.step() if epoch < int(args.disc_pretrain_num_epochs): discriminator.zero_grad() rewards_real = discriminator(images, captions, lengths) # rewards_fake = discriminator(images, sampled_captions, sampled_lengths) rewards_wrong = discriminator(images, wrong_captions, wrong_lengths) real_loss = -torch.mean(torch.log(rewards_real)) # fake_loss = -torch.mean(torch.clamp(torch.log(1 - rewards_fake), min=-1000)) wrong_loss = -torch.mean( torch.clamp(torch.log(1 - rewards_wrong), min=-1000)) loss_disc = real_loss + wrong_loss # + fake_loss, no fake_loss because this is pretraining # disc_losses.append(loss_disc.cpu().data.numpy()[0]) loss_disc.backward() optimizer_disc.step() if (i + 1) % args.log_step == 0: print( 'Epoch [%d], Step [%d], Disc Loss: %.4f, Gen Loss: %.4f' % (epoch + 1, i + 1, loss_disc, loss_gen)) if (i + 1) % 500 == 0: torch.save( discriminator.state_dict(), os.path.join( args.model_path, 'pretrained-discriminator-%d-%d.pkl' % (int(epoch) + 1, i + 1))) torch.save( generator.state_dict(), os.path.join( args.model_path, 'pretrained-generator-%d-%d.pkl' % (int(epoch) + 1, i + 1))) # Save pretrained models torch.save( discriminator.state_dict(), os.path.join( args.model_path, 'pretrained-discriminator-%d.pkl' % int(args.disc_pretrain_num_epochs))) torch.save( generator.state_dict(), os.path.join( args.model_path, 'pretrained-generator-%d.pkl' % int(args.gen_pretrain_num_epochs))) # Plot pretraining figures # plt.plot(disc_losses, label='pretraining_disc_loss') # plt.savefig(args.figure_path + 'pretraining_disc_losses.png') # plt.clf() # # plt.plot(gen_losses, label='pretraining_gen_loss') # plt.savefig(args.figure_path + 'pretraining_gen_losses.png') # plt.clf() else: generator.load_state_dict(torch.load(args.pretrained_gen_path)) discriminator.load_state_dict(torch.load(args.pretrained_disc_path)) # # Skip the rest for now # return # Train the Models total_step = len(data_loader) disc_gan_losses = [] gen_gan_losses = [] for epoch in range(args.num_epochs): for i, (images, captions, lengths, wrong_captions, wrong_lengths) in enumerate(data_loader): # Set mini-batch dataset images = to_var(images, volatile=True) captions = to_var(captions) wrong_captions = to_var(wrong_captions) generator.zero_grad() outputs, packed_lengths = generator(images, captions, lengths) outputs = PackedSequence(outputs, packed_lengths) outputs = pad_packed_sequence(outputs, batch_first=True) # (b, T, V) Tmax = outputs[0].size(1) if torch.cuda.is_available(): rewards = torch.zeros_like(outputs[0]).type( torch.cuda.FloatTensor) else: rewards = torch.zeros_like(outputs[0]).type(torch.FloatTensor) # getting rewards from disc # for t in tqdm(range(2, Tmax, 4)): for t in range(2, Tmax, 2): # for t in range(2, 4): if t >= min( lengths ): # TODO this makes things easier, but could min(lengths) could be too short break gen_samples = to_var(torch.zeros( (captions.size(0), Tmax)).type(torch.FloatTensor), volatile=True) # part 1: taken from real caption gen_samples[:, :t] = captions[:, :t].data predicted_ids, saved_states = generator.pre_compute( gen_samples, t) # for v in range(predicted_ids.size(1)): v = predicted_ids # pdb.set_trace() # part 2: taken from all possible vocabs # gen_samples[:,t] = predicted_ids[:,v] gen_samples[:, t] = v # part 3: taken from rollouts gen_samples[:, t:] = generator.rollout(gen_samples, t, saved_states) sampled_lengths = [] # finding sampled_lengths for batch in range(int(captions.size(0))): for b_t in range(Tmax): if gen_samples[batch, b_t].cpu().data.numpy() == 2: # <end> sampled_lengths.append(b_t + 1) break elif b_t == Tmax - 1: sampled_lengths.append(Tmax) # sort sampled_lengths sampled_lengths = np.array(sampled_lengths) sampled_lengths[::-1].sort() sampled_lengths = sampled_lengths.tolist() # get rewards from disc rewards[:, t, v] = discriminator(images, gen_samples.detach(), sampled_lengths) # rewards = rewards.detach() # pdb.set_trace() rewards_detached = rewards.data rewards_detached = to_var(rewards_detached) loss_gen = torch.dot(outputs[0], -rewards_detached) # gen_gan_losses.append(loss_gen.cpu().data.numpy()[0]) # pdb.set_trace() loss_gen.backward() optimizer_gen.step() # TODO get sampled_captions sampled_ids = generator.sample(images) # sampled_captions = torch.zeros_like(sampled_ids).type(torch.LongTensor) sampled_lengths = [] # finding sampled_lengths for batch in range(int(captions.size(0))): for b_t in range(20): #pdb.set_trace() #sampled_captions[batch, b_t].data = sampled_ids[batch, b_t].cpu().data.numpy()[0] if sampled_ids[batch, b_t].cpu().data.numpy() == 2: # <end> sampled_lengths.append(b_t + 1) break elif b_t == 20 - 1: sampled_lengths.append(20) # sort sampled_lengths sampled_lengths = np.array(sampled_lengths) sampled_lengths[::-1].sort() sampled_lengths = sampled_lengths.tolist() # Train discriminator discriminator.zero_grad() images.volatile = False captions.volatile = False wrong_captions.volatile = False rewards_real = discriminator(images, captions, lengths) rewards_fake = discriminator(images, sampled_ids, sampled_lengths) rewards_wrong = discriminator(images, wrong_captions, wrong_lengths) real_loss = -torch.mean(torch.log(rewards_real)) fake_loss = -torch.mean( torch.clamp(torch.log(1 - rewards_fake), min=-1000)) wrong_loss = -torch.mean( torch.clamp(torch.log(1 - rewards_wrong), min=-1000)) loss_disc = real_loss + fake_loss + wrong_loss # disc_gan_losses.append(loss_disc.cpu().data.numpy()[0]) loss_disc.backward() optimizer_disc.step() # Print log info if i % args.log_step == 0: print( 'Epoch [%d/%d], Step [%d/%d], Disc Loss: %.4f, Gen Loss: %.4f' % (epoch, args.num_epochs, i, total_step, loss_disc, loss_gen)) # Save the models # if (i+1) % args.save_step == 0: if ( i + 1 ) % args.log_step == 0: # jm: saving at the last iteration instead torch.save( generator.state_dict(), os.path.join( args.model_path, 'generator-gan-%d-%d.pkl' % (epoch + 1, i + 1))) torch.save( discriminator.state_dict(), os.path.join( args.model_path, 'discriminator-gan-%d-%d.pkl' % (epoch + 1, i + 1)))
# Update G network netG.zero_grad() label.fill_(1) output = netD(fake) errG = criterion(output, label) errG.backward() D_G_z2 = output.mean().item() optimizerG.step() print('[%d/%d][%d/%d] Loss_D: %.4f Loss_G: %.4f D(x): %.4f D(G(z)): %.4f / %.4f' %(epoch, epoch_num, i, len(train_dataloader), errD.item(), errG.item(), D_x, D_G_z1, D_G_z2)) if i%10==0: lossD.append(errD.item()) lossG.append(errG.item()) Dx.append(D_x) DG1.append(D_G_z1) DG2.append(D_G_z2) save_image(data[:32], 'img_output/real_samples_%s.png'%mode, normalize=True) fake = netG(fix_noise[:32]) save_image(fake.detach(), 'img_output/fake_samples_%s_%d.png'%(mode, epoch), normalize=True) np.save('loss_G_%s.npy'%mode, np.array(lossG)) np.save('loss_D_%s.npy'%mode, np.array(lossD)) np.save('Dx_%s.npy'%mode, np.array(Dx)) np.save('DG1_%s.npy'%mode, np.array(DG1)) np.save('DG2_%s.npy'%mode, np.array(DG2)) if (epoch+1)%3 == 0: torch.save(netG.state_dict(), 'model/netG_epoch_%s_%d.pth' %(mode, epoch)) torch.save(netD.state_dict(), 'model/netD_epoch_%s_%d.pth' %(mode, epoch))
def train_gan(): batch_size = 64 epochs = 100 disc_update = 1 gen_update = 5 latent_dimension = 100 lambduh = 10 device = torch.device( 'cuda:0') if torch.cuda.is_available() else torch.device('cpu') # load data train_loader, valid_loader, test_loader = get_data_loader( 'data', batch_size) disc_model = Discriminator().to(device) gen_model = Generator(latent_dimension).to(device) disc_optim = Adam(disc_model.parameters(), lr=1e-4, betas=(0.5, 0.9)) gen_optim = Adam(gen_model.parameters(), lr=1e-4, betas=(0.5, 0.9)) for e in range(epochs): disc_loss = 0 gen_loss = 0 for i, (images, _) in enumerate(train_loader): images = images.to(device) b_size = images.shape[0] step = i + 1 if step % disc_update == 0: disc_model.zero_grad() # sample noise noise = torch.randn((b_size, latent_dimension), device=device) # loss on fake inputs = gen_model(noise).detach() f_outputs = disc_model(inputs) loss = f_outputs.mean() # loss on real r_outputs = disc_model(images) loss -= r_outputs.mean() # add gradient penalty loss += lambduh * gradient_penalty(disc_model, images, inputs, device) disc_loss += loss loss.backward() disc_optim.step() if step % gen_update == 0: gen_model.zero_grad() noise = torch.randn((b_size, latent_dimension)).to(device) inputs = gen_model(noise) outputs = disc_model(inputs) loss = -outputs.mean() gen_loss += loss loss.backward() gen_optim.step() torch.save( { 'epoch': e, 'disc_model': disc_model.state_dict(), 'gen_model': gen_model.state_dict(), 'disc_loss': disc_loss, 'gen_loss': gen_loss, 'disc_optim': disc_optim.state_dict(), 'gen_optim': gen_optim.state_dict() }, "upsample/checkpoint_{}.pth".format(e)) print("Epoch: {} Disc loss: {}".format( e + 1, disc_loss.item() / len(train_loader))) print("Epoch: {} Gen loss: {}".format( e + 1, gen_loss.item() / len(train_loader)))
def main(args): # Create model directory if not os.path.exists(args.model_path): os.makedirs(args.model_path) # Image preprocessing # For normalization, see https://github.com/pytorch/vision#models transform = transforms.Compose([ transforms.RandomCrop(args.crop_size), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) ]) # Load vocabulary wrapper. with open(args.vocab_path, 'rb') as f: vocab = pickle.load(f) # Build data loader data_loader = get_loader(args.image_dir, args.caption_path, vocab, transform, args.batch_size, shuffle=True, num_workers=args.num_workers) # Build the models (Gen) # TODO: put these in generator encoder = EncoderCNN(args.embed_size) decoder = DecoderRNN(args.embed_size, args.hidden_size, len(vocab), args.num_layers) # Build the models (Disc) discriminator = Discriminator(args.embed_size, args.hidden_size, len(vocab), args.num_layers) if torch.cuda.is_available(): encoder.cuda() decoder.cuda() discriminator.cuda() # Loss and Optimizer (Gen) criterion = nn.CrossEntropyLoss() params = list(decoder.parameters()) + list( encoder.linear.parameters()) + list(encoder.bn.parameters()) optimizer = torch.optim.Adam(params, lr=args.learning_rate) # Loss and Optimizer (Disc) params_disc = list(discriminator.parameters()) optimizer_disc = torch.optim.Adam(params_disc) # Train the Models total_step = len(data_loader) disc_losses = [] for epoch in range(args.num_epochs): for i, (images, captions, lengths, wrong_captions, wrong_lengths) in enumerate(data_loader): # pdb.set_trace() # TODO: train disc before gen # Set mini-batch dataset images = to_var(images, volatile=True) captions = to_var(captions) wrong_captions = to_var(wrong_captions) targets = pack_padded_sequence(captions, lengths, batch_first=True)[0] # Forward, Backward and Optimize decoder.zero_grad() encoder.zero_grad() features = encoder(images) outputs = decoder(features, captions, lengths) sampled_captions = decoder.sample(features) # sampled_captions = torch.zeros_like(sampled_ids) sampled_lengths = [] for row in range(sampled_captions.size(0)): for index, word_id in enumerate(sampled_captions[row, :]): # pdb.set_trace() word = vocab.idx2word[word_id.cpu().data.numpy()[0]] # sampled_captions[row, index].data = word if word == '<end>': sampled_lengths.append(index + 1) break elif index == sampled_captions.size(1) - 1: sampled_lengths.append(sampled_captions.size(1)) break sampled_lengths = np.array(sampled_lengths) sampled_lengths[::-1].sort() sampled_lengths = sampled_lengths.tolist() loss = criterion(outputs, targets) loss.backward() optimizer.step() # Train discriminator discriminator.zero_grad() rewards_real = discriminator(images, captions, lengths) rewards_fake = discriminator(images, sampled_captions, sampled_lengths) rewards_wrong = discriminator(images, wrong_captions, wrong_lengths) real_loss = -torch.mean(torch.log(rewards_real)) fake_loss = -torch.mean( torch.clamp(torch.log(1 - rewards_fake), min=-1000)) wrong_loss = -torch.mean( torch.clamp(torch.log(1 - rewards_wrong), min=-1000)) loss_disc = real_loss + fake_loss + wrong_loss disc_losses.append(loss_disc.cpu().data.numpy()[0]) loss_disc.backward() optimizer_disc.step() # print('iteration %i' % i) # Print log info if i % args.log_step == 0: print( 'Epoch [%d/%d], Step [%d/%d], Loss: %.4f, Perplexity: %5.4f' % (epoch, args.num_epochs, i, total_step, loss.data[0], np.exp(loss.data[0]))) # Save the models # if (i+1) % args.save_step == 0: if ( i + 1 ) % total_step == 0: # jm: saving at the last iteration instead torch.save( decoder.state_dict(), os.path.join(args.model_path, 'decoder-%d-%d.pkl' % (epoch + 1, i + 1))) torch.save( encoder.state_dict(), os.path.join(args.model_path, 'encoder-%d-%d.pkl' % (epoch + 1, i + 1))) torch.save( discriminator.state_dict(), os.path.join( args.model_path, 'discriminator-%d-%d.pkl' % (epoch + 1, i + 1))) # plot at the end of every epoch plt.plot(disc_losses, label='disc loss') plt.savefig('disc_losses.png') plt.clf()