def train_ae(whichdecoder, batch, total_loss_ae, start_time, i): autoencoder.train() optimizer_ae.zero_grad() source, target, lengths = batch source = to_gpu(args.cuda, Variable(source)) target = to_gpu(args.cuda, Variable(target)) mask = target.gt(0) masked_target = target.masked_select(mask) output_mask = mask.unsqueeze(1).expand(mask.size(0), ntokens) output = autoencoder(whichdecoder, source, lengths, noise=True) flat_output = output.view(-1, ntokens) masked_output = flat_output.masked_select(output_mask).view(-1, ntokens) loss = criterion_ce(masked_output / args.temp, masked_target) loss.backward() # `clip_grad_norm` to prevent exploding gradient in RNNs / LSTMs torch.nn.utils.clip_grad_norm(autoencoder.parameters(), args.clip) optimizer_ae.step() total_loss_ae += loss.data accuracy = None if i % args.log_interval == 0 and i > 0: probs = F.softmax(masked_output, dim=-1) max_vals, max_indices = torch.max(probs, 1) accuracy = torch.mean(max_indices.eq(masked_target).float()).data[0] cur_loss = total_loss_ae[0] / args.log_interval elapsed = time.time() - start_time print('| epoch {:3d} | {:5d}/{:5d} batches | ms/batch {:5.2f} | ' 'loss {:5.2f} | ppl {:8.2f} | acc {:8.2f}'.format( epoch, i, len(train1_data), elapsed * 1000 / args.log_interval, cur_loss, math.exp(cur_loss), accuracy)) with open("{}/log.txt".format(args.outf), 'a') as f: f.write('| epoch {:3d} | {:5d}/{:5d} batches | ms/batch {:5.2f} | ' 'loss {:5.2f} | ppl {:8.2f} | acc {:8.2f}\n'.format( epoch, i, len(train1_data), elapsed * 1000 / args.log_interval, cur_loss, math.exp(cur_loss), accuracy)) total_loss_ae = 0 start_time = time.time() return total_loss_ae, start_time
def train_gan_d_into_ae(whichdecoder, batch): autoencoder.train() optimizer_ae.zero_grad() source, target, lengths = batch source = to_gpu(args.cuda, Variable(source)) target = to_gpu(args.cuda, Variable(target)) real_hidden = autoencoder(whichdecoder, source, lengths, noise=False, encode_only=True) real_hidden.register_hook(grad_hook) errD_real = gan_disc(real_hidden) errD_real.backward(mone) torch.nn.utils.clip_grad_norm(autoencoder.parameters(), args.clip) optimizer_ae.step() return errD_real
def train_gan_d(whichdecoder, batch): gan_disc.train() optimizer_gan_d.zero_grad() # positive samples ---------------------------- # generate real codes source, target, lengths = batch source = to_gpu(args.cuda, Variable(source)) target = to_gpu(args.cuda, Variable(target)) # batch_size x nhidden real_hidden = autoencoder(whichdecoder, source, lengths, noise=False, encode_only=True) # loss / backprop errD_real = gan_disc(real_hidden) errD_real.backward(one) # negative samples ---------------------------- # generate fake codes noise = to_gpu(args.cuda, Variable(torch.ones(args.batch_size, args.z_size))) noise.data.normal_(0, 1) # loss / backprop fake_hidden = gan_gen(noise) errD_fake = gan_disc(fake_hidden.detach()) errD_fake.backward(mone) # gradient penalty gradient_penalty = calc_gradient_penalty(gan_disc, real_hidden.data, fake_hidden.data) gradient_penalty.backward() optimizer_gan_d.step() errD = -(errD_real - errD_fake) return errD, errD_real, errD_fake
def calc_gradient_penalty(netD, real_data, fake_data): bsz = real_data.size(0) alpha = torch.rand(bsz, 1) alpha = alpha.expand(bsz, real_data.size(1)) # only works for 2D XXX alpha = to_gpu(args.cuda, Variable(alpha)) interpolates = alpha * real_data + ((1 - alpha) * fake_data) interpolates = Variable(interpolates, requires_grad=True) disc_interpolates = netD(interpolates) grad_output = to_gpu(args.cuda, torch.ones(disc_interpolates.size())) gradients = torch.autograd.grad(outputs=disc_interpolates, inputs=interpolates, grad_outputs=grad_output, create_graph=True, retain_graph=True, only_inputs=True)[0] gradients = gradients.view(gradients.size(0), -1) gradient_penalty = ( (gradients.norm(2, dim=1) - 1)**2).mean() * args.gan_gp_lambda return gradient_penalty
def train_gan_g(): gan_gen.train() gan_gen.zero_grad() noise = to_gpu(args.cuda, Variable(torch.ones(args.batch_size, args.z_size))) noise.data.normal_(0, 1) fake_hidden = gan_gen(noise) errG = gan_disc(fake_hidden) errG.backward(one) optimizer_gan_g.step() return errG
def classifier_regularize(whichclass, batch): autoencoder.train() autoencoder.zero_grad() source, target, lengths = batch source = to_gpu(args.cuda, Variable(source)) target = to_gpu(args.cuda, Variable(target)) flippedclass = abs(2 - whichclass) labels = to_gpu(args.cuda, Variable(torch.zeros(source.size(0)).fill_(flippedclass))) # Train code = autoencoder(0, source, lengths, noise=False, encode_only=True) code.register_hook(grad_hook_cla) scores = classifier(code) classify_reg_loss = F.binary_cross_entropy(scores.squeeze(1), labels) classify_reg_loss.backward() torch.nn.utils.clip_grad_norm(autoencoder.parameters(), args.clip) optimizer_ae.step() return classify_reg_loss
def train_classifier(whichclass, batch): classifier.train() classifier.zero_grad() source, target, lengths = batch source = to_gpu(args.cuda, Variable(source)) labels = to_gpu( args.cuda, Variable(torch.zeros(source.size(0)).fill_(whichclass - 1))) # Train code = autoencoder(0, source, lengths, noise=False, encode_only=True).detach() scores = classifier(code) classify_loss = F.binary_cross_entropy(scores.squeeze(1), labels) classify_loss.backward() optimizer_classify.step() classify_loss = classify_loss.cpu().data[0] pred = scores.data.round().squeeze(1) accuracy = pred.eq(labels.data).float().mean() return classify_loss, accuracy
def evaluate_autoencoder(whichdecoder, data_source, epoch): # Turn on evaluation mode which disables dropout. autoencoder.eval() total_loss = 0 ntokens = len(corpus.dictionary.word2idx) all_accuracies = 0 bcnt = 0 for i, batch in enumerate(data_source): source, target, lengths = batch source = to_gpu(args.cuda, Variable(source, volatile=True)) target = to_gpu(args.cuda, Variable(target, volatile=True)) mask = target.gt(0) masked_target = target.masked_select(mask) # examples x ntokens output_mask = mask.unsqueeze(1).expand(mask.size(0), ntokens) hidden = autoencoder(0, source, lengths, noise=False, encode_only=True) # output: batch x seq_len x ntokens if whichdecoder == 1: output = autoencoder(1, source, lengths, noise=False) flattened_output = output.view(-1, ntokens) masked_output = \ flattened_output.masked_select(output_mask).view(-1, ntokens) # accuracy max_vals1, max_indices1 = torch.max(masked_output, 1) all_accuracies += \ torch.mean(max_indices1.eq(masked_target).float()).data[0] max_values1, max_indices1 = torch.max(output, 2) max_indices2 = autoencoder.generate(2, hidden, maxlen=50) else: output = autoencoder(2, source, lengths, noise=False) flattened_output = output.view(-1, ntokens) masked_output = \ flattened_output.masked_select(output_mask).view(-1, ntokens) # accuracy max_vals2, max_indices2 = torch.max(masked_output, 1) all_accuracies += \ torch.mean(max_indices2.eq(masked_target).float()).data[0] max_values2, max_indices2 = torch.max(output, 2) max_indices1 = autoencoder.generate(1, hidden, maxlen=50) total_loss += criterion_ce(masked_output / args.temp, masked_target).data bcnt += 1 aeoutf_from = "{}/{}_output_decoder_{}_from.txt".format( args.outf, epoch, whichdecoder) aeoutf_tran = "{}/{}_output_decoder_{}_tran.txt".format( args.outf, epoch, whichdecoder) with open(aeoutf_from, 'w') as f_from, open(aeoutf_tran, 'w') as f_trans: max_indices1 = \ max_indices1.view(output.size(0), -1).data.cpu().numpy() max_indices2 = \ max_indices2.view(output.size(0), -1).data.cpu().numpy() target = target.view(output.size(0), -1).data.cpu().numpy() tran_indices = max_indices2 if whichdecoder == 1 else max_indices1 for t, tran_idx in zip(target, tran_indices): # real sentence chars = " ".join([corpus.dictionary.idx2word[x] for x in t]) f_from.write(chars) f_from.write("\n") # transfer sentence chars = " ".join( [corpus.dictionary.idx2word[x] for x in tran_idx]) f_trans.write(chars) f_trans.write("\n") return total_loss[0] / len(data_source), all_accuracies / bcnt
return errD_real print("Training...") with open("{}/log.txt".format(args.outf), 'a') as f: f.write('Training...\n') # schedule of increasing GAN training loops if args.niters_gan_schedule != "": gan_schedule = [int(x) for x in args.niters_gan_schedule.split("-")] else: gan_schedule = [] niter_gan = 1 fixed_noise = to_gpu(args.cuda, Variable(torch.ones(args.batch_size, args.z_size))) fixed_noise.data.normal_(0, 1) one = to_gpu(args.cuda, torch.FloatTensor([1])) mone = one * -1 for epoch in range(1, args.epochs + 1): # update gan training schedule if epoch in gan_schedule: niter_gan += 1 print("GAN training loop schedule increased to {}".format(niter_gan)) with open("{}/log.txt".format(args.outf), 'a') as f: f.write("GAN training loop schedule increased to {}\n".format( niter_gan)) total_loss_ae1 = 0 total_loss_ae2 = 0