def train_gan_d(whichdecoder, batch): gan_disc.train() optimizer_gan_d.zero_grad() # positive samples ---------------------------- # generate real codes source, target, lengths = batch source = to_gpu(use_cuda, Variable(source)) # batch_size x nhidden real_hidden = autoencoder(whichdecoder, source, lengths, noise=False, encode_only=True) errD_real = gan_disc(real_hidden.detach()) # negative samples ---------------------------- # generate fake codes noise = to_gpu(use_cuda, Variable(torch.ones(args.batch_size, args.z_size))) noise.data.normal_(0, 1) fake_hidden = gan_gen(noise) # errD_fake = gan_disc(fake_hidden.detach()) # w2 errD_fake = torch.exp(gan_disc(fake_hidden.detach())) # kl errD = errD_fake - errD_real errD.backward() optimizer_gan_d.step() return errD
def encode(self, indices, lengths, noise): embeddings = self.embedding(indices) packed_embeddings = pack_padded_sequence(input=embeddings, lengths=lengths, batch_first=True) # Encode packed_output, state = self.encoder(packed_embeddings) hidden, cell = state # batch_size x nhidden hidden = hidden[-1] # get hidden state of last layer of encoder # normalize to unit ball (l2 norm of 1) - p=2, dim=1 norms = torch.norm(hidden, 2, 1) # For older versions of PyTorch use: hidden = torch.div(hidden, norms.expand_as(hidden)) # For newest version of PyTorch (as of 8/25) use this: # hidden = torch.div(hidden, norms.unsqueeze(1).expand_as(hidden)) if noise and self.noise_r > 0: gauss_noise = torch.normal(mean=torch.zeros(hidden.size()), std=self.noise_r) hidden = hidden + to_gpu(self.gpu, Variable(gauss_noise)) epsilon = torch.randn_like(hidden) hidden = self.linear_encoder1(torch.cat((hidden, epsilon), 1)) hidden = self.linear_encoder2(hidden) return hidden
def __init__(self, emsize, nhidden, ntokens, nlayers, noise_r=0.2, share_decoder_emb=False, hidden_init=False, dropout=0, gpu=False): super(Seq2Seq2Decoder, self).__init__() self.nhidden = nhidden self.emsize = emsize self.ntokens = ntokens self.nlayers = nlayers self.noise_r = noise_r self.hidden_init = hidden_init self.dropout = dropout self.gpu = gpu self.start_symbols = to_gpu(gpu, Variable(torch.ones(10, 1).long())) # Vocabulary embedding self.embedding = nn.Embedding(ntokens, emsize) self.embedding_decoder0 = nn.Embedding(ntokens, emsize) self.embedding_decoder1 = nn.Embedding(ntokens, emsize) # RNN Encoder and Decoder self.encoder = nn.LSTM(input_size=emsize, hidden_size=nhidden, num_layers=nlayers, dropout=dropout, batch_first=True) # implicit encoder self.linear_encoder1 = nn.Linear(nhidden * 2, nhidden) self.linear_encoder2 = nn.Linear(nhidden, nhidden) decoder_input_size = emsize + nhidden self.decoder0 = nn.LSTM(input_size=decoder_input_size, hidden_size=nhidden, num_layers=1, dropout=dropout, batch_first=True) self.decoder1 = nn.LSTM(input_size=decoder_input_size, hidden_size=nhidden, num_layers=1, dropout=dropout, batch_first=True) # Initialize Linear Transformation self.linear = nn.Linear(nhidden, ntokens) self.init_weights() if share_decoder_emb: self.embedding_decoder1.weight = self.embedding_decoder0.weight
def classifier_regularize(whichclass, batch): autoencoder.train() autoencoder.zero_grad() source, target, lengths = batch source = to_gpu(use_cuda, Variable(source)) flippedclass = abs(1 - whichclass) labels = to_gpu(use_cuda, Variable(torch.zeros(source.size(0)).fill_(flippedclass))) # Train code = autoencoder(0, source, lengths, noise=False, encode_only=True) scores = classifier(code) classify_reg_loss = F.binary_cross_entropy(scores.squeeze(1), labels) classify_reg_loss.backward() torch.nn.utils.clip_grad_norm_(autoencoder.parameters(), args.clip) optimizer_ae.step() return classify_reg_loss
def train_lm(whichdecoder, eval_path, save_path): # generate examples indices = [] noise = to_gpu(use_cuda, Variable(torch.ones(100, args.z_size))) for i in range(1000): noise.data.normal_(0, 1) fake_hidden = gan_gen(noise) max_indices = autoencoder.generate(whichdecoder, fake_hidden, args.maxlen) indices.append(max_indices.data.cpu().numpy()) indices = np.concatenate(indices, axis=0) sentences_gen = [] # write generated sentences to text file with open(save_path + ".txt", "w") as f: # laplacian smoothing for word in corpus.dictionary.word2idx.keys(): f.write(word + "\n") for idx in indices: # generated sentence words = [corpus.dictionary.idx2word[x] for x in idx] # truncate sentences to first occurrence of <eos> truncated_sent = [] for w in words: if w != '<eos>': truncated_sent.append(w) else: break chars = " ".join(truncated_sent) sentences_gen.append(chars) f.write(chars + "\n") # train language model on generated examples lm = train_ngram_lm(kenlm_path=args.kenlm_path, data_path=save_path + ".txt", output_path=save_path + ".arpa", N=args.N) # load sentences to evaluate on eval_path += '0' if whichdecoder == 0 else '1' with open(eval_path + ".txt", 'r') as f: lines = f.readlines() sentences = [l.replace('\n', '') for l in lines] reverse_ppl = get_ppl(lm, sentences) # forward lm = train_ngram_lm(kenlm_path=args.kenlm_path, data_path=eval_path + ".txt", output_path=eval_path + ".arpa", N=args.N) forward_ppl = get_ppl(lm, sentences_gen) return forward_ppl, reverse_ppl
def train_classifier(whichclass, batch): classifier.train() classifier.zero_grad() source, target, lengths = batch source = to_gpu(use_cuda, Variable(source)) labels = to_gpu(use_cuda, Variable(torch.zeros(source.size(0)).fill_(whichclass))) # Train code = autoencoder(0, source, lengths, noise=False, encode_only=True).detach() scores = classifier(code) classify_loss = F.binary_cross_entropy(scores.squeeze(1), labels) classify_loss.backward() optimizer_classify.step() classify_loss = classify_loss.cpu().item() pred = scores.data.round().squeeze(1) accuracy = pred.eq(labels.data).float().mean() return classify_loss, accuracy
def train_gan_d(whichdecoder, batch): gan_disc.train() optimizer_gan_d.zero_grad() # positive samples ---------------------------- # generate real codes source, target, lengths = batch source = to_gpu(use_cuda, Variable(source)) # batch_size x nhidden real_hidden = autoencoder(whichdecoder, source, lengths, noise=False, encode_only=True) # loss / backprop errD_real = gan_disc(real_hidden) errD_real.backward(mone) # negative samples ---------------------------- # generate fake codes noise = to_gpu(use_cuda, Variable(torch.ones(args.batch_size, args.z_size))) noise.data.normal_(0, 1) # loss / backprop fake_hidden = gan_gen(noise) errD_fake = gan_disc(fake_hidden.detach()) errD_fake.backward(one) # gradient penalty gradient_penalty = calc_gradient_penalty(gan_disc, real_hidden.data, fake_hidden.data) gradient_penalty.backward() optimizer_gan_d.step() errD = errD_fake - errD_real return errD, errD_real, errD_fake
def generate(self, whichdecoder, hidden, maxlen, sample=False, temp=1.0): """Generate through decoder; no backprop""" batch_size = hidden.size(0) if self.hidden_init: # initialize decoder hidden state to encoder output state = (hidden.unsqueeze(0), self.init_state(batch_size)) else: state = self.init_hidden(batch_size) # <sos> self.start_symbols.data.resize_(batch_size, 1) self.start_symbols.data.fill_(1) self.start_symbols = to_gpu(self.gpu, self.start_symbols) if whichdecoder == 0: embedding = self.embedding_decoder0(self.start_symbols) else: embedding = self.embedding_decoder1(self.start_symbols) inputs = torch.cat([embedding, hidden.unsqueeze(1)], 2) # unroll all_indices = [] for i in range(maxlen): if whichdecoder == 0: output, state = self.decoder0(inputs, state) else: output, state = self.decoder1(inputs, state) overvocab = self.linear(output.squeeze(1)) if not sample: vals, indices = torch.max(overvocab, 1) indices = indices.unsqueeze(1) else: # assert 1 == 0 # sampling probs = F.softmax(overvocab / temp) indices = torch.multinomial(probs, 1) all_indices.append(indices) if whichdecoder == 0: embedding = self.embedding_decoder0(indices) else: embedding = self.embedding_decoder1(indices) inputs = torch.cat([embedding, hidden.unsqueeze(1)], 2) max_indices = torch.cat(all_indices, 1) return max_indices
def train_gan_g(): gan_gen.train() gan_gen.zero_grad() noise = to_gpu(use_cuda, Variable(torch.ones(args.batch_size, args.z_size))) noise.data.normal_(0, 1) fake_hidden = gan_gen(noise) errG = gan_disc(fake_hidden) errG.backward(mone) optimizer_gan_g.step() return errG
def train_ae(whichdecoder, batch): autoencoder.train() optimizer_ae.zero_grad() source, target, lengths = batch source = to_gpu(use_cuda, Variable(source)) target = to_gpu(use_cuda, Variable(target)) mask = target.gt(0) masked_target = target.masked_select(mask) output_mask = mask.unsqueeze(1).expand(mask.size(0), ntokens) output = autoencoder(whichdecoder, source, lengths, noise=True) flat_output = output.view(-1, ntokens) masked_output = flat_output.masked_select(output_mask).view(-1, ntokens) loss = criterion_ce(masked_output / args.temp, masked_target) loss.backward() # `clip_grad_norm` to prevent exploding gradient in RNNs / LSTMs torch.nn.utils.clip_grad_norm_(autoencoder.parameters(), args.clip) optimizer_ae.step() return loss
def train_gan_d_into_ae(whichdecoder, batch): autoencoder.train() optimizer_ae.zero_grad() source, target, lengths = batch source = to_gpu(use_cuda, Variable(source)) real_hidden = autoencoder(whichdecoder, source, lengths, noise=False, encode_only=True) errD_real = gan_disc(real_hidden) errD_real.backward(one) torch.nn.utils.clip_grad_norm_(autoencoder.parameters(), args.clip) optimizer_ae.step() return errD_real
def init_state(self, bsz): zeros = Variable(torch.zeros(self.nlayers, bsz, self.nhidden)) return to_gpu(self.gpu, zeros)
def init_hidden(self, bsz): zeros1 = Variable(torch.zeros(self.nlayers, bsz, self.nhidden)) zeros2 = Variable(torch.zeros(self.nlayers, bsz, self.nhidden)) return (to_gpu(self.gpu, zeros1), to_gpu(self.gpu, zeros2))
def evaluate_autoencoder(whichdecoder, data_source, epoch): # Turn on evaluation mode which disables dropout. eos_id = corpus.dictionary.word2idx['<eos>'] autoencoder.eval() ntokens = len(corpus.dictionary.word2idx) n_sents = 0.0 total_loss = 0.0 token_accuracies = 0.0 all_source_sents = [] all_transfer_sents = [] pbar = tqdm(range(len(data_source))) for ii in pbar: batch = data_source[ii] source, target, lengths = batch source = to_gpu(use_cuda, Variable(source, requires_grad=False)) target = to_gpu(use_cuda, Variable(target, requires_grad=False)) n_sents += source.size()[0] mask = target.gt(0) masked_target = target.masked_select(mask) # examples x ntokens output_mask = mask.unsqueeze(1).expand(mask.size(0), ntokens) hidden = autoencoder(0, source, lengths, noise=False, encode_only=True) # output: batch x seq_len x ntokens if whichdecoder == 0: output = autoencoder(0, source, lengths, noise=False) flattened_output = output.view(-1, ntokens) masked_output = flattened_output.masked_select(output_mask).view( -1, ntokens) # accuracy max_vals1, max_indices1 = torch.max(masked_output, 1) token_accuracies += torch.mean( max_indices1.eq(masked_target).float()).item() max_values1, max_indices1 = torch.max(output, 2) max_indices2 = autoencoder.generate(1, hidden, maxlen=50) else: output = autoencoder(1, source, lengths, noise=False) flattened_output = output.view(-1, ntokens) masked_output = flattened_output.masked_select(output_mask).view( -1, ntokens) # accuracy max_vals2, max_indices2 = torch.max(masked_output, 1) token_accuracies += torch.mean( max_indices2.eq(masked_target).float()).item() max_values2, max_indices2 = torch.max(output, 2) max_indices1 = autoencoder.generate(0, hidden, maxlen=50) # forward total_loss += criterion_ce(masked_output / args.temp, masked_target).data # all_source_sents, all_transfer_sents max_indices1 = max_indices1.view(output.size(0), -1).data.cpu().numpy() max_indices2 = max_indices2.view(output.size(0), -1).data.cpu().numpy() target = target.view(output.size(0), -1).data.cpu().numpy() tran_indices = max_indices2 if whichdecoder == 0 else max_indices1 for t, tran_idx in zip(target, tran_indices): # real sentence truncated_to_eos = t.tolist().index( eos_id) if eos_id in t.tolist() else len(t) chars = " ".join( [corpus.dictionary.idx2word[x] for x in t[:truncated_to_eos]]) all_source_sents.append(chars) # transfer sentence truncated_to_eos = tran_idx.tolist().index( eos_id) if eos_id in tran_idx.tolist() else len(tran_idx) chars = " ".join([ corpus.dictionary.idx2word[x] for x in tran_idx[:truncated_to_eos] ]) all_transfer_sents.append(chars) # compare the original and transfer aeoutf_from = "{}/{}_output_decoder_{}_from.txt".format( args.outf, epoch, whichdecoder) aeoutf_tran = "{}/{}_output_decoder_{}_tran.txt".format( args.outf, epoch, whichdecoder) with open(aeoutf_from, 'w') as f_from, open(aeoutf_tran, 'w') as f_trans: # laplacian smoothing # for word in corpus.dictionary.word2idx.keys(): # f_from.write(word + "\n") # f_trans.write(word + "\n") for i in range(len(all_source_sents)): # real sentence f_from.write(all_source_sents[i]) # transfer sentence f_trans.write(all_transfer_sents[i]) if i != len(all_source_sents) - 1: f_from.write("\n") f_trans.write("\n") # bleu all_bleu_scores = 0.0 for i in range(len(all_source_sents)): sou = all_source_sents[i].split(' ') tran = all_transfer_sents[i].split(' ') all_bleu_scores += sentence_bleu( [sou], tran, smoothing_function=SmoothingFunction().method7, weights=[1.0 / 3.0] * 3) bleu = all_bleu_scores / n_sents * 100.0 # forward and reverse loss = total_loss.item() / len(data_source) ppl = math.exp(loss) #print('bleu {:4.2f} | ppl {:4.3f}'.format(bleu, ppl)) #logging.info('bleu {:4.2f} | ppl {:4.3f}'.format(bleu, ppl)) # transfer labels = fasttext_classifier.predict(all_transfer_sents) truth = str(1 - whichdecoder) transfer = float(sum([l == truth for ll in labels for l in ll])) / n_sents * 100.0 # load sentences to evaluate on arpa_path = '{}/{}_lm_{}.arpa'.format(args.outf, epoch, whichdecoder) kenlm_model = train_ngram_lm(args.kenlm_path, aeoutf_from, arpa_path, args.N) forward = get_ppl(kenlm_model, all_transfer_sents) kenlm_model = train_ngram_lm(args.kenlm_path, aeoutf_tran, arpa_path, args.N) reverse = get_ppl(kenlm_model, all_source_sents) #print('transfer {:4.2f} | forward {:4.3f} | reverse {:4.3f}'.format(transfer, forward, reverse)) #logging.info('transfer {:4.2f} | forward {:4.3f} | reverse {:4.3f}'.format(transfer, forward, reverse)) return bleu, ppl, transfer, forward, reverse
# dumping vocabulary with open('{}/vocab.json'.format(args.outf), 'w') as f: json.dump(corpus.dictionary.word2idx, f) with open('{}/args.json'.format(args.outf), 'w') as f: json.dump(vars(args), f) eval_batch_size = 100 test0_data = batchify(corpus.data['test0'], eval_batch_size, shuffle=False) test1_data = batchify(corpus.data['test1'], eval_batch_size, shuffle=False) train0_data = batchify(corpus.data['train0'], args.batch_size, shuffle=True) train1_data = batchify(corpus.data['train1'], args.batch_size, shuffle=True) print("Loaded data!") fixed_noise = to_gpu(use_cuda, Variable(torch.ones(args.batch_size, args.z_size))) fixed_noise.data.normal_(0, 1) one = to_gpu(use_cuda, torch.FloatTensor([1])) mone = one * -1 # fasttext library fasttext_classifier = fasttext.supervised('./data/data.txt', 'model', label_prefix='__label__') result = fasttext_classifier.test('./data/data.txt') print('P@1:', result.precision) print('R@1:', result.recall) print('Number of examples:', result.nexamples) ############################################################################### # Build the models