def sampling_around_existing_sentence(s1, num=10): # NOTE: vocab based on datasets train_iter, test_iter, valid_iter, vocab = get_gyafc(conf) ckpt = torch.load(save_path) vae, vae_trainer = create_vae(conf, vocab) vae.load_state_dict(ckpt['vae_dict']) vae.eval() del ckpt # string to tensor s1_tensor = str_to_tensor(s1, vocab, conf) s1_tensor = on_cuda(s1_tensor.unsqueeze(0)) mu, logvar = vae.encode(s1_tensor) mvn = MultivariateNormal(mu, scale_tril=torch.diag(torch.exp(logvar[0]))) for i in range(num): z = mvn.sample() h_0 = on_cuda(torch.zeros(2 * conf.n_layers_E, 1, conf.n_hidden_G)) c_0 = on_cuda(torch.zeros(2 * conf.n_layers_E, 1, conf.n_hidden_G)) G_hidden = (h_0, c_0) G_inp = torch.LongTensor(1, 1).fill_(vocab.stoi[conf.start_token]) G_inp = on_cuda(G_inp) string = conf.start_token + ' ' while G_inp[0][0].item() != vocab.stoi[conf.end_token]: with torch.autograd.no_grad(): logit, G_hidden, _ = vae(None, G_inp, z, G_hidden) probs = F.softmax(logit[0], dim=1) G_inp = torch.multinomial(probs, 1) string += (vocab.itos[G_inp[0][0].item()] + ' ') print('----------------------------') print(string.encode('utf-8'))
def generate_sentences(n_examples): # NOTE: vocab based on datasets train_iter, test_iter, valid_iter, vocab = get_gyafc(conf) ckpt = torch.load(save_path) vae, vae_trainer = create_vae(conf, vocab) vae.load_state_dict(ckpt['vae_dict']) vae.eval() del ckpt for i in range(n_examples): z = on_cuda(torch.randn([1, conf.n_z])) h_0 = on_cuda(torch.zeros(2 * conf.n_layers_E, 1, conf.n_hidden_G)) c_0 = on_cuda(torch.zeros(2 * conf.n_layers_E, 1, conf.n_hidden_G)) G_hidden = (h_0, c_0) # 2 is the index of start token in vocab stoi G_inp = torch.LongTensor(1, 1).fill_(vocab.stoi[conf.start_token]) G_inp = on_cuda(G_inp) string = conf.start_token + ' ' # until we hit end token (index 3 in vocab stoi) while G_inp[0][0].item() != vocab.stoi[conf.end_token]: with torch.autograd.no_grad(): logit, G_hidden, _ = vae(None, G_inp, z, G_hidden) probs = F.softmax(logit[0], dim=1) G_inp = torch.multinomial(probs, 1) string += (vocab.itos[G_inp[0][0].item()] + ' ') # print(string.encode('utf-8')) print(string)
def create_g_input(x, train, vocab, conf): # performs random word dropout during training # clipping the last word in the sequence G_inp = x[:, 0:x.size(1) - 1].clone() if not train: return on_cuda(G_inp) # random word dropout r = np.random.rand(G_inp.size(0), G_inp.size(1)) for i in range(len(G_inp)): for j in range(1, G_inp.size(1)): if r[i, j] < conf.word_dropout and G_inp[i, j] not in [ vocab.stoi[conf.pad_token], vocab.stoi[conf.end_token] ]: G_inp[i, j] = vocab.stoi[conf.unk_token] return on_cuda(G_inp)
def forward(self, x, G_inp, z=None, G_hidden=None): # if testing with z sampled from random noise if z is None: batch_size, n_seq = x.size() # produce embedding from encoder input x = self.embedding(x) # h_T of encoder E_hidden = self.encoder(x) # mean of latent z mu = self.hidden_to_mu(E_hidden) # log variance of latent z logvar = self.hidden_to_logvar(E_hidden) # noise sampled from Normal(0, 1) z = on_cuda(torch.randn([batch_size, self.n_z])) # reparam trick: sample z = mu + eps*sigma for back prop z = mu + z * torch.exp(0.5 * logvar) # KL-divergence loss # kld = -0.5*torch.sum(logvar-mu.pow(2)-logvar.exp()+1, 1).mean() kld = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp()) else: # training with given text kld = None # embeddings for generator input G_inp = self.embedding(G_inp) logit, G_hidden = self.generator(G_inp, z, G_hidden) return logit, G_hidden, kld
def create_vae(conf, vocab): # emb = torchtext.vocab.GloVe(conf.vector, conf.n_embed) # vae = VAE(conf, emb) vae = VAE(conf) vae.embedding.weight.data.copy_(vocab.vectors) vae = on_cuda(vae) trainer_vae = torch.optim.Adam(vae.parameters(), lr=conf.lr) return vae, trainer_vae
def interpolate_sentences(num=10): # NOTE: vocab based on datasets train_iter, test_iter, valid_iter, vocab = get_gyafc(conf) ckpt = torch.load(save_path) vae, vae_trainer = create_vae(conf, vocab) vae.load_state_dict(ckpt['vae_dict']) vae.eval() del ckpt z1 = on_cuda(torch.randn([1, conf.n_z])) # z2 = on_cuda(torch.randn([1, conf.n_z])) z2 = z1 + on_cuda(0.3 * torch.ones(z1.size())) int_z = torch.lerp(z1, z2, on_cuda(torch.linspace(0.0, 1.0, num).unsqueeze(1))) # zs to strings for i in range(int_z.size()[0]): z = int_z[i, :].unsqueeze(0) h_0 = on_cuda(torch.zeros(2 * conf.n_layers_E, 1, conf.n_hidden_G)) c_0 = on_cuda(torch.zeros(2 * conf.n_layers_E, 1, conf.n_hidden_G)) G_hidden = (h_0, c_0) G_inp = torch.LongTensor(1, 1).fill_(vocab.stoi[conf.start_token]) G_inp = on_cuda(G_inp) string = conf.start_token + ' ' while G_inp[0][0].item() != vocab.stoi[conf.end_token]: with torch.autograd.no_grad(): logit, G_hidden, _ = vae(None, G_inp, z, G_hidden) probs = F.softmax(logit[0], dim=1) G_inp = torch.multinomial(probs, 1) string += (vocab.itos[G_inp[0][0].item()] + ' ') print('----------------------------') print(string.encode('utf-8'))
def __init__(self, conf): # create vae, load weights _, _, _, self.vocab = get_gyafc(conf) self.vae, _ = create_vae(conf, self.vocab) ckpt = torch.load(conf.vae_model_path) self.vae.load_state_dict(ckpt['vae_dict']) self.vae.eval() del(ckpt) # create linear shift self.linear_shift = on_cuda(LinearShift(conf)) # save conf self.conf = conf # init self.score = 0 self.eval_done = False # load dataset self.test = get_formality_set(conf, self.vocab) # scoring self.extractor = FeatureExtractor(conf.w2v_path, conf.corpus_dict_path) self.pt16_ridge = pickle.load(open(conf.pt16_path, 'rb'))
def eval(self, work): # evaluates quality of given parameters # copy weights to linear shift mu_weight = work['mu_weight'] mu_bias = work['mu_bias'] var_weight = work['var_weight'] var_bias = work['var_bias'] with torch.no_grad(): self.linear_shift.linear_mu[0].weight.copy_(torch.from_numpy(mu_weight).float()) self.linear_shift.linear_mu[0].bias.copy_(torch.from_numpy(mu_bias).float()) self.linear_shift.linear_logvar[0].weight.copy_(torch.from_numpy(var_weight).float()) self.linear_shift.linear_logvar[0].bias.copy_(torch.from_numpy(var_bias).float()) batch_scores = [] for batch in self.test: print('New Batch') current_batch_scores = [] current_batch_strings = [] batch = on_cuda(batch.T) # encode batch to mu and logvars mu, logvar = self.vae.encode(batch) # put mu and logvars pass linear shift new_mu, new_logvar = self.linear_shift(mu, logvar) # loop through each batch for i in range(new_mu.size()[0]): # create distribution mvn = MultivariateNormal(new_mu[i, :], scale_tril=torch.diag(torch.exp(new_logvar[i, :]))) # sample and decode z = mvn.sample().unsqueeze(0) h_0 = on_cuda(torch.zeros(self.conf.n_layers_G, 1, self.conf.n_hidden_G)) c_0 = on_cuda(torch.zeros(self.conf.n_layers_G, 1, self.conf.n_hidden_G)) G_hidden = (h_0, c_0) G_inp = torch.LongTensor(1, 1).fill_(self.vocab.stoi[self.conf.start_token]) G_inp = on_cuda(G_inp) string = '' length = 0 while G_inp[0][0].item() != self.vocab.stoi[self.conf.end_token]: with torch.autograd.no_grad(): logit, G_hidden, _ = self.vae(None, G_inp, z, G_hidden) probs = F.softmax(logit[0], dim=1) G_inp = torch.multinomial(probs, 1) if G_inp[0][0].item() != self.vocab.stoi[self.conf.end_token]: string += self.vocab.itos[G_inp[0][0].item()] + ' ' length += 1 if length >= 20: break current_batch_strings.append(string) print('Decode on current batch done, scoring now') # score on strings for i, sent in enumerate(current_batch_strings): # PT16 formality pt16 = self.get_pt16_score(sent) # bleu with original # TODO: how to get orignal sentence? # bleu = self.get_bleu_with_orig(?, sent) # current_batch_scores.append(self.conf.pt16_weight*pt16 + self.conf.bleu_weight*bleu) current_batch_scores.append(pt16) print('Current batch average score:', np.mean(current_batch_scores)) batch_scores.append(np.mean(current_batch_scores)) # TODO: process all scores to a single score? # score = 0 # TODO score = -np.mean(batch_scores) self.score = score self.eval_done = True
# TODO: change what to save optim.dump(conf.optim_filename) np.savez_compressed(conf.npz_filename, scores=np.array(all_scores), individuals=all_individuals) return optim, all_scores, all_individuals if __name__ == '__main__': with open('configs/default.yaml') as file: conf_dict = yaml.load(file, Loader=yaml.FullLoader) conf = Namespace(**conf_dict) print(conf) np.random.seed(conf.seed) torch.manual_seed(conf.seed) ray.init() optim, all_scores, all_individuals = search(conf) best_params = optim.recommend() best_linear_shift = on_cuda(LinearShift(conf)) mu_weight = best_params['mu_weight'] mu_bias = best_params['mu_bias'] var_weight = best_params['var_weight'] var_bias = best_params['var_bias'] with torch.no_grad(): best_linear_shift.linear_mu[0].weight.copy_(torch.from_numpy(mu_weight.value).float()) best_linear_shift.linear_mu[0].bias.copy_(torch.from_numpy(mu_bias.value).float()) best_linear_shift.linear_logvar[0].weight.copy_(torch.from_numpy(var_weight.value).float()) best_linear_shift.linear_logvar[0].bias.copy_(torch.from_numpy(var_bias.value).float()) torch.save(best_linear_shift.state_dict(), conf.linear_model_save_path)
def create_vae(conf, vocab): vae = VAE(conf) vae.embedding.weight.data.copy_(vocab.vectors) vae = on_cuda(vae) trainer_vae = torch.optim.Adam(vae.parameters(), lr=conf.lr) return vae, trainer_vae if __name__ == '__main__': with open('configs/default.yaml') as file: conf_dict = yaml.load(file, Loader=yaml.FullLoader) conf = Namespace(**conf_dict) print(conf) np.random.seed(conf.seed) torch.manual_seed(conf.seed) best_linear_shift = on_cuda(LinearShift(conf)) linear_ckpt = torch.load(conf.linear_model_save_path) best_linear_shift.load_state_dict(linear_ckpt) best_linear_shift.eval() _, _, _, vocab = get_gyafc(conf) ckpt = torch.load(conf.vae_model_path) vae, _ = create_vae(conf, vocab) vae.load_state_dict(ckpt['vae_dict']) vae.eval() del ckpt, linear_ckpt test = get_informal_test_set(conf, vocab) all_strings = [] for batch in test:
def train(): # data loading # train_iter, test_iter, valid_iter, vocab = get_wiki2(conf) train_iter, test_iter, valid_iter, vocab = get_gyafc(conf) # create model, load weights if necessary if args.resume_training: step, start_epoch, vae, trainer_vae = load_ckpt(conf, save_path, vocab) else: start_epoch = 0 step = 0 vae, trainer_vae = create_vae(conf, vocab) all_t_rec_loss = [] all_t_kl_loss = [] all_t_loss = [] all_v_rec_loss = [] all_v_kl_loss = [] all_v_loss = [] # training epochs for epoch in tqdm.tqdm(range(start_epoch, conf.epochs), desc='Epochs'): vae.train() # logging train_rec_loss = [] train_kl_loss = [] train_loss = [] for batch in train_iter: # batch is encoder input and target ouput for generator batch = on_cuda(batch.T) G_inp = create_g_input(batch, True, vocab, conf) rec_loss, kl_loss, elbo, kld_coef = train_batch(vae, trainer_vae, batch, G_inp, step, conf, train=True) train_rec_loss.append(rec_loss) train_kl_loss.append(kl_loss) train_loss.append(elbo) # log if args.to_train: writer.add_scalar('ELBO', elbo, step) writer.add_scalar('Cross Entropy', rec_loss, step) writer.add_scalar('KL Divergence Raw', kl_loss, step) writer.add_scalar('KL Annealed Weight', kld_coef, step) writer.add_scalar('KL Divergence Weighted', kl_loss * kld_coef, step) # increment step step += 1 # valid vae.eval() valid_rec_loss = [] valid_kl_loss = [] valid_loss = [] for valid_batch in valid_iter: valid_batch = on_cuda(valid_batch.T) G_inp = create_g_input(valid_batch, True, vocab, conf) with torch.autograd.no_grad(): rec_loss, kl_loss, elbo, kld_coef = train_batch(vae, trainer_vae, valid_batch, G_inp, step, conf, train=False) valid_rec_loss.append(rec_loss) valid_kl_loss.append(kl_loss) valid_loss.append(elbo) all_t_rec_loss.append(train_rec_loss) all_t_kl_loss.append(train_kl_loss) all_t_loss.append(train_loss) all_v_rec_loss.append(valid_rec_loss) all_v_kl_loss.append(valid_kl_loss) all_v_loss.append(valid_loss) mean_t_rec_loss = np.mean(train_rec_loss) mean_t_kl_loss = np.mean(train_kl_loss) mean_t_loss = np.mean(train_loss) mean_v_rec_loss = np.mean(valid_rec_loss) mean_v_kl_loss = np.mean(valid_kl_loss) mean_v_loss = np.mean(valid_loss) # loss_log.set_description_str(f'T_rec: ' + '%.2f'%mean_t_rec_loss + # ' T_kld: ' + '%.2f'%mean_t_kl_loss + ' V_rec: ' + # '%.2f'%mean_v_rec_loss + ' V_kld: ' + '%.2f'%mean_v_kl_loss) tqdm.tqdm.write(f'T_rec: ' + '%.2f' % mean_t_rec_loss + ' T_kld: ' + '%.2f' % mean_t_kl_loss + ' T_ELBO: ' + '%.2f' % mean_t_loss + ' V_rec: ' + '%.2f' % mean_v_rec_loss + ' V_kld: ' + '%.2f' % mean_v_kl_loss + ' V_ELBO: ' + '%.2f' % mean_v_loss + ' kld_coef: ' + '%.2f' % kld_coef) if epoch % 5 == 0: torch.save( { 'epoch': epoch + 1, 'vae_dict': vae.state_dict(), 'vae_trainer': trainer_vae.state_dict(), 'step': step }, save_path) # NOTE: npz path, still messed up, overwrites with the latest 5 when resume training # np.savez_compressed('data/losses_log/losses_wiki2_fixed.npz', # t_rec=np.array(all_t_rec_loss), # t_kl=np.array(all_t_kl_loss), # v_rec=np.array(all_v_rec_loss), # v_kl=np.array(all_v_kl_loss)) np.savez_compressed( 'data/losses_log/losses_gyafc_weightfix3_nodropout_25000crossover_long_0.0005k.npz', t_rec=np.array(all_t_rec_loss), t_kl=np.array(all_t_kl_loss), t_elbo=np.array(all_t_loss), v_rec=np.array(all_v_rec_loss), v_kl=np.array(all_v_kl_loss), v_elbo=np.array(all_v_loss))
def init_hidden(self, batch_size): h_0 = torch.zeros(self.n_layers_G, batch_size, self.n_hidden_G) c_0 = torch.zeros(self.n_layers_G, batch_size, self.n_hidden_G) self.hidden = (on_cuda(h_0), on_cuda(c_0))