def generate(self, encodings): sentences = [] for z in encodings.numpy(): z = get_cuda(T.from_numpy(z)).view((1, -1)) h_0 = get_cuda(T.zeros(opt.n_layers_G, 1, opt.n_hidden_G)) c_0 = get_cuda(T.zeros(opt.n_layers_G, 1, opt.n_hidden_G)) G_hidden = (h_0, c_0) G_inp = T.LongTensor(1, 1).fill_(self.vocab.stoi[opt.start_token]) G_inp = get_cuda(G_inp) sentence = opt.start_token + " " num_words = 0 while G_inp[0][0].item() != self.vocab.stoi[opt.end_token]: with T.autograd.no_grad(): logit, G_hidden, _ = self.vae(None, G_inp, z, G_hidden) probs = F.softmax(logit[0] / TEMPERATURE, dim=1) G_inp = T.multinomial(probs, 1) sentence += (self.vocab.itos[G_inp[0][0].item()] + " ") num_words += 1 if num_words > 64: break sentence = sentence.replace('<unk>', '').replace('<sos>', '').replace( '<eos>', '').replace('<pad>', '') sentences.append(sentence) return sentences
def generate_sentences(n_examples): #Generate n sentences checkpoint = T.load(save_path) vae.load_state_dict(checkpoint['vae_dict']) vae.eval() del checkpoint for i in range(n_examples): z = get_cuda(T.randn([1,opt.n_z])) h_0 = get_cuda(T.zeros(opt.n_layers_G, 1, opt.n_hidden_G)) c_0 = get_cuda(T.zeros(opt.n_layers_G, 1, opt.n_hidden_G)) G_hidden = (h_0, c_0) G_inp = T.LongTensor(1,1).fill_(vocab.stoi[opt.start_token]) G_inp = get_cuda(G_inp) str = opt.start_token+" " while G_inp[0][0].item() != vocab.stoi[opt.end_token]: with T.autograd.no_grad(): logit, G_hidden, _ = vae(None, G_inp, z, G_hidden) probs = F.softmax(logit[0], dim=1) G_inp = T.multinomial(probs,1) str += (vocab.itos[G_inp[0][0].item()]+" ") print(str.encode('utf-8'))
def __init__(self): _, _, self.vocab = get_iterators(opt) self.vae = VAE(opt) self.vae.embedding.weight.data.copy_(self.vocab.vectors) self.vae = get_cuda(self.vae) checkpoint = T.load('data/saved_models/vae_model.121.pyt') self.vae.load_state_dict(checkpoint['vae_dict']) self.vae.eval() del checkpoint
def forward(self, x, G_inp, z=None, G_hidden=None): if z is None: #If we are testing with z sampled from random noise batch_size, n_seq = x.size() x = self.embedding(x) #Produce embeddings from encoder input E_hidden = self.encoder(x) #Get h_T of Encoder mu = self.hidden_to_mu(E_hidden) #Get mean of lantent z logvar = self.hidden_to_logvar( E_hidden) #Get log variance of latent z z = get_cuda(T.randn([batch_size, self.n_z ])) #Noise sampled from ε ~ Normal(0,1) z = mu + z * T.exp( 0.5 * logvar ) #Reparameterization trick: Sample z = μ + ε*σ for backpropogation kld = -0.5 * T.sum(logvar - mu.pow(2) - logvar.exp() + 1, 1).mean() #Compute KL divergence loss else: kld = None #If we are training with given text G_inp = self.embedding(G_inp) #Produce embeddings for generator input logit, G_hidden = self.generator(G_inp, z, G_hidden) return logit, G_hidden, kld
opt = parser.parse_args() print(opt) save_path = "data/saved_models/vae_model.tar" if not os.path.exists("data/saved_models"): os.makedirs("data/saved_models") os.environ["CUDA_VISIBLE_DEVICES"] = str(opt.gpu_device) #str(gpu_device) #--------------------------------------------------------------- train_iter, val_iter, vocab = get_iterators(opt) vae = VAE(opt) vae.embedding.weight.data.copy_(vocab.vectors) #Intialize trainable embeddings with pretrained glove vectors vae = get_cuda(vae) trainer_vae = T.optim.Adam(vae.parameters(), lr=opt.lr) def create_generator_input(x, train): G_inp = x[:, 0:x.size(1)-1].clone() #input for generator should exclude last word of sequence if train == False: return G_inp r = np.random.rand(G_inp.size(0), G_inp.size(1)) #Perform word_dropout according to random values (r) generated for each word for i in range(len(G_inp)): for j in range(1,G_inp.size(1)): if r[i, j] < opt.word_dropout and G_inp[i, j] not in [vocab.stoi[opt.pad_token], vocab.stoi[opt.end_token]]: G_inp[i, j] = vocab.stoi[opt.unk_token] return G_inp
def init_hidden(self, batch_size): h_0 = T.zeros(self.n_layers_G, batch_size, self.n_hidden_G) c_0 = T.zeros(self.n_layers_G, batch_size, self.n_hidden_G) self.hidden = (get_cuda(h_0), get_cuda(c_0))