def sample_from_latent(model, vocab_src, vocab_tgt, config): num_samples = 5 prior = torch.distributions.Normal(loc=model.prior_loc, scale=model.prior_scale) z = prior.sample(sample_shape=[num_samples]) hidden_lm = tile_rnn_hidden(model.lm_init_layer(z), model.language_model.rnn) x_init = z.new([vocab_tgt[SOS_TOKEN] for _ in range(num_samples)]).long() x_embed = model.emb_src(x_init).unsqueeze(1) x_samples = [x_init.unsqueeze(-1)] for _ in range(config["max_len"]): pre_output, hidden_lm = model.language_model.forward_step( x_embed, hidden_lm, z) logits = model.generate_lm(pre_output) next_word_dist = torch.distributions.categorical.Categorical( logits=logits) x = next_word_dist.sample() x_embed = model.emb_src(x) x_samples.append(x) x_samples = torch.cat(x_samples, dim=-1) x_samples = batch_to_sentences(x_samples, vocab_src) print("Sampled source sentences from the latent space ") for idx, x in enumerate(x_samples, 1): print("{}: {}".format(idx, x))
def model_language(self, x, z): embed_x = self.dropout(self.emb_src(x)) hidden = torch.tanh(self.lm_init_layer(z)) hidden = tile_rnn_hidden(hidden, self.language_model.rnn) outputs = [] max_len = embed_x.shape[1] for t in range(max_len): prev_x = embed_x[:, t:t + 1, :] pre_output, hidden = self.language_model.forward_step( prev_x, hidden, z) logits = self.generate_lm(pre_output) outputs.append(logits) return torch.cat(outputs, dim=1)
def encode(self, x, x_len, z): embed_x = self.dropout(self.emb_src(x)) hidden = torch.tanh(self.enc_init_layer(z)) hidden = tile_rnn_hidden(hidden, self.encoder.rnn) enc_output, enc_final = self.encoder(embed_x, x_len, hidden) return enc_output, enc_final
def init_decoder(self, encoder_outputs, encoder_final, z): self.decoder.initialize(encoder_outputs, encoder_final) hidden = torch.tanh(self.dec_init_layer(z)) hidden = tile_rnn_hidden(hidden, self.decoder.rnn) return hidden