def interpolate(self, start_sentence, end_sentence, steps=5): start_split_sentence = start_sentence.split(" ") start_sequence_of_batches = [[word] for word in start_split_sentence] start_sequence_of_embedded_batches = [ get_variable(torch.FloatTensor(self.embeddings.embed_batch(batch))) for batch in start_sequence_of_batches ] start_mu, start_logvar = self._encoder_forward( start_sequence_of_embedded_batches, 1) end_split_sentence = end_sentence.split(" ") end_sequence_of_batches = [[word] for word in end_split_sentence] end_sequence_of_embedded_batches = [ get_variable(torch.FloatTensor(self.embeddings.embed_batch(batch))) for batch in end_sequence_of_batches ] end_mu, end_logvar = self._encoder_forward( end_sequence_of_embedded_batches, 1) step_size = (end_mu - start_mu) / float(steps) sentences = [start_sentence] for i in range(steps - 1): logits, predictions = self.decoder(start_mu + i * step_size, self.embeddings, batch_size=1) sentences.extend(self._to_sentences(predictions, 1)) sentences.append(end_sentence) return self._format_sentences(sentences)
def _vae_epoch(self, loaders, sentence_length_indices, batch_size, optimizer=None): losses = [] reconstruction_losses = [] kld_losses = [] error_rates = [] for index in sentence_length_indices: loader = loaders[index] sequence = next(iter(loader)) sequence_of_embedded_batches = [ get_variable( torch.FloatTensor(self.embeddings.embed_batch(batch))) for batch in sequence ] sequence_of_indexed_batches = [ get_variable( torch.LongTensor(self.embeddings.index_batch(batch))) for batch in sequence ] mu, logvar = self._encoder_forward(sequence_of_embedded_batches, batch_size) context = self._get_context(mu, logvar, batch_size) if optimizer is not None: logits, predictions = self._decoder_forward( context, batch_size, sequence_of_indexed_batches, len(sequence), self.drop_prob) else: logits, predictions = self._decoder_forward( context, batch_size, None, len(sequence), None) loss, reconstruction_loss, kld_loss = self.loss( logits, sequence_of_indexed_batches, mu, logvar, self.decoder.step_count) losses.append(loss.cpu().data.numpy()) reconstruction_losses.append( reconstruction_loss.cpu().data.numpy()) kld_losses.append(kld_loss.cpu().data.numpy()) error_rate = self.vae_error_rate(predictions, sequence_of_indexed_batches) error_rates.append(error_rate.cpu().data.numpy()) if optimizer is not None: optimizer.zero_grad() loss.backward() optimizer.step() self.encoder.increment_step(batch_size=batch_size) self.decoder.increment_step(batch_size=batch_size) print('Mean Loss: {}'.format(np.mean(losses))) print('Mean Error Rate: {}'.format(np.mean(error_rates))) return losses, reconstruction_losses, kld_losses, error_rates
def forward(self, sequence_embedding, embedding_dict, eos_index, training_sequence_length=None): sequence_of_indices = [] sequence_of_logits = [] x = get_variable(torch.FloatTensor([embedding_dict[eos_index]] * self.batch_size)) h_tm1 = self._get_initial_hidden_state(sequence_embedding) word_indices = [-1] * self.batch_size while (training_sequence_length is None and np.any(np.array(word_indices) != eos_index) and len(sequence_of_indices) < self.max_sequence_length) or (training_sequence_length is not None and len(sequence_of_indices) < training_sequence_length): h_tm1 = self._get_hidden_state(x, h_tm1, sequence_embedding) word_indices, logits = self._get_output(h_tm1[-1]) sequence_of_indices.append(word_indices) sequence_of_logits.append(logits) x = get_variable(torch.FloatTensor([embedding_dict[word_index] for word_index in word_indices])) return sequence_of_indices, sequence_of_logits
def generate_sentence(self, batch_size=16): context = get_variable( torch.randn(batch_size, self.decoder_hidden_dimension)) logits, predictions = self.decoder(context, self.embeddings, batch_size=batch_size) return self._format_sentences( self._to_sentences(predictions, batch_size))
def kld_loss(self, mu, logvar, clip_value=3.): loss = (-0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp(), 1)).mean() clip_mask = (loss < clip_value).float() keep_mask = (loss >= clip_value).float() clip_values = get_variable(torch.FloatTensor([clip_value])) loss = keep_mask * loss + clip_mask * clip_values return loss
def forward(self, context, embedding_dict, inputs=None, training_sequence_length=None, drop_prob=None, \ batch_size=16, eos_token='.', unk_token='<unknown>'): eos_index = embedding_dict.get_index(eos_token) if drop_prob is not None: unk_index = embedding_dict.get_index(unk_token) bernoulli = torch.distributions.Bernoulli( torch.FloatTensor([drop_prob])) hidden_tm1 = context.repeat(self.num_layers, 1).view(self.num_layers, batch_size, -1) input_t = get_variable( torch.FloatTensor([embedding_dict[eos_token]] * batch_size)) word_indices = [-1] * batch_size sequence_of_indices = [] sequence_of_logits = [] while (training_sequence_length is None and np.any(np.array(word_indices) != eos_index) \ and len(sequence_of_indices) < self.max_sequence_length) or \ (training_sequence_length is not None and len(sequence_of_indices) < training_sequence_length): if self.context_dimension is None: hidden_t = self.rnn(input_t, hidden_tm1) else: hidden_t = self.rnn(input_t, hidden_tm1, context) logits = self.fc(hidden_t[-1]) probabilities = self.generating_activation(logits) word_indices = torch.multinomial(probabilities, 1).view(-1) sequence_of_logits.append(logits) sequence_of_indices.append(word_indices) if inputs is None: word_indices = word_indices.cpu().data else: word_indices = inputs[len(sequence_of_indices) - 1].cpu().data if drop_prob is not None: drop_mask = bernoulli.sample_n(batch_size).view(-1).byte() word_indices[drop_mask] = torch.LongTensor([unk_index] * drop_mask.sum()) input_t = get_variable( torch.FloatTensor([ embedding_dict[embedding_dict.get_word(word_index)] for word_index in word_indices.numpy() ])) hidden_tm1 = hidden_t return sequence_of_logits, sequence_of_indices
def reconstruct(self, sentence): split_sentence = sentence.split(" ") sequence_of_batches = [[word] for word in split_sentence] sequence_of_embedded_batches = [ get_variable(torch.FloatTensor(self.embeddings.embed_batch(batch))) for batch in sequence_of_batches ] mu, logvar = self._encoder_forward(sequence_of_embedded_batches, 1) contexts = self._get_context(mu, logvar, 3) logits, mean_predictions = self._decoder_forward(mu, 1) logits, sample_predictions = self._decoder_forward(contexts, 3) return (self._format_sentences([sentence]), self._format_sentences(self._to_sentences(mean_predictions, 1)), self._format_sentences( self._to_sentences(sample_predictions, 3)))
def __init__(self, input_dimension=300, hidden_dimension=512, num_layers=1, batch_size=1): super(Encoder, self).__init__() self.input_dimension = input_dimension self.hidden_dimension = hidden_dimension self.num_layers = num_layers self.batch_size = batch_size self.forward_rnn = RNN(self.input_dimension, self.hidden_dimension, self.num_layers) self.backward_rnn = RNN(self.input_dimension, self.hidden_dimension, self.num_layers) self.h_0 = get_variable( torch.FloatTensor( np.zeros((self.num_layers, self.batch_size, self.hidden_dimension))))
def forward(self, input_sequence, batch_size=16): h_0 = get_variable( torch.FloatTensor( np.zeros( (self.num_layers, batch_size, self.hidden_dimension)))) forward_h_tm1 = h_0 backward_h_tm1 = h_0 sequence_length = len(input_sequence) embeddings = [[None, None]] * sequence_length for i in range(sequence_length): forward_input_embedding = input_sequence[i] backward_input_embedding = input_sequence[sequence_length - i - 1] forward_h_tm1 = self.forward_rnn(forward_input_embedding, forward_h_tm1) backward_h_tm1 = self.backward_rnn(backward_input_embedding, backward_h_tm1) sequence_embedding = torch.cat((forward_h_tm1[-1], backward_h_tm1[-1]), dim=-1) mu = self.mean_extractor(sequence_embedding) logvar = self.logvar_extractor(sequence_embedding) return mu, logvar
def _batch_y_values_to_variables(self, batch_y_values): return [get_variable(torch.LongTensor(y)) for y in batch_y_values]
def _batch_x_values_to_variables(self, batch_x_values): return [get_variable(torch.FloatTensor(x)) for x in batch_x_values]
def _get_initial_hidden_state(self, sequence_embedding): return get_variable( torch.FloatTensor( np.zeros((self.num_layers, self.batch_size, self.hidden_dimension))))
num_layers = 2 batch_size = 3 embedding_dict = {} vocabulary = ['a', 'b', 'c', 'd', 'eos'] eos_index = vocabulary.index('eos') for index in range(len(vocabulary)): embedding_dict[index] = np.random.rand(input_dimension) sequence_length = 6 input_embeddings = [] for i in range(sequence_length): batch_words = np.random.choice(range(len(vocabulary)), batch_size) batch_embeddings = np.concatenate( [embedding_dict[word].reshape(1, -1) for word in batch_words]) x = get_variable(torch.FloatTensor(batch_embeddings)) input_embeddings.append(x) input_embeddings.append( get_variable( torch.FloatTensor( np.concatenate([ embedding_dict[eos_index].reshape(1, -1) for i in range(batch_size) ])))) encoder = Encoder(input_dimension, hidden_dimension, num_layers, batch_size) sequence_embeddings = encoder(input_embeddings, True) state = get_variable( torch.FloatTensor(np.random.rand(batch_size, hidden_dimension))) attention = AttentionMechanism(hidden_dimension, hidden_dimension * 2)
def _get_context(self, mu, logvar, batch_size): z = get_variable(torch.randn(batch_size, self.decoder_hidden_dimension)) std = torch.exp(0.5 * logvar) context = z * std + mu return context
def _guide_epoch(self, loader, num_iterations, batch_size, optimizer=None): losses = [] reconstruction_losses = [] kld_losses = [] error_rates = [] tmp_losses = [] tmp_reconstruction_losses = [] tmp_kld_losses = [] tmp_error_rates = [] for index in range(num_iterations): sequences = next(iter(loader)) sequence = sequences[0] sequence_of_embedded_batches = [ get_variable( torch.FloatTensor(self.embeddings.embed_batch(batch))) for batch in sequence ] mu, logvar = self._encoder_forward(sequence_of_embedded_batches, 1) h_tm1 = get_variable( torch.zeros(self.num_layers, 1, self.guide_hidden_dimension)) for sequence_i in range(1, len(sequences)): h_t = self.guide(torch.cat([mu, logvar], dim=1), h_tm1) mu, logvar = h_t[-1].split(self.decoder_hidden_dimension, dim=1) context = self._get_context(mu, logvar, 1) sequence = sequences[sequence_i] sequence_of_embedded_batches = [ get_variable( torch.FloatTensor(self.embeddings.embed_batch(batch))) for batch in sequence ] sequence_of_indexed_batches = [ get_variable( torch.LongTensor(self.embeddings.index_batch(batch))) for batch in sequence ] if optimizer is not None: logits, predictions = self._decoder_forward( context, 1, sequence_of_indexed_batches, len(sequence), self.drop_prob) else: logits, predictions = self._decoder_forward( context, 1, None, len(sequence), None) loss, reconstruction_loss, kld_loss = self.loss( logits, sequence_of_indexed_batches, mu, logvar, self.decoder.step_count) tmp_losses.append(loss) tmp_reconstruction_losses.append(reconstruction_loss) tmp_kld_losses.append(kld_loss) error_rate = self.vae_error_rate(predictions, sequence_of_indexed_batches) tmp_error_rates.append(error_rate) mu, logvar = self._encoder_forward( sequence_of_embedded_batches, 1) h_tm1 = h_t if (index + 1) % batch_size == 0: loss = torch.cat(tmp_losses).mean() reconstruction_loss = torch.cat( tmp_reconstruction_losses).mean() kld_loss = torch.cat(tmp_kld_losses).mean() error_rate = torch.cat(tmp_error_rates).mean() tmp_losses = [] tmp_reconstruction_losses = [] tmp_kld_losses = [] tmp_error_rates = [] losses.append(loss.cpu().data.numpy()) reconstruction_losses.append( reconstruction_loss.cpu().data.numpy()) kld_losses.append(kld_loss.cpu().data.numpy()) error_rates.append(error_rate.cpu().data.numpy()) if optimizer is not None: optimizer.zero_grad() loss.backward() optimizer.step() print('Mean Loss: {}'.format(np.mean(losses))) print('Mean Error Rate: {}'.format(np.mean(error_rates))) return losses, reconstruction_losses, kld_losses, error_rates