Ejemplo n.º 1
0
    def interpolate(self, start_sentence, end_sentence, steps=5):
        start_split_sentence = start_sentence.split(" ")
        start_sequence_of_batches = [[word] for word in start_split_sentence]
        start_sequence_of_embedded_batches = [
            get_variable(torch.FloatTensor(self.embeddings.embed_batch(batch)))
            for batch in start_sequence_of_batches
        ]
        start_mu, start_logvar = self._encoder_forward(
            start_sequence_of_embedded_batches, 1)

        end_split_sentence = end_sentence.split(" ")
        end_sequence_of_batches = [[word] for word in end_split_sentence]
        end_sequence_of_embedded_batches = [
            get_variable(torch.FloatTensor(self.embeddings.embed_batch(batch)))
            for batch in end_sequence_of_batches
        ]
        end_mu, end_logvar = self._encoder_forward(
            end_sequence_of_embedded_batches, 1)

        step_size = (end_mu - start_mu) / float(steps)
        sentences = [start_sentence]
        for i in range(steps - 1):
            logits, predictions = self.decoder(start_mu + i * step_size,
                                               self.embeddings,
                                               batch_size=1)
            sentences.extend(self._to_sentences(predictions, 1))
        sentences.append(end_sentence)
        return self._format_sentences(sentences)
Ejemplo n.º 2
0
    def _vae_epoch(self,
                   loaders,
                   sentence_length_indices,
                   batch_size,
                   optimizer=None):
        losses = []
        reconstruction_losses = []
        kld_losses = []
        error_rates = []
        for index in sentence_length_indices:
            loader = loaders[index]
            sequence = next(iter(loader))
            sequence_of_embedded_batches = [
                get_variable(
                    torch.FloatTensor(self.embeddings.embed_batch(batch)))
                for batch in sequence
            ]
            sequence_of_indexed_batches = [
                get_variable(
                    torch.LongTensor(self.embeddings.index_batch(batch)))
                for batch in sequence
            ]
            mu, logvar = self._encoder_forward(sequence_of_embedded_batches,
                                               batch_size)
            context = self._get_context(mu, logvar, batch_size)

            if optimizer is not None:
                logits, predictions = self._decoder_forward(
                    context, batch_size, sequence_of_indexed_batches,
                    len(sequence), self.drop_prob)
            else:
                logits, predictions = self._decoder_forward(
                    context, batch_size, None, len(sequence), None)

            loss, reconstruction_loss, kld_loss = self.loss(
                logits, sequence_of_indexed_batches, mu, logvar,
                self.decoder.step_count)
            losses.append(loss.cpu().data.numpy())
            reconstruction_losses.append(
                reconstruction_loss.cpu().data.numpy())
            kld_losses.append(kld_loss.cpu().data.numpy())

            error_rate = self.vae_error_rate(predictions,
                                             sequence_of_indexed_batches)
            error_rates.append(error_rate.cpu().data.numpy())

            if optimizer is not None:
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
                self.encoder.increment_step(batch_size=batch_size)
                self.decoder.increment_step(batch_size=batch_size)

        print('Mean Loss: {}'.format(np.mean(losses)))
        print('Mean Error Rate: {}'.format(np.mean(error_rates)))
        return losses, reconstruction_losses, kld_losses, error_rates
Ejemplo n.º 3
0
	def forward(self, sequence_embedding, embedding_dict, eos_index, training_sequence_length=None):
		sequence_of_indices = []
		sequence_of_logits = []
		x = get_variable(torch.FloatTensor([embedding_dict[eos_index]] * self.batch_size))
		h_tm1 = self._get_initial_hidden_state(sequence_embedding)
		word_indices = [-1] * self.batch_size
		while (training_sequence_length is None and np.any(np.array(word_indices) != eos_index) and len(sequence_of_indices) < self.max_sequence_length) or (training_sequence_length is not None and len(sequence_of_indices) < training_sequence_length):
			h_tm1 = self._get_hidden_state(x, h_tm1, sequence_embedding)
			word_indices, logits = self._get_output(h_tm1[-1])
			sequence_of_indices.append(word_indices)
			sequence_of_logits.append(logits)
			x = get_variable(torch.FloatTensor([embedding_dict[word_index] for word_index in word_indices]))
		return sequence_of_indices, sequence_of_logits
Ejemplo n.º 4
0
 def generate_sentence(self, batch_size=16):
     context = get_variable(
         torch.randn(batch_size, self.decoder_hidden_dimension))
     logits, predictions = self.decoder(context,
                                        self.embeddings,
                                        batch_size=batch_size)
     return self._format_sentences(
         self._to_sentences(predictions, batch_size))
Ejemplo n.º 5
0
 def kld_loss(self, mu, logvar, clip_value=3.):
     loss = (-0.5 *
             torch.sum(1 + logvar - mu.pow(2) - logvar.exp(), 1)).mean()
     clip_mask = (loss < clip_value).float()
     keep_mask = (loss >= clip_value).float()
     clip_values = get_variable(torch.FloatTensor([clip_value]))
     loss = keep_mask * loss + clip_mask * clip_values
     return loss
 def forward(self, context, embedding_dict, inputs=None, training_sequence_length=None, drop_prob=None, \
         batch_size=16, eos_token='.', unk_token='<unknown>'):
     eos_index = embedding_dict.get_index(eos_token)
     if drop_prob is not None:
         unk_index = embedding_dict.get_index(unk_token)
         bernoulli = torch.distributions.Bernoulli(
             torch.FloatTensor([drop_prob]))
     hidden_tm1 = context.repeat(self.num_layers,
                                 1).view(self.num_layers, batch_size, -1)
     input_t = get_variable(
         torch.FloatTensor([embedding_dict[eos_token]] * batch_size))
     word_indices = [-1] * batch_size
     sequence_of_indices = []
     sequence_of_logits = []
     while (training_sequence_length is None and np.any(np.array(word_indices) != eos_index) \
             and len(sequence_of_indices) < self.max_sequence_length) or \
             (training_sequence_length is not None and len(sequence_of_indices) < training_sequence_length):
         if self.context_dimension is None:
             hidden_t = self.rnn(input_t, hidden_tm1)
         else:
             hidden_t = self.rnn(input_t, hidden_tm1, context)
         logits = self.fc(hidden_t[-1])
         probabilities = self.generating_activation(logits)
         word_indices = torch.multinomial(probabilities, 1).view(-1)
         sequence_of_logits.append(logits)
         sequence_of_indices.append(word_indices)
         if inputs is None:
             word_indices = word_indices.cpu().data
         else:
             word_indices = inputs[len(sequence_of_indices) - 1].cpu().data
             if drop_prob is not None:
                 drop_mask = bernoulli.sample_n(batch_size).view(-1).byte()
                 word_indices[drop_mask] = torch.LongTensor([unk_index] *
                                                            drop_mask.sum())
         input_t = get_variable(
             torch.FloatTensor([
                 embedding_dict[embedding_dict.get_word(word_index)]
                 for word_index in word_indices.numpy()
             ]))
         hidden_tm1 = hidden_t
     return sequence_of_logits, sequence_of_indices
Ejemplo n.º 7
0
 def reconstruct(self, sentence):
     split_sentence = sentence.split(" ")
     sequence_of_batches = [[word] for word in split_sentence]
     sequence_of_embedded_batches = [
         get_variable(torch.FloatTensor(self.embeddings.embed_batch(batch)))
         for batch in sequence_of_batches
     ]
     mu, logvar = self._encoder_forward(sequence_of_embedded_batches, 1)
     contexts = self._get_context(mu, logvar, 3)
     logits, mean_predictions = self._decoder_forward(mu, 1)
     logits, sample_predictions = self._decoder_forward(contexts, 3)
     return (self._format_sentences([sentence]),
             self._format_sentences(self._to_sentences(mean_predictions,
                                                       1)),
             self._format_sentences(
                 self._to_sentences(sample_predictions, 3)))
Ejemplo n.º 8
0
 def __init__(self,
              input_dimension=300,
              hidden_dimension=512,
              num_layers=1,
              batch_size=1):
     super(Encoder, self).__init__()
     self.input_dimension = input_dimension
     self.hidden_dimension = hidden_dimension
     self.num_layers = num_layers
     self.batch_size = batch_size
     self.forward_rnn = RNN(self.input_dimension, self.hidden_dimension,
                            self.num_layers)
     self.backward_rnn = RNN(self.input_dimension, self.hidden_dimension,
                             self.num_layers)
     self.h_0 = get_variable(
         torch.FloatTensor(
             np.zeros((self.num_layers, self.batch_size,
                       self.hidden_dimension))))
 def forward(self, input_sequence, batch_size=16):
     h_0 = get_variable(
         torch.FloatTensor(
             np.zeros(
                 (self.num_layers, batch_size, self.hidden_dimension))))
     forward_h_tm1 = h_0
     backward_h_tm1 = h_0
     sequence_length = len(input_sequence)
     embeddings = [[None, None]] * sequence_length
     for i in range(sequence_length):
         forward_input_embedding = input_sequence[i]
         backward_input_embedding = input_sequence[sequence_length - i - 1]
         forward_h_tm1 = self.forward_rnn(forward_input_embedding,
                                          forward_h_tm1)
         backward_h_tm1 = self.backward_rnn(backward_input_embedding,
                                            backward_h_tm1)
     sequence_embedding = torch.cat((forward_h_tm1[-1], backward_h_tm1[-1]),
                                    dim=-1)
     mu = self.mean_extractor(sequence_embedding)
     logvar = self.logvar_extractor(sequence_embedding)
     return mu, logvar
Ejemplo n.º 10
0
 def _batch_y_values_to_variables(self, batch_y_values):
     return [get_variable(torch.LongTensor(y)) for y in batch_y_values]
Ejemplo n.º 11
0
 def _batch_x_values_to_variables(self, batch_x_values):
     return [get_variable(torch.FloatTensor(x)) for x in batch_x_values]
 def _get_initial_hidden_state(self, sequence_embedding):
     return get_variable(
         torch.FloatTensor(
             np.zeros((self.num_layers, self.batch_size,
                       self.hidden_dimension))))
Ejemplo n.º 13
0
num_layers = 2
batch_size = 3

embedding_dict = {}
vocabulary = ['a', 'b', 'c', 'd', 'eos']
eos_index = vocabulary.index('eos')
for index in range(len(vocabulary)):
    embedding_dict[index] = np.random.rand(input_dimension)

sequence_length = 6
input_embeddings = []
for i in range(sequence_length):
    batch_words = np.random.choice(range(len(vocabulary)), batch_size)
    batch_embeddings = np.concatenate(
        [embedding_dict[word].reshape(1, -1) for word in batch_words])
    x = get_variable(torch.FloatTensor(batch_embeddings))
    input_embeddings.append(x)
input_embeddings.append(
    get_variable(
        torch.FloatTensor(
            np.concatenate([
                embedding_dict[eos_index].reshape(1, -1)
                for i in range(batch_size)
            ]))))

encoder = Encoder(input_dimension, hidden_dimension, num_layers, batch_size)
sequence_embeddings = encoder(input_embeddings, True)

state = get_variable(
    torch.FloatTensor(np.random.rand(batch_size, hidden_dimension)))
attention = AttentionMechanism(hidden_dimension, hidden_dimension * 2)
Ejemplo n.º 14
0
 def _get_context(self, mu, logvar, batch_size):
     z = get_variable(torch.randn(batch_size,
                                  self.decoder_hidden_dimension))
     std = torch.exp(0.5 * logvar)
     context = z * std + mu
     return context
Ejemplo n.º 15
0
    def _guide_epoch(self, loader, num_iterations, batch_size, optimizer=None):
        losses = []
        reconstruction_losses = []
        kld_losses = []
        error_rates = []
        tmp_losses = []
        tmp_reconstruction_losses = []
        tmp_kld_losses = []
        tmp_error_rates = []
        for index in range(num_iterations):
            sequences = next(iter(loader))
            sequence = sequences[0]
            sequence_of_embedded_batches = [
                get_variable(
                    torch.FloatTensor(self.embeddings.embed_batch(batch)))
                for batch in sequence
            ]
            mu, logvar = self._encoder_forward(sequence_of_embedded_batches, 1)
            h_tm1 = get_variable(
                torch.zeros(self.num_layers, 1, self.guide_hidden_dimension))
            for sequence_i in range(1, len(sequences)):
                h_t = self.guide(torch.cat([mu, logvar], dim=1), h_tm1)
                mu, logvar = h_t[-1].split(self.decoder_hidden_dimension,
                                           dim=1)
                context = self._get_context(mu, logvar, 1)
                sequence = sequences[sequence_i]
                sequence_of_embedded_batches = [
                    get_variable(
                        torch.FloatTensor(self.embeddings.embed_batch(batch)))
                    for batch in sequence
                ]
                sequence_of_indexed_batches = [
                    get_variable(
                        torch.LongTensor(self.embeddings.index_batch(batch)))
                    for batch in sequence
                ]

                if optimizer is not None:
                    logits, predictions = self._decoder_forward(
                        context, 1, sequence_of_indexed_batches, len(sequence),
                        self.drop_prob)
                else:
                    logits, predictions = self._decoder_forward(
                        context, 1, None, len(sequence), None)

                loss, reconstruction_loss, kld_loss = self.loss(
                    logits, sequence_of_indexed_batches, mu, logvar,
                    self.decoder.step_count)

                tmp_losses.append(loss)
                tmp_reconstruction_losses.append(reconstruction_loss)
                tmp_kld_losses.append(kld_loss)

                error_rate = self.vae_error_rate(predictions,
                                                 sequence_of_indexed_batches)
                tmp_error_rates.append(error_rate)

                mu, logvar = self._encoder_forward(
                    sequence_of_embedded_batches, 1)
                h_tm1 = h_t

            if (index + 1) % batch_size == 0:
                loss = torch.cat(tmp_losses).mean()
                reconstruction_loss = torch.cat(
                    tmp_reconstruction_losses).mean()
                kld_loss = torch.cat(tmp_kld_losses).mean()
                error_rate = torch.cat(tmp_error_rates).mean()
                tmp_losses = []
                tmp_reconstruction_losses = []
                tmp_kld_losses = []
                tmp_error_rates = []
                losses.append(loss.cpu().data.numpy())
                reconstruction_losses.append(
                    reconstruction_loss.cpu().data.numpy())
                kld_losses.append(kld_loss.cpu().data.numpy())
                error_rates.append(error_rate.cpu().data.numpy())
                if optimizer is not None:
                    optimizer.zero_grad()
                    loss.backward()
                    optimizer.step()

        print('Mean Loss: {}'.format(np.mean(losses)))
        print('Mean Error Rate: {}'.format(np.mean(error_rates)))
        return losses, reconstruction_losses, kld_losses, error_rates