Ejemplo n.º 1
0
    def encode_sentence(self, inputs):
        """
        Encode input sentences input a batch of hidden vectors z
        :param inputs: A tensor of size (batch_size x max_len) of indices of input sentences' tokens
        :return: A tensor of size (batch_size x hidden_size)
        """
        batch_size = inputs.size(0)

        # Get lengths
        lengths = get_sequences_lengths(inputs, masking=self.padding_idx)

        # Sort as required for pack_padded_sequence input
        lengths, indices = torch.sort(lengths, descending=True)
        inputs = inputs[indices]

        # Pack
        inputs = torch.nn.utils.rnn.pack_padded_sequence(self.drop(
            self.emb(inputs)),
                                                         lengths.data.tolist(),
                                                         batch_first=True)

        # Encode
        hidden, cell = self.zero_state(batch_size)
        _, (hidden, cell) = self.enc(inputs, (hidden, cell))

        hidden = torch.cat((hidden[0], hidden[1]), dim=1)

        _, unsort_ind = torch.sort(indices)
        z = hidden[unsort_ind]

        return z
Ejemplo n.º 2
0
    def encode_sentence(self, inputs):
        """
        Encode input sentences input a batch of hidden vectors z
        :param inputs: A tensor of size (batch_size x max_len) of indices of input sentences' tokens
        :return: A tensor of size (batch_size x hidden_size)
        """

        batch_size = inputs.size(0)

        
        hidden =  self.zero_state(batch_size)
        embed = self.embedding (inputs)
        lengths = get_sequences_lengths(inputs).data
        lengths = lengths.type(torch.cuda.IntTensor)
        sorted_len, idx = torch.sort(lengths, 0, True)
        ip = torch.nn.utils.rnn.pack_padded_sequence(embed [idx], sorted_len.tolist(), batch_first = True )
        output, hidden = self.enc_LSTM(ip, hidden)
        output =  torch.nn.utils.rnn.pad_packed_sequence (output, batch_first = True)[0]
        sorted_idx, indices = torch.sort(idx, 0)
        output = output[indices]

        z = hidden[0][0]

        

        return z
Ejemplo n.º 3
0
    def encode(self, inputs):
        inputs_len = get_sequences_lengths(inputs)

        inputs_emb = self.embedding(inputs)
        inputs_enc = self.encoder_sentences(inputs_emb, inputs_len)
        inputs_enc = F.dropout(inputs_enc, self.dropout, self.training)

        return inputs_enc
Ejemplo n.º 4
0
    def encode_sentence(self, inputs):
        """
        Encode input sentences input a batch of hidden vectors z
        :param inputs: A tensor of size (batch_size x max_len) of indices of input sentences' tokens
        :return: A tensor of size (batch_size x hidden_size)
        """

        batch_size = inputs.size(0)

        ##############################
        ### Insert your code below ###
        ##############################

        # zero_state, length(input) check utils, embedding layer to encode to embeddings,
        # sort to lengths & pass to packed_seq(sorts by length)
        # packed_seq_obj returns unsorted data, sort it back into orioginal order and pass to decoder

        embeddings = self.embedding(inputs)
        input_lengths = get_sequences_lengths(embeddings)

        #sort the embeddings before packing?

        packed_seq = nn.utils.rnn.pack_padded_sequence(embeddings,
                                                       input_lengths,
                                                       batch_first=True)
        encoder_output, z = self.encoder(packed_seq)

        # reshape output
        z = nn.utils.rnn.pack_padded_sequence(
            encoder_output,
            get_sequences_lengths(encoder_output),
            batch_first=True)

        ###############################
        ### Insert your code above ####
        ###############################

        return z
Ejemplo n.º 5
0
    def encode(self, inputs):
        #shape:(batch_size, sequence_len)
        sentence = inputs['sentence']

        #shape: (batch_size, )
        lengths = get_sequences_lengths(sentence)

        #shape: (batch_size, sequence_len, embedding_size)
        sentence_emb = self.embedding(sentence)

        #shape: (batch_size, hidden_size)
        decoder_hidden = self.encoder(sentence_emb, lengths)

        output_dict = {'decoder_hidden': decoder_hidden}

        return output_dict
Ejemplo n.º 6
0
    def encode_sentences(self, inputs):
        mask = inputs != self.padding_idx
        inputs_len = get_sequences_lengths(inputs)

        inputs_emb = self.embedding(inputs)
        inputs_enc = self.encoder_sentences(inputs_emb, inputs_len)
        inputs_enc = F.dropout(inputs_enc, self.dropout, self.training)

        mask = mask[:, :inputs_enc.size(1)]

        att_vec = self.att_sentences(inputs_enc)
        att_weights = self.att_reduce(att_vec)
        att = softmax_masked(att_weights, mask.unsqueeze(-1))

        inputs_att = torch.sum(inputs_enc * att, dim=1)
        inputs_att = F.dropout(inputs_att, self.dropout, self.training)

        return inputs_att, att
Ejemplo n.º 7
0
    def encode_sentence(self, inputs):
        """
        Encode input sentences input a batch of hidden vectors z
        :param inputs: A tensor of size (batch_size x max_len) of indices of input sentences' tokens
        :return: A tensor of size (batch_size x hidden_size)
        """
        batch_size = inputs.size(0)

        ##############################
        ### Insert your code below ###
        ##############################

        # Get lengths
        lengths = get_sequences_lengths(inputs, masking=self.padding_idx)

        # Sort as required for pack_padded_sequence input
        lengths, indices = torch.sort(lengths, descending=True)
        inputs = inputs[indices]

        lengths = lengths.data.tolist()

        # Pack
        inputs = torch.nn.utils.rnn.pack_padded_sequence(self.emb2(inputs),
                                                         lengths,
                                                         batch_first=True)

        # Encode
        hidden, cell = self.zero_state(batch_size)
        output, (hidden, cell) = self.enc(inputs, (hidden, cell))

        output = torch.nn.utils.rnn.pad_packed_sequence(
            output, total_length=self.max_len)[0]

        _, unsort_ind = torch.sort(indices)
        z = output[:, unsort_ind]
        z1 = hidden.squeeze(0)[unsort_ind]

        ###############################
        ### Insert your code above ####
        ###############################

        return z.view(batch_size, self.max_len, self.hidden_size), z1
Ejemplo n.º 8
0
    def encoder(self, inputs):
        batch_size = inputs.shape[0]

        # Get lengths
        lengths = get_sequences_lengths(inputs, masking=self.padding_idx)

        # Sort as required for pack_padded_sequence input
        lengths, indices = torch.sort(lengths, descending=True)
        inputs = inputs[indices]

        # Pack
        inputs = torch.nn.utils.rnn.pack_padded_sequence(self.emb(inputs), lengths.data.tolist(), batch_first=True)

        # Encode
        hidden = variable(torch.zeros(1, batch_size, self.hidden_size))
        cell = variable(torch.zeros(1, batch_size, self.hidden_size))
        _, (hidden, cell) = self.enc(inputs, (hidden, cell))

        # Unsort in the original order
        _, unsort_ind = torch.sort(indices)
        last_hidden = hidden.squeeze(0)[unsort_ind]
        return last_hidden