Beispiel #1
0
    def to_input_tensor_char(self, sents: List[List[str]],
                             device: torch.device) -> torch.Tensor:
        """ Convert list of sentences (words) into tensor with necessary padding for 
        shorter sentences.

        @param sents (List[List[str]]): list of sentences (words)
        @param device: device on which to load the tensor, i.e. CPU or GPU

        @returns sents_var: tensor of (max_sentence_length, batch_size, max_word_length)
        """
        ###     Connect `words2charindices()` and `pad_sents_char()` which you've defined in
        ###     previous parts

        #print('sents: {}'.format(sents))
        batch_size, max_sentence_length = len(sents), max(
            [len(s) for s in sents])
        word_ids = self.words2charindices(sents)
        word_ids_padded = pad_sents_char(
            word_ids, self.char2id['<pad>']
        )  # shape = (batch_size, max_sentence_length, max_word_length)
        max_word_length = len(word_ids_padded[0][0])
        #print('word_ids_padded: {}'.format(word_ids_padded))
        assert_expected_size(
            torch.tensor(word_ids_padded, device=device), 'word_ids_padded',
            [batch_size, max_sentence_length, max_word_length])

        #sents_var = torch.reshape(torch.tensor(word_ids_padded), (max_sentence_length, batch_size, max_word_length))
        sents_var = torch.tensor(word_ids_padded,
                                 device=device).permute(1, 0, 2)
        assert_expected_size(
            sents_var, 'sents_var',
            [max_sentence_length, batch_size, max_word_length])
        #print('sents_var: {}'.format(sents_var))
        return sents_var.contiguous()
    def forward(self, input, dec_hidden=None):
        """ Forward pass of character decoder.

        @param input: tensor of integers, shape (length, batch)
        @param dec_hidden: internal state of the LSTM before reading the input characters. A tuple of two tensors of shape (1, batch, hidden_size)

        @returns scores: called s in the PDF, shape (length, batch, self.vocab_size)
        @returns dec_hidden: internal state of the LSTM after reading the input characters. A tuple of two tensors of shape (1, batch, hidden_size)
        """
        ### YOUR CODE HERE for part 2b
        ### TODO - Implement the forward pass of the character decoder.

        length, batch_size = input.size() # length = m_word = number of characters in the word

        x_embeddings = self.decoderCharEmb(input)
        assert_expected_size(x_embeddings, 'x_embeddings', [length, batch_size, self.char_embedding_size])

        enc_hiddens, (hn, cn) = self.charDecoder(x_embeddings, dec_hidden)
        assert_expected_size(enc_hiddens, 'enc_hiddens', [length, batch_size, self.hidden_size])
        assert_expected_size(hn, 'hn', [1, batch_size, self.hidden_size])
        assert_expected_size(cn, 'cn', [1, batch_size, self.hidden_size])

        scores = self.char_output_projection(enc_hiddens)
        assert_expected_size(scores, 'scores', [length, batch_size, self.vocab_size])

        return scores, (hn, cn)
    def decode_greedy(self, initialStates, device, max_length=21):
        """ Greedy decoding
        @param initialStates: initial internal state of the LSTM, a tuple of two tensors of size (1, batch, hidden_size)
        @param device: torch.device (indicates whether the model is on CPU or GPU)
        @param max_length: maximum length of words to decode

        @returns decodedWords: a list (of length batch) of strings, each of which has length <= max_length.
                              The decoded strings should NOT contain the start-of-word and end-of-word characters.
        """

        ### YOUR CODE HERE for part 2d
        ### TODO - Implement greedy decoding.
        ### Hints:
        ###      - Use target_vocab.char2id and target_vocab.id2char to convert between integers and characters
        ###      - Use torch.tensor(..., device=device) to turn a list of character indices into a tensor.
        ###      - We use curly brackets as start-of-word and end-of-word characters. That is, use the character '{' for <START> and '}' for <END>.
        ###        Their indices are self.target_vocab.start_of_word and self.target_vocab.end_of_word, respectively.

        batch_size = len(initialStates[0][0])
        current_char_id = torch.tensor([self.target_vocab.start_of_word]*batch_size, device=device).reshape((1, batch_size))
        assert_expected_size(current_char_id, 'current_char_id', [1, batch_size])

        decodedWords = ['']*batch_size
        dec_hidden = initialStates
        for i in range(max_length):
            scores, dec_hidden = self.forward(current_char_id, dec_hidden)
            assert_expected_size(scores, 'scores', [1, batch_size, self.vocab_size])
            current_char_id = torch.argmax(scores, dim=2)
            decodedWords = [d+self.target_vocab.id2char[c.item()] for (c, d) in zip(current_char_id.squeeze(dim=0), decodedWords)]

        decodedWords = [d[:d.find('}')] if '}' in d else d for d in decodedWords]
        return decodedWords
Beispiel #4
0
def question_1c_sanity_check():
    '''
    Sanity check for highway.py class implementation
    '''
    print("-" * 80)
    print("Running Sanity Check for Question 1c: Highway")
    print("-" * 80)
    highway = Highway(EMBED_SIZE)

    # Reinitialize weights
    highway.w_projection.weight.data.fill_(0.3)
    highway.w_projection.bias.data.fill_(0.1)
    highway.w_gate.weight.data.fill_(0.3)
    highway.w_gate.bias.data.fill_(0.1)

    x_conv_out = torch.ones(BATCH_SIZE, EMBED_SIZE)

    output = highway.forward(x_conv_out)
    assert_expected_size(output, 'output', [BATCH_SIZE, EMBED_SIZE])
    print(output)
    def train_forward(self, char_sequence, dec_hidden=None):
        """ Forward computation during training.

        @param char_sequence: tensor of integers, shape (length, batch). Note that "length" here and in forward() need not be the same.
        @param dec_hidden: initial internal state of the LSTM, obtained from the output of the word-level decoder. A tuple of two tensors of shape (1, batch, hidden_size)

        @returns The cross-entropy loss, computed as the *sum* of cross-entropy losses of all the words in the batch, for every character in the sequence.
        """
        ### YOUR CODE HERE for part 2c
        ### TODO - Implement training forward pass.
        ###
        ### Hint: - Make sure padding characters do not contribute to the cross-entropy loss.
        ###       - char_sequence corresponds to the sequence x_1 ... x_{n+1} from the handout (e.g., <START>,m,u,s,i,c,<END>).

        length, batch_size = char_sequence.size() # length = m_word = number of characters in the word

        scores, (hn, cn) = self.forward(char_sequence, dec_hidden)
        assert_expected_size(scores, 'scores', [length, batch_size, self.vocab_size])
        assert_expected_size(hn, 'hn', [1, batch_size, self.hidden_size])
        assert_expected_size(cn, 'cn', [1, batch_size, self.hidden_size])

        loss = nn.CrossEntropyLoss(reduction='sum')
        cross_entropy_loss = 0
        for i in range(batch_size):
            if self.target_vocab.end_of_word in char_sequence[:,i]:
                end_id = (char_sequence[:,i]==self.target_vocab.end_of_word).nonzero()
                cross_entropy_loss += loss(scores[:end_id,i,:], char_sequence[1:end_id+1,i])

        return cross_entropy_loss
    def forward(self, x_reshaped: torch.Tensor) -> torch.Tensor:
        '''
        @param x_reshaped (Tensor): Tensor of padded source sentences with shape (b, e_char, m_word), where
                                    b = batch_size, e_word = word embedding length and m_word = max_word_length = max characters in a word
        @returns x_conv_out (Tensor) : Tensor of padded source sentences with shape (b, e_word)
        '''
        batch_size, m_word = len(x_reshaped), len(x_reshaped[0][0])
        e_char, e_word, kernel_size = self.char_embed_size, self.word_embed_size, self.kernel_size

        #print('x_reshaped size = {}'.format(x_reshaped.size()))
        assert_expected_size(x_reshaped, 'x_reshaped',
                             [batch_size, e_char, m_word])

        x_conv = self.cnn(x_reshaped)
        assert_expected_size(x_conv, 'x_conv',
                             [batch_size, e_word, m_word - kernel_size + 1])
        #print('x_conv size = {}'.format(x_conv.size()))

        relu = nn.ReLU()
        maxpool = nn.MaxPool1d(m_word - kernel_size + 1)
        x_conv_out = relu(x_conv)
        #print('x_conv_out (after relu) size = {}'.format(x_conv_out.size()))
        x_conv_out = maxpool(x_conv_out)
        #print('x_conv_out (after maxpool) size = {}'.format(x_conv_out.size()))
        x_conv_out = torch.squeeze(x_conv_out, dim=2)
        #print('x_conv_out size (after squeeze) = {}'.format(x_conv_out.size()))
        assert_expected_size(x_conv_out, 'x_conv_out', [batch_size, e_word])

        return x_conv_out
    def forward(self, x_conv_out: torch.Tensor) -> torch.Tensor:
        '''
        @param x_conv_out (Tensor): Tensor of padded source sentences with shape (b, e_word), where
                                    b = batch_size, e_word = word embedding length. These are the outputs
                                    from convolutional neural network
        @returns x_highway (Tensor) : Tensor of padded source sentences with shape (b, e_word)
        '''
        batch_size, e_word = len(x_conv_out), self.word_embed_size
        assert_expected_size(x_conv_out, 'x_conv_out', [batch_size, e_word])

        relu = nn.ReLU()
        x_proj = relu(self.w_projection(x_conv_out)) # shape = (b, e_word)
        assert_expected_size(x_proj, 'x_proj', [batch_size, e_word])

        x_gate = torch.sigmoid(self.w_gate(x_conv_out)) # shape = (b, e_word)
        assert_expected_size(x_gate, 'x_gate', [batch_size, e_word])

        x_highway = x_gate*x_proj + (1-x_gate)*x_conv_out
        assert_expected_size(x_highway, 'x_highway', [batch_size, e_word])
        return x_highway
Beispiel #8
0
    def forward(self, input_tensor):
        """
        Looks up character-based CNN embeddings for the words in a batch of sentences.
        @param input_tensor: Tensor of integers of shape (sentence_length, batch_size, max_word_length) where
            each integer is an index into the character vocabulary

        @param output: Tensor of shape (sentence_length, batch_size, embed_size), containing the 
            CNN-based embeddings for each word of the sentences in the batch
        """

        sentence_length, batch_size, m_word = input_tensor.size()
        e_char, e_word = self.char_embed_size, self.embed_size
        #print('input_tensor size = {}'.format(input_tensor.size()))
        #print('input_tensor = {}'.format(input_tensor))

        # Reshaping input tensor with a revised batch_size = sentence_length*batch_size
        x_padded = torch.reshape(input_tensor,
                                 (sentence_length * batch_size, m_word))
        assert_expected_size(x_padded, 'x_padded',
                             [sentence_length * batch_size, m_word])
        #print('x_padded size = {}'.format(x_padded.size()))
        #print('x_padded = {}'.format(x_padded))

        x_emb = self.embeddings(x_padded)
        assert_expected_size(x_emb, 'x_emb',
                             [sentence_length * batch_size, m_word, e_char])
        #print('x_emb size = {}'.format(x_emb.size()))

        x_reshaped = x_emb.permute(0, 2, 1)
        assert_expected_size(x_reshaped, 'x_reshaped',
                             [sentence_length * batch_size, e_char, m_word])
        #print('x_reshaped size = {}'.format(x_reshaped.size()))

        x_conv_out = self.cnn.forward(x_reshaped)
        assert_expected_size(x_conv_out, 'x_conv_out',
                             [sentence_length * batch_size, e_word])
        #print('x_conv_out size = {}'.format(x_conv_out.size()))

        x_highway = self.highway.forward(x_conv_out)
        assert_expected_size(x_highway, 'x_highway',
                             [sentence_length * batch_size, e_word])
        #print('x_highway size = {}'.format(x_highway.size()))

        x_word_emb = self.dropout(x_highway)
        assert_expected_size(x_word_emb, 'x_word_emb',
                             [sentence_length * batch_size, e_word])
        #print('x_word_emb size = {}'.format(x_word_emb.size()))

        #output = torch.reshape(x_word_emb, (sentence_length, batch_size, e_word))
        #assert_expected_size(output, 'output', [sentence_length, batch_size, e_word])
        output = torch.reshape(x_word_emb,
                               (sentence_length, batch_size, e_word))
        assert_expected_size(output, 'output',
                             [sentence_length, batch_size, e_word])
        #print('output size = {}'.format(output.size()))

        return output