def to_input_tensor_char(self, sents: List[List[str]], device: torch.device) -> torch.Tensor: """ Convert list of sentences (words) into tensor with necessary padding for shorter sentences. @param sents (List[List[str]]): list of sentences (words) @param device: device on which to load the tensor, i.e. CPU or GPU @returns sents_var: tensor of (max_sentence_length, batch_size, max_word_length) """ ### YOUR CODE HERE for part 1g ### TODO: ### Connect `words2charindices()` and `pad_sents_char()` which you've defined in ### previous parts batch_size = len(sents) max_sent_len = max([len(s) for s in sents]) max_word_len = 21 char2indices = self.words2charindices(sents) sents_paded = pad_sents_char(char2indices, self.char2id['<pad>']) sents_tensor = torch.tensor(sents_paded, dtype=torch.long, device=device) sents_tensor = sents_tensor.view(max_sent_len, batch_size, max_word_len) return sents_tensor
def to_input_tensor_char(self, sents: List[List[str]], device: torch.device) -> torch.Tensor: """ Convert list of sentences (words) into tensor with necessary padding for shorter sentences. @param sents (List[List[str]]): list of sentences (words) @param device: device on which to load the tensor, i.e. CPU or GPU @returns sents_var: tensor of (max_sentence_length, batch_size, max_word_length) """ ### YOUR CODE HERE for part 1e ### TODO: ### - Use `words2charindices()` from this file, which converts each character to its corresponding index in the ### character-vocabulary. ### - Use `pad_sents_char()` from utils.py, which pads all words to max_word_length of all words in the batch, ### and pads all sentences to max length of all sentences in the batch. Read __init__ to see how to get ### index of character-padding token ### - Connect these two parts to convert the resulting padded sentences to a torch tensor. ### HINT: ### - You may find .contiguous() useful after reshaping. Check the following links for more details: ### https://pytorch.org/docs/stable/tensors.html#torch.Tensor.contiguous ### https://pytorch.org/docs/stable/tensors.html#torch.Tensor.view char_indices = self.words2charindices(sents) sents_var = torch.tensor(pad_sents_char(char_indices, 0), dtype=torch.long, device=device).permute( (1, 0, 2)).contiguous() return sents_var
def to_input_tensor_char(self, sents: List[List[str]]) -> torch.Tensor: """ Convert list of sentences (words) into tensor with necessary padding for shorter sentences. @param sents (List[List[str]]): list of sentences (words) @param device: device on which to load the tensor, i.e. CPU or GPU @returns sents_var: tensor of (max_sentence_length, batch_size, max_word_length) """ ### YOUR CODE HERE for part 1g ### TODO: ### Connect `words2charindices()` and `pad_sents_char()` which you've defined in ### previous parts max_sentence_length = max(len(s) for s in sents) batch_size = len(sents) max_word_length = 21 # print(sents) char_ids = self.words2charindices(sents) sents_t = pad_sents_char(char_ids, self.char2id['<pad>']) sents_var = torch.LongTensor(sents_t) sents_var = sents_var.permute(1, 0, 2).contiguous() return sents_var
def to_input_tensor_char(self, sents: List[List[str]], device: torch.device) -> torch.Tensor: """ Convert list of sentences (words) into tensor with necessary padding for shorter sentences. @param sents (List[List[str]]): list of sentences (words) @param device: device on which to load the tensor, i.e. CPU or GPU @returns sents_var: tensor of (max_sentence_length, batch_size, max_word_length) """ ### YOUR CODE HERE for part 1g ### TODO: ### Connect `words2charindices()` and `pad_sents_char()` which you've defined in ### previous parts sents_tmp = self.words2charindices(sents) sents_tmp_padded = pad_sents_char(sents_tmp, self.char2id['<pad>']) #print(sents_tmp_padded) #print(len(sents_tmp_padded)) #sents_var = torch.as_tensor(data=sents_tmp_padded,dtype=torch.int64,device=device) sents_var = torch.tensor(sents_tmp_padded, dtype=torch.long, device=device).contiguous() #print(sents_var.size()) sents_var = sents_var.permute(1, 0, 2).contiguous() #print('size of data - sents_var is',sents_var.size()) #print(sents_var.size()) return sents_var
def to_input_tensor_char(self, sents: List[List[str]], device: torch.device) -> torch.Tensor: """ Convert list of sentences (words) into tensor with necessary padding for shorter sentences. @param sents (List[List[str]]): list of sentences (words) @param device: device on which to load the tensor, i.e. CPU or GPU @returns sents_var: tensor of (max_sentence_length, batch_size, max_word_length) """ ### Connect `words2charindices()` and `pad_sents_char()` which you've defined in ### previous parts #print('sents: {}'.format(sents)) batch_size, max_sentence_length = len(sents), max( [len(s) for s in sents]) word_ids = self.words2charindices(sents) word_ids_padded = pad_sents_char( word_ids, self.char2id['<pad>'] ) # shape = (batch_size, max_sentence_length, max_word_length) max_word_length = len(word_ids_padded[0][0]) #print('word_ids_padded: {}'.format(word_ids_padded)) assert_expected_size( torch.tensor(word_ids_padded, device=device), 'word_ids_padded', [batch_size, max_sentence_length, max_word_length]) #sents_var = torch.reshape(torch.tensor(word_ids_padded), (max_sentence_length, batch_size, max_word_length)) sents_var = torch.tensor(word_ids_padded, device=device).permute(1, 0, 2) assert_expected_size( sents_var, 'sents_var', [max_sentence_length, batch_size, max_word_length]) #print('sents_var: {}'.format(sents_var)) return sents_var.contiguous()
def to_input_tensor_char(self, sents: List[List[str]], device: torch.device) -> torch.Tensor: """ Convert list of sentences (words) into tensor with necessary padding for shorter sentences. @param sents (List[List[str]]): list of sentences (words) @param device: device on which to load the tensor, i.e. CPU or GPU @returns sents_var: tensor of (max_sentence_length, batch_size, max_word_length) """ ### YOUR CODE HERE for part 1g ### TODO: ### Connect `words2charindices()` and `pad_sents_char()` which you've defined in ### previous parts # max_sentence_length = max([len(s) for s in sents]) # batch_size = len(sents) # max_word_length = 21 char_ids = self.words2charindices(sents) # sents_t = pad_sents_char(char_ids, self['<pad>']) sents_t = pad_sents_char(char_ids, self.char2id['<pad>']) #(batch_size, max_sentence_length, max_word_length) sents_var = torch.tensor(sents_t, dtype=torch.long, device=device) # print("=== 1(g) to_input_tensor_char, sents_var = ", sents_var.size()) # return sents_var.permute(1, 0, 2) # return sents_var.contiguous().view(sents_var.size()[1], sents_var.size()[0], sents_var.size()[2]) return sents_var.permute(1, 0, 2).contiguous()
def to_input_tensor_char(self, sents: List[List[str]], device: torch.device) -> torch.Tensor: """ Convert list of sentences (words) into tensor with necessary padding for shorter sentences. @param sents (List[List[str]]): list of sentences (words) @param device: device on which to load the tensor, i.e. CPU or GPU @returns sents_var: tensor of (max_sentence_length, batch_size, max_word_length) """ ### YOUR CODE HERE for part 1g ### TODO: ### Connect `words2charindices()` and `pad_sents_char()` which you've defined in ### previous parts char_indices = self.words2charindices(sents) pad_token = self.word2id['<pad>'] padded_char_indices = pad_sents_char(char_indices, pad_token) sents_var = torch.tensor(padded_char_indices, dtype=torch.long, device=device) sents_var = sents_var.permute(1, 0, 2) """ # shape (max_sentence_length, batch_size, max_word_length) print(len(padded_char_indices)) print(len(padded_char_indices[0])) print(len(padded_char_indices[0][0])) print(sents_var.shape) assert sents_var.shape[0] == len(padded_char_indices[0]) assert sents_var.shape[1] == len(sents) assert sents_var.shape[2] == 21 print("assertions success!") """ return sents_var
def to_input_tensor_char(self, sents: List[List[str]], device: torch.device) -> torch.Tensor: """ Convert list of sentences (words) into tensor with necessary padding for shorter sentences. @param sents (List[List[str]]): list of sentences (words) @param device: device on which to load the tensor, i.e. CPU or GPU @returns sents_var: tensor of (max_sentence_length, batch_size, max_word_length) """ ### YOUR CODE HERE for part 1g ### TODO: ### Connect `words2charindices()` and `pad_sents_char()` which you've defined in ### previous parts # turn words into list of character indices sents = self.words2charindices(sents) #pad sentences and words sents_padded = pad_sents_char(sents, self.char2id['<pad>']) # pad_sents_char > (batch_size, max_sentence_length, max_word_length) # adjust dimensions sents_var = torch.tensor(sents_padded, dtype=torch.long, device=device) sents_var = sents_var.permute(1, 0, 2) return sents_var
def question_1b_sanity_check(): """ Sanity check for pad_sents_char() function. """ print("-" * 80) print("Running Sanity Check for Question 1b: Padding") print("-" * 80) vocab = VocabEntry() print("Running test on a list of sentences") sentences = [['Human:', 'What', 'do', 'we', 'want?'], ['Computer:', 'Natural', 'language', 'processing!'], ['Human:', 'When', 'do', 'we', 'want', 'it?'], ['Computer:', 'When', 'do', 'we', 'want', 'what?']] word_ids = vocab.words2charindices(sentences) padded_sentences = pad_sents_char(word_ids, 0) gold_padded_sentences = torch.load( './sanity_check_en_es_data/gold_padded_sentences.pkl') assert padded_sentences == gold_padded_sentences, "Sentence padding is incorrect: it should be:\n {} but is:\n{}".format( gold_padded_sentences, padded_sentences) print( len(padded_sentences), torch.transpose(torch.tensor(padded_sentences, dtype=torch.int), 0, 1).shape) print("Sanity Check Passed for Question 1b: Padding!") print("-" * 80)
def to_input_tensor_char(self, sents: List[List[str]], device: torch.device) -> torch.Tensor: """ Convert list of sentences (words) into tensor with necessary padding for shorter sentences. @param sents (List[List[str]]): list of sentences (words) @param device: device on which to load the tensor, i.e. CPU or GPU @returns sents_var: tensor of (max_sentence_length, batch_size, max_word_length) """ ### YOUR CODE HERE for part 1g ### TODO: ### Connect `words2charindices()` and `pad_sents_char()` which you've defined in ### previous parts word_ids = self.words2charindices(sents) sents_padded = pad_sents_char(word_ids, self.char2id['<pad>']) # this if of shape (batch_size, max_sentence_length, max_word_length) sents_padded_tensor = torch.tensor(sents_padded, dtype=torch.long, device=device) # need to make it (max_sentence_length, batch_size, max_word_length) # adding .contiguous() otherwise code was failing lin line nmt_model line #128 sents_var = sents_padded_tensor.permute(1, 0, 2).contiguous() return sents_var
def to_input_tensor_char(self, sents: List[List[str]], device: torch.device) -> torch.Tensor: """ Convert list of sentences (words) into tensor with necessary padding for shorter sentences. @param sents (List[List[str]]): list of sentences (words) @param device: device on which to load the tensor, i.e. CPU or GPU @returns sents_var: tensor of (max_sentence_length, batch_size, max_word_length) """ # This is to convert each character in all words to its corresponding index in the character-vocabulary # The elements in the list are not uniform in length nor padded sents = self.words2charindices(sents) # Apply pad_sents_char with the pad id # This function pads all words and sentences to their maximum lengths sents = pad_sents_char(sents, self.char2id['<pad>']) # Transform the results above as torch objects with device res_sents = torch.tensor(sents, dtype=torch.long, device=device) # To switch the first and second dimension res_sents = torch.transpose(res_sents, 0, 1) return res_sents
def question_1c_sanity_check(): """ Sanity check for to_input_tensor_char() function. """ print("-" * 80) print("Running Sanity Check for Question 1c: To input tensor") print("-" * 80) vocab = VocabEntry() print("Running test on a list of sentences") sentences = [['Human:', 'What', 'do', 'we', 'want?'], ['Computer:', 'Natural', 'language', 'processing!'], ['Human:', 'When', 'do', 'we', 'want', 'it?'], ['Computer:', 'When', 'do', 'we', 'want', 'what?']] word_ids = vocab.words2charindices(sentences) padded_sentences = pad_sents_char(word_ids, 0) gold_padded_sentences = torch.load('./sanity_check_en_es_data/gold_padded_sentences.pkl') assert padded_sentences == gold_padded_sentences, "Sentence padding is incorrect: it should be:\n {} but is:\n{}".format( gold_padded_sentences, padded_sentences) batch_size = len(gold_padded_sentences) max_sentence_length = len(gold_padded_sentences[0]) max_word_length = len(gold_padded_sentences[0][0]) padded_sentences_tensor = vocab.to_input_tensor_char(sentences, device=torch.device('cpu')) assert (padded_sentences_tensor.size() == (max_sentence_length, batch_size, max_word_length)) print("Sanity Check Passed for Question 1c: To input tensor") print("-" * 80)
def to_input_tensor_char(self, sents: List[List[str]], device: torch.device) -> torch.Tensor: """ Convert list of sentences (words) into tensor with necessary padding for shorter sentences. @param sents (List[List[str]]): list of sentences (words) @param device: device on which to load the tensor, i.e. CPU or GPU @returns sents_var: tensor of (max_sentence_length, batch_size, max_word_length) """ ### YOUR CODE HERE for part 1c ### TODO: ### Connect `words2charindices()` and `pad_sents_char()` which you've defined in ### previous parts words_ids = self.words2charindices(sents) padded_sents_chars = pad_sents_char( words_ids, char_pad_token=self.char2id['<pad>']) sents_var = torch.tensor(padded_sents_chars, dtype=torch.long, device=device) sents_var_tr = torch.transpose( sents_var, 0, 1 ) # Ensure you reshape the dimensions so that the output has shape: (max sentence length, batch size, max word length) return sents_var_tr
def to_input_tensor_char(self, sents: List[List[str]], device: torch.device) -> torch.Tensor: """ Convert list of sentences (words) into tensor with necessary padding for shorter sentences. @param sents (List[List[str]]): list of sentences (words) @param device: device on which to load the tensor, i.e. CPU or GPU @returns sents_var: tensor of (max_sentence_length, batch_size, max_word_length) """ ### YOUR CODE HERE for part 1g ### TODO: ### Connect `words2charindices()` and `pad_sents_char()` which you've defined in ### previous parts줌 # character들을 int형으로 바꿈 : word_ids (list[list[list[int]]]) word_ids = self.words2charindices(sents) # 길이에 맞게 padding함 : sents_padded (list[list[list[int]]]) # => (batch_size, max_sentence_length, max_word_length) sents_padded = pad_sents_char(word_ids, self.char2id['<pad>']) sents_padded_tensor = torch.tensor(sents_padded, dtype=torch.long, device=device) sents_var = torch.transpose(sents_padded_tensor, 0, 1) return sents_var
def to_input_tensor_char(self, sents: List[List[str]], device: torch.device) -> torch.Tensor: """ Convert list of sentences (words) into tensor with necessary padding for shorter sentences. @param sents (List[List[str]]): list of sentences (words) @param device: device on which to load the tensor, i.e. CPU or GPU @returns sents_var: tensor of (max_sentence_length, batch_size, max_word_length) """ ### YOUR CODE HERE for part 1g ### TODO: ### Connect `words2charindices()` and `pad_sents_char()` which you've defined in ### previous parts words2char_sents = self.words2charindices(sents) padded_sents = pad_sents_char(words2char_sents, self.word2id['<pad>']) max_sentence_length = max(len(s) for s in sents) max_word_length = 21 sents_var = torch.tensor(padded_sents, dtype=torch.long, device=device) sents_var = sents_var.permute(1, 0, 2).contiguous() # print("sentence:") # print(sents_var.size()) # print("dims") # print((max_sentence_length, max_word_length)) # print(sents_var.size()) # print("done") return sents_var
def to_input_tensor_char(self, sents: List[List[str]], device: torch.device) -> torch.Tensor: """ Convert list of sentences (words) into tensor with necessary padding for shorter sentences. @param sents (List[List[str]]): list of sentences (words) @param device: device on which to load the tensor, i.e. CPU or GPU @returns sents_var: tensor of (max_sentence_length, batch_size, max_word_length) """ ### YOUR CODE HERE for part 1g ### TODO: ### Connect `words2charindices()` and `pad_sents_char()` which you've defined in ### previous parts word_ids = self.words2charindices(sents) # of shape (batch_size, max_sentence_length, max_word_length), change to specificed device(cpu or gpu) sents_var = torch.tensor(pad_sents_char(word_ids, self.char2id['<pad>']), dtype=torch.long, device=device) # switch to (max_sentence_length, batch_size, max_word_length) sents_var = sents_var.permute(1, 0, 2) ### END YOUR CODE return sents_var
def to_input_tensor_char(self, sents: List[List[str]], device: torch.device) -> torch.Tensor: """ Convert list of sentences (words) into tensor with necessary padding for shorter sentences. @param sents (List[List[str]]): list of sentences (words) @param device: device on which to load the tensor, i.e. CPU or GPU @returns sents_var: tensor of (max_sentence_length, batch_size, max_word_length) """ ### YOUR CODE HERE for part 1c ### TODO: ### Connect `words2charindices()` and `pad_sents_char()` which you've defined in ### previous parts sents2int = self.words2charindices(sents) sent_padded = torch.tensor( pad_sents_char(sents2int, self.char2id['<pad>'])) batch_size = sent_padded.shape[0] max_sentence_length = sent_padded.shape[1] max_word_length = sent_padded.shape[2] print('Tensor shape BEFORE reshape: {}'.format(sent_padded.shape)) print('.' * 80) sent_padded = sent_padded.view(max_sentence_length, batch_size, max_word_length) print('Tensor shape AFTER reshape: {}'.format(sent_padded.shape)) return sent_padded
def question_1f_sanity_check(): """ Sanity check for pad_sents_char() function. """ print ("-"*80) print("Running Sanity Check for Question 1f: Padding") print ("-"*80) vocab = VocabEntry() print("Running test on a list of sentences") sentences = [['Human:', 'What', 'do', 'we', 'want?'], ['Computer:', 'Natural', 'language', 'processing!'], ['Human:', 'When', 'do', 'we', 'want', 'it?'], ['Computer:', 'When', 'do', 'we', 'want', 'what?']] word_ids = vocab.words2charindices(sentences) padded_sentences = pad_sents_char(word_ids, 0) gold_padded_sentences = torch.load('./sanity_check_en_es_data/gold_padded_sentences.pkl') assert padded_sentences == gold_padded_sentences, "Sentence padding is incorrect: it should be:\n {} but is:\n{}".format(gold_padded_sentences, padded_sentences) test_list = [[[4]*33]] padded_sent = pad_sents_char(test_list, 0) assert len(padded_sent[0][0]) == 21 print("Sanity Check Passed for Question 1f: Padding!") print("-"*80)
def to_input_tensor_char(self, sents: List[List[str]], device: torch.device) -> torch.Tensor: """ Convert list of sentences (words) into tensor with necessary padding for shorter sentences. @param sents (List[List[str]]): list of sentences (words) @param device: device on which to load the tensor, i.e. CPU or GPU @returns sents_var: tensor of (max_sentence_length, batch_size, max_word_length) """ sents = self.words2charindices(sents) sents = pad_sents_char(sents, self.char2id['<pad>']) return torch.tensor(sents).permute(1, 0, 2).to(device)
def to_input_tensor_char(self, sents: List[List[str]], device: torch.device) -> torch.Tensor: """ Convert list of sentences (words) into tensor with necessary padding for shorter sentences. @param sents (List[List[str]]): list of sentences (words) @param device: device on which to load the tensor, i.e. CPU or GPU @returns sents_var: tensor of (max_sentence_length, batch_size, max_word_length) """ ### YOUR CODE HERE for part 1f ### TODO: ### Connect `words2charindices()` and `pad_sents_char()` which you've defined in ### previous parts return torch.tensor(pad_sents_char(self.words2charindices(sents), self.char2id['<pad>'])).permute(1,0,2).to(device)
def question_1c_sanity_check(): print("-" * 80) print("Running Sanity Check for Question 1c: Input tensor") print("-" * 80) vocab = VocabEntry() print("Running test on a list of sentences") sentences = [['Human:', 'What', 'do', 'we', 'want?'], ['Computer:', 'Natural', 'language', 'processing!'], ['Human:', 'When', 'do', 'we', 'want', 'it?'], ['Computer:', 'When', 'do', 'we', 'want', 'what?']] word_ids = vocab.words2charindices(sentences) padded_sentences = pad_sents_char(word_ids, 0) o_tnsr = vocab.to_input_tensor_char(sentences, "cpu") print(o_tnsr.shape)
def to_input_tensor_char(self, sents: List[List[str]], device: torch.device) -> torch.Tensor: """ Convert list of sentences (words) into tensor with necessary padding for shorter sentences. @param sents (List[List[str]]): list of sentences (words) @param device: device on which to load the tensor, i.e. CPU or GPU @returns sents_var: tensor of (max_sentence_length, batch_size, max_word_length) """ ### YOUR CODE HERE for part 1g char_indices = self.words2charindices(sents) padded_chars = pad_sents_char(char_indices, self.char2id['<pad>']) sents_var = torch.tensor(padded_chars, dtype=torch.long, device=device) sents_var = sents_var.permute(1, 0, 2).contiguous() return sents_var
def test_question_1f_sanity_check(self): """ Sanity check for pad_sents_char() function. """ vocab = VocabEntry() print("Running test on a list of sentences") sentences = [['Human:', 'What', 'do', 'we', 'want?'], ['Computer:', 'Natural', 'language', 'processing!'], ['Human:', 'When', 'do', 'we', 'want', 'it?'], ['Computer:', 'When', 'do', 'we', 'want', 'what?']] word_ids = vocab.words2charindices(sentences) padded_sentences = pad_sents_char(word_ids, 0) gold_padded_sentences = torch.load('./sanity_check_en_es_data/gold_padded_sentences.pkl') assert len(gold_padded_sentences) == len(padded_sentences) for expected, got in zip(gold_padded_sentences, padded_sentences): if got != expected: raise AssertionError('got {}: expected: {}'.format(got, expected)) assert padded_sentences == gold_padded_sentences, "Sentence padding is incorrect: it should be:\n {} but is:\n{}".format(gold_padded_sentences, padded_sentences)
def to_input_tensor_char(self, sents: List[List[str]], device: torch.device) -> torch.Tensor: """ Convert list of sentences (words) into tensor with necessary padding for shorter sentences. :param List[List[str]] sents: list of sentences comprising of words. :param device: device on which to load the tensor, i.e. CPU or GPU. :return sents_var: padded tensor of sentences at the character level, shaped (max_sentence_length, batch_size, max_word_length). """ char_indices = self.words2charindices(sents) sents_var = pad_sents_char(char_indices, self.char2id['<pad>']) sents_var = torch.tensor(sents_var, device=device) sents_var = sents_var.permute(1, 0, 2) return sents_var
def to_input_tensor(self, sents: List[List[str]], device: torch.device) -> torch.Tensor: """ Convert list of sentences (words) into tensor with necessary padding for shorter sentences. @param sents (List[List[str]]): list of sentences (words) @param device: device on which to load the tesnor, i.e. CPU or GPU @returns sents_var: tensor of (max_sentence_length, batch_size) word_ids = self.words2indices(sents) sents_t = pad_sents(word_ids, self['<pad>']) sents_var = torch.tensor(sents_t, dtype=torch.long, device=device) return torch.t(sents_var) """ indices = pad_sents_char(self.words2charindices(sents), self['<pad>']) sents_var = torch.LongTensor(indices).permute(1, 0, 2).to(device) return sents_var
def to_input_tensor_char(self, sents: List[List[str]], device: torch.device) -> torch.Tensor: """ Convert list of sentences (words) into tensor with necessary padding for shorter sentences. @param sents (List[List[str]]): list of sentences (words) @param device: device on which to load the tensor, i.e. CPU or GPU @returns sents_var: tensor of (max_sentence_length, batch_size, max_word_length) """ ### YOUR CODE HERE for part 1g ### TODO: # print(sents) char_ids = self.words2charindices(sents) sents_t = pad_sents_char(char_ids,self.char2id['<pad>']) # print(sents_t) sents_var = torch.tensor(sents_t,dtype=torch.long,device=device) return torch.transpose(sents_var,0,1) # torch.t()将张量前两维转置
def to_input_tensor_char(self, sents: List[List[str]], device: torch.device) -> torch.Tensor: """ Convert list of sentences (words) into tensor with necessary padding for shorter sentences. @param sents (List[List[str]]): list of sentences (words) @param device: device on which to load the tensor, i.e. CPU or GPU @returns sents_var: tensor of (max_sentence_length, batch_size, max_word_length) """ ### YOUR CODE HERE for part 1c ### TODO: ### Connect `words2charindices()` and `pad_sents_char()` which you've defined in ### previous parts sents_t = pad_sents_char(self.words2charindices(sents), self['<pad>']) sents_var = torch.tensor(sents_t, dtype=torch.long, device=device) svs = sents_var.shape return torch.reshape(sents_var, (svs[1], svs[0], svs[2]))
def to_input_tensor_char(self, sents: List[List[str]], device: torch.device) -> torch.Tensor: """ Convert list of sentences (words) into tensor with necessary padding for shorter sentences. @param sents (List[List[str]]): list of sentences (words) @param device: device on which to load the tensor, i.e. CPU or GPU @returns sents_var: tensor of (max_sentence_length, batch_size, max_word_length) """ ### YOUR CODE HERE for part 1g ### TODO: ### Connect `words2charindices()` and `pad_sents_char()` which you've defined in ### previous parts wordIndices = self.words2charindices(sents) wordIndicesPadded = pad_sents_char(wordIndices, char_pad_token=self.char2id['<pad>']) sents_var = torch.tensor(wordIndicesPadded, device=device) return sents_var.transpose(0, 1)
def to_input_tensor_char(self, sents: List[List[str]], device: torch.device) -> torch.Tensor: """ Convert list of sentences (words) into tensor with necessary padding for shorter sentences. @param sents (List[List[str]]): list of sentences (words) @param device: device on which to load the tensor, i.e. CPU or GPU @returns sents_var: tensor of (max_sentence_length, batch_size, max_word_length) """ ### YOUR CODE HERE for part 1c ### TODO: ### Connect `words2charindices()` and `pad_sents_char()` which you've defined in ### previous parts sents_t = pad_sents_char(self.words2charindices(sents), self.char2id['<pad>']) # self.max_vocab_tokens_in_worlad = len(sents_t[0][0]) # assert self.max_vocab_tokens_in_word == 21 sents_var = torch.tensor(sents_t, dtype=torch.long, device=device) return sents_var.permute(1, 0, 2)
def to_input_tensor_char(self, sents: List[List[str]], device: torch.device) -> torch.Tensor: """ Convert list of sentences (words) into tensor with necessary padding for shorter sentences. @param sents (List[List[str]]): list of sentences (words) @param device: device on which to load the tensor, i.e. CPU or GPU @returns sents_var: tensor of (max_sentence_length, batch_size, max_word_length) """ ### YOUR CODE HERE for part 1g ### TODO: ### Connect `words2charindices()` and `pad_sents_char()` which you've defined in ### previous parts char_ids = self.words2charindices(sents) #out= list[list[list[int]]] char_ids_padded = pad_sents_char(char_ids, char_pad_token=0) # out = list[[maxsent_len x maxwordlen]] sents = torch.tensor(char_ids_padded, dtype=torch.long, device=device) #TODO does it need long? probably not sents_var = sents.permute(1, 0, 2) return sents_var