def question_1f_sanity_check(): sentence_length = 10 batch_size = 64 embedding_size = 128 x = torch.Tensor(np.ones((sentence_length, batch_size, embedding_size))) highway_instance = Highway(embedding_size) output = highway_instance.forward(x) assert (sentence_length, batch_size, embedding_size) == output.shape # test given closed gate output == input highway_instance = Highway(embedding_size) nn.init.constant_(highway_instance.gate_layer.weight, -float("inf")) highway_instance.gate_layer.bias.data.fill_(0) output = highway_instance.forward(x) assert torch.all(torch.eq(output, x)) # test given open gate and identity projection output == input highway_instance = Highway(embedding_size) nn.init.constant_(highway_instance.gate_layer.weight, float("inf")) highway_instance.gate_layer.bias.data.fill_(0) nn.init.eye_(highway_instance.projection_layer.weight) highway_instance.projection_layer.bias.data.fill_(0) output = highway_instance.forward(x) assert torch.all(torch.eq(output, x))
def question_1h_sanity_check(model): """ Sanity check for highway network """ print("-" * 80) print("Running Sanity Check for Question 1h: Padding") print("-" * 80) vocab = VocabEntry() print("Running test on a list of sentences") sentences = [['Human:', 'What', 'do', 'we', 'want?'], ['Computer:', 'Natural', 'language', 'processing!'], ['Human:', 'When', 'do', 'we', 'want', 'it?'], ['Computer:', 'When', 'do', 'we', 'want', 'what?']] word_ids = vocab.words2charindices(sentences) #padded_sentences = pad_sents_char(word_ids, 0) padded_sentences = vocab.to_input_tensor_char(sentences, model.device) gold_padded_sentences = torch.load( './sanity_check_en_es_data/gold_padded_sentences.pkl') #Test with batch size 1 x = torch.rand(1, 1, 21) hw = Highway(21, 21, 21, 0.5) hw.forward(x) #Test with batch size 4 print(a.size()) print(padded_sentences.size()) #assert padded_sentences.size() == a.size(), "to_input_tensor size incorrect! is incorrect: it should be:\n {} but is:\n{}".format(a.size(), padded_sentences.size()) print("Sanity Check Passed for Question 1h: Padding!") print("-" * 80)
class CNNClassifier(nn.Module): """ Class that converts input words to their CNN-based embeddings. """ def __init__(self, embed_size, kenerl_size=5): """ Init the Embedding layer for one language @param embed_size (int): Embedding size (dimensionality) for the output @param vocab (VocabEntry): VocabEntry object. See vocab.py for documentation. """ super(CNNClassifier, self).__init__() self.embed_size = embed_size self.kenerl_size = kenerl_size self.embeddings = nn.Embedding(len(vocab.char2id), self.embed_char, padding_idx=pad_token_idx) self.cnn = CNN(self.embed_char, self.kenerl_size, self.embed_size) #self.embed_size is number of filter self.highway = Highway(self.embed_size, dropout_rate=0.3) def forward(self, input): """ Looks up character-based CNN embeddings for the words in a batch of sentences. @param input: Tensor of integers of shape (sentence_length, batch_size, max_word_length) where each integer is an index into the character vocabulary @param output: Tensor of shape (sentence_length, batch_size, embed_size), containing the CNN-based embeddings for each word of the sentences in the batch """ ## A4 code # output = self.embeddings(input) # return output ## End A4 code ### YOUR CODE HERE for part 1f #print(input.shape) output = self.embeddings(input) #print(output.shape) x_reshape = output.permute( 0, 1, 3, 2) ##(sentence_length, batch_size, ebed_char, max_word_length) shape = x_reshape.shape sentence_length = shape[0] batch_size = shape[1] max_word_length = shape[3] x_reshape = x_reshape.view(-1, self.embed_char, max_word_length) #print(x_reshape.shape) x_cnn = self.cnn.forward(x_reshape) #print(x_cnn.shape) x_highway = self.highway.forward( x_cnn.view(sentence_length, batch_size, self.embed_size)) return x_highway # (sentence_length, batch_size, embed_size, max_word_length-k+1) # batch_size, sentence_length, embed_size, max_word_length-k+1 ### END YOUR CODE
def question_1f_sanity_check(): """ Sanity check for Highway Class init and forward methods """ print("-" * 80) print("Running Sanity Check for Question 1f: Highway layer") print("-" * 80) print("Running test on a list of out conv layers") B = 4 e_word = 3 conv_out = torch.Tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]]) #4*3 high_layer = Highway(e_word) my_high = high_layer.forward(conv_out) output_expected_size = [B, e_word] assert list( my_high.size() ) == output_expected_size, "output shape is incorrect: it should be:\n {} but is:\n{}".format( output_expected_size, list(my_high.size())) print("Sanity Check Passed for Question 1e: Correct Output Shape!") print("-" * 80)
class ModelCharEmbeddings(nn.Module): """ Class that converts input words to their CNN-based embeddings. """ def __init__(self, embed_size, vocab, dropout_rate=0.3): """ Init the Embedding layer for one language @param embed_size (int): Embedding size (dimensionality) for the output @param vocab (VocabEntry): VocabEntry object. See vocab.py for documentation. """ super(ModelCharEmbeddings, self).__init__() ## A4 code # pad_token_idx = vocab.src['<pad>'] # self.embeddings = nn.Embedding(len(vocab.src), embed_size, padding_idx=pad_token_idx) ## End A4 code ### YOUR CODE HERE for part 1j #embed_size is e_word #padding_idx=vocab.char2id['<pad>'] #the handout says e_char = 50 self.embed_size = embed_size self.embeddings = nn.Embedding(len(vocab.char2id), 50) self.highway = Highway(embed_size) self.cnn = CNN(50, embed_size) self.dropout = nn.Dropout(p=dropout_rate) ### END YOUR CODE def forward(self, input): """ Looks up character-based CNN embeddings for the words in a batch of sentences. @param input: Tensor of integers of shape (sentence_length, batch_size, max_word_length) where each integer is an index into the character vocabulary @param output: Tensor of shape (sentence_length, batch_size, embed_size), containing the CNN-based embeddings for each word of the sentences in the batch """ ## A4 code # output = self.embeddings(input) # return output ## End A4 code ### YOUR CODE HERE for part 1j embeddings = self.embeddings(input) # input = input.permute(1,0,2) #now input is ->(batch_size, sentence_length,max_word_length) #now embeddings is ->(batch_size, sentence_length,max_word_length, e_char) #reshape to -> (batch_size x sentence_length,max_word_length, e_char) sentence_length, batch_size, max_word_length, e_char = embeddings.shape embeddings = embeddings.view(batch_size * sentence_length, e_char, max_word_length) #cnn_embeds -> batch_size x sentence_length, e_word cnn_embeds = self.cnn.forward(embeddings) output = self.highway.forward(cnn_embeds) x_wordemb = self.dropout(output) _, e_word = x_wordemb.shape x_wordemb = x_wordemb.view(sentence_length, batch_size, e_word) return x_wordemb
class ModelEmbeddings(nn.Module): """ Class that converts input words to their CNN-based embeddings. """ def __init__(self, embed_size, vocab): """ Init the Embedding layer for one language @param embed_size (int): Embedding size (dimensionality) for the output @param vocab (VocabEntry): VocabEntry object. See vocab.py for documentation. """ super(ModelEmbeddings, self).__init__() ## A4 code # pad_token_idx = vocab.src['<pad>'] # self.embeddings = nn.Embedding(len(vocab.src), embed_size, padding_idx=pad_token_idx) ## End A4 code ### YOUR CODE HERE for part 1j pad_token_idx = vocab['<pad>'] self.vocab = vocab self.embed_size = embed_size self.embeddings = nn.Embedding(len(vocab.char2id), embedding_dim=50, padding_idx=pad_token_idx) self.dropout = nn.Dropout(p=0.3) self.Highway = Highway(embed_size) self.CNN = CNN(embed_size, char_embed=50) ### END YOUR CODE def forward(self, input): """ Looks up character-based CNN embeddings for the words in a batch of sentences. @param input: Tensor of integers of shape (sentence_length, batch_size, max_word_length) where each integer is an index into the character vocabulary @param output: Tensor of shape (sentence_length, batch_size, embed_size), containing the CNN-based embeddings for each word of the sentences in the batch """ ## A4 code # output = self.embeddings(input) # return output ## End A4 code ### YOUR CODE HERE for part 1j l = input.shape[0] batch_size = input.shape[1] x_embed = self.embeddings(input) x_reshaped = x_embed.permute(0, 1, 3, 2) x_reshaped = x_reshaped.contiguous().view( x_reshaped.shape[0] * x_reshaped.shape[1], x_reshaped.shape[2], x_reshaped.shape[3]) x_convout = self.CNN.forward(x_reshaped) x_highway = self.Highway.forward(x_convout) output = self.dropout(x_highway) output = output.view(l, batch_size, output.shape[-1]) return output
class ModelEmbeddings(nn.Module): """ Class that converts input words to their CNN-based embeddings. """ def __init__(self, embed_size, vocab): """ Init the Embedding layer for one language @param embed_size (int): Embedding size (dimensionality) for the output @param vocab (VocabEntry): VocabEntry object. See vocab.py for documentation. """ super(ModelEmbeddings, self).__init__() ## A4 code # pad_token_idx = vocab.src['<pad>'] # self.embeddings = nn.Embedding(len(vocab.src), embed_size, padding_idx=pad_token_idx) ## End A4 code ### YOUR CODE HERE for part 1j self.embed_size = embed_size self.char_embed_size = 50 self.x_embeddings = None pad_token_idx = vocab['<pad>'] self.embeddings = nn.Embedding(len(vocab.char2id), self.char_embed_size, padding_idx=pad_token_idx) self.cnn = CNN(output_features=self.embed_size, char_embeddings=self.char_embed_size) self.highway = Highway(self.embed_size) ### END YOUR CODE def forward(self, input): """ Looks up character-based CNN embeddings for the words in a batch of sentences. @param input: Tensor of integers of shape (sentence_length, batch_size, max_word_length) where each integer is an index into the character vocabulary @param output: Tensor of shape (sentence_length, batch_size, embed_size), containing the CNN-based embeddings for each word of the sentences in the batch """ ## A4 code # output = self.embeddings(input) # return output ## End A4 code ### YOUR CODE HERE for part 1j self.x_embeddings = self.embeddings(input) new_embeds = self.x_embeddings.permute(0, 1, 3, 2) new_embeds2 = new_embeds.reshape( new_embeds.size()[0] * new_embeds.size()[1], new_embeds.size()[2], new_embeds.size()[3]) cnn_op = self.cnn.forward(new_embeds2) new_res = torch.squeeze(cnn_op, dim=2) highway_op = self.highway.forward(new_res) new_highway_op = highway_op.reshape(input.size()[0], input.size()[1], highway_op.size()[1]) return new_highway_op
def highway_sanity_check(): print("Highway Input" + str([1])) embed_size = 1 cnn_out = torch.Tensor([1]) highway = Highway(embed_size) result = highway.forward(cnn_out) print("Highway Output " + str(result)) assert (result.size() == cnn_out.size()) print("-----Shape Test Passed -----")
def test_highway(): print() print("==="*30) print("\nHighway Class test") e_word = 3 x_conv_out = torch.tensor( [ [ [0, 1, 1], # sentence a's word 1 [-1, 1, 0] # sentence b's word 1 ], [ [1, 0, 0], # sentence a's word 2 [0, 1, 0] # sentence a's word 2 ] ], dtype=torch.float, device=device ) sent_len = x_conv_out.shape[0] batch_size = x_conv_out.shape[0] correct_x_highway = np.array( [ [ [ 0., 0.38797045, 0.57840323], # sentence a's word 1 [-0.03674287, 0.4926422, 0.22739217] # sentence b's word 1 ], [ [ 0.58957815, 0., 0.], # sentence a's word 2 [ 0.24245806, 0.47267026, 0.18764845] # sentence b's word 2 ] ] ) model = Highway(e_word).to(device) obtained_x_highway = model.forward(torch.flatten(x_conv_out, 0, 1)) obtained_x_highway = torch.stack(torch.split(obtained_x_highway, batch_size, dim=0)) obtained_x_highway = obtained_x_highway.cpu().detach().numpy() assert np.allclose(correct_x_highway, obtained_x_highway), \ "\n\nIncorrect x_highway\n\nCorrect x_highway:\n{}\n\nYour x_highway:\n{}". \ format(correct_x_highway, obtained_x_highway) print("\nx_highway =\n", obtained_x_highway) # # Check the weights # print("\nWproj weights:\n", model.Wproj.weight.cpu().detach().numpy()) # print("\nWproj bias:\n", model.Wproj.bias.cpu().detach().numpy()) # print("\n\nWgate weights:\n", model.Wgate.weight.cpu().detach().numpy()) # print("\nWgate bias:\n", model.Wgate.bias.cpu().detach().numpy()) print("\n\nHighway Test Passed!\n") print("==="*30)
class ModelEmbeddings(nn.Module): """ Class that converts input words to their CNN-based embeddings. """ def __init__(self, embed_size, vocab): """ Init the Embedding layer for one language @param embed_size (int): Embedding size (dimensionality) for the output @param vocab (VocabEntry): VocabEntry object. See vocab.py for documentation. """ super(ModelEmbeddings, self).__init__() ## A4 code # pad_token_idx = vocab.src['<pad>'] # self.embeddings = nn.Embedding(len(vocab.src), embed_size, padding_idx=pad_token_idx) ## End A4 code ### YOUR CODE HERE for part 1j pad_token_idx = vocab.char2id['<pad>'] self.embed_size = embed_size self.embeddings = nn.Embedding(len(vocab.char2id), 50, padding_idx=pad_token_idx) self.highway = Highway(e_word=embed_size) self.CNN = CNN(e_word=embed_size, e_char=50, m_word=21, kernel_size=5) self.dropout = nn.Dropout(p=0.3) ### END YOUR CODE def forward(self, input): """ Looks up character-based CNN embeddings for the words in a batch of sentences. Must map from x_padded to x_word_emb @param input: Tensor of integers of shape (sentence_length, batch_size, max_word_length) where each integer is an index into the character vocabulary @param output: Tensor of shape (sentence_length, batch_size, embed_size), containing the CNN-based embeddings for each word of the sentences in the batch """ ## A4 code # output = self.embeddings(input) # return output ## End A4 code ### YOUR CODE HERE for part 1j sentence_length = input.size()[0] batch_size = input.size()[1] input = input.reshape(sentence_length * batch_size, -1) x_embedding = self.embeddings(input) x_reshaped = x_embedding.permute(0, 2, 1) x_convout = self.CNN.forward(x_reshaped) x_convout = x_convout.squeeze() x_highway = self.highway.forward(x_convout) x_word_emb = self.dropout(x_highway) x_word_emb = x_word_emb.view(sentence_length, batch_size, -1) return x_word_emb
class ModelEmbeddings(nn.Module): """ Class that converts input words to their CNN-based embeddings. """ def __init__(self, word_embed_size, vocab): """ Init the Embedding layer for one language @param word_embed_size (int): Embedding size (dimensionality) for the output word @param vocab (VocabEntry): VocabEntry object. See vocab.py for documentation. Hints: - You may find len(self.vocab.char2id) useful when create the embedding """ super(ModelEmbeddings, self).__init__() ### YOUR CODE HERE for part 1h self.e_char = 50 self.word_embed_size = word_embed_size self.dropout_prob = 0.3 self.vocab = vocab # apparently 21 is an arbitrary value chosen for the sanity tests and also the greedy decoder self.max_word_len = 21 self.embedding = nn.Embedding(len(vocab.char2id), self.e_char, padding_idx=vocab.char2id['∏']) self.cnn = CNN(e_char=self.e_char, filters=self.word_embed_size, kernel_size=5, m_word=self.max_word_len) self.highway = Highway(word_embed_size) self.dropout = nn.Dropout(p=self.dropout_prob) ### END YOUR CODE def forward(self, input): """ Looks up character-based CNN embeddings for the words in a batch of sentences. @param input: Tensor of integers of shape (sentence_length, batch_size, max_word_length) where each integer is an index into the character vocabulary @param output: Tensor of shape (sentence_length, batch_size, word_embed_size), containing the CNN-based embeddings for each word of the sentences in the batch """ ### YOUR CODE HERE for part 1h embedded = self.embedding.forward(input) max_sent_len, batch_size, samples, channels = embedded.shape reshaped = embedded.view( (max_sent_len * batch_size, samples, channels)).transpose(1, 2) conv_out = self.cnn.forward(reshaped) highway = self.highway.forward(conv_out.squeeze()) dropout = self.dropout.forward(highway) return dropout.view(max_sent_len, batch_size, -1)
def question_1g_sanity_check(): """ Sanity check for highway module """ print ("-"*80) print("Running Sanity Check for Question 1g: highway") print ("-"*80) highway = Highway(EMBED_SIZE) conv_out = torch.rand(BATCH_SIZE, EMBED_SIZE) highway = highway.forward(conv_out) expected_size = [BATCH_SIZE, EMBED_SIZE] assert(list(highway.size()) == expected_size) print("Sanity Check Passed for Question 1f: Padding!") print("-"*80)
def test_shape(self): print("-" * 80) print("Running Sanity Check for Question 1d: Highway Shape") print("-" * 80) batch_size, word_embed_size = 64, 40 highway = Highway(word_embed_size) x_conv_out = torch.randn([batch_size, word_embed_size]) x_word_emb = highway.forward(x_conv_out) self.assertEqual(x_word_emb.shape, (batch_size, word_embed_size)) self.assertEqual(x_word_emb.shape, x_conv_out.shape) print("Sanity Check Passed for Question 1d: Highway Shape!") print("-" * 80)
class ModelEmbeddings(nn.Module): """ Class that converts input words to their CNN-based embeddings. """ def __init__(self, embed_size, vocab): """ Init the Embedding layer for one language @param embed_size (int): Embedding size (dimensionality) for the output @param vocab (VocabEntry): VocabEntry object. See vocab.py for documentation. """ super(ModelEmbeddings, self).__init__() ## A4 code #pad_token_idx = vocab.src['<pad>'] #self.embeddings = nn.Embedding(len(vocab.src), embed_size, padding_idx=pad_token_idx) ## End A4 code ### YOUR CODE HERE for part 1j self.embed_size=embed_size self.char_embed_size=50 self.embeddings=nn.Embedding(len(vocab.char2id),self.char_embed_size,padding_idx=vocab.char2id['<pad>']) self.conv=CNN(in_channels=self.char_embed_size,out_channels=self.embed_size) self.highway=Highway(size=self.embed_size) self.dropout=nn.Dropout(p=0.3) ### END YOUR CODE def forward(self, input): """ Looks up character-based CNN embeddings for the words in a batch of sentences. @param input: Tensor of integers of shape (sentence_length, batch_size, max_word_length) where each integer is an index into the character vocabulary @param output: Tensor of shape (sentence_length, batch_size, embed_size), containing the CNN-based embeddings for each word of the sentences in the batch """ ## A4 code #x_emb = self.embeddings(input) # return output ## End A4 code ### YOUR CODE HERE for part 1j x_embed=self.embeddings(input) x_reshaped=torch.reshape(x_embed,[x_embed.size()[0]*x_embed.size()[1],x_embed.size()[3],x_embed.size()[2]]) x_conv_out=self.conv.forward(x_reshaped) x_highway=self.highway.forward(x_conv_out) x_word_emb=self.dropout(x_highway) output=torch.reshape(x_word_emb,[x_embed.size()[0],x_embed.size()[1],-1]) return output
def question_1h_sanity_check(): """ Sanity check for Highway network """ print("-" * 80) print("Running Sanity Check for Question 1h: Highway network") print("-" * 80) net = Highway(5, 5) input = torch.randn((10, 6, 5)) normal_output = net.forward(input) assert normal_output.shape == input.shape print("-" * 80) print("Sanity Check Passed for Question 1h: Highway network")
def question_1h_sanity_check(): print("-" * 80) print("Running Sanity Check for Question 1h: Model Embedding") print("-" * 80) model = Highway(3) x = torch.tensor([[1, 1, 1], [1, 1, 1], [1, 1, 1], [1, 1, 1]], dtype=torch.float) print(x.shape) res = model.forward(x) print(res) assert res.shape == x.shape print("Sanity Check Passed for Question 1h: Highway!") print("-" * 80)
def question_1f_sanity_check(): """Sanity check for highway module """ print("-" * 80) print("Running Sanity Check for Question 1f: Highway") print("-" * 80) highway = Highway(EMBED_SIZE) x_convout = torch.randn(BATCH_SIZE, EMBED_SIZE) ret = highway.forward(x_convout) output_expected_size = (BATCH_SIZE, EMBED_SIZE) assert output_expected_size == ret.shape print("Sanity Check Passed for Question 1f: Highway!") print("-" * 80)
def question_1h_sanity_check(): # Sanity check for highway.py print("-" * 80) print("Running Sanity Check for Question 1h: Highway") print("-" * 80) inpt = torch.zeros(BATCH_SIZE, EMBED_SIZE) highway_net = Highway(EMBED_SIZE) output = highway_net.forward(inpt) output_expected_size = [BATCH_SIZE, EMBED_SIZE] assert ( list(output.size()) == output_expected_size ), "output shape is incorrect: it should be:\n {} but is:\n{}".format( output_expected_size, list(output.size())) print("Sanity Check Passed for Question 1h: Highway!") print("-" * 80)
def test_gate_bypass(self): print("-" * 80) print("Running Sanity Check for Question 1d: Highway Bypass") print("-" * 80) batch_size, word_embed_size = 64, 40 highway = Highway(word_embed_size) highway.gate.weight.data[:, :] = 0.0 highway.gate.bias.data[:] = - float('inf') x_conv_out = torch.randn([batch_size, word_embed_size]) x_word_emb = highway.forward(x_conv_out) self.assertTrue(torch.allclose(x_conv_out, x_word_emb)) print("Sanity Check Passed for Question 1d: Highway Bypass!") print("-" * 80)
def question_1f_sanity_check(x_conv_out): """ Sanity check for the class `highway`. """ print("-" * 80) print("Running Sanity Check for Question 1f: highway") print("-" * 80) print("Running test on a batch of x_conv_out") embed_size = x_conv_out.size()[-1] model = Highway(embed_size) x_highway = model.forward(x_conv_out) assert x_highway.size() == x_conv_out.size( ), "Output size should be: {}, but got {}".format(x_conv_out.size(), x_highway.size()) print("Sanity Check Passed for Question 1f: highway!") print("-" * 80)
def question_1h_sanity_check(): """ Sanity check for highway network. """ print ("-"*80) print("Running Sanity Check for Question 1h: Highway") print ("-"*80) # create highway network highway = Highway(EMBED_SIZE) # validate input & output shape inpt = torch.zeros(BATCH_SIZE, EMBED_SIZE, dtype=torch.float) output_expected_size = [BATCH_SIZE, EMBED_SIZE] output = highway.forward(inpt) assert(list(output.size()) == output_expected_size), "output shape is incorrect: it should be:\n {} but is:\n{}".format(output_expected_size, list(output.size())) # manually set weights highway.proj.weight.data = torch.Tensor([[0.3, 0.2, 0.8], [0.1, 0.05, 0.4], [-0.7, 0.01, -1.2]]) highway.proj.bias.data = torch.zeros(EMBED_SIZE) highway.gate.weight.data = torch.Tensor([[0.3, 0.2, 0.8], [0.1, 0.05, 0.4], [-0.7, 0.01, -1.2]]) highway.gate.bias.data = torch.zeros(EMBED_SIZE) inpt = torch.Tensor([[1, 2, 3], [0, 0, 0]]) proj = F.relu(highway.proj(inpt)) gate = torch.sigmoid(highway.gate(inpt)) output = highway(inpt) expected_proj = torch.Tensor([[ 3.1000, 1.4000, 0.0], [ 0.0, 0.0, 0.0]]) expected_gate = torch.Tensor([[ 0.95689274, 0.802183888, 0.013653659], [ 0.5, 0.5, 0.5]]) expected_output = torch.Tensor([[3.00947475, 1.51868967, 2.95903902], [0.0, 0.0, 0.0]]) assert(proj.allclose(expected_proj)), "proj is incorrect: it should be:\n {} but is:\n{}".format(expected_proj, proj) assert(gate.allclose(expected_gate)), "gate is incorrect: it should be:\n {} but is:\n{}".format(expected_gate, gate) assert(output.allclose(expected_output)), "output is incorrect: it should be:\n {} but is:\n{}".format(expected_output, output) print("Sanity Check Passed for Question 1h: Highway!") print("-"*80)
def question_1c_sanity_check(): ''' Sanity check for highway.py class implementation ''' print("-" * 80) print("Running Sanity Check for Question 1c: Highway") print("-" * 80) highway = Highway(EMBED_SIZE) # Reinitialize weights highway.w_projection.weight.data.fill_(0.3) highway.w_projection.bias.data.fill_(0.1) highway.w_gate.weight.data.fill_(0.3) highway.w_gate.bias.data.fill_(0.1) x_conv_out = torch.ones(BATCH_SIZE, EMBED_SIZE) output = highway.forward(x_conv_out) assert_expected_size(output, 'output', [BATCH_SIZE, EMBED_SIZE]) print(output)
def test_highway(self): e_word = 3 ones = np.ones((8,e_word)) x = torch.Tensor(ones) model = Highway(e_word) out = model.forward(x) self.assertEqual(out.shape, (8,3)) e_word = 10 ones = np.ones((11,e_word)) x = torch.Tensor(ones) model = Highway(e_word) out = model.forward(x) self.assertEqual(out.shape, (11,e_word)) bad_input = torch.Tensor(np.ones((8,e_word+1))) with self.assertRaises(RuntimeError): model.forward(bad_input) same_input_different_batch_size = torch.Tensor(np.ones((10, e_word ))) model.forward(same_input_different_batch_size)
class ModelEmbeddings(nn.Module): """ Class that converts input words to their CNN-based embeddings. """ def __init__(self, embed_size, vocab): """ Init the Embedding layer for one language @param embed_size (int): Embedding size (dimensionality) for the output @param vocab (VocabEntry): VocabEntry object. See vocab.py for documentation. """ super(ModelEmbeddings, self).__init__() ## A4 code # pad_token_idx = vocab.src['<pad>'] # self.embeddings = nn.Embedding(len(vocab.src), embed_size, padding_idx=pad_token_idx) ## End A4 code ### YOUR CODE HERE for part 1j pad_token_idx = vocab.char2id['<pad>'] self.char_embed_size = 50 self.dropout_rate = 0.3 self.max_word_size = 21 self.word_embed_size = embed_size self.embed_size = embed_size self.v_char = len(vocab.char2id) self.v_word = len(vocab.word2id) self.embeddings = nn.Embedding(self.v_char, self.char_embed_size, padding_idx=pad_token_idx) self.Dropout = nn.Dropout(p=self.dropout_rate) self.cnn = CNN(e_char=self.char_embed_size, e_word=self.word_embed_size, m_word=self.max_word_size) self.highway = Highway(embedding_size=self.word_embed_size) ### END YOUR CODE def forward(self, input): """ Looks up character-based CNN embeddings for the words in a batch of sentences. @param input: Tensor of integers of shape (sentence_length, batch_size, max_word_length) where each integer is an index into the character vocabulary @param output: Tensor of shape (sentence_length, batch_size, embed_size), containing the CNN-based embeddings for each word of the sentences in the batch """ ## A4 code # output = self.embeddings(input) # return output ## End A4 code ### YOUR CODE HERE for part 1j max_word_size = input.size()[-1] assert (max_word_size == self.max_word_size) char_embeddings = self.embeddings( input ) # (max_sent_len, batch_size, max_word_len, char_embedding_size) # conv1d only performs on the last dimension so we have to swap char_embeddings = char_embeddings.permute( 0, 1, 3, 2) # (max_sent_len, batch_size, char_embedding_size, max_word_len) max_sent_len = char_embeddings.size()[0] batch_size = char_embeddings.size()[1] char_embedding_size = char_embeddings.size()[2] max_word_len = char_embeddings.size()[3] # conv1d only accepts 3 dimension array, so any extra dimensions need to be concatenated. char_embeddings = char_embeddings.reshape( max_sent_len * batch_size, char_embedding_size, max_word_len ) # (max_sent_len * batch_size, char_embedding_size, max_word_len) cnn_out = self.cnn.forward( char_embeddings ) # (max_sent_len * batch_size, word_embedding_size) highway_out = self.highway.forward( cnn_out) # (max_sent_len * batch_size, word_embedding_size) dropout_out = self.Dropout(highway_out) output = dropout_out.reshape( max_sent_len, batch_size, dropout_out.size() [-1]) # (max_sent_len, batch_size, word_embedding_size) return output
class ModelEmbeddings(nn.Module): """ Class that converts input words to their CNN-based embeddings. """ def __init__(self, embed_size, vocab): """ Init the Embedding layer for one language @param embed_size (int): Embedding size (dimensionality) for the output @param vocab (VocabEntry): VocabEntry object. See vocab.py for documentation. """ super(ModelEmbeddings, self).__init__() ## A4 code # pad_token_idx = vocab.src['<pad>'] # self.embeddings = nn.Embedding(len(vocab.src), embed_size, padding_idx=pad_token_idx) ## End A4 code ### YOUR CODE HERE for part 1j self.embed_size = embed_size self.embedding_char_size = 50 self.dropout_rate = 0.3 self.max_word_length = 21 self.embedding_word_size = embed_size pad_token_idx = vocab.char2id['<pad>'] self.charEmbeddings = nn.Embedding(len(vocab.char2id), self.embedding_char_size, padding_idx=pad_token_idx) self.dropout = nn.Dropout(p=self.dropout_rate) #construct CNN self.CNN = CNN(self.embedding_char_size, self.embedding_word_size, self.max_word_length) #construct Highway self.highway = Highway(self.embedding_word_size) ### END YOUR CODE def forward(self, input): """ Looks up character-based CNN embeddings for the words in a batch of sentences. @param input: Tensor of integers of shape (sentence_length, batch_size, max_word_length) where each integer is an index into the character vocabulary @param output: Tensor of shape (sentence_length, batch_size, embed_size), containing the CNN-based embeddings for each word of the sentences in the batch """ ## A4 code # output = self.embeddings(input) # return output ## End A4 code ### YOUR CODE HERE for part 1j sentence_length, batch_size, _ = input.shape x_emb = self.charEmbeddings(input) #note: x_emb : tensor of (sentence_length, batch_size, max_word_length, e_char) x_emb = x_emb.view((sentence_length * batch_size, self.max_word_length, self.embedding_char_size)).transpose(1, 2) #now x_emb is : tensor of (sentence_length * batch_size, e_char, max_word_length) #cnn needs: input shape: x_reshaped:(batch_size, char_embedding_size(also called e_char), max_word_length) # output shape: (batch_size, e_word) x_conv_out = self.CNN.forward(x_emb) #x_conv_out : (batch_size * sentence_length, e_word=embed_size) x_highway = self.highway.forward(x_conv_out) x_word_emb = self.dropout(x_highway) #now x_word_emb is: tensor of (sentence_length * batch_size, embed_size) output = x_word_emb.view( (sentence_length, batch_size, self.embedding_word_size)) return output
class ModelEmbeddings(nn.Module): """ Class that converts input words to their CNN-based embeddings. """ def __init__(self, embed_size, vocab): """ Init the Embedding layer for one language @param embed_size (int): Embedding size (dimensionality) for the output. That is, e_word @param vocab (VocabEntry): VocabEntry object. See vocab.py for documentation. """ super(ModelEmbeddings, self).__init__() ## A4 code # pad_token_idx = vocab.src['<pad>'] # self.embeddings = nn.Embedding(len(vocab.src), embed_size, padding_idx=pad_token_idx) ## End A4 code ### YOUR CODE HERE for part 1j pad_token_idx = vocab.char2id['<pad>'] # num_embeddings = how many 'idxs' we have that is used to code characters. Something like char_vocab_size. self.char_dimension = 50 self.word_dimension = embed_size self.max_word_length = 21 # Copied over from utils self.dropout_prob = 0.3 self.embed_size = embed_size self.embeddings = nn.Embedding(num_embeddings=len(vocab.id2char), embedding_dim=self.char_dimension, padding_idx=pad_token_idx) self.cnn_layer = CNN(char_dimension=self.char_dimension, max_word_length=self.max_word_length, out_channels=self.word_dimension) self.highway_layer = Highway(self.word_dimension) self.dropout_layer = nn.Dropout(p=self.dropout_prob) ### END YOUR CODE def forward(self, input): """ Looks up character-based CNN embeddings for the words in a batch of sentences. @param input: Tensor of integers of shape (sentence_length, batch_size, max_word_length) where each integer is an index into the character vocabulary @param output: Tensor of shape (sentence_length, batch_size, embed_size), containing the CNN-based embeddings for each word of the sentences in the batch """ ## A4 code # output = self.embeddings(input) # return output ## End A4 code ### YOUR CODE HERE for part 1j # Note that the input is of shape (sentence_length, batch_size, max_word_length). Presumably this is the output of # to_input_tensor_char method from vocab class. sentence_length = input.shape[0] batch_size = input.shape[1] # Input has shape (sentence_length, batch_size, max_word_length) x_emb = self.embeddings( input) # (sentence_length, batch_size, max_word_length, char_dim) x_reshape = x_emb.view( -1, self.max_word_length, self.char_dimension ) # (sentence_length * batch_size, max_word_length, char_dim) x_reshape = x_reshape.permute( [0, 2, 1]) # (sentence_length * batch_size, char_dim, max_word_length) x_conv_out = self.cnn_layer.forward( x_reshape) # Apply CNN (sentence_length * batch_size, word_dim) x_highway = self.highway_layer.forward( x_conv_out) # (sentence_length * batch_size, word_dim) x_word_emb = self.dropout_layer( x_highway) # (sentence_length * batch_size, word_dim) x_final = x_word_emb.reshape(sentence_length, batch_size, self.word_dimension) return x_final
class ModelEmbeddings(nn.Module): """ Class that converts input words to their CNN-based embeddings. """ def __init__(self, embed_size, vocab): """ Init the Embedding layer for one language @param embed_size (int): Embedding size (dimensionality) for the output @param vocab (VocabEntry): VocabEntry object. See vocab.py for documentation. """ super(ModelEmbeddings, self).__init__() self.embed_size = embed_size # word_embed_size self.char_embed_size = 50 self.dropout_rate = 0.3 pad_token_idx = vocab.char2id['<pad>'] self.embeddings = nn.Embedding(len(vocab.char2id), self.char_embed_size, padding_idx=pad_token_idx) self.cnn = CNN(self.char_embed_size, self.embed_size) self.highway = Highway(self.embed_size) self.dropout = nn.Dropout(p=self.dropout_rate) def forward(self, input_tensor): """ Looks up character-based CNN embeddings for the words in a batch of sentences. @param input_tensor: Tensor of integers of shape (sentence_length, batch_size, max_word_length) where each integer is an index into the character vocabulary @param output: Tensor of shape (sentence_length, batch_size, embed_size), containing the CNN-based embeddings for each word of the sentences in the batch """ sentence_length, batch_size, m_word = input_tensor.size() e_char, e_word = self.char_embed_size, self.embed_size #print('input_tensor size = {}'.format(input_tensor.size())) #print('input_tensor = {}'.format(input_tensor)) # Reshaping input tensor with a revised batch_size = sentence_length*batch_size x_padded = torch.reshape(input_tensor, (sentence_length * batch_size, m_word)) assert_expected_size(x_padded, 'x_padded', [sentence_length * batch_size, m_word]) #print('x_padded size = {}'.format(x_padded.size())) #print('x_padded = {}'.format(x_padded)) x_emb = self.embeddings(x_padded) assert_expected_size(x_emb, 'x_emb', [sentence_length * batch_size, m_word, e_char]) #print('x_emb size = {}'.format(x_emb.size())) x_reshaped = x_emb.permute(0, 2, 1) assert_expected_size(x_reshaped, 'x_reshaped', [sentence_length * batch_size, e_char, m_word]) #print('x_reshaped size = {}'.format(x_reshaped.size())) x_conv_out = self.cnn.forward(x_reshaped) assert_expected_size(x_conv_out, 'x_conv_out', [sentence_length * batch_size, e_word]) #print('x_conv_out size = {}'.format(x_conv_out.size())) x_highway = self.highway.forward(x_conv_out) assert_expected_size(x_highway, 'x_highway', [sentence_length * batch_size, e_word]) #print('x_highway size = {}'.format(x_highway.size())) x_word_emb = self.dropout(x_highway) assert_expected_size(x_word_emb, 'x_word_emb', [sentence_length * batch_size, e_word]) #print('x_word_emb size = {}'.format(x_word_emb.size())) #output = torch.reshape(x_word_emb, (sentence_length, batch_size, e_word)) #assert_expected_size(output, 'output', [sentence_length, batch_size, e_word]) output = torch.reshape(x_word_emb, (sentence_length, batch_size, e_word)) assert_expected_size(output, 'output', [sentence_length, batch_size, e_word]) #print('output size = {}'.format(output.size())) return output
import torch import torch.nn as nn import torch.nn.functional as F from highway import Highway """ Sanity test of Highway block. """ net = Highway(e_word=3, dropout=0.5) x = torch.tensor([[[1., 2., 3.], [5., 6., 7.]], [[3., 5., 6.], [4., 5., 6.]]]) # shape(b, s_len, e_words) (2, 2, 3) print("input size: {}".format(x.size())) print("input type: {}".format(type(x))) print("input: {}".format(x)) x_high = net.forward(x) print("output size: {}".format(x_high.size())) print("output type: {}".format(type(x_high))) print("output: {}".format(x_high))
class ModelEmbeddings(nn.Module): """ Class that converts input words to their CNN-based embeddings. """ def __init__(self, word_embed_size, vocab): """ Init the Embedding layer for one language @param word_embed_size (int): Embedding size (dimensionality) for the output word @param vocab (VocabEntry): VocabEntry object. See vocab.py for documentation. Hints: - You may find len(self.vocab.char2id) useful when create the embedding """ super(ModelEmbeddings, self).__init__() ### YOUR CODE HERE for part 1h self.word_embed_size = word_embed_size self.vocab = vocab self.dropout = nn.Dropout(p=0.3) self.e_char = 50 self.char_embeds = nn.Embedding(len(self.vocab.char2id), self.e_char) self.cnn = CNN(self.e_char, self.word_embed_size) self.highway = Highway(self.word_embed_size) ### END YOUR CODE def forward(self, input): """ Looks up character-based CNN embeddings for the words in a batch of sentences. @param input: Tensor of integers of shape (sentence_length, batch_size, max_word_length) where each integer is an index into the character vocabulary @param output: Tensor of shape (sentence_length, batch_size, word_embed_size), containing the CNN-based embeddings for each word of the sentences in the batch """ ### YOUR CODE HERE for part 1h x_padded = input # not a big fan of using the keyword 'input' as variable name sent_len, batch_size, m_word = x_padded.size() x_reshaped = self.char_embeds( x_padded) # lookup the character embeddings x_reshaped = x_reshaped.permute(0, 1, 3, 2) assert x_reshaped.size() == (sent_len, batch_size, self.e_char, m_word), \ "x_rehaped is incorrect size; expected {} but got {}". \ format((sent_len, batch_size, self.e_char, m_word), tuple(x_reshaped.shape)) x_conv_out = self.cnn.forward(torch.flatten(x_reshaped, 0, 1)) assert x_conv_out.size() == (sent_len*batch_size, self.word_embed_size), \ "x_conv_out is incorrect size; expected {} but got {}". \ format((sent_len*batch_size, self.word_embed_size), tuple(x_conv_out.shape)) x_highway = self.highway.forward(x_conv_out) assert x_highway.size() == (sent_len*batch_size, self.word_embed_size), \ "x_highway is incorrect size; expected {} but got {}". \ format((sent_len*batch_size, self.word_embed_size), tuple(x_highway.shape)) x_word_emb = self.dropout(x_highway) x_word_emb = torch.stack(torch.split(x_word_emb, batch_size, dim=0)) assert x_word_emb.size() == (sent_len, batch_size, self.word_embed_size), \ "x_word_emb is incorrect size; expected {} but got {}". \ format((sent_len, batch_size, self.word_embed_size), tuple(x_word_emb.size())) # print("\nx_word_emb =\n", x_word_emb) return x_word_emb
class ModelEmbeddings(nn.Module): """ Class that converts input words to their CNN-based embeddings. """ def __init__(self, embed_size, vocab): """ Init the Embedding layer for one language @param embed_size (int): Embedding size (dimensionality) for the output @param vocab (VocabEntry): VocabEntry object. See vocab.py for documentation. """ super(ModelEmbeddings, self).__init__() ## A4 code # pad_token_idx = vocab.src['<pad>'] # self.embeddings = nn.Embedding(len(vocab.src), embed_size, padding_idx=pad_token_idx) ## End A4 code ### YOUR CODE HERE for part 1j pad_token_idx = vocab.char2id['<pad>'] self.e_char = 50 self.embed_size = embed_size self.vocab = vocab self.embeddings = nn.Embedding(len(self.vocab.char2id), self.e_char, padding_idx=pad_token_idx) self.cnn = CNN(self.e_char, self.embed_size) self.highway = Highway(self.embed_size) self.dropout = nn.Dropout(0.3) ### END YOUR CODE def forward(self, input): """ Looks up character-based CNN embeddings for the words in a batch of sentences. @param input: Tensor of integers of shape (sentence_length, batch_size, max_word_length) where each integer is an index into the character vocabulary @param output: Tensor of shape (sentence_length, batch_size, embed_size), containing the CNN-based embeddings for each word of the sentences in the batch """ ## A4 code # output = self.embeddings(input) # return output ## End A4 code ### YOUR CODE HERE for part 1j #print(self.embed_size) #print(input.size()) sentence_length = input.size()[0] batch_size = input.size()[1] e = self.embeddings(input) #print(e.size()) e = e.permute(0, 1, 3, 2) e = e.contiguous() e = e.view(-1, e.size()[2], e.size()[3]) #print(e.size()) x_conv_out = self.cnn.forward(e) #print(x_conv_out.size()) x_highway = self.highway.forward(x_conv_out) #print(x_highway.size()) x_word_emb = self.dropout(x_highway) #print(x_word_emb.size()) x_word_emb = x_word_emb.view(sentence_length, batch_size, self.embed_size) return x_word_emb