def forward(self, batch): # context size = batchsize x max len x embedding dim # convert the padded context into a packed sequence such that the padded vectors are not shown to the LSTM # context_packed = sum of all seq lenghts, embedding_dim # batch_sizes = column-wise (how many real elements do I have?) device = batch["device"] context_packed = nn.utils.rnn.pack_padded_sequence( batch["seq"].to(device), batch["seq_lengths"].to(device), batch_first=True, enforce_sorted=False) self.lstm.to(device) # forward propagate LSTM, initial states are set to 0 per default # out = sum(seq_lenghts), hidden*2 out, hidden = self.lstm(context_packed) # unpack the sequence # out: tensor of shape (batch_size, max_seq_length, hidden_size*2) out, hidden = torch.nn.utils.rnn.pad_packed_sequence(out, batch_first=True) # Decode the hidden state of the last time step out = out[:, -1, :] # concat the word vectors into phrase word_composed = comp_functions.concat(batch["w1"].to(device), batch["w2"].to(device), axis=1) # concate phrase with encoded sequence and send through forward context_phrase = torch.cat((word_composed, out), 1) x = F.relu(self._hidden_layer(context_phrase)) x = F.dropout(x, p=self.dropout_rate) return self._output_layer(x)
def test_concat(self): """ Test whether two batches of size [1x3] and [1x3] can be concatenated to retrieve a batch of [1x6] """ expected_p = np.array([[1, 1, 1, 1, 0, 0]]) p = concat(self.u, self.v, axis=1) np.testing.assert_allclose(p, expected_p)
def compose(self, word1, word2): """ Two words are combined via a matrix : word1;word2 x W+b :param word1: embedding of the first word :param word2: embedding of the second word :return: composed input word embeddings (dimension = embedding dimension) """ composed_phrase = comp_functions.concat(word1, word2, axis=1) transformed = self.matrix_layer(composed_phrase) reg_transformed = F.dropout(transformed, p=self.dropout_rate) if self.normalize_embeddings: reg_transformed = F.normalize(reg_transformed, p=2, dim=1) return reg_transformed
def compose(self, word1, word2): """ this function takes two words, concatenates them and applies linear transformation :param word1: the first word of size batch_size x embedding size :param word2: the first word of size batch_size x embedding size :return: the composed representation """ composed_phrase = comp_functions.concat(word1, word2, axis=1) transformed = self.matrix_layer(composed_phrase) reg_transformed = F.dropout(transformed, p=self.dropout_rate) if self.normalize_embeddings: reg_transformed = F.normalize(reg_transformed, p=2, dim=1) return reg_transformed
def forward(self, batch): """ this function takes two words, concatenates them and applies a non-linear matrix transformation (hidden layer) Its output is then fed to an output layer. Then it returns the concatenated and transformed vectors. :param word1: the first word of size batch_size x embedding size :param word2: the first word of size batch_size x embedding size :return: the transformed vectors after output layer """ device = batch["device"] word_composed = comp_functions.concat(batch["w1"].to(device), batch["w2"].to(device), axis=1) x = F.relu(self.hidden_layer(word_composed)) x = F.dropout(x, p=self.dropout_rate) return self.output_layer(x)
def compose(self, word1, word2): """ this function takes two words, concatenates them and applies a non-linear matrix transformation ( hidden layer) Its output is then fed to an output layer. Then it returns the concatenated and transformed vectors. :param word1: the first word of size batch_size x embedding size :param word2: the first word of size batch_size x embedding size :return: the raw label scores """ composed_phrase = comp_functions.concat(word1, word2, axis=1) transformed = self.matrix_layer(composed_phrase) reg_transformed = F.dropout(transformed, p=self.dropout_rate) if self.normalize_embeddings: reg_transformed = F.normalize(reg_transformed, p=2, dim=1) return reg_transformed