class TestAttemtion(unittest.TestCase): def setUp(self): self.attention = Attention() self.hs = np.random.randn(10, 5, 4) self.h = np.random.randn(10, 4) def test_forward(self): out = self.attention.forward(self.hs, self.h) self.assertEqual((10, 4), out.shape) def test_backward(self): dout = self.attention.forward(self.hs, self.h) dhs, dh = self.attention.backward(dout) self.assertEqual((10, 5, 4), dhs.shape) self.assertEqual((10, 5), dh.shape)
def forward(self, hs_enc, hs_dec): N, T, H = hs_dec.shape out = np.empty_like(hs_dec) for t in range(T): layer = Attention() out[:, t, :] = layer.forward(hs_enc, hs_dec[:, t, :]) self.layers.append(layer) self.attention_weights.append(layer.attention_weight) return out
class ATAE_LSTM(nn.Module): ''' This class implements the ATAE_LSTM model ''' def __init__(self, num_classes: int = 2, bidirectional: bool = False, rnn_layers: int = 1, hidden_size: int = 256, rnn_type: str = 'GRU'): super(ATAE_LSTM, self).__init__() self.stackedembeddings: StackedEmbeddings = StackedEmbeddings([ FlairEmbeddings('news-forward'), FlairEmbeddings('news-backward') ]) self.wordembeddings: StackedEmbeddings = StackedEmbeddings( [WordEmbeddings('glove')]) self.embedding_dimension: int = self.stackedembeddings.embedding_length + self.wordembeddings.embedding_length self.bidirectional: bool = bidirectional self.rnn_layers: int = rnn_layers self.rnn_type: str = rnn_type self.num_classes: int = num_classes self.hidden_size: int = hidden_size if self.rnn_type == 'GRU': self.rnn = torch.nn.GRU(self.embedding_dimension, self.hidden_size, bidirectional=self.bidirectional, num_layers=self.rnn_layers) else: self.rnn = torch.nn.LSTM(self.embedding_dimension, self.hidden_size, bidirectional=self.bidirectional, num_layers=self.rnn_layers) self.attention = Attention() def weird_operation(self, rnn_output_tensor: torch.tensor, aspect_embedding_tensor: torch.tensor) -> torch.tensor: transformed_rnn_output = self.myLinear(rnn_output_tensor) return F.relu( torch.cat([transformed_rnn_output, aspect_embedding_tensor], dim=2)) def myLinear(self, input_tensor: torch.tensor, output_dim: int = None, keep=True) -> torch.tensor: if keep: output_dim = input_tensor.size(-1) elif output_dim is None: raise Exception('Enter the output dimension') self.Linear = nn.Linear(input_tensor.size(-1), output_dim) torch.nn.init.xavier_uniform(self.Linear.weight) return self.Linear(input_tensor) def custom_embedding_layer( self, inputs: Union[List[Sentence], Sentence]) -> torch.tensor: if type(inputs) == Sentence: inputs = [inputs] self.stackedembeddings.embed(inputs) inputs.sort(key=lambda x: len(x), reverse=True) max_length = len(inputs[0]) lengths: List[int] = [] batch_tensor_list: List[torch.Tensor] = [] for sentence in inputs: sentence_tensor = [ token.get_embedding().unsqueeze(dim=0) for token in sentence.tokens ] for i in range(max_length - len(sentence)): sentence_tensor.append( torch.zeros(token.get_embedding().size(0)).unsqueeze(0)) sentence_tensor = torch.cat(sentence_tensor, dim=0) batch_tensor_list.append(sentence_tensor.unsqueeze(0)) lengths.append(len(sentence)) batch_tensor: torch.Tensor = torch.cat(batch_tensor_list, dim=0) return batch_tensor, torch.tensor(lengths) def aspect_embedding_layer(self, targets: tuple( [List[List[int]], List[Sentence]]), vocab: dict(), trainable: bool = True, affine: bool = False) -> torch.tensor: self.wordembeddings.embed(targets[1]) embed = nn.Embedding(len(vocab), self.wordembeddings.embedding_length, padding_idx=0) weights = torch.empty(len(vocab), self.wordembeddings.embedding_length) ''' for i, sentence in targets[1]: for j, token in enumerate(sentence.tokens): index = targets[0][i][j] weights[i] = token.get_embedding() ''' embed.from_pretrained(embeddings=weights, freeze=not trainable) inputs = torch.tensor(targets[0]) embedding = embed(inputs) if affine: transformed_aspect_embeddings = self.myLinear( input_tensor=embedding, keepdim=True) else: transformed_aspect_embeddings = embedding return transformed_aspect_embeddings def affine_transformation_final(self, sent_repr: torch.tensor, final_hidden_state: torch.tensor): projected_final_hidden = self.myLinear(final_hidden_state, output_dim=sent_repr.size(-1), keep=False) projected_sent_repr = self.myLinear(sent_repr, keep=True) print( self.myLinear(input_tensor=torch.tanh(projected_sent_repr + projected_final_hidden), output_dim=self.num_classes, keep=False)) def forward(self, input_sentences: List[Sentence], target_words: List[List[int]], vocab: dict()): nontrainable_embeddings, __lengths__ = self.custom_embedding_layer( inputs=input_sentences) trainable_embeddings = self.aspect_embedding_layer(targets=tuple( [target_words, input_sentences]), vocab=vocab) trainable_embeddings = torch.cat([ trainable_embeddings for i in range(nontrainable_embeddings.size(1)) ], dim=1) combined_embeddings = torch.cat( [nontrainable_embeddings, trainable_embeddings], dim=2) packed_embeddings = pack_padded_sequence(combined_embeddings, lengths=__lengths__, batch_first=True) recurrent_output, last_states = self.rnn(packed_embeddings) padded_rnn_embedding, __ = pad_packed_sequence(recurrent_output, batch_first=True) weird_tensor = self.weird_operation( rnn_output_tensor=padded_rnn_embedding, aspect_embedding_tensor=trainable_embeddings) final_hidden_state_repr = torch.cat( [padded_rnn_embedding[:, -1, :], padded_rnn_embedding[:, 0, :]], dim=-1).unsqueeze(dim=1) sent_repr = self.attention.forward( attention_candidates=weird_tensor, weighted_sum_candidates=padded_rnn_embedding, attention_size=weird_tensor.size(-1)) final_logits = self.affine_transformation_final( sent_repr=sent_repr, final_hidden_state=final_hidden_state_repr) return final_logits