def forward(self, inputs, lengths): # STEP 1 - perform dot product of the attention vector and each hidden state # inputs is a 3D Tensor: batch, len, hidden_size # scores is a 2D Tensor: batch, len # for the first batch: e.g. len=2, hidden_size=3 # tensor([[-1.7301, -1.3598, -0.7204], # [-1.6151, 1.4487, 2.6711]]) # tensor([[0.1926, 0.2789, 0.5285], # [0.0105, 0.2251, 0.7644]]) # (A×1×B×C×1×D) then the out tensor will be of shape: (A×B×C×D). # After attention (linear + tanh) # [ [att1] [att2] ... ] => [att1 att2 ...] scores = self.attention(inputs).squeeze() scores = self.softmax(scores) # Step 2 - Masking # construct a mask, based on sentence lengths mask = self.get_mask(scores, lengths) # apply the mask - zero out masked timesteps masked_scores = scores * mask # re-normalize the masked scores _sums = masked_scores.sum(-1, keepdim=True) # sums per row scores = masked_scores.div(_sums) # divide by row sum # Step 3 - Weighted sum of hidden states, by the attention scores # multiply each hidden state with the attention weights weighted = torch.mul(inputs, scores.unsqueeze(-1).expand_as(inputs)) # sum the hidden states representations = weighted.sum(1).squeeze() return representations, scores
def forward(self, inputs, mask, lengths): scores = self.attention(inputs).squeeze() scores = self.softmax(scores) masked_scores = scores * mask.float() _sums = masked_scores.sum(-1, keepdim=True) scores = masked_scores.div(_sums) weighted = torch.mul(inputs, scores.unsqueeze(-1).expand_as(inputs)) representations = weighted.sum(1).squeeze() return representations, scores
def forward(self, inputs, lengths): ################################################################## # STEP 1 - perform dot product # of the attention vector and each hidden state ################################################################## # inputs is a 3D Tensor: batch, len, hidden_size # scores is a 2D Tensor: batch, len scores = self.attention(inputs).squeeze() scores = self.softmax(scores) ################################################################## # Step 2 - Masking ################################################################## # construct a mask, based on sentence lengths mask = self.get_mask(scores, lengths) # apply the mask - zero out masked timesteps masked_scores = scores * mask # re-normalize the masked scores _sums = masked_scores.sum(-1, keepdim=True) # sums per row scores = masked_scores.div(_sums) # divide by row sum ################################################################## # Step 3 - Weighted sum of hidden states, by the attention scores ################################################################## # multiply each hidden state with the attention weights weighted = torch.mul(inputs, scores.unsqueeze(-1).expand_as(inputs)) # sum the hidden states # in case of bug uncomment the following line # representations = weighted.sum(1).squeeze() representations = weighted.sum(1) return representations, scores
def forward(self, inputs, weigths): # inputs is a 3D Tensor: batch, len, hidden_size # weights is a 2D Tensor: batch, len # scores is a 2D Tensor: batch, len scores = self.attention(inputs).squeeze() scores = self.softmax(scores) scores = torch.add(scores, weigths) # re-normalize the fused scores _sums = scores.sum(-1, keepdim=True) # sums per row scores = scores.div(_sums) # divide by row sum # multiply each hidden state with the attention weights weighted = torch.mul(inputs, scores.unsqueeze(-1).expand_as(inputs)) # sum the hidden states # uncomment following command in case of bug # representations = weighted.sum(1).squeeze() representations = weighted.sum(1) return representations, scores
def forward(self, inputs, lengths): ################################################################## # STEP 1 - perform dot product # of the attention vector and each hidden state ################################################################## # inputs is a 3D Tensor: batch, len, hidden_size # scores is a 2D Tensor: batch, len scores = self.attention(inputs).squeeze() scores = self.softmax(scores) ################################################################## # Step 2 - Masking ################################################################## # construct a mask, based on sentence lengths mask = self.get_mask(scores, lengths) # apply the mask - zero out masked timesteps masked_scores = scores * mask # re-normalize the masked scores _sums = masked_scores.sum(-1, keepdim=True) # sums per row scores = masked_scores.div(_sums) # divide by row sum ################################################################## # Step 3 - Weighted sum of hidden states, by the attention scores ################################################################## # multiply each hidden state with the attention weights weighted = torch.mul(inputs, scores.unsqueeze(-1).expand_as(inputs)) # sum the hidden states representations = weighted.sum(1).squeeze() return representations, scores
def weighted_timestep(hidden, weights): weighted_h = torch.mul(hidden, weights.unsqueeze(-1).expand_as(hidden)) return weighted_h