Example #1
0
    def forward(self, inputs, lengths):

        # STEP 1 - perform dot product of the attention vector and each hidden state

        # inputs is a 3D Tensor: batch, len, hidden_size
        # scores is a 2D Tensor: batch, len
        # for the first batch: e.g. len=2, hidden_size=3
        # tensor([[-1.7301, -1.3598, -0.7204],
        #         [-1.6151,  1.4487,  2.6711]])
        # tensor([[0.1926, 0.2789, 0.5285],
        #         [0.0105, 0.2251, 0.7644]])
        # (A×1×B×C×1×D) then the out tensor will be of shape: (A×B×C×D).
        # After attention (linear + tanh)
        # [ [att1] [att2] ... ] => [att1 att2 ...]
        scores = self.attention(inputs).squeeze()
        scores = self.softmax(scores)

        # Step 2 - Masking

        # construct a mask, based on sentence lengths
        mask = self.get_mask(scores, lengths)
        # apply the mask - zero out masked timesteps
        masked_scores = scores * mask
        # re-normalize the masked scores
        _sums = masked_scores.sum(-1, keepdim=True)  # sums per row
        scores = masked_scores.div(_sums)  # divide by row sum

        # Step 3 - Weighted sum of hidden states, by the attention scores

        # multiply each hidden state with the attention weights
        weighted = torch.mul(inputs, scores.unsqueeze(-1).expand_as(inputs))
        # sum the hidden states
        representations = weighted.sum(1).squeeze()

        return representations, scores
Example #2
0
    def forward(self, inputs, mask, lengths):
        scores = self.attention(inputs).squeeze()
        scores = self.softmax(scores)

        masked_scores = scores * mask.float()
        _sums = masked_scores.sum(-1, keepdim=True)
        scores = masked_scores.div(_sums)

        weighted = torch.mul(inputs, scores.unsqueeze(-1).expand_as(inputs))
        representations = weighted.sum(1).squeeze()

        return representations, scores
Example #3
0
    def forward(self, inputs, lengths):

        ##################################################################
        # STEP 1 - perform dot product
        # of the attention vector and each hidden state
        ##################################################################

        # inputs is a 3D Tensor: batch, len, hidden_size
        # scores is a 2D Tensor: batch, len
        scores = self.attention(inputs).squeeze()
        scores = self.softmax(scores)

        ##################################################################
        # Step 2 - Masking
        ##################################################################

        # construct a mask, based on sentence lengths
        mask = self.get_mask(scores, lengths)

        # apply the mask - zero out masked timesteps
        masked_scores = scores * mask

        # re-normalize the masked scores
        _sums = masked_scores.sum(-1, keepdim=True)  # sums per row
        scores = masked_scores.div(_sums)  # divide by row sum

        ##################################################################
        # Step 3 - Weighted sum of hidden states, by the attention scores
        ##################################################################

        # multiply each hidden state with the attention weights
        weighted = torch.mul(inputs, scores.unsqueeze(-1).expand_as(inputs))

        # sum the hidden states

        # in case of bug uncomment the following line
        # representations = weighted.sum(1).squeeze()
        representations = weighted.sum(1)

        return representations, scores
Example #4
0
    def forward(self, inputs, weigths):

        # inputs is a 3D Tensor: batch, len, hidden_size
        # weights is a 2D Tensor: batch, len
        # scores is a 2D Tensor: batch, len
        scores = self.attention(inputs).squeeze()
        scores = self.softmax(scores)
        scores = torch.add(scores, weigths)

        # re-normalize the fused scores
        _sums = scores.sum(-1, keepdim=True)  # sums per row
        scores = scores.div(_sums)  # divide by row sum

        # multiply each hidden state with the attention weights
        weighted = torch.mul(inputs, scores.unsqueeze(-1).expand_as(inputs))

        # sum the hidden states
        # uncomment following command in case of bug
        # representations = weighted.sum(1).squeeze()
        representations = weighted.sum(1)

        return representations, scores
Example #5
0
    def forward(self, inputs, lengths):

        ##################################################################
        # STEP 1 - perform dot product
        # of the attention vector and each hidden state
        ##################################################################

        # inputs is a 3D Tensor: batch, len, hidden_size
        # scores is a 2D Tensor: batch, len
        scores = self.attention(inputs).squeeze()
        scores = self.softmax(scores)

        ##################################################################
        # Step 2 - Masking
        ##################################################################

        # construct a mask, based on sentence lengths
        mask = self.get_mask(scores, lengths)

        # apply the mask - zero out masked timesteps
        masked_scores = scores * mask

        # re-normalize the masked scores
        _sums = masked_scores.sum(-1, keepdim=True)  # sums per row
        scores = masked_scores.div(_sums)  # divide by row sum

        ##################################################################
        # Step 3 - Weighted sum of hidden states, by the attention scores
        ##################################################################

        # multiply each hidden state with the attention weights
        weighted = torch.mul(inputs, scores.unsqueeze(-1).expand_as(inputs))

        # sum the hidden states
        representations = weighted.sum(1).squeeze()

        return representations, scores
Example #6
0
 def weighted_timestep(hidden, weights):
     weighted_h = torch.mul(hidden, weights.unsqueeze(-1).expand_as(hidden))
     return weighted_h