Esempio n. 1
0
            def global_attention(query):
                # linear map
                y = Linear(query, global_attention_vec_size, True)
                y = y.view(-1, 1, 1, global_attention_vec_size)
                # Attention mask is a softmax of v_g^{\top} * tanh(...)
                s = torch.sum(global_v *
                              torch.tanh(global_hidden_features + y),
                              dim=[1, 3])
                a = tf.softmax(s)

                return a
Esempio n. 2
0
 def local_attention(query):
     # linear map
     y = Linear(query, local_attention_vec_size, True)
     y = y.view(-1, 1, 1, local_attention_vec_size)
     # Attention mask is a softmax of v_l^{\top} * tanh(...)
     #print((local_v * torch.tanh(local_hidden_features + y)).size())
     s = torch.sum(local_v * torch.tanh(local_hidden_features + y),
                   dim=[1, 3])
     # Now calculate the attention-weighted vector, i.e., alpha in eq.[2]
     a = tf.softmax(s)
     return a
Esempio n. 3
0
        def attention(query):
            # linear map
            y = Linear(query, attention_vec_size, True)
            y = y.view(-1, 1, 1, attention_vec_size)
            # Attention mask is a softmax of v_d^{\top} * tanh(...).
            s = torch.sum(v * torch.tanh(hidden_features + y), dim=[1, 3])
            # Now calculate the attention-weighted vector, i.e., gamma in eq.[7]
            a = tf.softmax(s)
            # eq. [8]
            #print(hidden.size())
            #print((a.view(-1, 1, attn_length, 1)).size())
            d = torch.sum(a.view(-1, 1, attn_length, 1) * hidden, dim=[2, 3])

            return d.view(-1, attn_size)