from __future__ import division
from __future__ import absolute_import
import torch
#overrides

from allennlp.modules.matrix_attention.matrix_attention import MatrixAttention


class CosineMatrixAttention(MatrixAttention):
    u"""
    Computes attention between every entry in matrix_1 with every entry in matrix_2 using cosine
    similarity.
    """

    #overrides
    def forward(self, matrix_1, matrix_2):
        a_norm = matrix_1 / (matrix_1.norm(p=2, dim=-1, keepdim=True) + 1e-13)
        b_norm = matrix_2 / (matrix_2.norm(p=2, dim=-1, keepdim=True) + 1e-13)
        return torch.bmm(a_norm, b_norm.transpose(-1, -2))


CosineMatrixAttention = MatrixAttention.register(u"cosine")(
    CosineMatrixAttention)
Beispiel #2
0
        self._weight_matrix = Parameter(
            torch.Tensor(matrix_1_dim, matrix_2_dim))

        self._bias = Parameter(torch.Tensor(1))
        self._activation = activation or Activation.by_name(u'linear')()
        self._use_input_biases = use_input_biases
        self.reset_parameters()

    def reset_parameters(self):
        torch.nn.init.xavier_uniform_(self._weight_matrix)
        self._bias.data.fill_(0)

    #overrides
    def forward(self, matrix_1, matrix_2):

        if self._use_input_biases:
            bias1 = matrix_1.new_ones(matrix_1.size()[:-1] + (1, ))
            bias2 = matrix_2.new_ones(matrix_2.size()[:-1] + (1, ))

            matrix_1 = torch.cat([matrix_1, bias1], -1)
            matrix_2 = torch.cat([matrix_2, bias2], -1)
        intermediate = torch.matmul(matrix_1.unsqueeze(1),
                                    self._weight_matrix.unsqueeze(0))
        final = torch.matmul(intermediate,
                             matrix_2.unsqueeze(1).transpose(2, 3))
        return self._activation(final.squeeze(1) + self._bias)


BilinearMatrixAttention = MatrixAttention.register(u"bilinear")(
    BilinearMatrixAttention)
    The legacy implementation of ``MatrixAttention``.

    It should be considered deprecated as it uses much more memory than the newer specialized
    ``MatrixAttention`` modules.

    Parameters
    ----------
    similarity_function: ``SimilarityFunction``, optional (default=``DotProductSimilarity``)
        The similarity function to use when computing the attention.
    """
    def __init__(self, similarity_function=None):
        super(LegacyMatrixAttention, self).__init__()
        self._similarity_function = similarity_function or DotProductSimilarity(
        )

    #overrides
    def forward(self, matrix_1, matrix_2):
        tiled_matrix_1 = matrix_1.unsqueeze(2).expand(matrix_1.size()[0],
                                                      matrix_1.size()[1],
                                                      matrix_2.size()[1],
                                                      matrix_1.size()[2])
        tiled_matrix_2 = matrix_2.unsqueeze(1).expand(matrix_2.size()[0],
                                                      matrix_1.size()[1],
                                                      matrix_2.size()[1],
                                                      matrix_2.size()[2])
        return self._similarity_function(tiled_matrix_1, tiled_matrix_2)


LegacyMatrixAttention = MatrixAttention.register(u"legacy")(
    LegacyMatrixAttention)
    def reset_parameters(self):
        std = math.sqrt(6 / (self._weight_vector.size(0) + 1))
        self._weight_vector.data.uniform_(-std, std)
        self._bias.data.fill_(0)

    #overrides
    def forward(
            self,  # pylint: disable=arguments-differ
            matrix_1,
            matrix_2):
        # TODO(mattg): Remove the need for this tiling.
        # https://github.com/allenai/allennlp/pull/1235#issuecomment-391540133
        tiled_matrix_1 = matrix_1.unsqueeze(2).expand(matrix_1.size()[0],
                                                      matrix_1.size()[1],
                                                      matrix_2.size()[1],
                                                      matrix_1.size()[2])
        tiled_matrix_2 = matrix_2.unsqueeze(1).expand(matrix_2.size()[0],
                                                      matrix_1.size()[1],
                                                      matrix_2.size()[1],
                                                      matrix_2.size()[2])

        combined_tensors = util.combine_tensors(
            self._combination, [tiled_matrix_1, tiled_matrix_2])
        dot_product = torch.matmul(combined_tensors, self._weight_vector)
        return self._activation(dot_product + self._bias)


LinearMatrixAttention = MatrixAttention.register(u"linear")(
    LinearMatrixAttention)
from __future__ import absolute_import
import torch
#overrides

from allennlp.modules.matrix_attention.matrix_attention import MatrixAttention


class DotProductMatrixAttention(MatrixAttention):
    u"""
    Computes attention between every entry in matrix_1 with every entry in matrix_2 using a dot
    product.
    """

    #overrides
    def forward(self, matrix_1, matrix_2):
        return matrix_1.bmm(matrix_2.transpose(2, 1))


DotProductMatrixAttention = MatrixAttention.register(u"dot_product")(
    DotProductMatrixAttention)