Ejemplo n.º 1
0
from __future__ import absolute_import
import torch

#overrides
from allennlp.modules.attention.attention import Attention
from allennlp.modules.similarity_functions import DotProductSimilarity, SimilarityFunction


class LegacyAttention(Attention):
    u"""
    Computes attention between a vector and a matrix using a similarity function.
    This should be considered deprecated, as it consumes more memory than the specialized attention modules.
    """
    def __init__(self, similarity_function=None, normalize=True):
        super(LegacyAttention, self).__init__(normalize)
        self._similarity_function = similarity_function or DotProductSimilarity(
        )

    #overrides
    def _forward_internal(self, vector, matrix):
        tiled_vector = vector.unsqueeze(1).expand(vector.size()[0],
                                                  matrix.size()[1],
                                                  vector.size()[1])
        return self._similarity_function(tiled_vector, matrix)


LegacyAttention = Attention.register(u"legacy")(LegacyAttention)
Ejemplo n.º 2
0
    activation : ``Activation``, optional (default=linear (i.e. no activation))
        An activation function applied after the ``x^T W y + b`` calculation.  Default is no
        activation.
    normalize : ``bool``, optional (default: ``True``)
        If true, we normalize the computed similarities with a softmax, to return a probability
        distribution for your attention.  If false, this is just computing a similarity score.
    """
    def __init__(self,
                 vector_dim,
                 matrix_dim,
                 activation=None,
                 normalize=True):
        super(BilinearAttention, self).__init__(normalize)
        self._weight_matrix = Parameter(torch.Tensor(vector_dim, matrix_dim))
        self._bias = Parameter(torch.Tensor(1))
        self._activation = activation or Activation.by_name(u'linear')()
        self.reset_parameters()

    def reset_parameters(self):
        torch.nn.init.xavier_uniform_(self._weight_matrix)
        self._bias.data.fill_(0)

    #overrides
    def _forward_internal(self, vector, matrix):
        intermediate = vector.mm(self._weight_matrix).unsqueeze(1)
        return self._activation(
            intermediate.bmm(matrix.transpose(1, 2)).squeeze(1) + self._bias)


BilinearAttention = Attention.register(u"bilinear")(BilinearAttention)