def test_can_init_scaled_dot(self): legacy_attention = Attention.from_params( Params({ "type": "scaled_dot_product", "scaling_factor": 9 })) isinstance(legacy_attention, ScaledDotProductAttention)
def test_can_init_linear(self): legacy_attention = Attention.from_params( Params({ "type": "linear", "tensor_1_dim": 3, "tensor_2_dim": 3 })) isinstance(legacy_attention, LinearAttention)
def __init__( self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, elmo_text_field_embedder: TextFieldEmbedder, quote_response_encoder: Seq2SeqEncoder, quote_response_encoder_aux: Seq2VecEncoder, classifier_feedforward: FeedForward, classifier_feedforward_2: FeedForward, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None, report_auxiliary_metrics: bool = False, # predict_mode: bool = False, ) -> None: super(SarcasmClassifier, self).__init__(vocab, regularizer) self.text_field_embedder = text_field_embedder self.elmo_text_field_embedder = elmo_text_field_embedder self.num_classes = self.vocab.get_vocab_size("labels") self.num_classes_emotions = self.vocab.get_vocab_size("emotion_labels") self.quote_response_encoder = quote_response_encoder self.quote_response_encoder_aux = quote_response_encoder_aux self.classifier_feedforward = classifier_feedforward self.classifier_feedforward_2 = classifier_feedforward_2 self.attention_seq2seq = Attention( quote_response_encoder.get_output_dim()) self.label_acc_metrics = {"accuracy": CategoricalAccuracy()} self.label_f1_metrics = {} self.label_f1_metrics_emotions = {} # for i in range(self.num_classes): # self.label_f1_metrics[vocab.get_token_from_index(index=i, namespace="label")] =\ # F1Measure(positive_label=i) for i in range(self.num_classes): self.label_f1_metrics[vocab.get_token_from_index(index=i, namespace="labels")] =\ F1Measure(positive_label=i) for i in range(self.num_classes_emotions): self.label_f1_metrics_emotions[vocab.get_token_from_index(index=i, namespace="emotion_labels")] =\ F1Measure(positive_label=i) self.loss = torch.nn.CrossEntropyLoss() # self.attention_seq2seq = Attention(quote_response_encoder.get_output_dim()) self.report_auxiliary_metrics = report_auxiliary_metrics # self.predict_mode = predict_mode initializer(self)
from __future__ import absolute_import import torch #overrides from allennlp.modules.attention.attention import Attention from allennlp.modules.similarity_functions import DotProductSimilarity, SimilarityFunction class LegacyAttention(Attention): u""" Computes attention between a vector and a matrix using a similarity function. This should be considered deprecated, as it consumes more memory than the specialized attention modules. """ def __init__(self, similarity_function=None, normalize=True): super(LegacyAttention, self).__init__(normalize) self._similarity_function = similarity_function or DotProductSimilarity( ) #overrides def _forward_internal(self, vector, matrix): tiled_vector = vector.unsqueeze(1).expand(vector.size()[0], matrix.size()[1], vector.size()[1]) return self._similarity_function(tiled_vector, matrix) LegacyAttention = Attention.register(u"legacy")(LegacyAttention)
activation : ``Activation``, optional (default=linear (i.e. no activation)) An activation function applied after the ``x^T W y + b`` calculation. Default is no activation. normalize : ``bool``, optional (default: ``True``) If true, we normalize the computed similarities with a softmax, to return a probability distribution for your attention. If false, this is just computing a similarity score. """ def __init__(self, vector_dim, matrix_dim, activation=None, normalize=True): super(BilinearAttention, self).__init__(normalize) self._weight_matrix = Parameter(torch.Tensor(vector_dim, matrix_dim)) self._bias = Parameter(torch.Tensor(1)) self._activation = activation or Activation.by_name(u'linear')() self.reset_parameters() def reset_parameters(self): torch.nn.init.xavier_uniform_(self._weight_matrix) self._bias.data.fill_(0) #overrides def _forward_internal(self, vector, matrix): intermediate = vector.mm(self._weight_matrix).unsqueeze(1) return self._activation( intermediate.bmm(matrix.transpose(1, 2)).squeeze(1) + self._bias) BilinearAttention = Attention.register(u"bilinear")(BilinearAttention)
def test_can_init_cosine(self): legacy_attention = Attention.from_params(Params({"type": "cosine"})) isinstance(legacy_attention, CosineAttention)
def test_can_init_legacy(self): legacy_attention = Attention.from_params(Params({"type": "legacy"})) isinstance(legacy_attention, LegacyAttention)
def test_can_init_dot(self): legacy_attention = Attention.from_params( Params({"type": "dot_product"})) isinstance(legacy_attention, DotProductAttention)
def test_can_init_dot(self): legacy_attention = Attention.from_params(Params({"type": "dot_product"})) isinstance(legacy_attention, DotProductAttention)
def test_can_init_linear(self): legacy_attention = Attention.from_params(Params({"type": "linear", "tensor_1_dim": 3, "tensor_2_dim": 3})) isinstance(legacy_attention, LinearAttention)