def __init__(self,
                 vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 num_highway_layers: int,
                 phrase_layer: Seq2SeqEncoder,
                 attention_similarity_function: SimilarityFunction,
                 modeling_layer: Seq2SeqEncoder,
                 span_start_encoder: Seq2SeqEncoder,
                 span_end_encoder: Seq2SeqEncoder,
                 feed_forward: FeedForward,
                 dropout: float = 0.2,
                 mask_lstms: bool = True,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None) -> None:
        super(ModelV21, self).__init__(vocab, regularizer)

        self._text_field_embedder = text_field_embedder
        self._highway_layer = TimeDistributed(
            Highway(text_field_embedder.get_output_dim(), num_highway_layers))
        self._phrase_layer = phrase_layer
        self._matrix_attention = MatrixAttention(attention_similarity_function)
        self._modeling_layer = modeling_layer
        self._span_end_encoder = span_end_encoder
        self._span_start_encoder = span_start_encoder
        self._feed_forward = feed_forward

        encoding_dim = phrase_layer.get_output_dim()
        modeling_dim = modeling_layer.get_output_dim()
        #span_start_input_dim = encoding_dim * 4 + modeling_dim
        #span_start_input_dim = encoding_dim + modeling_dim
        self._span_start_predictor = TimeDistributed(
            torch.nn.Linear(encoding_dim, 1))

        span_end_encoding_dim = span_end_encoder.get_output_dim()
        #span_end_input_dim = encoding_dim * 4 + span_end_encoding_dim
        #span_end_input_dim = encoding_dim + span_end_encoding_dim
        self._span_end_predictor = TimeDistributed(
            torch.nn.Linear(encoding_dim, 1))
        self._no_answer_predictor = TimeDistributed(
            torch.nn.Linear(encoding_dim, 1))

        # TODO:
        self._self_matrix_attention = MatrixAttention(
            attention_similarity_function)
        self._linear_layer = TimeDistributed(
            torch.nn.Linear(4 * encoding_dim, encoding_dim))
        self._residual_linear_layer = TimeDistributed(
            torch.nn.Linear(3 * encoding_dim, encoding_dim))

        self._span_start_accuracy = CategoricalAccuracy()
        self._span_end_accuracy = CategoricalAccuracy()
        self._span_accuracy = BooleanAccuracy()
        self._squad_metrics = SquadEmAndF1()
        if dropout > 0:
            self._dropout = torch.nn.Dropout(p=dropout)
        else:
            self._dropout = lambda x: x
        self._mask_lstms = mask_lstms

        initializer(self)
    def __init__(self, vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
             #    num_highway_layers: int,
                 phrase_layer: Seq2SeqEncoder,
                 attention_similarity_function: SimilarityFunction,
                 residual_encoder: Seq2SeqEncoder,
                 span_start_encoder: Seq2SeqEncoder,
                 span_end_encoder: Seq2SeqEncoder,
                 dropout: float = 0.2,
                 mask_lstms: bool = True,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None) -> None:
        super(ModelSQUAD, self).__init__(vocab, regularizer)

        self._text_field_embedder = text_field_embedder
       # self._highway_layer = TimeDistributed(Highway(text_field_embedder.get_output_dim(),
       #                                               num_highway_layers))
        self._phrase_layer = phrase_layer
        self._matrix_attention = MatrixAttention(attention_similarity_function)
        self._residual_encoder = residual_encoder
        self._span_end_encoder = span_end_encoder
        self._span_start_encoder = span_start_encoder

        encoding_dim = phrase_layer.get_output_dim()
        self._span_start_predictor = TimeDistributed(torch.nn.Linear(encoding_dim, 1))

        span_end_encoding_dim = span_end_encoder.get_output_dim()
        self._span_end_predictor = TimeDistributed(torch.nn.Linear(encoding_dim, 1))
        self._no_answer_predictor = TimeDistributed(torch.nn.Linear(encoding_dim, 1))

        self._self_matrix_attention = MatrixAttention(attention_similarity_function)
        self._linear_layer = TimeDistributed(torch.nn.Linear(4*encoding_dim, encoding_dim))
        self._residual_linear_layer = TimeDistributed(torch.nn.Linear(3*encoding_dim, encoding_dim))

        self._self_atten = TriLinearAttention(encoding_dim)
        
        #self._w_x = torch.nn.Parameter(torch.Tensor(encoding_dim))
        #self._w_y = torch.nn.Parameter(torch.Tensor(encoding_dim))
        #self._w_xy = torch.nn.Parameter(torch.Tensor(encoding_dim))

        #std = math.sqrt(6 / (encoding_dim + 1))
        #self._w_x.data.uniform_(-std, std)
        #self._w_y.data.uniform_(-std, std)
        #self._w_xy.data.uniform_(-std, std)

        self._span_start_accuracy = CategoricalAccuracy()
        self._span_end_accuracy = CategoricalAccuracy()
        self._span_accuracy = BooleanAccuracy()
        self._squad_metrics = SquadEmAndF1()
        if dropout > 0:
            self._dropout = torch.nn.Dropout(p=dropout)
        else:
            self._dropout = lambda x: x
        self._mask_lstms = mask_lstms

        initializer(self)
    def __init__(self,
                 vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 attend_feedforward: FeedForward,
                 similarity_function: SimilarityFunction,
                 compare_feedforward: FeedForward,
                 classifier_feedforward: FeedForward,
                 context_encoder: Optional[Seq2SeqEncoder] = None,
                 response_encoder: Optional[Seq2SeqEncoder] = None,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None) -> None:
        super(DialogueContextCoherenceAttentionClassifier, self).__init__(vocab, regularizer)

        self.text_field_embedder = text_field_embedder
        self.num_classes = vocab.get_vocab_size("labels")
        self.context_encoder = context_encoder
        self.response_encoder = response_encoder
        self.attend_feedforward = TimeDistributed(attend_feedforward)
        self.matrix_attention = MatrixAttention(similarity_function)
        self.compare_feedforward = TimeDistributed(compare_feedforward)
        self.classifier_feedforward = classifier_feedforward
        labels = self.vocab.get_index_to_token_vocabulary('labels')
        pos_label_index = list(labels.keys())[list(labels.values()).index('neg')]

        check_dimensions_match(text_field_embedder.get_output_dim(), attend_feedforward.get_input_dim(),
                               "text field embedding dim", "attend feedforward input dim")
        check_dimensions_match(classifier_feedforward.get_output_dim(), self.num_classes,
                               "final output dimension", "number of labels")

        self.metrics = {
            "accuracy": CategoricalAccuracy()
            # "f1": F1Measure(positive_label=pos_label_index)
        }
        self.loss = torch.nn.CrossEntropyLoss()
        initializer(self)
    def __init__(self,
                 vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 attend_feedforward: FeedForward,
                 similarity_function: SimilarityFunction,
                 compare_feedforward: FeedForward,
                 aggregate_feedforward: FeedForward,
                 premise_encoder: Optional[Seq2SeqEncoder] = None,
                 hypothesis_encoder: Optional[Seq2SeqEncoder] = None,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None) -> None:
        super(DecomposableAttention, self).__init__(vocab, regularizer)

        self._text_field_embedder = text_field_embedder
        self._attend_feedforward = TimeDistributed(attend_feedforward)
        self._matrix_attention = MatrixAttention(similarity_function)
        self._compare_feedforward = TimeDistributed(compare_feedforward)
        self._aggregate_feedforward = aggregate_feedforward
        self._premise_encoder = premise_encoder
        self._hypothesis_encoder = hypothesis_encoder or premise_encoder

        self._num_labels = vocab.get_vocab_size(namespace="labels")

        check_dimensions_match(text_field_embedder.get_output_dim(),
                               attend_feedforward.get_input_dim(),
                               "text field embedding dim",
                               "attend feedforward input dim")
        check_dimensions_match(aggregate_feedforward.get_output_dim(),
                               self._num_labels, "final output dimension",
                               "number of labels")

        self._accuracy = CategoricalAccuracy()
        self._loss = torch.nn.CrossEntropyLoss()

        initializer(self)
Beispiel #5
0
    def __init__(self,
                 text_field_embedder: TextFieldEmbedder,
                 attend_feedforward: FeedForward,
                 similarity_function: SimilarityFunction,
                 compare_feedforward: FeedForward,
                 premise_encoder: Optional[Seq2SeqEncoder] = None,
                 hypothesis_encoder: Optional[Seq2SeqEncoder] = None,
                 premise_composer: Optional[Seq2SeqEncoder] = None,
                 hypothesis_composer: Optional[Seq2SeqEncoder] = None,
                 combine_feedforward: Optional[FeedForward] = None,
                 aggregate_feedforward: Optional[FeedForward] = None,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 num_wrapping_dims=0,
                 vocab=None) -> None:

        super(ESIM, self).__init__()

        self.vocab = vocab

        self._text_field_embedder = text_field_embedder
        self._attend_feedforward = TimeDistributed(attend_feedforward)
        self._matrix_attention = MatrixAttention(similarity_function)
        self._compare_feedforward = TimeDistributed(compare_feedforward)

        self._premise_encoder = premise_encoder
        self._hypothesis_encoder = hypothesis_encoder or premise_encoder
        self._premise_composer = premise_composer
        self._hypothesis_composer = hypothesis_composer or premise_composer

        self._combine_feedforward = combine_feedforward
        self._aggregate_feedforward = aggregate_feedforward

        self._num_wrapping_dims = num_wrapping_dims
    def __init__(self, vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 attend_feedforward: FeedForward,
                 similarity_function: SimilarityFunction,
                 compare_feedforward: FeedForward,
                 aggregate_feedforward: FeedForward,
                 premise_encoder: Optional[Seq2SeqEncoder] = None,
                 hypothesis_encoder: Optional[Seq2SeqEncoder] = None,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None) -> None:
        super(DecomposableAttention, self).__init__(vocab, regularizer)

        self._text_field_embedder = text_field_embedder
        self._attend_feedforward = TimeDistributed(attend_feedforward)
        self._matrix_attention = MatrixAttention(similarity_function)
        self._compare_feedforward = TimeDistributed(compare_feedforward)
        self._aggregate_feedforward = aggregate_feedforward
        self._premise_encoder = premise_encoder
        self._hypothesis_encoder = hypothesis_encoder or premise_encoder

        self._num_labels = vocab.get_vocab_size(namespace="labels")

        if text_field_embedder.get_output_dim() != attend_feedforward.get_input_dim():
            raise ConfigurationError("Output dimension of the text_field_embedder (dim: {}), "
                                     "must match the input_dim of the FeedForward layer "
                                     "attend_feedforward, (dim: {}). ".format(text_field_embedder.get_output_dim(),
                                                                              attend_feedforward.get_input_dim()))
        if aggregate_feedforward.get_output_dim() != self._num_labels:
            raise ConfigurationError("Final output dimension (%d) must equal num labels (%d)" %
                                     (aggregate_feedforward.get_output_dim(), self._num_labels))

        self._accuracy = CategoricalAccuracy()
        self._loss = torch.nn.CrossEntropyLoss()

        initializer(self)
Beispiel #7
0
    def __init__(self,
                 vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 num_highway_layers: int,
                 phrase_layer: Seq2SeqEncoder,
                 attention_similarity_function: SimilarityFunction,
                 modeling_layer: Seq2SeqEncoder,
                 span_end_encoder: Seq2SeqEncoder,
                 dropout: float = 0.2,
                 mask_lstms: bool = True,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None) -> None:
        super(BidirectionalAttentionFlow, self).__init__(vocab, regularizer)

        self._text_field_embedder = text_field_embedder
        self._highway_layer = TimeDistributed(
            Highway(text_field_embedder.get_output_dim(), num_highway_layers))
        self._phrase_layer = phrase_layer
        self._matrix_attention = MatrixAttention(attention_similarity_function)
        self._modeling_layer = modeling_layer
        self._span_end_encoder = span_end_encoder

        encoding_dim = phrase_layer.get_output_dim()
        modeling_dim = modeling_layer.get_output_dim()
        span_start_input_dim = encoding_dim * 4 + modeling_dim
        self._span_start_predictor = TimeDistributed(
            torch.nn.Linear(span_start_input_dim, 1))

        span_end_encoding_dim = span_end_encoder.get_output_dim()
        span_end_input_dim = encoding_dim * 4 + span_end_encoding_dim
        self._span_end_predictor = TimeDistributed(
            torch.nn.Linear(span_end_input_dim, 1))

        # Bidaf has lots of layer dimensions which need to match up - these aren't necessarily
        # obvious from the configuration files, so we check here.
        check_dimensions_match(modeling_layer.get_input_dim(),
                               4 * encoding_dim, "modeling layer input dim",
                               "4 * encoding dim")
        check_dimensions_match(text_field_embedder.get_output_dim(),
                               phrase_layer.get_input_dim(),
                               "text field embedder output dim",
                               "phrase layer input dim")
        check_dimensions_match(span_end_encoder.get_input_dim(),
                               4 * encoding_dim + 3 * modeling_dim,
                               "span end encoder input dim",
                               "4 * encoding dim + 3 * modeling dim")

        self._span_start_accuracy = CategoricalAccuracy()
        self._span_end_accuracy = CategoricalAccuracy()
        self._span_accuracy = BooleanAccuracy()
        self._squad_metrics = SquadEmAndF1()
        if dropout > 0:
            self._dropout = torch.nn.Dropout(p=dropout)
        else:
            self._dropout = lambda x: x
        self._mask_lstms = mask_lstms

        initializer(self)
 def test_forward_works_on_simple_input(self):
     attention = MatrixAttention()
     sentence_1_tensor = Variable(
         torch.FloatTensor([[[1, 1, 1], [-1, 0, 1]]]))
     sentence_2_tensor = Variable(
         torch.FloatTensor([[[1, 1, 1], [-1, 0, 1], [-1, -1, -1]]]))
     result = attention(sentence_1_tensor, sentence_2_tensor).data.numpy()
     assert result.shape == (1, 2, 3)
     assert_allclose(result, [[[3, 0, -3], [0, 2, 0]]])
Beispiel #9
0
    def __init__(self, vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 num_highway_layers: int,
                 phrase_layer: Seq2SeqEncoder,
                 attention_similarity_function: SimilarityFunction,
                 modeling_layer: Seq2SeqEncoder,
                 span_end_encoder: Seq2SeqEncoder,
                 initializer: InitializerApplicator,
                 dropout: float = 0.2,
                 mask_lstms: bool = True,
                 evaluation_json_file: str = None) -> None:
        super(BidirectionalAttentionFlow, self).__init__(vocab)

        self._text_field_embedder = text_field_embedder
        self._highway_layer = TimeDistributed(Highway(text_field_embedder.get_output_dim(),
                                                      num_highway_layers))
        self._phrase_layer = phrase_layer
        self._matrix_attention = MatrixAttention(attention_similarity_function)
        self._modeling_layer = modeling_layer
        self._span_end_encoder = span_end_encoder

        encoding_dim = phrase_layer.get_output_dim()
        modeling_dim = modeling_layer.get_output_dim()
        span_start_input_dim = encoding_dim * 4 + modeling_dim
        self._span_start_predictor = TimeDistributed(torch.nn.Linear(span_start_input_dim, 1))

        span_end_encoding_dim = span_end_encoder.get_output_dim()
        span_end_input_dim = encoding_dim * 4 + span_end_encoding_dim
        self._span_end_predictor = TimeDistributed(torch.nn.Linear(span_end_input_dim, 1))
        initializer(self)
        self._span_start_accuracy = CategoricalAccuracy()
        self._span_end_accuracy = CategoricalAccuracy()
        self._span_accuracy = BooleanAccuracy()
        self._official_em = Average()
        self._official_f1 = Average()
        if dropout > 0:
            self._dropout = torch.nn.Dropout(p=dropout)
        else:
            self._dropout = lambda x: x
        self._mask_lstms = mask_lstms

        if evaluation_json_file:
            logger.info("Prepping official evaluation dataset from %s", evaluation_json_file)
            with open(evaluation_json_file) as dataset_file:
                dataset_json = json.load(dataset_file)
            question_to_answers = {}
            for article in dataset_json['data']:
                for paragraph in article['paragraphs']:
                    for question in paragraph['qas']:
                        question_id = question['id']
                        answers = [answer['text'] for answer in question['answers']]
                        question_to_answers[question_id] = answers

            self._official_eval_dataset = question_to_answers
        else:
            self._official_eval_dataset = None
Beispiel #10
0
    def __init__(self, vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 attend_feedforward: FeedForward,
                 similarity_function: SimilarityFunction,
                 compare_feedforward: FeedForward,
                 aggregate_feedforward: FeedForward,
                 premise_encoder: Optional[Seq2SeqEncoder] = None,
                 hypothesis_encoder: Optional[Seq2SeqEncoder] = None,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None,
                 preload_path: Optional[str] = None) -> None:
        super(DecomposableAttention, self).__init__(vocab, regularizer)

        self._text_field_embedder = text_field_embedder
        self._attend_feedforward = TimeDistributed(attend_feedforward)
        self._matrix_attention = MatrixAttention(similarity_function)
        self._compare_feedforward = TimeDistributed(compare_feedforward)
        self._aggregate_feedforward = aggregate_feedforward
        self._premise_encoder = premise_encoder
        self._hypothesis_encoder = hypothesis_encoder or premise_encoder

        # self._num_labels = vocab.get_vocab_size(namespace="labels")

        check_dimensions_match(text_field_embedder.get_output_dim(), attend_feedforward.get_input_dim(),
                               "text field embedding dim", "attend feedforward input dim")
        # check_dimensions_match(aggregate_feedforward.get_output_dim(), self._num_labels,
        #                        "final output dimension", "number of labels")

        self._accuracy = CategoricalAccuracy()
        self._loss = torch.nn.CrossEntropyLoss()

        initializer(self)

        # Do we want to initialize with the SNLI stuff? let's say yes.
        # 'snli-decomposable-attention/weights.th'
        if preload_path is not None:
            logger.info("Preloading!")
            preload = torch.load(preload_path)
            own_state = self.state_dict()
            for name, param in preload.items():
                if name not in own_state:
                    logger.info("Unexpected key {} in state_dict with size {}".format(name, param.size()))
                elif param.size() == own_state[name].size():
                    own_state[name].copy_(param)
                else:
                    logger.info("Network has {} with size {}, ckpt has {}".format(name,
                                                                            own_state[name].size(),
                                                                            param.size()))

                missing = set(own_state.keys()) - set(preload.keys())
                if len(missing) > 0:
                    logger.info("We couldn't find {}".format(','.join(missing)))
Beispiel #11
0
def test_all_attention_works_the_same(attention_type: str):
    module_cls = MatrixAttention.by_name(attention_type)

    matrix1 = torch.FloatTensor([[[1, 2, 3], [4, 5, 6], [7, 8, 9]]])
    matrix2 = torch.FloatTensor([[[1, 2, 3], [4, 5, 6]]])

    if module_cls in {BilinearMatrixAttention, LinearMatrixAttention}:
        module = module_cls(matrix1.size(-1), matrix2.size(-1))
    else:
        module = module_cls()

    output = module(matrix1, matrix2)
    assert tuple(output.size()) == (1, 3, 2)
Beispiel #12
0
    def __init__(self,
                 vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 encoder: Seq2SeqEncoder,
                 similarity_function: SimilarityFunction,
                 projection_feedforward: FeedForward,
                 inference_encoder: Seq2SeqEncoder,
                 output_feedforward: FeedForward,
                 output_logit: FeedForward,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 dropout: float = 0.5,
                 regularizer: Optional[RegularizerApplicator] = None) -> None:
        super().__init__(vocab, regularizer)

        self._text_field_embedder = text_field_embedder
        self._encoder = encoder

        self._matrix_attention = MatrixAttention(similarity_function)
        self._projection_feedforward = projection_feedforward

        self._inference_encoder = inference_encoder

        if dropout:
            self.dropout = torch.nn.Dropout(dropout)
            self.rnn_input_dropout = VariationalDropout(dropout)
        else:
            self.dropout = None
            self.rnn_input_dropout = None

        self._output_feedforward = output_feedforward
        self._output_logit = output_logit

        self._num_labels = vocab.get_vocab_size(namespace="labels")

        check_dimensions_match(text_field_embedder.get_output_dim(),
                               encoder.get_input_dim(),
                               "text field embedding dim", "encoder input dim")
        check_dimensions_match(encoder.get_output_dim() * 4,
                               projection_feedforward.get_input_dim(),
                               "encoder output dim",
                               "projection feedforward input")
        check_dimensions_match(projection_feedforward.get_output_dim(),
                               inference_encoder.get_input_dim(),
                               "proj feedforward output dim",
                               "inference lstm input dim")

        self._accuracy = CategoricalAccuracy()
        self._loss = torch.nn.CrossEntropyLoss()

        initializer(self)
Beispiel #13
0
    def from_params(cls, vocab: Vocabulary, params: Params) -> 'DialogueContextHierarchicalCoherenceAttentionClassifier':
        embedder_params = params.pop("text_field_embedder")
        text_field_embedder = TextFieldEmbedder.from_params(vocab, embedder_params)

        utterance_encoder = Seq2VecEncoder.from_params(params.pop("utterance_encoder"))
        context_encoder = Seq2SeqEncoder.from_params(params.pop("context_encoder"))

        response_encoder_params = params.pop("response_encoder", None)
        if response_encoder_params is not None:
            response_encoder = Seq2SeqEncoder.from_params(response_encoder_params)
        else:
            response_encoder = None

        attend_feedforward = FeedForward.from_params(params.pop('attend_feedforward'))
        #similarity_function = SimilarityFunction.from_params(params.pop("similarity_function"))
        compare_feedforward = FeedForward.from_params(params.pop('compare_feedforward'))
        classifier_feedforward = FeedForward.from_params(params.pop("classifier_feedforward"))
        final_classifier_feedforward = FeedForward.from_params(params.pop("final_classifier_feedforward"))

        initializer = InitializerApplicator.from_params(params.pop("initializer", []))
        regularizer = RegularizerApplicator.from_params(params.pop("regularizer", []))

        matrix_attention = MatrixAttention().from_params(params.pop("similarity_function"))

        return cls(vocab=vocab,
                   text_field_embedder=text_field_embedder,
                   attend_feedforward=attend_feedforward,
                   matrix_attention=matrix_attention,
                   compare_feedforward=compare_feedforward,
                   classifier_feedforward=classifier_feedforward,
                   final_classifier_feedforward=final_classifier_feedforward,
                   utterance_encoder=utterance_encoder,
                   context_encoder=context_encoder,
                   response_encoder=response_encoder,
                   initializer=initializer,
                   regularizer=regularizer)
Beispiel #14
0
import pytest
import torch

from allennlp.modules import MatrixAttention
from allennlp.modules.matrix_attention import BilinearMatrixAttention, LinearMatrixAttention


@pytest.mark.parametrize("attention_type", MatrixAttention.list_available())
def test_all_attention_works_the_same(attention_type: str):
    module_cls = MatrixAttention.by_name(attention_type)

    matrix1 = torch.FloatTensor([[[1, 2, 3], [4, 5, 6], [7, 8, 9]]])
    matrix2 = torch.FloatTensor([[[1, 2, 3], [4, 5, 6]]])

    if module_cls in {BilinearMatrixAttention, LinearMatrixAttention}:
        module = module_cls(matrix1.size(-1), matrix2.size(-1))
    else:
        module = module_cls()

    output = module(matrix1, matrix2)
    assert tuple(output.size()) == (1, 3, 2)
Beispiel #15
0
    def __init__(self,
                 vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 num_highway_layers: int,
                 phrase_layer: Seq2SeqEncoder,
                 attention_similarity_function: SimilarityFunction,
                 modeling_layer: Seq2SeqEncoder,
                 span_end_encoder: Seq2SeqEncoder,
                 dropout: float = 0.2,
                 mask_lstms: bool = True,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None) -> None:
        super(BidirectionalAttentionFlow, self).__init__(vocab, regularizer)

        self._text_field_embedder = text_field_embedder
        self._highway_layer = TimeDistributed(
            Highway(text_field_embedder.get_output_dim(), num_highway_layers))
        self._phrase_layer = phrase_layer
        self._matrix_attention = MatrixAttention(attention_similarity_function)
        self._modeling_layer = modeling_layer
        self._span_end_encoder = span_end_encoder

        encoding_dim = phrase_layer.get_output_dim()
        modeling_dim = modeling_layer.get_output_dim()

        self._compat_layer = FC3(encoding_dim * 4 + modeling_dim)
        self._compat_pred_layer = Linear(encoding_dim * 4 + modeling_dim, 2)

        span_start_input_dim = encoding_dim * 4 + modeling_dim
        self._span_start_predictor = TimeDistributed(
            torch.nn.Linear(span_start_input_dim, 1))

        span_end_encoding_dim = span_end_encoder.get_output_dim()
        span_end_input_dim = encoding_dim * 4 + span_end_encoding_dim
        self._span_end_predictor = TimeDistributed(
            torch.nn.Linear(span_end_input_dim, 1))

        # Bidaf has lots of layer dimensions which need to match up - these
        # aren't necessarily obvious from the configuration files, so we check
        # here.
        if modeling_layer.get_input_dim() != 4 * encoding_dim:
            raise ConfigurationError(
                "The input dimension to the modeling_layer must be "
                "equal to 4 times the encoding dimension of the phrase_layer. "
                "Found {} and 4 * {} respectively.".format(
                    modeling_layer.get_input_dim(), encoding_dim))
        if text_field_embedder.get_output_dim() != phrase_layer.get_input_dim(
        ):
            raise ConfigurationError(
                "The output dimension of the text_field_embedder (embedding_dim + "
                "char_cnn) must match the input dimension of the phrase_encoder. "
                "Found {} and {}, respectively.".format(
                    text_field_embedder.get_output_dim(),
                    phrase_layer.get_input_dim()))

        if span_end_encoder.get_input_dim(
        ) != encoding_dim * 4 + modeling_dim * 3:
            raise ConfigurationError(
                "The input dimension of the span_end_encoder should be equal to "
                "4 * phrase_layer.output_dim + 3 * modeling_layer.output_dim. "
                "Found {} and (4 * {} + 3 * {}) "
                "respectively.".format(span_end_encoder.get_input_dim(),
                                       encoding_dim, modeling_dim))

        self._span_start_accuracy = CategoricalAccuracy()
        self._span_end_accuracy = CategoricalAccuracy()
        self._span_accuracy = BooleanAccuracy()
        self._squad_metrics = SquadEmAndF1()
        self._compat_accuracy = BooleanAccuracy()
        if dropout > 0:
            self._dropout = torch.nn.Dropout(p=dropout)
        else:
            self._dropout = lambda x: x
        self._mask_lstms = mask_lstms

        initializer(self)
 def test_can_build_from_params(self):
     params = Params({'similarity_function': {'type': 'cosine'}})
     attention = MatrixAttention.from_params(params)
     # pylint: disable=protected-access
     assert attention._similarity_function.__class__.__name__ == 'CosineSimilarity'
    def __init__(self,
                 vocab,
                 text_field_embedder,
                 num_highway_layers,
                 phrase_layer,
                 attention_similarity_function,
                 modeling_layer,
                 span_end_encoder,
                 dropout=0.2,
                 mask_lstms=True,
                 initializer=InitializerApplicator(),
                 regularizer=None):
        super(BidirectionalAttentionFlow, self).__init__(vocab, regularizer)
        # Initialize layers.
        self._text_field_embedder = text_field_embedder
        self._highway_layer = TimeDistributed(
            Highway(text_field_embedder.get_output_dim(), num_highway_layers))
        self._phrase_layer = phrase_layer
        self._matrix_attention = MatrixAttention(attention_similarity_function)
        self._modeling_layer = modeling_layer
        self._span_end_encoder = span_end_encoder

        # Inintialize start/end span predictors.
        encoding_dim = phrase_layer.get_output_dim()
        modeling_dim = modeling_layer.get_output_dim()
        span_start_input_dim = encoding_dim * 4 + modeling_dim
        self._span_start_predictor = \
            TimeDistributed(torch.nn.Linear(span_start_input_dim, 1))

        span_end_encoding_dim = span_end_encoder.get_output_dim()
        span_end_input_dim = encoding_dim * 4 + span_end_encoding_dim
        self._span_end_predictor = \
            TimeDistributed(torch.nn.Linear(span_end_input_dim, 1))

        # Check dimentions
        check_dimensions_match(modeling_layer.get_input_dim(),
                               4 * encoding_dim, "modeling layer input dim",
                               "4 * encoding dim")
        check_dimensions_match(text_field_embedder.get_output_dim(),
                               phrase_layer.get_input_dim(),
                               "text field embedder output dim",
                               "phrase layer input dim")
        check_dimensions_match(span_end_encoder.get_input_dim(),
                               4 * encoding_dim + 3 * modeling_dim,
                               "span end encoder input dim",
                               "4 * encoding dim + 3 * modeling dim")

        self._span_start_accuracy = CategoricalAccuracy()
        self._span_end_accuracy = CategoricalAccuracy()
        self._span_accuracy = BooleanAccuracy()
        self._squad_metrics = SquadEmAndF1()

        # If dropout has been set, add Dropout layer.
        if dropout > 0:
            self._dropout = torch.nn.Dropout(p=dropout)
        else:
            self._dropout = lambda x: x

        self._mask_lstms = mask_lstms

        initializer(self)
    def __init__(self,
                 vocab,
                 text_field_embedder,
                 num_highway_layers,
                 phrase_layer,
                 attention_similarity_function,
                 modeling_layer,
                 cove_layer=None,
                 elmo_layer=None,
                 deep_elmo=False,
                 dropout=0.2,
                 mask_lstms=True,
                 initializer=InitializerApplicator(),
                 regularizer=None):
        super(HeadlessPairAttnEncoder, self).__init__(vocab)  #, regularizer)

        if text_field_embedder is None:  # just using ELMo embeddings
            self._text_field_embedder = lambda x: x
            d_emb = 0
            self._highway_layer = lambda x: x
        else:
            self._text_field_embedder = text_field_embedder
            d_emb = text_field_embedder.get_output_dim()
            self._highway_layer = TimeDistributed(
                Highway(d_emb, num_highway_layers))

        self._phrase_layer = phrase_layer
        self._matrix_attention = MatrixAttention(attention_similarity_function)
        self._modeling_layer = modeling_layer
        self._cove = cove_layer
        self._elmo = elmo_layer
        self._deep_elmo = deep_elmo
        self.pad_idx = vocab.get_token_index(vocab._padding_token)

        d_inp_phrase = phrase_layer.get_input_dim()
        d_out_phrase = phrase_layer.get_output_dim()
        d_out_model = modeling_layer.get_output_dim()
        d_inp_model = modeling_layer.get_input_dim()
        self.output_dim = d_out_model

        if (elmo_layer is None and d_inp_model != 2 * d_out_phrase) or \
            (elmo_layer is not None and not deep_elmo and d_inp_model != 2 * d_out_phrase) or \
            (elmo_layer is not None and deep_elmo and d_inp_model != 2 * d_out_phrase + 1024):
            raise ConfigurationError(
                "The input dimension to the modeling_layer must be "
                "equal to 4 times the encoding dimension of the phrase_layer. "
                "Found {} and 4 * {} respectively.".format(
                    d_inp_model, d_out_phrase))
        if (cove_layer is None and elmo_layer is None and d_emb != d_inp_phrase) \
            or (cove_layer is not None and d_emb + 600 != d_inp_phrase) \
            or (elmo_layer is not None and d_emb + 1024 != d_inp_phrase):
            raise ConfigurationError(
                "The output dimension of the text_field_embedder "
                "must match the input "
                "dimension of the phrase_encoder. Found {} and {} "
                "respectively.".format(d_emb, d_inp_phrase))
        if dropout > 0:
            self._dropout = torch.nn.Dropout(p=dropout)
        else:
            self._dropout = lambda x: x
        self._mask_lstms = mask_lstms

        initializer(self)
 def test_can_build_from_params(self):
     params = Params({'similarity_function': {'type': 'cosine'}})
     attention = MatrixAttention.from_params(params)
     # pylint: disable=protected-access
     assert attention._similarity_function.__class__.__name__ == 'CosineSimilarity'