コード例 #1
0
    def __init__(
        self,
        vocab: Vocabulary,
        text_field_embedder: TextFieldEmbedder,
        phrase_probability: FeedForward,
        edge_probability: FeedForward,
        premise_encoder: Seq2SeqEncoder,
        edge_embedding: Embedding,
        use_encoding_for_node: bool,
        ignore_edges: bool,
        attention_similarity: SimilarityFunction,
        initializer: InitializerApplicator = InitializerApplicator()
    ) -> None:
        super(TreeAttention, self).__init__(vocab)

        self._text_field_embedder = text_field_embedder
        self._premise_encoder = premise_encoder
        self._nodes_attention = SingleTimeDistributed(
            LegacyMatrixAttention(attention_similarity), 0)
        self._num_labels = vocab.get_vocab_size(namespace="labels")
        self._phrase_probability = TimeDistributed(phrase_probability)
        self._ignore_edges = ignore_edges
        if not self._ignore_edges:
            self._num_edges = vocab.get_vocab_size(namespace="edges")
            self._edge_probability = TimeDistributed(edge_probability)
            self._edge_embedding = edge_embedding
        self._use_encoding_for_node = use_encoding_for_node
        self._accuracy = CategoricalAccuracy()
        self._loss = torch.nn.CrossEntropyLoss()
        initializer(self)
コード例 #2
0
    def __init__(
        self,
        similarity_function: SimilarityFunction,
        response_projection_feedforward: FeedForward,
        response_inference_encoder: Seq2SeqEncoder,
        response_input_feedforward: Optional[FeedForward] = None,
        source_input_feedforward: Optional[FeedForward] = None,
        source_projection_feedforward: Optional[FeedForward] = None,
        source_inference_encoder: Optional[Seq2SeqEncoder] = None,
        dropout: float = 0.5,
        #whether to only consider the response and alignments from the source to response
        response_only=False
    ) -> None:

        super().__init__()

        self._response_input_feedforward = response_input_feedforward
        self._response_projection_feedforward = response_projection_feedforward
        self._response_inference_encoder = response_inference_encoder

        self._source_input_feedforward = source_input_feedforward or response_input_feedforward
        self._source_projection_feedforward = source_projection_feedforward or response_projection_feedforward
        self._source_inference_encoder = source_inference_encoder or response_inference_encoder

        self._matrix_attention = LegacyMatrixAttention(similarity_function)

        if dropout:
            self.dropout = torch.nn.Dropout(dropout)
            self.rnn_input_dropout = InputVariationalDropout(dropout)
        else:
            self.dropout = None
            self.rnn_input_dropout = None

        self._response_only = response_only
    def __init__(self,
                 input_size: int,
                 num_highway_layers: int,
                 phrase_layer: Seq2SeqEncoder,
                 similarity_function: SimilarityFunction,
                 modeling_layer: Seq2SeqEncoder,
                 dropout: float = 0.2,
                 mask_lstms: bool = True) -> None:
        super(BidafInteractionEncoder, self).__init__()

        self._highway_layer = TimeDistributed(
            Highway(input_size, num_highway_layers))
        self._phrase_layer = phrase_layer
        self._matrix_attention = LegacyMatrixAttention(similarity_function)
        self._modeling_layer = modeling_layer

        encoding_dim = phrase_layer.get_output_dim()
        modeling_dim = modeling_layer.get_output_dim()

        # Bidaf has lots of layer dimensions which need to match up - these aren't necessarily
        # obvious from the configuration files, so we check here.
        check_dimensions_match(modeling_layer.get_input_dim(),
                               4 * encoding_dim, "modeling layer input dim",
                               "4 * encoding dim")
        check_dimensions_match(input_size, phrase_layer.get_input_dim(),
                               "input_size", "phrase layer input dim")

        if dropout > 0:
            self._dropout = torch.nn.Dropout(p=dropout)
        else:
            self._dropout = lambda x: x
        self._mask_lstms = mask_lstms
コード例 #4
0
    def __init__(
        self,
        attend_feedforward: FeedForward,
        similarity_function: SimilarityFunction,
        compare_feedforward: FeedForward,
        aggregate_feedforward: FeedForward,
        document_encoder: Seq2VecEncoder,
        initializer: InitializerApplicator = InitializerApplicator()
    ) -> None:
        super(DecomposableAttentionSentenceScorer, self).__init__()

        self._attend_feedforward = TimeDistributed(attend_feedforward)
        self._matrix_attention = LegacyMatrixAttention(similarity_function)
        self._compare_feedforward = TimeDistributed(compare_feedforward)
        self._aggregate_feedforward = aggregate_feedforward
        self._document_encoder = document_encoder

        d_dim = self._document_encoder.get_output_dim()

        self._scorer = FeedForward(input_dim=2 * d_dim,
                                   num_layers=1,
                                   hidden_dims=1,
                                   activations=lambda x: x,
                                   dropout=0.)

        initializer(self)
コード例 #5
0
 def test_forward_works_on_simple_input(self):
     attention = LegacyMatrixAttention(DotProductSimilarity())
     sentence_1_tensor = Variable(torch.FloatTensor([[[1, 1, 1], [-1, 0, 1]]]))
     sentence_2_tensor = Variable(torch.FloatTensor([[[1, 1, 1], [-1, 0, 1], [-1, -1, -1]]]))
     result = attention(sentence_1_tensor, sentence_2_tensor).data.numpy()
     assert result.shape == (1, 2, 3)
     assert_allclose(result, [[[3, 0, -3], [0, 2, 0]]])
コード例 #6
0
    def __init__(self,
                 input_dim: int,
                 projection_dim: int = None,
                 similarity_function: SimilarityFunction = DotProductSimilarity(),
                 num_attention_heads: int = 1,
                 combination: str = '1,2',
                 output_dim: int = None) -> None:
        super().__init__()
        self._input_dim = input_dim
        if projection_dim:
            self._projection = torch.nn.Linear(input_dim, projection_dim)
        else:
            self._projection = lambda x: x
            projection_dim = input_dim
        self._matrix_attention = LegacyMatrixAttention(similarity_function)
        self._num_attention_heads = num_attention_heads
        if isinstance(similarity_function, MultiHeadedSimilarity):
            if num_attention_heads == 1:
                raise ConfigurationError("Similarity function has multiple heads but encoder doesn't")
            if num_attention_heads != similarity_function.num_heads:
                raise ConfigurationError("Number of heads don't match between similarity function "
                                         "and encoder: %d, %d" % (num_attention_heads,
                                                                  similarity_function.num_heads))
        elif num_attention_heads > 1:
            raise ConfigurationError("Encoder has multiple heads but similarity function doesn't")
        self._combination = combination

        combined_dim = util.get_combined_dim(combination, [input_dim, projection_dim])
        if output_dim:
            self._output_projection = Linear(combined_dim, output_dim)
            self._output_dim = output_dim
        else:
            self._output_projection = lambda x: x
            self._output_dim = combined_dim
    def __init__(self, vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 attend_feedforward: FeedForward,
                 similarity_function: SimilarityFunction,
                 compare_feedforward: FeedForward,
                 aggregate_feedforward: FeedForward,
                 premise_encoder: Optional[Seq2SeqEncoder] = None,
                 hypothesis_encoder: Optional[Seq2SeqEncoder] = None,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None) -> None:
        super(DecomposableAttention, self).__init__(vocab, regularizer)

        self._text_field_embedder = text_field_embedder
        self._attend_feedforward = TimeDistributed(attend_feedforward)
        self._matrix_attention = LegacyMatrixAttention(similarity_function)
        self._compare_feedforward = TimeDistributed(compare_feedforward)
        self._aggregate_feedforward = aggregate_feedforward
        self._premise_encoder = premise_encoder
        self._hypothesis_encoder = hypothesis_encoder or premise_encoder

        self._num_labels = vocab.get_vocab_size(namespace="labels")

        check_dimensions_match(text_field_embedder.get_output_dim(), attend_feedforward.get_input_dim(),
                               "text field embedding dim", "attend feedforward input dim")
        check_dimensions_match(aggregate_feedforward.get_output_dim(), self._num_labels,
                               "final output dimension", "number of labels")

        self._accuracy = CategoricalAccuracy()
        self._loss = torch.nn.CrossEntropyLoss()

        initializer(self)
コード例 #8
0
    def __init__(self,
                 vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 num_highway_layers: int,
                 phrase_layer: Seq2SeqEncoder,
                 similarity_function: SimilarityFunction,
                 modeling_layer: Seq2SeqEncoder,
                 span_end_encoder: Seq2SeqEncoder,
                 dropout: float = 0.2,
                 mask_lstms: bool = True,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None) -> None:
        super(BidirectionalAttentionFlowFT, self).__init__(vocab, regularizer)

        self._text_field_embedder = text_field_embedder
        self._highway_layer = TimeDistributed(
            Highway(text_field_embedder.get_output_dim(), num_highway_layers))
        self._phrase_layer = phrase_layer
        self._matrix_attention = LegacyMatrixAttention(similarity_function)
        self._modeling_layer = modeling_layer
        self._span_end_encoder = span_end_encoder

        encoding_dim = phrase_layer.get_output_dim()
        modeling_dim = modeling_layer.get_output_dim()
        span_start_input_dim = encoding_dim * 4 + modeling_dim
        self._action_predictor = torch.nn.Linear(modeling_dim, 4)
        self._span_start_predictor = TimeDistributed(
            torch.nn.Linear(span_start_input_dim, 1))

        span_end_encoding_dim = span_end_encoder.get_output_dim()
        span_end_input_dim = encoding_dim * 4 + span_end_encoding_dim
        self._span_end_predictor = TimeDistributed(
            torch.nn.Linear(span_end_input_dim, 1))

        # Bidaf has lots of layer dimensions which need to match up - these aren't necessarily
        # obvious from the configuration files, so we check here.
        check_dimensions_match(modeling_layer.get_input_dim(),
                               4 * encoding_dim, "modeling layer input dim",
                               "4 * encoding dim")
        check_dimensions_match(text_field_embedder.get_output_dim(),
                               phrase_layer.get_input_dim(),
                               "text field embedder output dim",
                               "phrase layer input dim")
        check_dimensions_match(span_end_encoder.get_input_dim(),
                               4 * encoding_dim + 3 * modeling_dim,
                               "span end encoder input dim",
                               "4 * encoding dim + 3 * modeling dim")

        self._action_accuracy = CategoricalAccuracy()
        self._span_start_accuracy = CategoricalAccuracy()
        self._span_end_accuracy = CategoricalAccuracy()
        self._span_accuracy = BooleanAccuracy()
        self._squad_metrics = SquadEmAndF1()
        if dropout > 0:
            self._dropout = torch.nn.Dropout(p=dropout)
        else:
            self._dropout = lambda x: x
        self._mask_lstms = mask_lstms

        initializer(self)
コード例 #9
0
    def __init__(self,
                 vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 attend_feedforward: FeedForward,
                 similarity_function: SimilarityFunction,
                 compare_feedforward: FeedForward,
                 aggregate_feedforward: FeedForward,
                 use_sentiment: bool,
                 use_tfidf: bool,
                 headline_encoder: Optional[Seq2SeqEncoder] = None,
                 body_encoder: Optional[Seq2SeqEncoder] = None,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None) -> None:
        super(DecomposableAttentionModel, self).__init__(vocab, regularizer)

        self._text_field_embedder = text_field_embedder
        self._attend_feedforward = TimeDistributed(attend_feedforward)
        self._matrix_attention = LegacyMatrixAttention(similarity_function)
        self._compare_feedforward = TimeDistributed(compare_feedforward)
        self._aggregate_feedforward = aggregate_feedforward
        self._headline_encoder = headline_encoder
        self._body_encoder = body_encoder or headline_encoder

        self._num_labels = vocab.get_vocab_size(namespace="labels")

        self.use_sentiment = use_sentiment
        self.use_tfidf = use_tfidf

        self._accuracy = CategoricalAccuracy()

        self._loss = torch.nn.CrossEntropyLoss()

        initializer(self)
コード例 #10
0
    def __init__(
        self,
        vocab: Vocabulary,
        text_field_embedder: TextFieldEmbedder,
        encoder: Seq2SeqEncoder,
        similarity_function: SimilarityFunction,
        projection_feedforward: FeedForward,
        inference_encoder: Seq2SeqEncoder,
        output_feedforward: FeedForward,
        output_logit: FeedForward,
        dropout: float = 0.5,
        initializer: InitializerApplicator = InitializerApplicator(),
        regularizer: Optional[RegularizerApplicator] = None,
    ) -> None:
        super().__init__(vocab, regularizer)

        self._text_field_embedder = text_field_embedder
        self._encoder = encoder

        self._matrix_attention = LegacyMatrixAttention(similarity_function)
        self._projection_feedforward = projection_feedforward

        self._inference_encoder = inference_encoder

        if dropout:
            self.dropout = torch.nn.Dropout(dropout)
            self.rnn_input_dropout = InputVariationalDropout(dropout)
        else:
            self.dropout = None
            self.rnn_input_dropout = None

        self._output_feedforward = output_feedforward
        self._output_logit = output_logit

        self._num_labels = vocab.get_vocab_size(namespace="labels")

        check_dimensions_match(
            text_field_embedder.get_output_dim(),
            encoder.get_input_dim(),
            "text field embedding dim",
            "encoder input dim",
        )
        check_dimensions_match(
            encoder.get_output_dim() * 4,
            projection_feedforward.get_input_dim(),
            "encoder output dim",
            "projection feedforward input",
        )
        check_dimensions_match(
            projection_feedforward.get_output_dim(),
            inference_encoder.get_input_dim(),
            "proj feedforward output dim",
            "inference lstm input dim",
        )

        self._accuracy = CategoricalAccuracy()
        self._loss = torch.nn.CrossEntropyLoss()

        initializer(self)
コード例 #11
0
    def __init__(self, vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 encoder: Seq2SeqEncoder,
                 similarity_function: SimilarityFunction,
                 projection_feedforward: FeedForward,
                 inference_encoder: Seq2SeqEncoder,
                 output_feedforward: FeedForward,
                 output_logit: FeedForward,
                 parser_model_path: str,
                 parser_cuda_device: int,
                 freeze_parser: bool,
                 dropout: float = 0.5,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None) -> None:
        super().__init__(vocab, regularizer)

        self._text_field_embedder = text_field_embedder
        self._encoder = encoder

        self._matrix_attention = LegacyMatrixAttention(similarity_function)
        self._projection_feedforward = projection_feedforward

        self._inference_encoder = inference_encoder

        if dropout:
            self.dropout = torch.nn.Dropout(dropout)
            self.rnn_input_dropout = InputVariationalDropout(dropout)
        else:
            self.dropout = None
            self.rnn_input_dropout = None

        self._output_feedforward = output_feedforward
        self._output_logit = output_logit

        self._num_labels = vocab.get_vocab_size(namespace="labels")

        check_dimensions_match(text_field_embedder.get_output_dim(), encoder.get_input_dim(),
                               "text field embedding dim", "encoder input dim")
        check_dimensions_match(encoder.get_output_dim() * 4, projection_feedforward.get_input_dim(),
                               "encoder output dim", "projection feedforward input")
        check_dimensions_match(projection_feedforward.get_output_dim(), inference_encoder.get_input_dim(),
                               "proj feedforward output dim", "inference lstm input dim")

        self._accuracy = CategoricalAccuracy()
        self._loss = torch.nn.CrossEntropyLoss()

        self._parser = load_archive(parser_model_path,
                                    cuda_device=parser_cuda_device).model
        self._parser._head_sentinel.requires_grad = False
        for child in self._parser.children():
            for param in child.parameters():
                param.requires_grad = False
        if not freeze_parser:
            for param in self._parser.encoder.parameters():
                param.requires_grad = True

        initializer(self)
コード例 #12
0
ファイル: bidaf.py プロジェクト: mmazab/LifeQA
    def __init__(self,
                 vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 num_highway_layers: int,
                 phrase_layer: Seq2SeqEncoder,
                 similarity_function: SimilarityFunction,
                 modeling_layer: Seq2VecEncoder,
                 answers_encoder: Seq2VecEncoder,
                 classifier_feedforward: FeedForward,
                 dropout: float = 0.2,
                 mask_lstms: bool = True,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None) -> None:

        super(BidirectionalAttentionFlow, self).__init__(vocab, regularizer)

        self._text_field_embedder = text_field_embedder
        self._highway_layer = TimeDistributed(
            Highway(text_field_embedder.get_output_dim(), num_highway_layers))

        self._classifier_feedforward = classifier_feedforward

        self._phrase_layer = phrase_layer
        self._matrix_attention = LegacyMatrixAttention(similarity_function)
        self._modeling_layer = modeling_layer

        encoding_dim = phrase_layer.get_output_dim()

        self._time_distributed_highway_layer = TimeDistributed(
            self._highway_layer)
        self._answers_encoder = TimeDistributed(answers_encoder)

        # Bidaf has lots of layer dimensions which need to match up - these aren't necessarily
        # obvious from the configuration files, so we check here.
        check_dimensions_match(modeling_layer.get_input_dim(),
                               4 * encoding_dim, "modeling layer input dim",
                               "4 * encoding dim")
        check_dimensions_match(text_field_embedder.get_output_dim(),
                               phrase_layer.get_input_dim(),
                               "text field embedder output dim",
                               "phrase layer input dim")

        if dropout > 0:
            self._dropout = torch.nn.Dropout(p=dropout)
        else:
            self._dropout = lambda x: x

        self._mask_lstms = mask_lstms
        self.loss = torch.nn.CrossEntropyLoss()

        initializer(self)
コード例 #13
0
    def __init__(self, vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 attend_feedforward: FeedForward,
                 similarity_function: SimilarityFunction,
                 compare_feedforward: FeedForward,
                 aggregate_feedforward: FeedForward,
                 parser_model_path: str,
                 parser_cuda_device: int,
                 freeze_parser: bool,
                 premise_encoder: Optional[Seq2SeqEncoder] = None,
                 hypothesis_encoder: Optional[Seq2SeqEncoder] = None,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None) -> None:
        super(SyntacticEntailment, self).__init__(vocab, regularizer)

        self._text_field_embedder = text_field_embedder
        self._attend_feedforward = TimeDistributed(attend_feedforward)
        self._attention = LegacyMatrixAttention(similarity_function)
        self._compare_feedforward = TimeDistributed(compare_feedforward)
        self._aggregate_feedforward = aggregate_feedforward
        self._premise_encoder = premise_encoder
        self._hypothesis_encoder = hypothesis_encoder or premise_encoder

        self._num_labels = vocab.get_vocab_size(namespace="labels")

        check_dimensions_match(text_field_embedder.get_output_dim(),
                               attend_feedforward.get_input_dim(),
                               "text field embedding dim",
                               "attend feedforward input dim")
        check_dimensions_match(aggregate_feedforward.get_output_dim(),
                               self._num_labels,
                               "final output dimension",
                               "number of labels")

        self._accuracy = CategoricalAccuracy()
        self._loss = torch.nn.CrossEntropyLoss()

        self._parser = load_archive(parser_model_path,
                                    cuda_device=parser_cuda_device).model
        self._parser._head_sentinel.requires_grad = False
        for child in self._parser.children():
            for param in child.parameters():
                param.requires_grad = False
        if not freeze_parser:
            for param in self._parser.encoder.parameters():
                param.requires_grad = True

        initializer(self)
コード例 #14
0
class EsimComparatorLayer2(torch.nn.Module):

    def __init__(self,
                 similarity_function: SimilarityFunction = None) -> None:
        super().__init__()
        # Don't use DotProductMatrixAttention() if model wasn't trained exactly with it.
        self._matrix_attention = LegacyMatrixAttention(similarity_function)

    @overrides
    def forward(self, # pylint: disable=arguments-differ
                encoded_premise: torch.Tensor,
                encoded_hypothesis: torch.Tensor) -> Dict[str, torch.Tensor]: # pylint: disable=unused-argument
        # Shape: (batch_size, premise_length, hypothesis_length)
        similarity_matrix = self._matrix_attention(encoded_premise, encoded_hypothesis)
        return similarity_matrix

    def get_output_dim(self):
        return self._matrix_attention.get_output_dim()
コード例 #15
0
    def __init__(self, word_embeddings: TextFieldEmbedder,
                 encoder1: Seq2VecEncoder, encoder2: Seq2VecEncoder,
                 similarity_function: SimilarityFunction,
                 vocab: Vocabulary) -> None:

        super().__init__(vocab)
        self.word_embedding = word_embeddings
        self.enc_turn1and2 = encoder1
        self.enc_turn3 = encoder2
        self.matrix_attention = LegacyMatrixAttention(similarity_function)
        self.accuracy = MicroMetrics(vocab)
        self.label_index_to_label = self.vocab.get_index_to_token_vocabulary(
            'labels')
        final_concatenated_dimension = 4 * self.enc_turn1and2.get_output_dim()
        self.hidden2out = torch.nn.Linear(
            in_features=final_concatenated_dimension,
            out_features=vocab.get_vocab_size("labels"))
        self.lexicon_embedding = LexiconEmbedder(LEXICON_PATH, self.vocab)
    def __init__(self, vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 attend_feedforward: FeedForward,
                 similarity_function: SimilarityFunction,
                 compare_feedforward: FeedForward,
                 aggregate_feedforward: FeedForward,
                 premise_encoder: Optional[Seq2SeqEncoder] = None,
                 hypothesis_encoder: Optional[Seq2SeqEncoder] = None,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None,
                 similarity_weight: int = 30) -> None:
        super(DecomposableAttentionModified, self).__init__(vocab, regularizer)
        
        self.label_map = vocab.get_token_to_index_vocabulary('labels')

        label_map = [None]*len(self.label_map)
        for lb,lb_idx in self.label_map.items():
            label_map[lb_idx] = lb
        self.label_map = label_map

        self._text_field_embedder = text_field_embedder
        self._attend_feedforward = TimeDistributed(attend_feedforward)
        self._matrix_attention = LegacyMatrixAttention(similarity_function)
        self._compare_feedforward = TimeDistributed(compare_feedforward)
        self._aggregate_feedforward = aggregate_feedforward
        self._premise_encoder = premise_encoder
        self._hypothesis_encoder = hypothesis_encoder or premise_encoder

        self._num_labels = vocab.get_vocab_size(namespace="labels")

        check_dimensions_match(text_field_embedder.get_output_dim(), attend_feedforward.get_input_dim(),
                               "text field embedding dim", "attend feedforward input dim")
        check_dimensions_match(aggregate_feedforward.get_output_dim(), self._num_labels,
                               "final output dimension", "number of labels")

        self._accuracy = CategoricalAccuracy()
        self._loss = torch.nn.CrossEntropyLoss()

        initializer(self)

        self.lambda_layer = nn.Sequential(nn.Linear(16, 1,bias=False), MyActivationFunction())

        self.lambda_layer[0].weight.data = torch.tensor([[0.1,0.5,0.5,0.5, 0.5,0.1,0.5,0.5, 0.5,0.5,0.1,0.5, 0.5,0.5,0.5,0.9]])
        self.similarity_weight = similarity_weight        
コード例 #17
0
    def __init__(self,
                 encoder: Seq2SeqEncoder,
                 projection_feedforward: FeedForward,
                 inference_encoder: Seq2SeqEncoder,
                 output_feedforward: FeedForward,
                 similarity_function: SimilarityFunction = None,
                 dropout: float = 0.5) -> None:
        super().__init__()

        self._encoder = encoder
        self._matrix_attention = LegacyMatrixAttention(similarity_function)
        self._projection_feedforward = projection_feedforward
        self._inference_encoder = inference_encoder
        if dropout:
            self.dropout = torch.nn.Dropout(dropout)
            self.rnn_input_dropout = InputVariationalDropout(dropout)
        else:
            self.dropout = None
            self.rnn_input_dropout = None
        self._output_feedforward = output_feedforward
コード例 #18
0
ファイル: decomposable.py プロジェクト: dragomirradev/neuclir
    def __init__(
        self,
        attend_feedforward: FeedForward,
        similarity_function: SimilarityFunction,
        compare_feedforward: FeedForward,
        aggregate_feedforward: FeedForward,
        query_encoder: Optional[Seq2SeqEncoder] = None,
        document_encoder: Optional[Seq2SeqEncoder] = None,
        initializer: InitializerApplicator = InitializerApplicator()
    ) -> None:
        super(DecomposableAttentionScorer, self).__init__()

        self._attend_feedforward = TimeDistributed(attend_feedforward)
        self._matrix_attention = LegacyMatrixAttention(similarity_function)
        self._compare_feedforward = TimeDistributed(compare_feedforward)
        self._aggregate_feedforward = aggregate_feedforward
        self._query_encoder = query_encoder
        self._document_encoder = document_encoder or query_encoder

        initializer(self)
コード例 #19
0
    def __init__(self,
                 vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 encoder: Seq2SeqEncoder,
                 similarity_function: SimilarityFunction,
                 output_feedforward: FeedForward,
                 output_logit: FeedForward,
                 dropout: float = 0.5,
                 aggr_type: str = "both",
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None) -> None:
        super().__init__(vocab, regularizer)

        self._text_field_embedder = text_field_embedder
        self._encoder = encoder

        self._matrix_attention = LegacyMatrixAttention(similarity_function)

        if dropout:
            self.dropout = torch.nn.Dropout(dropout)
            self.rnn_input_dropout = InputVariationalDropout(dropout)
        else:
            self.dropout = None
            self.rnn_input_dropout = None

        self._output_feedforward = output_feedforward
        self._output_logit = output_logit
        self._num_labels = 1

        check_dimensions_match(text_field_embedder.get_output_dim(),
                               encoder.get_input_dim(),
                               "text field embedding dim", "encoder input dim")
        # check_dimensions_match(encoder.get_output_dim() * 4, projection_feedforward.get_input_dim(),
        #                        "encoder output dim", "projection feedforward input")
        # check_dimensions_match(projection_feedforward.get_output_dim(), inference_encoder.get_input_dim(),
        #                        "proj feedforward output dim", "inference lstm input dim")
        self._aggr_type = aggr_type
        self._metric = PearsonCorrelation()
        self._loss = torch.nn.MSELoss()

        initializer(self)
コード例 #20
0
    def __init__(self,
                 vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 encoder: Seq2SeqEncoder,
                 similarity_function: SimilarityFunction,
                 projection_feedforward: FeedForward,
                 inference_encoder: Seq2SeqEncoder,
                 output_feedforward: FeedForwardPair,
                 dropout: float = 0.5,
                 margin: float = 1.25,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None) -> None:
        super().__init__(vocab, regularizer)

        self._text_field_embedder = text_field_embedder
        self._encoder = encoder

        self._matrix_attention = LegacyMatrixAttention(similarity_function)
        self._projection_feedforward = projection_feedforward

        self._inference_encoder = inference_encoder

        if dropout:
            self.dropout = torch.nn.Dropout(dropout)
            self.rnn_input_dropout = InputVariationalDropout(dropout)
        else:
            self.dropout = None
            self.rnn_input_dropout = None

        self._output_feedforward = output_feedforward

        self._margin = margin

        self._accuracy = BooleanAccuracy()

        initializer(self)
    def __init__(self,
                 vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 encoder: Seq2SeqEncoder,
                 similarity_function: SimilarityFunction,
                 projection_feedforward: FeedForward,
                 inference_encoder: Seq2SeqEncoder,
                 output_feedforward: FeedForward,
                 output_logit: FeedForward,
                 dropout: float = 0.5,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None,
                 similarity_weight: int = 30) -> None:
        super().__init__(vocab, regularizer)

        self.label_map = vocab.get_token_to_index_vocabulary('labels')

        label_map = [None] * len(self.label_map)
        for lb, lb_idx in self.label_map.items():
            label_map[lb_idx] = lb
        self.label_map = label_map

        self._text_field_embedder = text_field_embedder
        self._encoder = encoder
        print(similarity_function)
        self._matrix_attention = LegacyMatrixAttention(similarity_function)
        self._projection_feedforward = projection_feedforward

        self._inference_encoder = inference_encoder

        if dropout:
            self.dropout = torch.nn.Dropout(dropout)
            self.rnn_input_dropout = InputVariationalDropout(dropout)
        else:
            self.dropout = None
            self.rnn_input_dropout = None

        self._output_feedforward = output_feedforward
        self._output_logit = output_logit

        self._num_labels = vocab.get_vocab_size(namespace="labels")

        check_dimensions_match(text_field_embedder.get_output_dim(),
                               encoder.get_input_dim(),
                               "text field embedding dim", "encoder input dim")
        check_dimensions_match(encoder.get_output_dim() * 4,
                               projection_feedforward.get_input_dim(),
                               "encoder output dim",
                               "projection feedforward input")
        check_dimensions_match(projection_feedforward.get_output_dim(),
                               inference_encoder.get_input_dim(),
                               "proj feedforward output dim",
                               "inference lstm input dim")

        self._accuracy = CategoricalAccuracy()
        self._loss = torch.nn.CrossEntropyLoss()

        initializer(self)

        self.lambda_layer = nn.Sequential(nn.Linear(16, 1, bias=False),
                                          MyActivationFunction())

        self.lambda_layer[0].weight.data = torch.tensor([[
            0.1, 0.5, 0.5, 0.5, 0.5, 0.1, 0.5, 0.5, 0.5, 0.5, 0.1, 0.5, 0.5,
            0.5, 0.5, 0.9
        ]])
        self.similarity_weight = similarity_weight
        print("SIMILARITY WEIGHT BEING USED IS : {0}".format(
            self.similarity_weight))
コード例 #22
0
    def __init__(self, vocab: Vocabulary, cf_a, preloaded_elmo=None) -> None:
        super(BidirectionalAttentionFlow_1,
              self).__init__(vocab, cf_a.regularizer)
        """
        Initialize some data structures 
        """
        self.cf_a = cf_a
        # Bayesian data models
        self.VBmodels = []
        self.LinearModels = []
        """
        ############## TEXT FIELD EMBEDDER with ELMO ####################
        text_field_embedder : ``TextFieldEmbedder``
            Used to embed the ``question`` and ``passage`` ``TextFields`` we get as input to the model.
        """
        if (cf_a.use_ELMO):
            if (type(preloaded_elmo) != type(None)):
                text_field_embedder = preloaded_elmo
            else:
                text_field_embedder = bidut.download_Elmo(
                    cf_a.ELMO_num_layers, cf_a.ELMO_droput)
                print("ELMO loaded from disk or downloaded")
        else:
            text_field_embedder = None

#        embedder_out_dim  = text_field_embedder.get_output_dim()
        self._text_field_embedder = text_field_embedder

        if (cf_a.Add_Linear_projection_ELMO):
            if (self.cf_a.VB_Linear_projection_ELMO):
                prior = Vil.Prior(**(cf_a.VB_Linear_projection_ELMO_prior))
                print(
                    "----------------- Bayesian Linear Projection ELMO --------------"
                )
                linear_projection_ELMO = LinearVB(
                    text_field_embedder.get_output_dim(), 200, prior=prior)
                self.VBmodels.append(linear_projection_ELMO)
            else:
                linear_projection_ELMO = torch.nn.Linear(
                    text_field_embedder.get_output_dim(), 200)

            self._linear_projection_ELMO = linear_projection_ELMO
        """
        ############## Highway layers ####################
        num_highway_layers : ``int``
            The number of highway layers to use in between embedding the input and passing it through
            the phrase layer.
        """

        Input_dimension_highway = None
        if (cf_a.Add_Linear_projection_ELMO):
            Input_dimension_highway = 200
        else:
            Input_dimension_highway = text_field_embedder.get_output_dim()

        num_highway_layers = cf_a.num_highway_layers
        # Linear later to compute the start
        if (self.cf_a.VB_highway_layers):
            print("----------------- Bayesian Highway network  --------------")
            prior = Vil.Prior(**(cf_a.VB_highway_layers_prior))
            highway_layer = HighwayVB(Input_dimension_highway,
                                      num_highway_layers,
                                      prior=prior)
            self.VBmodels.append(highway_layer)
        else:

            highway_layer = Highway(Input_dimension_highway,
                                    num_highway_layers)
        highway_layer = TimeDistributed(highway_layer)

        self._highway_layer = highway_layer
        """
        ############## Phrase layer ####################
        phrase_layer : ``Seq2SeqEncoder``
            The encoder (with its own internal stacking) that we will use in between embedding tokens
            and doing the bidirectional attention.
        """
        if cf_a.phrase_layer_dropout > 0:  ## Create dropout layer
            dropout_phrase_layer = torch.nn.Dropout(
                p=cf_a.phrase_layer_dropout)
        else:
            dropout_phrase_layer = lambda x: x

        phrase_layer = PytorchSeq2SeqWrapper(
            torch.nn.LSTM(Input_dimension_highway,
                          hidden_size=cf_a.phrase_layer_hidden_size,
                          batch_first=True,
                          bidirectional=True,
                          num_layers=cf_a.phrase_layer_num_layers,
                          dropout=cf_a.phrase_layer_dropout))

        phrase_encoding_out_dim = cf_a.phrase_layer_hidden_size * 2
        self._phrase_layer = phrase_layer
        self._dropout_phrase_layer = dropout_phrase_layer
        """
        ############## Matrix attention layer ####################
        similarity_function : ``SimilarityFunction``
            The similarity function that we will use when comparing encoded passage and question
            representations.
        """

        # Linear later to compute the start
        if (self.cf_a.VB_similarity_function):
            prior = Vil.Prior(**(cf_a.VB_similarity_function_prior))
            print(
                "----------------- Bayesian Similarity matrix --------------")
            similarity_function = LinearSimilarityVB(
                combination="x,y,x*y",
                tensor_1_dim=phrase_encoding_out_dim,
                tensor_2_dim=phrase_encoding_out_dim,
                prior=prior)
            self.VBmodels.append(similarity_function)
        else:
            similarity_function = LinearSimilarity(
                combination="x,y,x*y",
                tensor_1_dim=phrase_encoding_out_dim,
                tensor_2_dim=phrase_encoding_out_dim)

        matrix_attention = LegacyMatrixAttention(similarity_function)
        self._matrix_attention = matrix_attention
        """
        ############## Modelling Layer ####################
        modeling_layer : ``Seq2SeqEncoder``
            The encoder (with its own internal stacking) that we will use in between the bidirectional
            attention and predicting span start and end.
        """
        ## Create dropout layer
        if cf_a.modeling_passage_dropout > 0:  ## Create dropout layer
            dropout_modeling_passage = torch.nn.Dropout(
                p=cf_a.modeling_passage_dropout)
        else:
            dropout_modeling_passage = lambda x: x

        modeling_layer = PytorchSeq2SeqWrapper(
            torch.nn.LSTM(phrase_encoding_out_dim * 4,
                          hidden_size=cf_a.modeling_passage_hidden_size,
                          batch_first=True,
                          bidirectional=True,
                          num_layers=cf_a.modeling_passage_num_layers,
                          dropout=cf_a.modeling_passage_dropout))

        self._modeling_layer = modeling_layer
        self._dropout_modeling_passage = dropout_modeling_passage
        """
        ############## Span Start Representation #####################
        span_end_encoder : ``Seq2SeqEncoder``
            The encoder that we will use to incorporate span start predictions into the passage state
            before predicting span end.
        """
        encoding_dim = phrase_layer.get_output_dim()
        modeling_dim = modeling_layer.get_output_dim()
        span_start_input_dim = encoding_dim * 4 + modeling_dim

        # Linear later to compute the start
        if (self.cf_a.VB_span_start_predictor_linear):
            prior = Vil.Prior(**(cf_a.VB_span_start_predictor_linear_prior))
            print(
                "----------------- Bayesian Span Start Predictor--------------"
            )
            span_start_predictor_linear = LinearVB(span_start_input_dim,
                                                   1,
                                                   prior=prior)
            self.VBmodels.append(span_start_predictor_linear)
        else:
            span_start_predictor_linear = torch.nn.Linear(
                span_start_input_dim, 1)

        self._span_start_predictor_linear = span_start_predictor_linear
        self._span_start_predictor = TimeDistributed(
            span_start_predictor_linear)
        """
        ############## Span End Representation #####################
        """

        ## Create dropout layer
        if cf_a.span_end_encoder_dropout > 0:
            dropout_span_end_encode = torch.nn.Dropout(
                p=cf_a.span_end_encoder_dropout)
        else:
            dropout_span_end_encode = lambda x: x

        span_end_encoder = PytorchSeq2SeqWrapper(
            torch.nn.LSTM(encoding_dim * 4 + modeling_dim * 3,
                          hidden_size=cf_a.modeling_span_end_hidden_size,
                          batch_first=True,
                          bidirectional=True,
                          num_layers=cf_a.modeling_span_end_num_layers,
                          dropout=cf_a.span_end_encoder_dropout))

        span_end_encoding_dim = span_end_encoder.get_output_dim()
        span_end_input_dim = encoding_dim * 4 + span_end_encoding_dim

        self._span_end_encoder = span_end_encoder
        self._dropout_span_end_encode = dropout_span_end_encode

        if (self.cf_a.VB_span_end_predictor_linear):
            print(
                "----------------- Bayesian Span End Predictor--------------")
            prior = Vil.Prior(**(cf_a.VB_span_end_predictor_linear_prior))
            span_end_predictor_linear = LinearVB(span_end_input_dim,
                                                 1,
                                                 prior=prior)
            self.VBmodels.append(span_end_predictor_linear)
        else:
            span_end_predictor_linear = torch.nn.Linear(span_end_input_dim, 1)

        self._span_end_predictor_linear = span_end_predictor_linear
        self._span_end_predictor = TimeDistributed(span_end_predictor_linear)
        """
        Dropput last layers
        """
        if cf_a.spans_output_dropout > 0:
            dropout_spans_output = torch.nn.Dropout(
                p=cf_a.span_end_encoder_dropout)
        else:
            dropout_spans_output = lambda x: x

        self._dropout_spans_output = dropout_spans_output
        """
        Checkings and accuracy
        """
        # Bidaf has lots of layer dimensions which need to match up - these aren't necessarily
        # obvious from the configuration files, so we check here.
        check_dimensions_match(modeling_layer.get_input_dim(),
                               4 * encoding_dim, "modeling layer input dim",
                               "4 * encoding dim")
        check_dimensions_match(Input_dimension_highway,
                               phrase_layer.get_input_dim(),
                               "text field embedder output dim",
                               "phrase layer input dim")
        check_dimensions_match(span_end_encoder.get_input_dim(),
                               4 * encoding_dim + 3 * modeling_dim,
                               "span end encoder input dim",
                               "4 * encoding dim + 3 * modeling dim")

        self._span_start_accuracy = CategoricalAccuracy()
        self._span_end_accuracy = CategoricalAccuracy()
        self._span_accuracy = BooleanAccuracy()
        self._squad_metrics = SquadEmAndF1()
        """
        mask_lstms : ``bool``, optional (default=True)
            If ``False``, we will skip passing the mask to the LSTM layers.  This gives a ~2x speedup,
            with only a slight performance decrease, if any.  We haven't experimented much with this
            yet, but have confirmed that we still get very similar performance with much faster
            training times.  We still use the mask for all softmaxes, but avoid the shuffling that's
            required when using masking with pytorch LSTMs.
        """
        self._mask_lstms = cf_a.mask_lstms
        """
        ################### Initialize parameters ##############################
        """
        #### THEY ARE ALL INITIALIZED WHEN INSTANTING THE COMPONENTS ###
        """
        ####################### OPTIMIZER ################
        """
        optimizer = pytut.get_optimizers(self, cf_a)
        self._optimizer = optimizer
コード例 #23
0
    def __init__(
            self,
            vocab: Vocabulary,
            text_field_embedder: TextFieldEmbedder,
            num_highway_layers: int,
            phrase_layer: Seq2SeqEncoder,
            similarity_function: SimilarityFunction,
            modeling_layer: Seq2SeqEncoder,
            modeling_layer_memory: Seq2SeqEncoder,
            margin: float,
            max: float,
            dropout: float = 0.2,
            mask_lstms: bool = False,
            memory_enabled: bool = False,
            memory_update: bool = True,
            memory_concat: bool = False,
            save_memory_snapshots: bool = False,
            save_entity_embeddings: bool = False,
            initializer: InitializerApplicator = InitializerApplicator(),
            regularizer: Optional[RegularizerApplicator] = None,
            answer_layer_image: Seq2SeqEncoder = None,
            answer_layer_text: Seq2SeqEncoder = None,
            question_image_encoder: Seq2SeqEncoder = None,
            step_layer: Seq2SeqEncoder = None,
            num_heads: int = 2,
            num_slots:
        int = 61,  # Maximum number of entities in the training set.
            last_layer_hidden_dims: List[int] = None,
            last_layer_num_layers: int = 4,
            projection_input_dim: int = 2048,
            projection_hidden_dims: List[int] = None,
            save_step_wise_attentions=False) -> None:

        super(ProceduralReasoningNetworksforRecipeQA,
              self).__init__(vocab, regularizer)

        self._text_field_embedder = text_field_embedder
        self._highway_layer = TimeDistributed(
            Highway(text_field_embedder.get_output_dim(), num_highway_layers))
        self._phrase_layer = phrase_layer
        self._matrix_attention = LegacyMatrixAttention(similarity_function)
        self._modeling_layer = modeling_layer
        self._modeling_layer_memory = modeling_layer_memory
        self.margin = torch.FloatTensor([margin]).cuda()
        self.cos = nn.CosineSimilarity(dim=-1, eps=1e-6).cuda()
        self.for_max = torch.FloatTensor([max]).cuda()
        self._memory_enabled = memory_enabled
        self._memory_update = memory_update
        self._memory_concat = memory_concat
        self._save_memory_snapshots = save_memory_snapshots
        self._save_entity_embeddings = save_entity_embeddings
        self._step_layer = step_layer
        self._label_acc = CategoricalAccuracy()
        self.save_step_wise_attentions = save_step_wise_attentions

        if self._memory_enabled:
            head_size = int(step_layer.get_output_dim() / num_heads)
            self.mem_module = RelationalMemory(
                mem_slots=num_slots,
                head_size=head_size,
                input_size=head_size * num_heads,
                num_heads=num_heads,
                num_blocks=1,
                forget_bias=1.,
                input_bias=0.,
            ).cuda(0)

            last_layer_input_dim = 10 * modeling_layer.get_output_dim()
        else:
            last_layer_input_dim = 5 * modeling_layer.get_output_dim()
        self._activation = torch.nn.Tanh()
        self._last_layer = FeedForward(last_layer_input_dim,
                                       last_layer_num_layers,
                                       last_layer_hidden_dims,
                                       self._activation, dropout)
        self._answer_layer_image = answer_layer_image  # uses image encoder for image input
        self._answer_layer_text = answer_layer_text  # uses text encoder for text input
        self._question_image_encoder = question_image_encoder  # converts question image inputs to encoding dim
        self._vocab = vocab
        # TODO: Replace hard coded parameters with config parameters
        self._mlp_projector = TimeDistributed(
            torch.nn.Sequential(
                torch.nn.Dropout(0.1, inplace=False),
                torch.nn.Linear(projection_input_dim,
                                projection_hidden_dims[0]),
                torch.nn.Tanh(),
                torch.nn.Dropout(0.1, inplace=False),
                torch.nn.Linear(projection_hidden_dims[0],
                                projection_hidden_dims[1]),
                torch.nn.Tanh(),
                torch.nn.Dropout(0.1, inplace=False),
                torch.nn.Linear(projection_hidden_dims[1],
                                projection_hidden_dims[2]),
                torch.nn.Tanh(),
                torch.nn.Dropout(0.1, inplace=False),
                torch.nn.Linear(projection_hidden_dims[2],
                                projection_hidden_dims[3]),
            ))
        if dropout > 0:
            self._dropout = torch.nn.Dropout(p=dropout)
        else:
            self._dropout = lambda x: x

        self._mask_lstms = mask_lstms

        if self._save_memory_snapshots:
            if os.path.isfile('memory_snapshots_by_recipe.pkl'
                              ):  # make sure we start with a clean file
                os.remove('memory_snapshots_by_recipe.pkl')

        if self._save_entity_embeddings:
            if os.path.isfile('entity_embeddings_final.pkl'
                              ):  # make sure we start with a clean file
                os.remove('entity_embeddings_final.pkl')
        initializer(self)
コード例 #24
0
ファイル: esim.py プロジェクト: tchewik/isanlp_rst
    def __init__(
        self,
        vocab: Vocabulary,
        text_field_embedder: TextFieldEmbedder,
        encoder: Seq2SeqEncoder,
        similarity_function: SimilarityFunction,
        projection_feedforward: FeedForward,
        inference_encoder: Seq2SeqEncoder,
        output_feedforward: FeedForward,
        output_logit: FeedForward,
        dropout: float = 0.5,
        class_weights: list = [],
        initializer: InitializerApplicator = InitializerApplicator(),
        regularizer: Optional[RegularizerApplicator] = None,
        encode_together: bool = False,
    ) -> None:
        super().__init__(vocab, regularizer)

        self._text_field_embedder = text_field_embedder
        self._encoder = encoder
        self.encode_together = encode_together

        self._matrix_attention = LegacyMatrixAttention(similarity_function)
        self._projection_feedforward = projection_feedforward

        self._inference_encoder = inference_encoder

        if dropout:
            self.dropout = torch.nn.Dropout(dropout)
            self.rnn_input_dropout = InputVariationalDropout(dropout)
        else:
            self.dropout = None
            self.rnn_input_dropout = None
            
        if class_weights:
            self.class_weights = class_weights
        else:
            self.class_weights = [1.] * self.output_feedforward.get_output_dim()

        self._output_feedforward = output_feedforward
        self._output_logit = output_logit

        self._num_labels = vocab.get_vocab_size(namespace="labels")

        check_dimensions_match(
            text_field_embedder.get_output_dim(),
            encoder.get_input_dim(),
            "text field embedding dim",
            "encoder input dim",
        )
        check_dimensions_match(
            encoder.get_output_dim() * 4,
            projection_feedforward.get_input_dim(),
            "encoder output dim",
            "projection feedforward input",
        )
        check_dimensions_match(
            projection_feedforward.get_output_dim(),
            inference_encoder.get_input_dim(),
            "proj feedforward output dim",
            "inference lstm input dim",
        )

        self.metrics = {"accuracy": CategoricalAccuracy()}
        
        for _class in range(len(self.class_weights)):
            self.metrics.update({
                f"f1_rel{_class}": F1Measure(_class),
            })
        
        self._loss = torch.nn.CrossEntropyLoss(weight=torch.FloatTensor(self.class_weights))

        initializer(self)
コード例 #25
0
    def __init__(self, vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 num_highway_layers: int,
                 phrase_layer: Seq2SeqEncoder,
                 similarity_function: SimilarityFunction,
                 modeling_layer: Seq2SeqEncoder,
                 span_end_encoder: Seq2SeqEncoder,
                 dropout: float = 0.2,
                 mask_lstms: bool = True,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None) -> None:
        super(BidirectionalAttentionFlowBasic, self).__init__(vocab, regularizer)

        self._text_field_embedder = text_field_embedder
        self._highway_layer = TimeDistributed(Highway(text_field_embedder.get_output_dim(),
                                                      num_highway_layers))
        self._phrase_layer = phrase_layer
        self._matrix_attention = LegacyMatrixAttention(similarity_function)
        self._modeling_layer = modeling_layer
        self._span_end_encoder = span_end_encoder

        encoding_dim = phrase_layer.get_output_dim()
        modeling_dim = modeling_layer.get_output_dim()
        span_start_input_dim = encoding_dim * 4 + modeling_dim
        self._span_start_predictor = TimeDistributed(torch.nn.Linear(span_start_input_dim, 1))

        span_end_encoding_dim = span_end_encoder.get_output_dim()
        span_end_input_dim = encoding_dim * 4 + span_end_encoding_dim
        self._span_end_predictor = TimeDistributed(torch.nn.Linear(span_end_input_dim, 1))

        # Bidaf has lots of layer dimensions which need to match up - these aren't necessarily
        # obvious from the configuration files, so we check here.
        check_dimensions_match(modeling_layer.get_input_dim(), 4 * encoding_dim,
                               "modeling layer input dim", "4 * encoding dim")
        check_dimensions_match(text_field_embedder.get_output_dim(), phrase_layer.get_input_dim(),
                               "text field embedder output dim", "phrase layer input dim")
        check_dimensions_match(span_end_encoder.get_input_dim(), 4 * encoding_dim + 3 * modeling_dim,
                               "span end encoder input dim", "4 * encoding dim + 3 * modeling dim")

        self._span_start_accuracy = CategoricalAccuracy()
        self._span_end_accuracy = CategoricalAccuracy()
        self._span_accuracy = BooleanAccuracy()
        self._squad_metrics = SquadEmAndF1()

        if dropout > 0:
            self._dropout = torch.nn.Dropout(p=dropout)
        else:
            self._dropout = lambda x: x
        self._mask_lstms = mask_lstms

        # evaluation

        # BLEU
        self._bleu_score_types_to_use = ["BLEU1", "BLEU2", "BLEU3", "BLEU4"]
        self._bleu_scores = {x: Average() for x in self._bleu_score_types_to_use}

        # ROUGE using pyrouge
        self._rouge_score_types_to_use = ['rouge-n', 'rouge-l', 'rouge-w']

        # if we have rouge-n as metric we actualy get n scores like rouge-1, rouge-2, .., rouge-n
        max_rouge_n = 4
        rouge_n_metrics = []
        if "rouge-n" in self._rouge_score_types_to_use:
            rouge_n_metrics = ["rouge-{0}".format(x) for x in range(1, max_rouge_n + 1)]

        rouge_scores_names = rouge_n_metrics + [y for y in self._rouge_score_types_to_use if y != 'rouge-n']
        self._rouge_scores = {x: Average() for x in rouge_scores_names}
        self._rouge_evaluator = rouge.Rouge(metrics=self._rouge_score_types_to_use,
                                            max_n=max_rouge_n,
                                            limit_length=True,
                                            length_limit=100,
                                            length_limit_type='words',
                                            apply_avg=False,
                                            apply_best=False,
                                            alpha=0.5,  # Default F1_score
                                            weight_factor=1.2,
                                            stemming=True)

        initializer(self)
コード例 #26
0
    def __init__(self, 
                 vocab: Vocabulary,
                 training_tasks: Any,
                 validation_tasks: Any,
                 
                 text_field_embedder: TextFieldEmbedder,
                 attend_feedforward: FeedForward,
                 similarity_function: SimilarityFunction,
                 compare_feedforward: FeedForward,
                 aggregate_feedforward: FeedForward,
                 premise_encoder: Optional[Seq2SeqEncoder] = None,
                 hypothesis_encoder: Optional[Seq2SeqEncoder] = None,

                 langs_print_train: List[str] = None,
                 dropout: float = 0.0,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None) -> None:
        super(DecomposableAttentionMultiling, self).__init__(vocab, regularizer=regularizer)
        if type(training_tasks) == dict:
            self._training_tasks = list(training_tasks.keys())
        else:
            self._training_tasks = training_tasks

        if type(validation_tasks) == dict:
            self._validation_tasks = list(validation_tasks.keys())
        else:
            self._validation_tasks = validation_tasks

        self._label_namespace = "labels"
        self._num_labels = vocab.get_vocab_size(namespace=self._label_namespace)

        # elmo / bert
        self._text_field_embedder = text_field_embedder
        # decomposable attention stuff
        self._attend_feedforward = TimeDistributed(attend_feedforward)
        self._matrix_attention = LegacyMatrixAttention(similarity_function)
        self._compare_feedforward = TimeDistributed(compare_feedforward)
        self._aggregate_feedforward = aggregate_feedforward
        self._premise_encoder = premise_encoder
        self._hypothesis_encoder = hypothesis_encoder or premise_encoder
        
        check_dimensions_match(text_field_embedder.get_output_dim(), attend_feedforward.get_input_dim(),
                               "text field embedding dim", "attend feedforward input dim")
        check_dimensions_match(aggregate_feedforward.get_output_dim(), self._num_labels,
                               "final output dimension", "number of labels")



        self._dropout = torch.nn.Dropout(p=dropout)

        self._loss = torch.nn.CrossEntropyLoss()

        # initializer(self._nli_projection_layer)

        self._nli_per_lang_acc: Dict[str, CategoricalAccuracy] = dict()
        
        for taskname in self._validation_tasks:
            # this will hide some metrics from tqdm, but they will still be computed
            self._nli_per_lang_acc[taskname] = CategoricalAccuracy()
        self._nli_avg_acc = Average()
        
        self._langs_pring_train = langs_print_train or "en"
        if '*' in self._langs_pring_train:
            self._langs_pring_train = [t.split("")[-1] for t in training_tasks] 
コード例 #27
0
 def __init__(self, similarity_function: SimilarityFunction = None) -> None:
     super().__init__()
     # Don't use DotProductMatrixAttention() if model wasn't trained exactly with it.
     self._matrix_attention = LegacyMatrixAttention(similarity_function)
コード例 #28
0
print("encoding_dim: ", encoding_dim)
print("Question encoding: ", encoded_question.shape)
print("Passage encoding: ", encoded_passage.shape)
"""
################### SIMILARITY FUNCTION LAYER  #########################################
NOTE: Since the LSTM implementation of PyTorch cannot apply dropout in the last layer, 
we just apply ourselves later
"""

print("-------------- SIMILARITY LAYER ---------------")

similarity_function = LinearSimilarity(combination="x,y,x*y",
                                       tensor_1_dim=200,
                                       tensor_2_dim=200)

matrix_attention = LegacyMatrixAttention(similarity_function)

passage_question_similarity = matrix_attention(encoded_passage,
                                               encoded_question)
# Shape: (batch_size, passage_length, question_length)
print("passage question similarity: ", passage_question_similarity.shape)

# Shape: (batch_size, passage_length, question_length)
passage_question_attention = util.masked_softmax(passage_question_similarity,
                                                 question_mask)
# Shape: (batch_size, passage_length, encoding_dim)
passage_question_vectors = util.weighted_sum(encoded_question,
                                             passage_question_attention)

# We replace masked values with something really negative here, so they don't affect the
# max below.
コード例 #29
0
ファイル: bidaf.py プロジェクト: ethanjperez/allennlp
    def __init__(self,
                 vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 num_highway_layers: int,
                 phrase_layer: Seq2SeqEncoder,
                 similarity_function: SimilarityFunction,
                 modeling_layer: Seq2SeqEncoder,
                 span_end_encoder: Seq2SeqEncoder,
                 dropout: float = 0.2,
                 mask_lstms: bool = True,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None,
                 judge: Model = None,
                 update_judge: bool = False,
                 reward_method: str = None,
                 detach_value_head: bool = False,
                 qa_loss_weight: float = 0.,
                 influence_reward: bool = False,
                 dataset_name: str = 'squad') -> None:
        super(BidirectionalAttentionFlow, self).__init__(vocab, regularizer)

        self.judge = judge
        self.is_judge = self.judge is None
        self.reward_method = None if self.is_judge else reward_method
        self.update_judge = update_judge and (self.judge is not None)
        self._detach_value_head = detach_value_head
        self._qa_loss_weight = qa_loss_weight
        self.influence_reward = influence_reward
        self.answer_type = 'mc' if dataset_name == 'race' else 'span'
        self.output_type = 'span'  # The actual way the output is given (here it's as a pointer to input)
        self._text_field_embedder = text_field_embedder
        self._highway_layer = TimeDistributed(
            Highway(text_field_embedder.get_output_dim(), num_highway_layers))
        self._phrase_layer = phrase_layer
        self._matrix_attention = LegacyMatrixAttention(similarity_function)
        if not self.is_judge:
            self._turn_film_gen = torch.nn.Linear(
                1, 2 * modeling_layer.get_input_dim())
            self._film = FiLM()
        self._modeling_layer = modeling_layer
        self._span_end_encoder = span_end_encoder

        encoding_dim = phrase_layer.get_output_dim()
        modeling_dim = modeling_layer.get_output_dim()
        span_start_input_dim = encoding_dim * 4 + modeling_dim
        if not self.is_judge:
            self._value_head = TimeDistributed(
                torch.nn.Linear(span_start_input_dim, 1))  # Can make MLP
        self._span_start_predictor = TimeDistributed(
            torch.nn.Linear(span_start_input_dim, 1))

        span_end_encoding_dim = span_end_encoder.get_output_dim()
        span_end_input_dim = encoding_dim * 4 + span_end_encoding_dim
        self._span_end_predictor = TimeDistributed(
            torch.nn.Linear(span_end_input_dim, 1))

        # Bidaf has lots of layer dimensions which need to match up - these aren't necessarily
        # obvious from the configuration files, so we check here.
        check_dimensions_match(modeling_layer.get_input_dim(),
                               4 * encoding_dim, "modeling layer input dim",
                               "4 * encoding dim")
        check_dimensions_match(text_field_embedder.get_output_dim(),
                               phrase_layer.get_input_dim(),
                               "text field embedder output dim",
                               "phrase layer input dim")
        check_dimensions_match(span_end_encoder.get_input_dim(),
                               4 * encoding_dim + 3 * modeling_dim,
                               "span end encoder input dim",
                               "4 * encoding dim + 3 * modeling dim")

        # Bidaf has lots of layer dimensions which need to match up - these aren't necessarily
        # obvious from the configuration files, so we check here.
        check_dimensions_match(modeling_layer.get_input_dim(),
                               4 * encoding_dim, "modeling layer input dim",
                               "4 * encoding dim")
        check_dimensions_match(text_field_embedder.get_output_dim(),
                               phrase_layer.get_input_dim(),
                               "text field embedder output dim",
                               "phrase layer input dim")
        check_dimensions_match(span_end_encoder.get_input_dim(),
                               4 * encoding_dim + 3 * modeling_dim,
                               "span end encoder input dim",
                               "4 * encoding dim + 3 * modeling dim")

        self._span_start_accuracy = CategoricalAccuracy()
        self._span_end_accuracy = CategoricalAccuracy()
        self._span_accuracy = BooleanAccuracy()
        self._squad_metrics = SquadEmAndF1()
        if dropout > 0:
            self._dropout = torch.nn.Dropout(p=dropout)
        else:
            self._dropout = lambda x: x
        self._mask_lstms = mask_lstms

        initializer(self)