Example #1
0
    def __init__(self,
                 vocab: Vocabulary,
                 mention_feedforward: FeedForward,
                 relation_feedforward: FeedForward,
                 feature_size: int,
                 spans_per_word: float,
                 span_emb_dim: int,
                 rel_prop: int = 0,
                 rel_prop_dropout_A: float = 0.0,
                 rel_prop_dropout_f: float = 0.0,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 positive_label_weight: float = 1.0,
                 regularizer: Optional[RegularizerApplicator] = None) -> None:
        super(RelationExtractor, self).__init__(vocab, regularizer)

        # Need to hack this for cases where there's no relation data. It breaks Ulme's code.
        self._n_labels = max(vocab.get_vocab_size("relation_labels"), 1)

        # Span candidate scorer.
        # TODO(dwadden) make sure I've got the input dim right on this one.
        feedforward_scorer = torch.nn.Sequential(
            TimeDistributed(mention_feedforward),
            TimeDistributed(
                torch.nn.Linear(mention_feedforward.get_output_dim(), 1)))
        self._mention_pruner = Pruner(feedforward_scorer)

        # Relation scorer.
        self._relation_feedforward = relation_feedforward
        self._relation_scorer = torch.nn.Linear(
            relation_feedforward.get_output_dim(), self._n_labels)

        self._spans_per_word = spans_per_word

        # TODO(dwadden) Add code to compute relation F1.
        self._candidate_recall = CandidateRecall()
        self._relation_metrics = RelationMetrics()

        class_weights = torch.cat([
            torch.tensor([1.0]),
            positive_label_weight * torch.ones(self._n_labels)
        ])
        self._loss = torch.nn.CrossEntropyLoss(reduction="sum",
                                               ignore_index=-1,
                                               weight=class_weights)
        self.rel_prop = rel_prop

        # Relation Propagation
        self._A_network = FeedForward(input_dim=self._n_labels,
                                      num_layers=1,
                                      hidden_dims=span_emb_dim,
                                      activations=lambda x: x,
                                      dropout=rel_prop_dropout_A)
        self._f_network = FeedForward(input_dim=2 * span_emb_dim,
                                      num_layers=1,
                                      hidden_dims=span_emb_dim,
                                      activations=torch.nn.Sigmoid(),
                                      dropout=rel_prop_dropout_f)

        initializer(self)
Example #2
0
    def __init__(self,
                 vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 num_highway_layers: int,
                 phrase_layer: Seq2SeqEncoder,
                 similarity_function: SimilarityFunction,
                 modeling_layer: Seq2SeqEncoder,
                 span_end_encoder: Seq2SeqEncoder,
                 dropout: float = 0.2,
                 mask_lstms: bool = True,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None) -> None:

        super(BidafOriginal, self).__init__(vocab, regularizer)

        self._text_field_embedder = text_field_embedder
        self._highway_layer = TimeDistributed(
            Highway(text_field_embedder.get_output_dim(), num_highway_layers))
        self._phrase_layer = phrase_layer
        self._matrix_attention = LegacyMatrixAttention(similarity_function)
        self._modeling_layer = modeling_layer
        self._span_end_encoder = span_end_encoder

        encoding_dim = phrase_layer.get_output_dim()
        modeling_dim = modeling_layer.get_output_dim()
        span_start_input_dim = encoding_dim * 4 + modeling_dim
        self._span_start_predictor = TimeDistributed(
            torch.nn.Linear(span_start_input_dim, 1))

        span_end_encoding_dim = span_end_encoder.get_output_dim()
        span_end_input_dim = encoding_dim * 4 + span_end_encoding_dim
        self._span_end_predictor = TimeDistributed(
            torch.nn.Linear(span_end_input_dim, 1))

        # Bidaf has lots of layer dimensions which need to match up - these aren't necessarily
        # obvious from the configuration files, so we check here.
        check_dimensions_match(modeling_layer.get_input_dim(),
                               4 * encoding_dim, "modeling layer input dim",
                               "4 * encoding dim")
        check_dimensions_match(text_field_embedder.get_output_dim(),
                               phrase_layer.get_input_dim(),
                               "text field embedder output dim",
                               "phrase layer input dim")
        check_dimensions_match(span_end_encoder.get_input_dim(),
                               4 * encoding_dim + 3 * modeling_dim,
                               "span end encoder input dim",
                               "4 * encoding dim + 3 * modeling dim")

        self._span_start_accuracy = CategoricalAccuracy()
        self._span_end_accuracy = CategoricalAccuracy()
        self._span_accuracy = BooleanAccuracy()
        self._squad_metrics = SquadEmAndF1()
        if dropout > 0:
            self._dropout = torch.nn.Dropout(p=dropout)
        else:
            self._dropout = lambda x: x
        self._mask_lstms = mask_lstms

        initializer(self)
    def __init__(
        self,
        vocab: Vocabulary,
        text_field_embedder: TextFieldEmbedder,
        span_extractor: SpanExtractor,
        encoder: Seq2SeqEncoder,
        feedforward: FeedForward = None,
        pos_tag_embedding: Embedding = None,
        initializer: InitializerApplicator = InitializerApplicator(),
        evalb_directory_path: str = DEFAULT_EVALB_DIR,
        **kwargs,
    ) -> None:
        super().__init__(vocab, **kwargs)

        self.text_field_embedder = text_field_embedder
        self.span_extractor = span_extractor
        self.num_classes = self.vocab.get_vocab_size("labels")
        self.encoder = encoder
        self.feedforward_layer = TimeDistributed(
            feedforward) if feedforward else None
        self.pos_tag_embedding = pos_tag_embedding or None
        if feedforward is not None:
            output_dim = feedforward.get_output_dim()
        else:
            output_dim = span_extractor.get_output_dim()

        self.tag_projection_layer = TimeDistributed(
            Linear(output_dim, self.num_classes))

        representation_dim = text_field_embedder.get_output_dim()
        if pos_tag_embedding is not None:
            representation_dim += pos_tag_embedding.get_output_dim()
        check_dimensions_match(
            representation_dim,
            encoder.get_input_dim(),
            "representation dim (tokens + optional POS tags)",
            "encoder input dim",
        )
        check_dimensions_match(
            encoder.get_output_dim(),
            span_extractor.get_input_dim(),
            "encoder input dim",
            "span extractor input dim",
        )
        if feedforward is not None:
            check_dimensions_match(
                span_extractor.get_output_dim(),
                feedforward.get_input_dim(),
                "span extractor output dim",
                "feedforward input dim",
            )

        self.tag_accuracy = CategoricalAccuracy()

        if evalb_directory_path is not None:
            self._evalb_score = EvalbBracketingScorer(evalb_directory_path)
        else:
            self._evalb_score = None
        initializer(self)
Example #4
0
    def __init__(self,
                 vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 phrase_layer: Seq2SeqEncoder,
                 residual_encoder: Seq2SeqEncoder,
                 span_start_encoder: Seq2SeqEncoder,
                 span_end_encoder: Seq2SeqEncoder,
                 initializer: InitializerApplicator,
                 dropout: float = 0.2,
                 pair2vec_dropout: float = 0.15,
                 max_span_length: int = 30,
                 pair2vec_model_file: str = None,
                 pair2vec_config_file: str = None) -> None:
        super().__init__(vocab)
        self._max_span_length = max_span_length
        self._text_field_embedder = text_field_embedder
        self._phrase_layer = phrase_layer
        self._encoding_dim = phrase_layer.get_output_dim()

        self.pair2vec = pair2vec_util.get_pair2vec(pair2vec_config_file,
                                                   pair2vec_model_file)
        self._pair2vec_dropout = torch.nn.Dropout(pair2vec_dropout)

        self._matrix_attention = LinearMatrixAttention(self._encoding_dim,
                                                       self._encoding_dim,
                                                       'x,y,x*y')

        # atten_dim = self._encoding_dim * 4 + 600 if ablation_type == 'attn_over_rels' else self._encoding_dim * 4
        atten_dim = self._encoding_dim * 4 + 600
        self._merge_atten = TimeDistributed(
            torch.nn.Linear(atten_dim, self._encoding_dim))

        self._residual_encoder = residual_encoder

        self._self_attention = LinearMatrixAttention(self._encoding_dim,
                                                     self._encoding_dim,
                                                     'x,y,x*y')

        self._merge_self_attention = TimeDistributed(
            torch.nn.Linear(self._encoding_dim * 3, self._encoding_dim))

        self._span_start_encoder = span_start_encoder
        self._span_end_encoder = span_end_encoder

        self._span_start_predictor = TimeDistributed(
            torch.nn.Linear(self._encoding_dim, 1))
        self._span_end_predictor = TimeDistributed(
            torch.nn.Linear(self._encoding_dim, 1))
        self._squad_metrics = SquadEmAndF1()
        initializer(self)

        self._span_start_accuracy = CategoricalAccuracy()
        self._span_end_accuracy = CategoricalAccuracy()
        self._official_em = Average()
        self._official_f1 = Average()

        self._span_accuracy = BooleanAccuracy()
        self._variational_dropout = InputVariationalDropout(dropout)
Example #5
0
	def __init__(self, vocab: Vocabulary,
				 text_field_embedder: TextFieldEmbedder,
				 text_field_embedder_elmo: TextFieldEmbedder,
				 num_highway_layers: int,
				 highway_dim: int,
				 highway_elmo_dim: int,
				 phrase_layer: Seq2SeqEncoder,
				 soft_align_matrix_attention: SoftAlignmentMatrixAttention,
				 self_matrix_attention: BilinearMatrixAttention,
				 passage_modeling_layer: Seq2SeqEncoder,
				 question_modeling_layer: Seq2SeqEncoder,
				 question_encoding_layer: Seq2VecEncoder,
				 passage_similarity_function: SimilarityFunction,
				 question_similarity_function: SimilarityFunction,
				 dropout: float = 0.2,
				 mask_lstms: bool = True,
				 initializer: InitializerApplicator = InitializerApplicator(),
				 regularizer: Optional[RegularizerApplicator] = None) -> None:
		super(MultiGranuFusionElmo, self).__init__(vocab, regularizer)

		self._text_field_embedder = text_field_embedder
		self._text_field_embedder_elmo = text_field_embedder_elmo
		self._highway_layer = TimeDistributed(Highway(highway_dim, num_highway_layers))
		self._highway_elmo_layer = TimeDistributed(Highway(highway_elmo_dim, num_highway_layers))
		self._phrase_layer = phrase_layer
		self._matrix_attention = soft_align_matrix_attention
		self._self_matrix_attention = self_matrix_attention
		self._passage_modeling_layer = passage_modeling_layer
		self._question_modeling_layer = question_modeling_layer
		self._question_encoding_layer = question_encoding_layer
		self._passage_similarity_function = passage_similarity_function
		self._question_similarity_function = question_similarity_function

		passage_modeling_output_dim = self._passage_modeling_layer.get_output_dim()
		question_modeling_output_dim = self._question_modeling_layer.get_output_dim()

		encoding_dim = phrase_layer.get_output_dim() + text_field_embedder_elmo.get_output_dim()
		self._passage_fusion_weight = nn.Linear(encoding_dim * 4, encoding_dim)
		self._question_fusion_weight = nn.Linear(encoding_dim * 4, encoding_dim)
		self._fusion_weight = nn.Linear(encoding_dim * 4, encoding_dim)
		self._span_start_weight = nn.Linear(passage_modeling_output_dim, question_modeling_output_dim)
		self._span_end_weight = nn.Linear(passage_modeling_output_dim, question_modeling_output_dim)
		self._span_weight = torch.FloatTensor([0.1, 1])

		self._span_predictor = TimeDistributed(torch.nn.Linear(self._passage_modeling_layer.get_output_dim(), 2))

		self._span_start_accuracy = CategoricalAccuracy()
		self._span_end_accuracy = CategoricalAccuracy()
		self._span_accuracy = BooleanAccuracy()
		self._squad_metrics = SquadEmAndF1()
		if dropout > 0:
			self._dropout = torch.nn.Dropout(p=dropout)
		else:
			self._dropout = lambda x: x
		self._mask_lstms = mask_lstms

		initializer(self)
Example #6
0
 def __init__(self, input_dim):
     super(FusionLayer, self).__init__()
     self._input_dim = input_dim
     self._tanh = nn.Tanh()
     self._sigmoid = nn.Sigmoid()
     self._fusion_m = TimeDistributed(
         Linear(in_features=4 * input_dim, out_features=input_dim))
     self._fusion_g = TimeDistributed(
         Linear(in_features=4 * input_dim, out_features=1))
    def __init__(self,
                 vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 phrase_layer: Seq2SeqEncoder,
                 attention_similarity_function: SimilarityFunction,
                 residual_encoder: Seq2SeqEncoder,
                 span_start_encoder: Seq2SeqEncoder,
                 span_end_encoder: Seq2SeqEncoder,
                 dropout: float = 0.2,
                 mask_lstms: bool = True,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None) -> None:
        super(ModelMSMARCO, self).__init__(vocab, regularizer)

        self._text_field_embedder = text_field_embedder
        self._phrase_layer = phrase_layer
        self._residual_encoder = residual_encoder
        self._span_end_encoder = span_end_encoder
        self._span_start_encoder = span_start_encoder

        encoding_dim = phrase_layer.get_output_dim()
        self._span_start_predictor = TimeDistributed(
            torch.nn.Linear(encoding_dim, 1))

        span_end_encoding_dim = span_end_encoder.get_output_dim()
        self._span_end_predictor = TimeDistributed(
            torch.nn.Linear(encoding_dim, 1))
        self._no_answer_predictor = TimeDistributed(
            torch.nn.Linear(encoding_dim, 1))

        self._matrix_attention = TriLinearAttention(encoding_dim)
        self._self_matrix_attention = TriLinearAttention(encoding_dim)
        self._linear_layer = TimeDistributed(
            torch.nn.Linear(4 * encoding_dim, encoding_dim))
        self._residual_linear_layer = TimeDistributed(
            torch.nn.Linear(3 * encoding_dim, encoding_dim))

        #self._w_x = torch.nn.Parameter(torch.Tensor(encoding_dim))
        #self._w_y = torch.nn.Parameter(torch.Tensor(encoding_dim))
        #self._w_xy = torch.nn.Parameter(torch.Tensor(encoding_dim))

        #std = math.sqrt(6 / (encoding_dim + 1))
        #self._w_x.data.uniform_(-std, std)
        #self._w_y.data.uniform_(-std, std)
        #self._w_xy.data.uniform_(-std, std)

        self._squad_metrics = SquadEmAndF1()
        self._rouge_metric = Rouge()
        if dropout > 0:
            self._dropout = torch.nn.Dropout(p=dropout)
        else:
            self._dropout = lambda x: x
        self._mask_lstms = mask_lstms

        initializer(self)
        self._ite = 0
Example #8
0
    def __init__(self, vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 num_highway_layers: int,
                 phrase_layer: Seq2SeqEncoder,
                 attention_similarity_function: SimilarityFunction,
                 modeling_layer: Seq2SeqEncoder,
                 span_end_encoder: Seq2SeqEncoder,
                 initializer: InitializerApplicator,
                 dropout: float = 0.2,
                 mask_lstms: bool = True,
                 evaluation_json_file: str = None) -> None:
        super(BidirectionalAttentionFlow, self).__init__(vocab)

        self._text_field_embedder = text_field_embedder
        self._highway_layer = TimeDistributed(Highway(text_field_embedder.get_output_dim(),
                                                      num_highway_layers))
        self._phrase_layer = phrase_layer
        self._matrix_attention = MatrixAttention(attention_similarity_function)
        self._modeling_layer = modeling_layer
        self._span_end_encoder = span_end_encoder

        encoding_dim = phrase_layer.get_output_dim()
        modeling_dim = modeling_layer.get_output_dim()
        span_start_input_dim = encoding_dim * 4 + modeling_dim
        self._span_start_predictor = TimeDistributed(torch.nn.Linear(span_start_input_dim, 1))

        span_end_encoding_dim = span_end_encoder.get_output_dim()
        span_end_input_dim = encoding_dim * 4 + span_end_encoding_dim
        self._span_end_predictor = TimeDistributed(torch.nn.Linear(span_end_input_dim, 1))
        initializer(self)
        self._span_start_accuracy = CategoricalAccuracy()
        self._span_end_accuracy = CategoricalAccuracy()
        self._span_accuracy = BooleanAccuracy()
        self._official_em = Average()
        self._official_f1 = Average()
        if dropout > 0:
            self._dropout = torch.nn.Dropout(p=dropout)
        else:
            self._dropout = lambda x: x
        self._mask_lstms = mask_lstms

        if evaluation_json_file:
            logger.info("Prepping official evaluation dataset from %s", evaluation_json_file)
            with open(evaluation_json_file) as dataset_file:
                dataset_json = json.load(dataset_file)
            question_to_answers = {}
            for article in dataset_json['data']:
                for paragraph in article['paragraphs']:
                    for question in paragraph['qas']:
                        question_id = question['id']
                        answers = [answer['text'] for answer in question['answers']]
                        question_to_answers[question_id] = answers

            self._official_eval_dataset = question_to_answers
        else:
            self._official_eval_dataset = None
Example #9
0
    def __init__(self, embA_size: int, embB_size: int, hidden_dim: int):
        super(SpanRepAssembly, self).__init__()

        self.embA_size = embA_size
        self.embB_size = embB_size
        self.hidden_dim = hidden_dim

        self.hiddenA = TimeDistributed(Linear(embA_size, hidden_dim))
        self.hiddenB = TimeDistributed(
            Linear(embB_size, hidden_dim, bias=False))
Example #10
0
    def __init__(
        self,
        vocab: Vocabulary,
        text_field_embedder: TextFieldEmbedder,
        context_layer: Seq2SeqEncoder,
        mention_feedforward: FeedForward,
        antecedent_feedforward: FeedForward,
        feature_size: int,
        max_span_width: int,
        spans_per_word: float,
        max_antecedents: int,
        lexical_dropout: float = 0.2,
        initializer: InitializerApplicator = InitializerApplicator(),
        regularizer: Optional[RegularizerApplicator] = None,
    ) -> None:
        super().__init__(vocab, regularizer)

        self._text_field_embedder = text_field_embedder
        self._context_layer = context_layer
        self._antecedent_feedforward = TimeDistributed(antecedent_feedforward)
        feedforward_scorer = torch.nn.Sequential(
            TimeDistributed(mention_feedforward),
            TimeDistributed(torch.nn.Linear(mention_feedforward.get_output_dim(), 1)),
        )
        self._mention_pruner = Pruner(feedforward_scorer)
        self._antecedent_scorer = TimeDistributed(
            torch.nn.Linear(antecedent_feedforward.get_output_dim(), 1)
        )

        self._endpoint_span_extractor = EndpointSpanExtractor(
            context_layer.get_output_dim(),
            combination="x,y",
            num_width_embeddings=max_span_width,
            span_width_embedding_dim=feature_size,
            bucket_widths=False,
        )
        self._attentive_span_extractor = SelfAttentiveSpanExtractor(
            input_dim=text_field_embedder.get_output_dim()
        )

        # 10 possible distance buckets.
        self._num_distance_buckets = 10
        self._distance_embedding = Embedding(self._num_distance_buckets, feature_size)

        self._max_span_width = max_span_width
        self._spans_per_word = spans_per_word
        self._max_antecedents = max_antecedents

        self._mention_recall = MentionRecall()
        self._conll_coref_scores = ConllCorefScores()
        if lexical_dropout > 0:
            self._lexical_dropout = torch.nn.Dropout(p=lexical_dropout)
        else:
            self._lexical_dropout = lambda x: x
        initializer(self)
Example #11
0
    def __init__(self,
                 vocab: Vocabulary,
                 source_embedder: TextFieldEmbedder,
                 encoder: Seq2SeqEncoder,
                 max_decoding_steps: int,
                 spans_per_word: float,
                 target_namespace: str = "tokens",
                 target_embedding_dim: int = None,
                 attention_function: SimilarityFunction = None,
                 scheduled_sampling_ratio: float = 0.0,
                 spans_extractor: SpanExtractor = None,
                 spans_scorer_feedforward: FeedForward = None) -> None:
        super(SpanAe, self).__init__(vocab)
        self._source_embedder = source_embedder
        self._encoder = encoder
        self._max_decoding_steps = max_decoding_steps
        self._target_namespace = target_namespace
        self._attention_function = attention_function
        self._scheduled_sampling_ratio = scheduled_sampling_ratio
        # We need the start symbol to provide as the input at the first timestep of decoding, and
        # end symbol as a way to indicate the end of the decoded sequence.
        self._start_index = self.vocab.get_token_index(START_SYMBOL,
                                                       self._target_namespace)
        self._end_index = self.vocab.get_token_index(END_SYMBOL,
                                                     self._target_namespace)
        num_classes = self.vocab.get_vocab_size(self._target_namespace)
        # Decoder output dim needs to be the same as the encoder output dim since we initialize the
        # hidden state of the decoder with that of the final hidden states of the encoder. Also, if
        # we're using attention with ``DotProductSimilarity``, this is needed.
        self._decoder_output_dim = self._encoder.get_output_dim() + 1
        target_embedding_dim = target_embedding_dim or self._source_embedder.get_output_dim(
        )
        self._target_embedder = Embedding(num_classes, target_embedding_dim)
        if self._attention_function:
            self._decoder_attention = Attention(self._attention_function)
            # The output of attention, a weighted average over encoder outputs, will be
            # concatenated to the input vector of the decoder at each time step.
            self._decoder_input_dim = self._encoder.get_output_dim(
            ) + target_embedding_dim
        else:
            self._decoder_input_dim = target_embedding_dim
        self._decoder_cell = LSTMCell(self._decoder_input_dim + 1,
                                      self._decoder_output_dim)
        self._output_projection_layer = Linear(self._decoder_output_dim,
                                               num_classes)

        self._span_extractor = spans_extractor

        feedforward_scorer = torch.nn.Sequential(
            TimeDistributed(spans_scorer_feedforward),
            TimeDistributed(
                torch.nn.Linear(spans_scorer_feedforward.get_output_dim(), 1)))
        self._span_pruner = SpanPruner(feedforward_scorer)

        self._spans_per_word = spans_per_word
Example #12
0
File: model.py Project: zqyuan/r2c
    def __init__(self,
                 vocab: Vocabulary,
                 span_encoder: Seq2SeqEncoder,
                 reasoning_encoder: Seq2SeqEncoder,
                 input_dropout: float = 0.3,
                 hidden_dim_maxpool: int = 1024,
                 class_embs: bool=True,
                 reasoning_use_obj: bool=True,
                 reasoning_use_answer: bool=True,
                 reasoning_use_question: bool=True,
                 pool_reasoning: bool = True,
                 pool_answer: bool = True,
                 pool_question: bool = False,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 ):
        super(AttentionQA, self).__init__(vocab)

        self.detector = SimpleDetector(pretrained=True, average_pool=True, semantic=class_embs, final_dim=512)
        ###################################################################################################

        self.rnn_input_dropout = TimeDistributed(InputVariationalDropout(input_dropout)) if input_dropout > 0 else None

        self.span_encoder = TimeDistributed(span_encoder)
        self.reasoning_encoder = TimeDistributed(reasoning_encoder)

        self.span_attention = BilinearMatrixAttention(
            matrix_1_dim=span_encoder.get_output_dim(),
            matrix_2_dim=span_encoder.get_output_dim(),
        )

        self.obj_attention = BilinearMatrixAttention(
            matrix_1_dim=span_encoder.get_output_dim(),
            matrix_2_dim=self.detector.final_dim,
        )

        self.reasoning_use_obj = reasoning_use_obj
        self.reasoning_use_answer = reasoning_use_answer
        self.reasoning_use_question = reasoning_use_question
        self.pool_reasoning = pool_reasoning
        self.pool_answer = pool_answer
        self.pool_question = pool_question
        dim = sum([d for d, to_pool in [(reasoning_encoder.get_output_dim(), self.pool_reasoning),
                                        (span_encoder.get_output_dim(), self.pool_answer),
                                        (span_encoder.get_output_dim(), self.pool_question)] if to_pool])

        self.final_mlp = torch.nn.Sequential(
            torch.nn.Dropout(input_dropout, inplace=False),
            torch.nn.Linear(dim, hidden_dim_maxpool),
            torch.nn.ReLU(inplace=True),
            torch.nn.Dropout(input_dropout, inplace=False),
            torch.nn.Linear(hidden_dim_maxpool, 1),
        )
        self._accuracy = CategoricalAccuracy()
        self._loss = torch.nn.CrossEntropyLoss()
        initializer(self)
Example #13
0
def make_pruner(scorer, entity_beam, gold_beam):
    """
    Create a pruner that either takes outputs of other scorers (i.e. entity beam), or uses its own
    scorer (the `default_scorer`).
    """
    item_scorer = torch.nn.Sequential(
        TimeDistributed(scorer),
        TimeDistributed(torch.nn.Linear(scorer.get_output_dim(), 1)))
    min_score_to_keep = 1e-10 if entity_beam else None

    return Pruner(item_scorer, entity_beam, gold_beam, min_score_to_keep)
Example #14
0
    def __init__(self, hidden_size, drop_prob):
        super(SelfAtt, self).__init__()

        self.drop_prob = drop_prob
        self.att_wrapper = TimeDistributed(nn.Linear(hidden_size*4, hidden_size))
        self.trilinear = TriLinearAttention(hidden_size)
        self.self_att_upsampler = TimeDistributed(nn.Linear(hidden_size*3, hidden_size*4))
        self.enc = nn.GRU(hidden_size, hidden_size//2, 1,
                           batch_first=True,
                           bidirectional=True)
        self.hidden_size = hidden_size
Example #15
0
    def __init__(self,
                 vocab: Vocabulary,
                 mention_feedforward: FeedForward,
                 relation_feedforward: FeedForward,
                 spans_per_word: float,
                 span_emb_dim: int,
                 use_biaffine_rel: bool,
                 rel_prop: int = 0,
                 rel_prop_dropout_A: float = 0.0,
                 rel_prop_dropout_f: float = 0.0,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 positive_label_weight: float = 1.0,
                 regularizer: Optional[RegularizerApplicator] = None) -> None:
        super(RelationExtractor, self).__init__(vocab, regularizer)

        self._n_labels = max(vocab.get_vocab_size("relation_labels"), 1)

        feedforward_scorer = torch.nn.Sequential(
            TimeDistributed(mention_feedforward),
            TimeDistributed(torch.nn.Linear(mention_feedforward.get_output_dim(), 1)))
        self._mention_pruner = Pruner(feedforward_scorer)

        # Relation scorer.
        self._use_biaffine_rel = use_biaffine_rel
        if self._use_biaffine_rel:
            self._biaffine = torch.nn.ModuleList()
            for _ in range(self._n_labels):
                self._biaffine.append(torch.nn.Linear(span_emb_dim, span_emb_dim))
        else:
            self._relation_feedforward = relation_feedforward
            self._relation_scorer = torch.nn.Linear(relation_feedforward.get_output_dim(), self._n_labels)

        self._spans_per_word = spans_per_word

        self._relation_metrics = RelationMetrics1()

        class_weights = torch.cat([torch.tensor([1.0]), positive_label_weight * torch.ones(self._n_labels)])
        self._loss = torch.nn.CrossEntropyLoss(reduction="sum", ignore_index=-1, weight=class_weights)
        self.rel_prop = rel_prop

        # Relation Propagation
        self._A_network = FeedForward(input_dim=self._n_labels,
                                      num_layers=1,
                                      hidden_dims=span_emb_dim,
                                      activations=lambda x: x,
                                      dropout=rel_prop_dropout_A)
        self._f_network = FeedForward(input_dim=2*span_emb_dim,
                                      num_layers=1,
                                      hidden_dims=span_emb_dim,
                                      activations=torch.nn.Sigmoid(),
                                      dropout=rel_prop_dropout_f)

        initializer(self)
Example #16
0
    def __init__(self, vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 attend_feedforward: FeedForward,
                 similarity_function: SimilarityFunction,
                 compare_feedforward: FeedForward,
                 aggregate_feedforward: FeedForward,
                 premise_encoder: Optional[Seq2SeqEncoder] = None,
                 hypothesis_encoder: Optional[Seq2SeqEncoder] = None,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None,
                 preload_path: Optional[str] = None) -> None:
        super(DecomposableAttention, self).__init__(vocab, regularizer)

        self._text_field_embedder = text_field_embedder
        self._attend_feedforward = TimeDistributed(attend_feedforward)
        self._matrix_attention = MatrixAttention(similarity_function)
        self._compare_feedforward = TimeDistributed(compare_feedforward)
        self._aggregate_feedforward = aggregate_feedforward
        self._premise_encoder = premise_encoder
        self._hypothesis_encoder = hypothesis_encoder or premise_encoder

        # self._num_labels = vocab.get_vocab_size(namespace="labels")

        check_dimensions_match(text_field_embedder.get_output_dim(), attend_feedforward.get_input_dim(),
                               "text field embedding dim", "attend feedforward input dim")
        # check_dimensions_match(aggregate_feedforward.get_output_dim(), self._num_labels,
        #                        "final output dimension", "number of labels")

        self._accuracy = CategoricalAccuracy()
        self._loss = torch.nn.CrossEntropyLoss()

        initializer(self)

        # Do we want to initialize with the SNLI stuff? let's say yes.
        # 'snli-decomposable-attention/weights.th'
        if preload_path is not None:
            logger.info("Preloading!")
            preload = torch.load(preload_path)
            own_state = self.state_dict()
            for name, param in preload.items():
                if name not in own_state:
                    logger.info("Unexpected key {} in state_dict with size {}".format(name, param.size()))
                elif param.size() == own_state[name].size():
                    own_state[name].copy_(param)
                else:
                    logger.info("Network has {} with size {}, ckpt has {}".format(name,
                                                                            own_state[name].size(),
                                                                            param.size()))

                missing = set(own_state.keys()) - set(preload.keys())
                if len(missing) > 0:
                    logger.info("We couldn't find {}".format(','.join(missing)))
    def __init__(self,
                 vocab: Vocabulary,
                 span_typer: SpanTyper,
                 embed_size: int,
                 label_namespace: str = 'span_labels',
                 event_namespace: str = 'event_labels'):
        super(ArgumentSpanClassifier, self).__init__()

        self.vocab: Vocabulary = vocab
        self.label_namespace: str = label_namespace
        self.event_namespace: str = event_namespace

        self.embed_size = embed_size
        self.event_embedding_size = 50

        self.event_embeddings: nn.Embedding = nn.Embedding(
            num_embeddings=len(
                vocab.get_token_to_index_vocabulary(
                    namespace=event_namespace)),
            embedding_dim=self.event_embedding_size)

        self.lexical_dropout = nn.Dropout(p=0.2)
        self.span_extractor: SpanExtractor = EndpointSpanExtractor(
            input_dim=self.embed_size, combination='x,y')
        self.attentive_span_extractor: SpanExtractor = SelfAttentiveSpanExtractor(
            embed_size)

        self.arg_affine = TimeDistributed(
            FeedForward(input_dim=self.span_extractor.get_output_dim() +
                        self.attentive_span_extractor.get_output_dim(),
                        hidden_dims=self.embed_size,
                        num_layers=2,
                        activations=nn.GELU(),
                        dropout=0.2))
        self.trigger_affine = FeedForward(
            input_dim=self.span_extractor.get_output_dim() +
            self.attentive_span_extractor.get_output_dim(),
            hidden_dims=self.embed_size - self.event_embedding_size,
            num_layers=2,
            activations=nn.GELU(),
            dropout=0.2)

        self.trigger_event_infusion = TimeDistributed(
            FeedForward(input_dim=2 * self.embed_size,
                        hidden_dims=self.embed_size,
                        num_layers=2,
                        activations=nn.GELU(),
                        dropout=0.2))

        self.span_typer: SpanTyper = span_typer

        self.apply(self._init_weights)
    def __init__(self,
                 vocab,
                 text_field_embedder,
                 span_extractor,
                 encoder,
                 feedforward=None,
                 pos_tag_embedding=None,
                 initializer=InitializerApplicator(),
                 regularizer=None,
                 evalb_directory_path=DEFAULT_EVALB_DIR):
        super(SpanConstituencyParser, self).__init__(vocab, regularizer)

        self.text_field_embedder = text_field_embedder
        self.span_extractor = span_extractor
        self.num_classes = self.vocab.get_vocab_size(u"labels")
        self.encoder = encoder
        self.feedforward_layer = TimeDistributed(
            feedforward) if feedforward else None
        self.pos_tag_embedding = pos_tag_embedding or None
        if feedforward is not None:
            output_dim = feedforward.get_output_dim()
        else:
            output_dim = span_extractor.get_output_dim()

        self.tag_projection_layer = TimeDistributed(
            Linear(output_dim, self.num_classes))

        representation_dim = text_field_embedder.get_output_dim()
        if pos_tag_embedding is not None:
            representation_dim += pos_tag_embedding.get_output_dim()
        check_dimensions_match(
            representation_dim, encoder.get_input_dim(),
            u"representation dim (tokens + optional POS tags)",
            u"encoder input dim")
        check_dimensions_match(encoder.get_output_dim(),
                               span_extractor.get_input_dim(),
                               u"encoder input dim",
                               u"span extractor input dim")
        if feedforward is not None:
            check_dimensions_match(span_extractor.get_output_dim(),
                                   feedforward.get_input_dim(),
                                   u"span extractor output dim",
                                   u"feedforward input dim")

        self.tag_accuracy = CategoricalAccuracy()

        if evalb_directory_path is not None:
            self._evalb_score = EvalbBracketingScorer(evalb_directory_path)
        else:
            self._evalb_score = None
        initializer(self)
Example #19
0
    def __init__(self,
                 vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 highway_embedding_size: int,
                 num_highway_layers: int,
                 phrase_layer: Seq2SeqEncoder,
                 modeling_layer: Seq2SeqEncoder,
                 span_end_lstm: Seq2SeqEncoder,
                 language: str = 'en',
                 ptr_dim: int = 200,
                 dropout: float = 0.2,
                 max_num_passages: int = 5,
                 max_num_character: int = 4,
                 loss_ratio: float = 0.1,
                 mask_lstms: bool = True,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None) -> None:
        super().__init__(vocab, regularizer)
        self._span_end_encoder = span_end_lstm
        self.loss_ratio = loss_ratio
        self.language = language
        self.max_num_character = max_num_character
        self.relu = torch.nn.ReLU()
        self.max_num_passages = max_num_passages
        self.ptr_dim = ptr_dim
        self.decay = 1.0
        self._text_field_embedder = text_field_embedder
        self._highway_layer = TimeDistributed(
            ElasticHighway(text_field_embedder.get_output_dim(),
                           highway_embedding_size, num_highway_layers))
        self._phrase_layer = phrase_layer
        self._matrix_attention = DotProductMatrixAttention()
        self._modeling_layer = modeling_layer
        modeling_dim = modeling_layer.get_output_dim()
        encoding_dim = phrase_layer.get_output_dim()
        self._ptr_layer_1 = TimeDistributed(
            torch.nn.Linear(encoding_dim * 4 + modeling_dim, 1))
        self._ptr_layer_2 = TimeDistributed(
            torch.nn.Linear(encoding_dim * 4 + modeling_dim, 1))
        self._span_start_accuracy = CategoricalAccuracy()
        self._span_end_accuracy = CategoricalAccuracy()
        self._span_accuracy = BooleanAccuracy()
        self._rouge_metrics = MsmarcoRouge()
        self._bleu_metrics = BLEU()
        if dropout > 0:
            self._dropout = torch.nn.Dropout(p=dropout)
        else:
            self._dropout = lambda x: x
        self._mask_lstms = mask_lstms

        initializer(self)
Example #20
0
    def __init__(self,
                 vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 num_highway_layers: int,
                 phrase_layer: Seq2SeqEncoder,
                 similarity_function: SimilarityFunction,
                 modeling_layer: Seq2VecEncoder,
                 answers_encoder: Seq2VecEncoder,
                 classifier_feedforward: FeedForward,
                 dropout: float = 0.2,
                 mask_lstms: bool = True,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None) -> None:

        super(BidirectionalAttentionFlow, self).__init__(vocab, regularizer)

        self._text_field_embedder = text_field_embedder
        self._highway_layer = TimeDistributed(
            Highway(text_field_embedder.get_output_dim(), num_highway_layers))

        self._classifier_feedforward = classifier_feedforward

        self._phrase_layer = phrase_layer
        self._matrix_attention = LegacyMatrixAttention(similarity_function)
        self._modeling_layer = modeling_layer

        encoding_dim = phrase_layer.get_output_dim()

        self._time_distributed_highway_layer = TimeDistributed(
            self._highway_layer)
        self._answers_encoder = TimeDistributed(answers_encoder)

        # Bidaf has lots of layer dimensions which need to match up - these aren't necessarily
        # obvious from the configuration files, so we check here.
        check_dimensions_match(modeling_layer.get_input_dim(),
                               4 * encoding_dim, "modeling layer input dim",
                               "4 * encoding dim")
        check_dimensions_match(text_field_embedder.get_output_dim(),
                               phrase_layer.get_input_dim(),
                               "text field embedder output dim",
                               "phrase layer input dim")

        if dropout > 0:
            self._dropout = torch.nn.Dropout(p=dropout)
        else:
            self._dropout = lambda x: x

        self._mask_lstms = mask_lstms
        self.loss = torch.nn.CrossEntropyLoss()

        initializer(self)
    def __init__(self,
                 vocab,
                 text_field_embedder,
                 phrase_layer,
                 residual_encoder,
                 span_start_encoder,
                 span_end_encoder,
                 initializer,
                 dropout=0.2,
                 mask_lstms=True):
        super(BiDAFSelfAttention, self).__init__(vocab)
        # Initialize layers.
        self._text_field_embedder = text_field_embedder

        self._phrase_layer = phrase_layer
        # Inintialize start/end span predictors.
        encoding_dim = phrase_layer.get_output_dim()

        self._matrix_attention = TriLinearAttention(encoding_dim)
        self._merge_atten = TimeDistributed(
            torch.nn.Linear(encoding_dim * 4, encoding_dim))

        self._residual_encoder = residual_encoder
        self._self_atten = TriLinearAttention(encoding_dim)
        self._merge_self_atten = TimeDistributed(
            torch.nn.Linear(encoding_dim * 3, encoding_dim))

        self._span_start_encoder = span_start_encoder
        self._span_end_encoder = span_end_encoder

        self._span_start_predictor = TimeDistributed(
            torch.nn.Linear(encoding_dim, 1))
        self._span_end_predictor = TimeDistributed(
            torch.nn.Linear(encoding_dim, 1))

        initializer(self)

        self._span_start_accuracy = CategoricalAccuracy()
        self._span_end_accuracy = CategoricalAccuracy()
        self._span_accuracy = BooleanAccuracy()
        self._squad_metrics = SquadEmAndF1()
        self._official_em = Average()
        self._official_f1 = Average()
        if dropout > 0:
            self._dropout = torch.nn.Dropout(p=dropout)
            # self._dropout = VariationalDropout(p=dropout)
        else:
            self._dropout = lambda x: x
        self._mask_lstms = mask_lstms
Example #22
0
    def __init__(self,
                 vocab: Vocabulary,
                 task: str,
                 encoder: Seq2SeqEncoder,
                 label_smoothing: float = 0.0,
                 dropout: float = 0.0,
                 adaptive: bool = False,
                 features: List[str] = None,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None) -> None:
        super(TagDecoder, self).__init__(vocab, regularizer)

        self.task = task
        self.encoder = encoder
        self.output_dim = encoder.get_output_dim()
        self.label_smoothing = label_smoothing
        self.num_classes = self.vocab.get_vocab_size(task)
        self.adaptive = adaptive
        self.features = features if features else []

        self.metrics = {
            "acc": CategoricalAccuracy(),
            # "acc3": CategoricalAccuracy(top_k=3)
        }

        if self.adaptive:
            # TODO
            adaptive_cutoffs = [
                round(self.num_classes / 15), 3 * round(self.num_classes / 15)
            ]
            self.task_output = AdaptiveLogSoftmaxWithLoss(
                self.output_dim,
                self.num_classes,
                cutoffs=adaptive_cutoffs,
                div_value=4.0)
        else:
            self.task_output = TimeDistributed(
                Linear(self.output_dim, self.num_classes))

        self.feature_outputs = torch.nn.ModuleDict()
        self.features_metrics = {}
        for feature in self.features:
            self.feature_outputs[feature] = TimeDistributed(
                Linear(self.output_dim, vocab.get_vocab_size(feature)))
            self.features_metrics[feature] = {
                "acc": CategoricalAccuracy(),
            }

        initializer(self)
Example #23
0
    def __init__(self,
                 vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None,
                 judge: Model = None,
                 update_judge: bool = False,
                 reward_method: str = None,
                 detach_value_head: bool = False,
                 qa_loss_weight: float = 0.,
                 influence_reward: bool = False,
                 theory_of_mind: bool = False) -> None:
        super(BertMC, self).__init__(vocab, regularizer)

        self.judge = judge
        self.is_judge = self.judge is None
        self.reward_method = None if self.is_judge else reward_method
        self.update_judge = update_judge and (self.judge is not None)
        self._detach_value_head = detach_value_head
        self._qa_loss_weight = qa_loss_weight
        self.influence_reward = influence_reward
        self.theory_of_mind = theory_of_mind
        self._text_field_embedder = text_field_embedder
        self._hidden_dim = text_field_embedder.get_output_dim()
        self.answer_type = 'mc'
        self.output_type = 'mc'
        self._config = self._text_field_embedder.token_embedder_tokens._modules[
            'bert_model'].config

        if not self.is_judge:
            self._sent_chosen_embeddings = torch.nn.Embedding(
                2, self._config.hidden_size)
            self._sent_chosen_embeddings.weight.data *= 0  # Init to zero to minimally affect BERT at start
            self._policy_head = TimeDistributed(
                torch.nn.Linear(self._hidden_dim, 1))  # Can make MLP
            self._value_head = TimeDistributed(
                torch.nn.Linear(self._hidden_dim, 1))  # Can make MLP
            self._turn_film_gen = torch.nn.Linear(1, 2 * self._hidden_dim)
            self._film = FiLM()
            if self.theory_of_mind:
                final_blocks_config = deepcopy(self._config)
                final_blocks_config.num_hidden_layers = 1
                self.final_blocks_input_proj = TimeDistributed(
                    torch.nn.Linear(self._hidden_dim * 2, self._hidden_dim))
                self.final_blocks = BertEncoder(final_blocks_config)

        # NOTE: Rename to self._accuracy (may break model loading)
        self._span_start_accuracy = CategoricalAccuracy()
        self._initializer = initializer
Example #24
0
    def __init__(self,
                 vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 span_extractor: SpanExtractor,
                 feedforward: FeedForward,
                 ner_threshold: float = 0.65,
                 max_inner_range: float = 18,
                 metadata: List[Dict[str, Any]] = None,
                 label_namespace: str = "ner_labels",
                 regularizer: Optional[RegularizerApplicator] = None,
                 initializer: InitializerApplicator = InitializerApplicator()) -> None:
        super(NERTagger, self).__init__(vocab, regularizer)

        self._include_trigger = False
        for label in vocab.get_token_to_index_vocabulary(label_namespace):
            if "trigger" in label:
                self._include_trigger = True

        self.label_namespace = label_namespace
        self._n_labels = self.vocab.get_vocab_size(label_namespace)

        # null_label = vocab.get_token_index("", label_namespace)
        # assert null_label == 0

        self._ner_threshold = ner_threshold
        self._max_inner_range = max_inner_range
        self._ner_scorer = torch.nn.ModuleDict()

        self._text_field_embedder = text_field_embedder

        self._span_extractor = span_extractor

        self._ner_scorer = torch.nn.Sequential(
            TimeDistributed(feedforward),
            TimeDistributed(torch.nn.Linear(
                feedforward.get_output_dim(),
                self._n_labels)))

        self._relation_f1_metric = RelationMetric(
            vocab, tag_namespace=label_namespace,
        )

        self._ner_metric = NERMetrics(self._n_labels)
        self._relation_metric = SpanRelationMetric()

        self._loss = torch.nn.BCEWithLogitsLoss(reduction="sum")

        initializer(self)
Example #25
0
    def __init__(self,
                 vocab: Vocabulary,
                 make_feedforward: Callable,
                 span_emb_dim: int,
                 feature_size: int,
                 spans_per_word: float,
                 positive_label_weight: float = 1.0,
                 regularizer: Optional[RegularizerApplicator] = None) -> None:
        super().__init__(vocab, regularizer)

        self._namespaces = [
            entry for entry in vocab.get_namespaces()
            if "relation_labels" in entry
        ]
        self._n_labels = {
            name: vocab.get_vocab_size(name)
            for name in self._namespaces
        }

        self._mention_pruners = torch.nn.ModuleDict()
        self._relation_feedforwards = torch.nn.ModuleDict()
        self._relation_scorers = torch.nn.ModuleDict()
        self._relation_metrics = {}

        for namespace in self._namespaces:
            mention_feedforward = make_feedforward(input_dim=span_emb_dim)
            feedforward_scorer = torch.nn.Sequential(
                TimeDistributed(mention_feedforward),
                TimeDistributed(
                    torch.nn.Linear(mention_feedforward.get_output_dim(), 1)))
            self._mention_pruners[namespace] = Pruner(feedforward_scorer)

            relation_scorer_dim = 3 * span_emb_dim
            relation_feedforward = make_feedforward(
                input_dim=relation_scorer_dim)
            self._relation_feedforwards[namespace] = relation_feedforward
            relation_scorer = torch.nn.Linear(
                relation_feedforward.get_output_dim(),
                self._n_labels[namespace])
            self._relation_scorers[namespace] = relation_scorer

            self._relation_metrics[namespace] = RelationMetrics()

        self._spans_per_word = spans_per_word
        self._active_namespace = None

        self._loss = torch.nn.CrossEntropyLoss(reduction="sum",
                                               ignore_index=-1)
Example #26
0
    def __init__(self,
                 num_turns: int,
                 combination: str,
                 qq_attention: MatrixAttention,
                 qa_attention: MatrixAttention,
                 coref_layer: Seq2SeqEncoder,
                 use_ling: bool = False,
                 ling_features_size: int = 0,
                 use_mention_score=False,
                 use_antecedent_score=False):
        super(BiAttContext_MultiTurn, self).__init__()
        self.num_turns = num_turns
        self.combination = combination
        self.qq_attention = qq_attention
        self.qa_attention = qa_attention
        self._coref_layer = coref_layer
        self.use_ling = True
        coref_output_dim = self._coref_layer.get_output_dim()
        coref_input_dim = self._coref_layer.get_input_dim()
        self.use_ling = use_ling
        if self.use_ling:
            self._coref_proj = TimeDistributed(
                torch.nn.Linear(coref_output_dim + ling_features_size,
                                coref_output_dim))

        if use_mention_score:
            self.mention_score = TimeDistributed(
                torch.nn.Linear(coref_output_dim, 1, bias=False))
        else:
            self.mention_score = None

        if use_antecedent_score:
            self.antecedent_score = TimeDistributed(
                torch.nn.Sequential(torch.nn.Linear(coref_output_dim, 1),
                                    torch.nn.Sigmoid()))
        else:
            self.antecedent_score = None

        if self.combination == 'entropy+exponential':
            if torch.cuda.is_available():
                self.entropy_combination_weight = torch.nn.Parameter(
                    torch.cuda.FloatTensor(1), requires_grad=True)
            else:
                self.entropy_combination_weight = torch.nn.Parameter(
                    torch.FloatTensor(1), requires_grad=True)

        self.q_hat_enc = TimeDistributed(
            torch.nn.Linear(coref_input_dim * 3, coref_input_dim))
    def __init__(self, vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 attend_feedforward: FeedForward,
                 similarity_function: SimilarityFunction,
                 compare_feedforward: FeedForward,
                 aggregate_feedforward: FeedForward,
                 parser_model_path: str,
                 parser_cuda_device: int,
                 freeze_parser: bool,
                 premise_encoder: Optional[Seq2SeqEncoder] = None,
                 hypothesis_encoder: Optional[Seq2SeqEncoder] = None,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None) -> None:
        super(SyntacticEntailment, self).__init__(vocab, regularizer)

        self._text_field_embedder = text_field_embedder
        self._attend_feedforward = TimeDistributed(attend_feedforward)
        self._attention = LegacyMatrixAttention(similarity_function)
        self._compare_feedforward = TimeDistributed(compare_feedforward)
        self._aggregate_feedforward = aggregate_feedforward
        self._premise_encoder = premise_encoder
        self._hypothesis_encoder = hypothesis_encoder or premise_encoder

        self._num_labels = vocab.get_vocab_size(namespace="labels")

        check_dimensions_match(text_field_embedder.get_output_dim(),
                               attend_feedforward.get_input_dim(),
                               "text field embedding dim",
                               "attend feedforward input dim")
        check_dimensions_match(aggregate_feedforward.get_output_dim(),
                               self._num_labels,
                               "final output dimension",
                               "number of labels")

        self._accuracy = CategoricalAccuracy()
        self._loss = torch.nn.CrossEntropyLoss()

        self._parser = load_archive(parser_model_path,
                                    cuda_device=parser_cuda_device).model
        self._parser._head_sentinel.requires_grad = False
        for child in self._parser.children():
            for param in child.parameters():
                param.requires_grad = False
        if not freeze_parser:
            for param in self._parser.encoder.parameters():
                param.requires_grad = True

        initializer(self)
    def __init__(self,
                 vocab: Vocabulary,
                 sentence_encoder: SentenceEncoder,
                 qarg_ffnn: FeedForward,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None):
        super(ClauseAndSpanToAnswerSlotModel,
              self).__init__(vocab, regularizer)
        self._sentence_encoder = sentence_encoder
        self._qarg_ffnn = qarg_ffnn

        self._clause_embedding = Embedding(
            vocab.get_vocab_size("abst-clause-labels"),
            self._qarg_ffnn.get_input_dim())
        self._span_extractor = EndpointSpanExtractor(
            input_dim=self._sentence_encoder.get_output_dim(),
            combination="x,y")
        self._span_hidden = TimeDistributed(
            Linear(2 * self._sentence_encoder.get_output_dim(),
                   self._qarg_ffnn.get_input_dim()))
        self._predicate_hidden = Linear(
            self._sentence_encoder.get_output_dim(),
            self._qarg_ffnn.get_input_dim())
        self._qarg_predictor = Linear(self._qarg_ffnn.get_output_dim(),
                                      self.vocab.get_vocab_size("qarg-labels"))
        self._metric = BinaryF1()
Example #29
0
    def __init__(self, vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 encoder: Seq2SeqEncoder,
                 binary_feature_dim: int,
                 embedding_dropout: float = 0.0,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None,
                 label_smoothing: float = None,
                 ignore_span_metric: bool = False) -> None:
        super(SemanticRoleLabeler, self).__init__(vocab, regularizer)

        self.text_field_embedder = text_field_embedder
        self.num_classes = self.vocab.get_vocab_size("labels")

        # For the span based evaluation, we don't want to consider labels
        # for verb, because the verb index is provided to the model.
        self.span_metric = SpanBasedF1Measure(vocab, tag_namespace="labels", ignore_classes=["V"])

        self.encoder = encoder
        # There are exactly 2 binary features for the verb predicate embedding.
        self.binary_feature_embedding = Embedding(2, binary_feature_dim)
        self.tag_projection_layer = TimeDistributed(Linear(self.encoder.get_output_dim(),
                                                           self.num_classes))
        self.embedding_dropout = Dropout(p=embedding_dropout)
        self._label_smoothing = label_smoothing
        self.ignore_span_metric = ignore_span_metric

        check_dimensions_match(text_field_embedder.get_output_dim() + binary_feature_dim,
                               encoder.get_input_dim(),
                               "text embedding dim + verb indicator embedding dim",
                               "encoder input dim")
        initializer(self)
Example #30
0
    def __init__(self,
                 vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 stacked_encoder: Seq2SeqEncoder,
                 binary_feature_dim: int,
                 initializer: InitializerApplicator,
                 embedding_dropout: float = 0.0) -> None:
        super(SemanticRoleLabeler, self).__init__(vocab)

        self.text_field_embedder = text_field_embedder
        self.num_classes = self.vocab.get_vocab_size("labels")

        # For the span based evaluation, we don't want to consider labels
        # for verb, because the verb index is provided to the model.
        self.span_metric = SpanBasedF1Measure(vocab,
                                              tag_namespace="labels",
                                              ignore_classes=["V"])

        self.stacked_encoder = stacked_encoder
        # There are exactly 2 binary features for the verb predicate embedding.
        self.binary_feature_embedding = Embedding(2, binary_feature_dim)
        self.tag_projection_layer = TimeDistributed(
            Linear(self.stacked_encoder.get_output_dim(), self.num_classes))
        self.embedding_dropout = Dropout(p=embedding_dropout)
        initializer(self)

        if text_field_embedder.get_output_dim(
        ) + binary_feature_dim != stacked_encoder.get_input_dim():
            raise ConfigurationError(
                "The SRL Model uses a binary verb indicator feature, meaning "
                "the input dimension of the stacked_encoder must be equal to "
                "the output dimension of the text_field_embedder + 1.")