Ejemplo n.º 1
0
    def test_correct_sequence_elements_are_embedded(self):
        sequence_tensor = torch.randn([2, 5, 7])
        # Concatentate start and end points together to form our representation.
        extractor = EndpointSpanExtractor(7, "x,y")

        indices = torch.LongTensor([[[1, 3], [2, 4]], [[0, 2], [3, 4]]])
        span_representations = extractor(sequence_tensor, indices)

        assert list(span_representations.size()) == [2, 2, 14]
        assert extractor.get_output_dim() == 14
        assert extractor.get_input_dim() == 7

        start_indices, end_indices = indices.split(1, -1)
        # We just concatenated the start and end embeddings together, so
        # we can check they match the original indices if we split them apart.
        start_embeddings, end_embeddings = span_representations.split(7, -1)

        correct_start_embeddings = batched_index_select(
            sequence_tensor, start_indices.squeeze())
        correct_end_embeddings = batched_index_select(sequence_tensor,
                                                      end_indices.squeeze())
        numpy.testing.assert_array_equal(start_embeddings.data.numpy(),
                                         correct_start_embeddings.data.numpy())
        numpy.testing.assert_array_equal(end_embeddings.data.numpy(),
                                         correct_end_embeddings.data.numpy())
Ejemplo n.º 2
0
    def __init__(self,
                 vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 number_of_linear_layers : int = 2,
                 metrics: Dict[str, allennlp.training.metrics.Metric] = None,
                 renorm_method: str = None,
                 skip_connection: bool = False,
                 regularizer: RegularizerApplicator = None,
                 bert_model: str = None,
                 ) -> None:
        super().__init__(vocab,regularizer)
        self.embbedings = text_field_embedder
        self.bert_type_model = BERT_BASE_CONFIG if "base" in bert_model else BERT_LARGE_CONFIG
        self.extractor = EndpointSpanExtractor(input_dim=self.bert_type_model['hidden_size'], combination="x,y")
        self.crossEntropyLoss   = torch.nn.CrossEntropyLoss()
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.metrics = metrics or {
            "accuracy": CategoricalAccuracy()
        }
        self.first_liner_layer = torch.nn.Linear(self.bert_type_model['hidden_size']*2,self.bert_type_model['hidden_size']*2)
        self.second_liner_layer = torch.nn.Linear(self.bert_type_model['hidden_size']*2,self.bert_type_model['hidden_size']*2)
        self.do_skip_connection = skip_connection

        self.number_of_linear_layers = number_of_linear_layers
        self.relation_layer_norm = torch.nn.LayerNorm(torch.Size([self.bert_type_model['hidden_size']*2]), elementwise_affine=True)
        self.head_token_index = 1 # fixme this should be as argument
        self.tail_token_index = 3
        self.tanh = torch.nn.Tanh()
        self.drop_layer = torch.nn.Dropout(p=0.2)
        self.renorm_method = renorm_method or linear
    def __init__(self,
                 vocab: Vocabulary,
                 sentence_encoder: SentenceEncoder,
                 qarg_ffnn: FeedForward,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None):
        super(ClauseAndSpanToAnswerSlotModel,
              self).__init__(vocab, regularizer)
        self._sentence_encoder = sentence_encoder
        self._qarg_ffnn = qarg_ffnn

        self._clause_embedding = Embedding(
            vocab.get_vocab_size("abst-clause-labels"),
            self._qarg_ffnn.get_input_dim())
        self._span_extractor = EndpointSpanExtractor(
            input_dim=self._sentence_encoder.get_output_dim(),
            combination="x,y")
        self._span_hidden = TimeDistributed(
            Linear(2 * self._sentence_encoder.get_output_dim(),
                   self._qarg_ffnn.get_input_dim()))
        self._predicate_hidden = Linear(
            self._sentence_encoder.get_output_dim(),
            self._qarg_ffnn.get_input_dim())
        self._qarg_predictor = Linear(self._qarg_ffnn.get_output_dim(),
                                      self.vocab.get_vocab_size("qarg-labels"))
        self._metric = BinaryF1()
Ejemplo n.º 4
0
    def __init__(self,
                 vocab: Vocabulary,
                 sentence_encoder: SentenceEncoder,
                 tan_ffnn: FeedForward,
                 inject_predicate: bool = False,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None):
        super(SpanToTanModel, self).__init__(vocab, regularizer)
        self._sentence_encoder = sentence_encoder
        self._tan_ffnn = tan_ffnn
        self._inject_predicate = inject_predicate

        self._span_extractor = EndpointSpanExtractor(
            input_dim=self._sentence_encoder.get_output_dim(),
            combination="x,y")
        prediction_input_dim = (3 * self._sentence_encoder.get_output_dim()
                                ) if self._inject_predicate else (
                                    2 *
                                    self._sentence_encoder.get_output_dim())
        self._tan_pred = TimeDistributed(
            Sequential(
                Linear(prediction_input_dim, self._tan_ffnn.get_input_dim()),
                ReLU(), self._tan_ffnn,
                Linear(self._tan_ffnn.get_output_dim(),
                       self.vocab.get_vocab_size("tan-string-labels"))))
        self._metric = BinaryF1()
Ejemplo n.º 5
0
    def __init__(self,
                 encoder_size=64,
                 dim_num_feat=0,
                 dropout=0.2,
                 seq_dropout=0.1,
                 num_outputs=5):
        super(EntityLink_bert, self).__init__()
        # self.word_embedding = nn.Embedding(vocab_size,
        #                                    word_embed_size,
        #                                    padding_idx=0)
        # self.pos_embedding = nn.Embedding(pos_embed_size, pos_dim, padding_idx=0)
        self.seq_dropout = seq_dropout

        self.dropout1d = nn.Dropout2d(self.seq_dropout)
        self.span_extractor = EndpointSpanExtractor(encoder_size * 2,
                                                    combination="x,x+y,y")
        # selfspanextractor效果很差
        bert_model = 'bert-base-chinese'
        self.bert = BertModel.from_pretrained(bert_model)
        self.use_layer = -1
        self.LSTM = LSTMEncoder(embed_size=768,
                                encoder_size=encoder_size,
                                bidirectional=True)
        hidden_size = 100
        self.hidden = nn.Linear(2 * encoder_size, num_outputs)
        self.classify = nn.Sequential(
            nn.BatchNorm1d(4 * 768), nn.Dropout(p=dropout),
            nn.Linear(in_features=4 * 768, out_features=num_outputs))
        self.attn_pool = Attention(2 * encoder_size)
Ejemplo n.º 6
0
    def __init__(self,
                 vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 context_layer: Seq2SeqEncoder,
                 complex_word_feedforward: FeedForward,
                 lexical_dropout: float = 0.2,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None) -> None:

        super(NeuralMutilingualCWI, self).__init__(vocab, regularizer)

        self._text_field_embedder = text_field_embedder
        self._context_layer = context_layer

        self._complex_word_scorer = torch.nn.Sequential(
            complex_word_feedforward,
            torch.nn.Linear(complex_word_feedforward.get_output_dim(), 1))

        self._target_word_extractor = EndpointSpanExtractor(
            context_layer.get_output_dim(), combination="x,y")

        self._loss = torch.nn.BCELoss()
        self._metric = F1Measure(1)

        if lexical_dropout > 0:
            self._lexical_dropout = torch.nn.Dropout(p=lexical_dropout)
        else:
            self._lexical_dropout = lambda x: x
        initializer(self)
Ejemplo n.º 7
0
    def __init__(self,
                 vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 context_layer: Seq2SeqEncoder,
                 mention_feedforward: FeedForward,
                 antecedent_feedforward: FeedForward,
                 feature_size: int,
                 max_span_width: int,
                 spans_per_word: float,
                 max_antecedents: int,
                 lexical_dropout: float = 0.2,
                 context_layer_back: Seq2SeqEncoder = None,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None) -> None:
        super(CoreferenceResolver, self).__init__(vocab, regularizer)

        self._text_field_embedder = text_field_embedder
        self._context_layer = context_layer
        self._context_layer_back = context_layer_back
        self._antecedent_feedforward = TimeDistributed(antecedent_feedforward)
        feedforward_scorer = torch.nn.Sequential(
            TimeDistributed(mention_feedforward),
            TimeDistributed(
                torch.nn.Linear(mention_feedforward.get_output_dim(), 1)))
        self._mention_pruner = SpanPruner(feedforward_scorer)
        self._antecedent_scorer = TimeDistributed(
            torch.nn.Linear(antecedent_feedforward.get_output_dim(), 1))
        # TODO check the output dim when two context layers are passed through
        self._endpoint_span_extractor = EndpointSpanExtractor(
            context_layer.get_output_dim(),
            combination="x,y",
            num_width_embeddings=max_span_width,
            span_width_embedding_dim=feature_size,
            bucket_widths=False)
        self._attentive_span_extractor = SelfAttentiveSpanExtractor(
            input_dim=text_field_embedder.get_output_dim())

        # 10 possible distance buckets.
        self._num_distance_buckets = 10
        self._distance_embedding = Embedding(self._num_distance_buckets,
                                             feature_size)
        self._speaker_embedding = Embedding(2, feature_size)
        self.genres = {
            g: i
            for i, g in enumerate(['bc', 'bn', 'mz', 'nw', 'pt', 'tc', 'wb'])
        }
        self._genre_embedding = Embedding(len(self.genres), feature_size)

        self._max_span_width = max_span_width
        self._spans_per_word = spans_per_word
        self._max_antecedents = max_antecedents

        self._mention_recall = MentionRecall()
        self._conll_coref_scores = ConllCorefScores()
        if lexical_dropout > 0:
            self._lexical_dropout = torch.nn.Dropout(p=lexical_dropout)
        else:
            self._lexical_dropout = lambda x: x
        self._feature_dropout = torch.nn.Dropout(0.2)
        initializer(self)
Ejemplo n.º 8
0
 def __init__(self, bert_hidden_size: int):
     super().__init__()
     self.bert_hidden_size = bert_hidden_size
     fc_size = 256
     # self.span_extractor = SelfAttentiveSpanExtractor(bert_hidden_size)
     self.span_extractor = EndpointSpanExtractor(bert_hidden_size,
                                                 "x,y,x*y")
     self.fc = nn.Sequential(nn.BatchNorm1d(bert_hidden_size * 7),
                             nn.Dropout(0.5),
                             nn.Linear(bert_hidden_size * 7, fc_size),
                             nn.ReLU(), nn.BatchNorm1d(fc_size),
                             nn.Dropout(0.5), nn.Linear(fc_size, fc_size),
                             nn.ReLU(), nn.BatchNorm1d(fc_size),
                             nn.Dropout(0.5), nn.Linear(fc_size, fc_size),
                             nn.ReLU(), nn.BatchNorm1d(fc_size),
                             nn.Dropout(0.5), nn.Linear(fc_size, 3))
     for i, module in enumerate(self.fc):
         if isinstance(module, (nn.BatchNorm1d, nn.BatchNorm2d)):
             nn.init.constant_(module.weight, 1)
             nn.init.constant_(module.bias, 0)
             print("Initing batchnorm")
         elif isinstance(module, nn.Linear):
             if getattr(module, "weight_v", None) is not None:
                 nn.init.uniform_(module.weight_g, 0, 1)
                 nn.init.kaiming_normal_(module.weight_v)
                 print("Initing linear with weight normalization")
                 assert model[i].weight_g is not None
             else:
                 nn.init.kaiming_normal_(module.weight)
                 print("Initing linear")
             nn.init.constant_(module.bias, 0)
Ejemplo n.º 9
0
    def test_masked_indices_are_handled_correctly(self):
        sequence_tensor = torch.randn([2, 5, 7])
        # concatentate start and end points together to form our representation.
        extractor = EndpointSpanExtractor(7, "x,y")

        indices = torch.LongTensor([[[1, 3], [2, 4]], [[0, 2], [3, 4]]])
        span_representations = extractor(sequence_tensor, indices)

        # Make a mask with the second batch element completely masked.
        indices_mask = torch.LongTensor([[1, 1], [0, 0]])

        span_representations = extractor(sequence_tensor,
                                         indices,
                                         span_indices_mask=indices_mask)
        start_embeddings, end_embeddings = span_representations.split(7, -1)
        start_indices, end_indices = indices.split(1, -1)

        correct_start_embeddings = batched_index_select(
            sequence_tensor, start_indices.squeeze()).data
        # Completely masked second batch element, so it should all be zero.
        correct_start_embeddings[1, :, :].fill_(0)
        correct_end_embeddings = batched_index_select(
            sequence_tensor, end_indices.squeeze()).data
        correct_end_embeddings[1, :, :].fill_(0)
        numpy.testing.assert_array_equal(start_embeddings.data.numpy(),
                                         correct_start_embeddings.numpy())
        numpy.testing.assert_array_equal(end_embeddings.data.numpy(),
                                         correct_end_embeddings.numpy())
Ejemplo n.º 10
0
    def __init__(
        self,
        use_citation_graph_embeddings: bool,
        citation_embedding_file: str,
        doc_to_idx_mapping_file: str,
        finetune_embedding: bool,
        vocab: Vocabulary,
        text_field_embedder: TextFieldEmbedder,
        context_layer: Seq2SeqEncoder,
        modules: Params,
        loss_weights: Dict[str, int],
        lexical_dropout: float = 0.2,
        initializer: InitializerApplicator = InitializerApplicator(),
        regularizer: Optional[RegularizerApplicator] = None,
        display_metrics: List[str] = None,
    ) -> None:
        super(SalientOnlyModel, self).__init__(vocab, regularizer)

        self._text_field_embedder = text_field_embedder
        self._context_layer = context_layer
        self._lexical_dropout = torch.nn.Dropout(p=lexical_dropout)

        if use_citation_graph_embeddings:
            if citation_embedding_file == "" or doc_to_idx_mapping_file == "":
                raise ValueError(
                    "Must supply citation embedding files to use graph embedding features"
                )
            self._document_embedding = initialize_graph_embeddings(
                citation_embedding_file, finetune_embedding=finetune_embedding)
            self._doc_to_idx_mapping = json.load(open(doc_to_idx_mapping_file))
        else:
            self._document_embedding = None
            self._doc_to_idx_mapping = None

        modules = Params(modules)

        self._saliency_classifier = SpanClassifier.from_params(
            vocab=vocab,
            document_embedding=self._document_embedding,
            doc_to_idx_mapping=self._doc_to_idx_mapping,
            params=modules.pop("saliency_classifier"))
        self._endpoint_span_extractor = EndpointSpanExtractor(
            context_layer.get_output_dim(), combination="x,y")
        self._attentive_span_extractor = SelfAttentiveSpanExtractor(
            input_dim=context_layer.get_output_dim())

        for k in loss_weights:
            loss_weights[k] = float(loss_weights[k])

        self._loss_weights = loss_weights
        self._permanent_loss_weights = copy.deepcopy(self._loss_weights)

        self._display_metrics = display_metrics
        self._multi_task_loss_metrics = {k: Average() for k in ["saliency"]}

        self.training_mode = True
        self.prediction_mode = False

        initializer(self)
Ejemplo n.º 11
0
 def _make_span_extractor(self):
     if True:
         return MeanPoolingSpanExtractor(self.input_dim)
     #if self.span_pooling == "attn":
     #    return SelfAttentiveSpanExtractor(self.input_dim)
     else:
         return EndpointSpanExtractor(self.input_dim,
                                      combination=self.span_pooling)
Ejemplo n.º 12
0
    def __init__(self,
                 vocab_size,
                 init_embedding,
                 word_embed_size=300,
                 encoder_size=64,
                 dim_num_feat=0,
                 dropout=0.2,
                 seq_dropout=0.1,
                 num_outputs=5):
        super(EntityLink_v3, self).__init__()
        # self.word_embedding = nn.Embedding(vocab_size,
        #                                    word_embed_size,
        #                                    padding_idx=0)
        # self.pos_embedding = nn.Embedding(pos_embed_size, pos_dim, padding_idx=0)
        self.word_embedding = nn.Embedding(vocab_size,
                                           word_embed_size,
                                           padding_idx=0)
        self.seq_dropout = seq_dropout
        self.embed_size = word_embed_size
        self.encoder_size = encoder_size
        if init_embedding is not None:
            self.word_embedding.weight.data.copy_(
                torch.from_numpy(init_embedding))
        for param in self.word_embedding.parameters():
            param.requires_grad = False

        self.dropout1d = nn.Dropout2d(self.seq_dropout)
        self.span_extractor = EndpointSpanExtractor(encoder_size * 2)

        bert_model = 'bert-base-chinese'
        #self.bert = BertModel.from_pretrained(bert_model)
        self.use_layer = -1
        self.query_attention = Attention(encoder_size * 2)
        self.abstract_attention = Attention(encoder_size * 2)
        self.lstm_attention = Attention(encoder_size * 2)

        self.LSTM_query = LSTMEncoder(embed_size=300,
                                      encoder_size=encoder_size,
                                      bidirectional=True)
        self.LSTM_abstract = LSTMEncoder(embed_size=300,
                                         encoder_size=encoder_size,
                                         bidirectional=True)
        self.LSTM = LSTMEncoder(embed_size=300,
                                encoder_size=encoder_size,
                                bidirectional=True)
        hidden_size = 100
        self.hidden = nn.Linear(2 * encoder_size, hidden_size)
        self.span_linear = nn.Linear(encoder_size * 4, encoder_size * 2)

        self.classify = nn.Sequential(
            nn.BatchNorm1d(encoder_size * 4), nn.Dropout(p=dropout),
            nn.Linear(in_features=encoder_size * 4, out_features=num_outputs))
        self.mlp = nn.Sequential(nn.BatchNorm1d(768), nn.Dropout(p=dropout),
                                 nn.Linear(in_features=768, out_features=128),
                                 nn.ReLU(inplace=True))
        self.mlp2 = nn.Sequential(
            nn.BatchNorm1d(128 + 2), nn.Dropout(p=dropout),
            nn.Linear(in_features=128 + 2, out_features=1), nn.Sigmoid())
Ejemplo n.º 13
0
    def __init__(self,
                 vocab_size=0,
                 word_embed_size=0,
                 encoder_size=64,
                 dim_num_feat=0,
                 dropout=0.2,
                 seq_dropout=0.1,
                 num_outputs=5):
        super(EntityLink_v2, self).__init__()
        # self.word_embedding = nn.Embedding(vocab_size,
        #                                    word_embed_size,
        #                                    padding_idx=0)
        self.type_embedding = nn.Embedding(vocab_size,
                                           word_embed_size,
                                           padding_idx=0)
        self.seq_dropout = seq_dropout

        self.dropout1d = nn.Dropout2d(self.seq_dropout)
        self.span_extractor = EndpointSpanExtractor(768)

        bert_model = 'bert-base-chinese'
        self.bert = BertModel.from_pretrained(bert_model)
        self.use_layer = -1
        self.lstm_attention = Attention(768)

        self.LSTM = LSTMEncoder(embed_size=768,
                                encoder_size=encoder_size,
                                bidirectional=True)
        hidden_size = 100
        self.hidden = nn.Linear(2 * encoder_size, hidden_size)
        self.span_linear = nn.Linear(768 * 2, 768)
        self.span_extractor = EndpointSpanExtractor(768)

        self.classify = nn.Sequential(
            nn.BatchNorm1d(encoder_size * 4), nn.Dropout(p=dropout),
            nn.Linear(in_features=encoder_size * 4, out_features=num_outputs))
        self.mlp1 = nn.Sequential(
            nn.BatchNorm1d(768 * 2 + 1), nn.Dropout(p=dropout),
            nn.Linear(in_features=768 * 2 + 1, out_features=128),
            nn.ReLU(inplace=True))
        self.mlp2 = nn.Sequential(nn.BatchNorm1d(128), nn.Dropout(p=dropout),
                                  nn.Linear(in_features=128, out_features=1),
                                  nn.Sigmoid())
Ejemplo n.º 14
0
    def __init__(
        self,
        vocab: Vocabulary,
        text_field_embedder: TextFieldEmbedder,
        context_layer: Seq2SeqEncoder,
        mention_feedforward: FeedForward,
        antecedent_feedforward: FeedForward,
        feature_size: int,
        max_span_width: int,
        spans_per_word: float,
        max_antecedents: int,
        lexical_dropout: float = 0.2,
        initializer: InitializerApplicator = InitializerApplicator(),
        regularizer: Optional[RegularizerApplicator] = None,
    ) -> None:
        super().__init__(vocab, regularizer)

        self._text_field_embedder = text_field_embedder
        self._context_layer = context_layer
        self._antecedent_feedforward = TimeDistributed(antecedent_feedforward)
        feedforward_scorer = torch.nn.Sequential(
            TimeDistributed(mention_feedforward),
            TimeDistributed(torch.nn.Linear(mention_feedforward.get_output_dim(), 1)),
        )
        self._mention_pruner = Pruner(feedforward_scorer)
        self._antecedent_scorer = TimeDistributed(
            torch.nn.Linear(antecedent_feedforward.get_output_dim(), 1)
        )

        self._endpoint_span_extractor = EndpointSpanExtractor(
            context_layer.get_output_dim(),
            combination="x,y",
            num_width_embeddings=max_span_width,
            span_width_embedding_dim=feature_size,
            bucket_widths=False,
        )
        self._attentive_span_extractor = SelfAttentiveSpanExtractor(
            input_dim=text_field_embedder.get_output_dim()
        )

        # 10 possible distance buckets.
        self._num_distance_buckets = 10
        self._distance_embedding = Embedding(self._num_distance_buckets, feature_size)

        self._max_span_width = max_span_width
        self._spans_per_word = spans_per_word
        self._max_antecedents = max_antecedents

        self._mention_recall = MentionRecall()
        self._conll_coref_scores = ConllCorefScores()
        if lexical_dropout > 0:
            self._lexical_dropout = torch.nn.Dropout(p=lexical_dropout)
        else:
            self._lexical_dropout = lambda x: x
        initializer(self)
    def __init__(self,
                 vocab: Vocabulary,
                 span_typer: SpanTyper,
                 embed_size: int,
                 label_namespace: str = 'span_labels',
                 event_namespace: str = 'event_labels'):
        super(ArgumentSpanClassifier, self).__init__()

        self.vocab: Vocabulary = vocab
        self.label_namespace: str = label_namespace
        self.event_namespace: str = event_namespace

        self.embed_size = embed_size
        self.event_embedding_size = 50

        self.event_embeddings: nn.Embedding = nn.Embedding(
            num_embeddings=len(
                vocab.get_token_to_index_vocabulary(
                    namespace=event_namespace)),
            embedding_dim=self.event_embedding_size)

        self.lexical_dropout = nn.Dropout(p=0.2)
        self.span_extractor: SpanExtractor = EndpointSpanExtractor(
            input_dim=self.embed_size, combination='x,y')
        self.attentive_span_extractor: SpanExtractor = SelfAttentiveSpanExtractor(
            embed_size)

        self.arg_affine = TimeDistributed(
            FeedForward(input_dim=self.span_extractor.get_output_dim() +
                        self.attentive_span_extractor.get_output_dim(),
                        hidden_dims=self.embed_size,
                        num_layers=2,
                        activations=nn.GELU(),
                        dropout=0.2))
        self.trigger_affine = FeedForward(
            input_dim=self.span_extractor.get_output_dim() +
            self.attentive_span_extractor.get_output_dim(),
            hidden_dims=self.embed_size - self.event_embedding_size,
            num_layers=2,
            activations=nn.GELU(),
            dropout=0.2)

        self.trigger_event_infusion = TimeDistributed(
            FeedForward(input_dim=2 * self.embed_size,
                        hidden_dims=self.embed_size,
                        num_layers=2,
                        activations=nn.GELU(),
                        dropout=0.2))

        self.span_typer: SpanTyper = span_typer

        self.apply(self._init_weights)
Ejemplo n.º 16
0
    def __init__(
        self,
        vocab: Vocabulary,
        text_field_embedder: TextFieldEmbedder,
        metrics: Dict[str, allennlp.training.metrics.Metric] = None,
        number_of_layers: int = 2,
        number_of_linear_layers: int = 2,
        renorm_method: str = None,
        skip_connection: bool = False,
        regularizer: RegularizerApplicator = None,
        bert_model: str = None,
    ) -> None:
        super().__init__(vocab, regularizer)
        self.embbedings = text_field_embedder
        self.hidden_size = 250
        self.bilstm = torch.nn.LSTM(input_size=300,
                                    hidden_size=self.hidden_size,
                                    num_layers=number_of_layers,
                                    batch_first=True,
                                    bidirectional=True,
                                    dropout=0.2)
        self.extractor = EndpointSpanExtractor(input_dim=self.hidden_size,
                                               combination="x,y")
        self.crossEntropyLoss = torch.nn.CrossEntropyLoss()
        self.device = torch.device(
            "cuda" if torch.cuda.is_available() else "cpu")
        self.metrics = metrics or {"accuracy": CategoricalAccuracy()}
        self.first_liner_layer = torch.nn.Linear(
            self.hidden_size * 2 * 2, self.hidden_size * 2 * 2
        )  # twiche double, firest is because bidirectional and second because concat
        self.second_liner_layer = torch.nn.Linear(self.hidden_size * 2 * 2,
                                                  self.hidden_size * 2 * 2)
        self.do_skip_connection = skip_connection

        self.number_of_linear_layers = number_of_linear_layers
        self.relation_layer_norm = torch.nn.LayerNorm(torch.Size(
            [self.hidden_size * 2]),
                                                      elementwise_affine=True)
        self.tanh = torch.nn.Tanh()
        self.drop_layer = torch.nn.Dropout(p=0.2)
        self.renorm_method = renorm_method or linear
        for t in [
                head_start_token, head_end_token, tail_start_token,
                tail_end_token
        ]:
            index = self.vocab.add_token_to_namespace(t)
            if t == head_start_token:
                self.head_token_index = index
            elif t == tail_start_token:
                self.tail_token_index = index
Ejemplo n.º 17
0
    def __init__(self,
                 vocab_size,
                 init_embedding,
                 word_embed_size=300,
                 encoder_size=64,
                 dim_num_feat=0,
                 dropout=0.2,
                 seq_dropout=0.1,
                 num_outputs=5,
                 use_bert=False):
        super(EntityLink_entity_vector, self).__init__()
        # self.word_embedding = nn.Embedding(vocab_size,
        #                                    word_embed_size,
        #                                    padding_idx=0)
        # self.pos_embedding = nn.Embedding(pos_embed_size, pos_dim, padding_idx=0)
        self.use_bert = use_bert
        if not use_bert:
            self.word_embedding = nn.Embedding(vocab_size,
                                               word_embed_size,
                                               padding_idx=0)
            self.seq_dropout = seq_dropout
            self.embed_size = word_embed_size
            self.encoder_size = encoder_size
            if init_embedding is not None:
                self.word_embedding.weight.data.copy_(
                    torch.from_numpy(init_embedding))
            self.seq_dropout = seq_dropout
            #self.lstm_attention = Attention(encoder_size*2)

            self.dropout1d = nn.Dropout2d(self.seq_dropout)
            self.LSTM = LSTMEncoder(embed_size=word_embed_size,
                                    encoder_size=encoder_size,
                                    bidirectional=True)
            self.classify = nn.Sequential(
                nn.BatchNorm1d(encoder_size * 4), nn.Dropout(p=dropout),
                nn.Linear(in_features=encoder_size * 4,
                          out_features=num_outputs))
            span_size = 2 * encoder_size

        else:
            bert_model = 'bert-base-chinese'
            self.bert = BertModel.from_pretrained(bert_model)
            span_size = 768
        self.span_extractor = EndpointSpanExtractor(span_size)

        self.use_layer = -1
        self.use_layer = -1
        hidden_size = 100
        self.hidden = nn.Linear(1536, hidden_size)
Ejemplo n.º 18
0
    def __init__(self, flair_model: FlairEmbeddings) -> None:
        super().__init__()

        self.flair_model = flair_model
        self.pretrain_name = self.flair_model.name
        self.output_dim = flair_model.lm.hidden_size

        for param in self.flair_model.lm.parameters():
            param.requires_grad = False

        # In Flair, every LM is unidirectional going forwards.
        # We always extract on the right side.
        comb_string = "y"

        self.span_extractor = EndpointSpanExtractor(input_dim=self.flair_model.lm.hidden_size, combination=comb_string)
Ejemplo n.º 19
0
    def __init__(
        self,
        vocab: Vocabulary,
        text_field_embedder: TextFieldEmbedder,
        context_layer: Seq2SeqEncoder,
        modules: Params,
        loss_weights: Dict[str, int],
        lexical_dropout: float = 0.2,
        initializer: InitializerApplicator = InitializerApplicator(),
        regularizer=None,
        #regularizer: Optional[GbiRegularizerApplicator] = None,
        display_metrics: List[str] = None,
    ) -> None:
        super(ScirexModel, self).__init__(vocab, regularizer)

        self._text_field_embedder = text_field_embedder
        self._context_layer = context_layer
        self._lexical_dropout = torch.nn.Dropout(p=lexical_dropout)

        modules = Params(modules)

        self._ner = NERTagger.from_params(vocab=vocab,
                                          params=modules.pop("ner"))
        self._saliency_classifier = SpanClassifier.from_params(
            vocab=vocab, params=modules.pop("saliency_classifier"))
        self._cluster_n_ary_relation = NAryRelationExtractor.from_params(
            vocab=vocab, params=modules.pop("n_ary_relation"))

        self._endpoint_span_extractor = EndpointSpanExtractor(
            context_layer.get_output_dim(), combination="x,y")
        self._attentive_span_extractor = SelfAttentiveSpanExtractor(
            input_dim=context_layer.get_output_dim())

        for k in loss_weights:
            loss_weights[k] = float(loss_weights[k])
        self._loss_weights = loss_weights
        self._permanent_loss_weights = copy.deepcopy(self._loss_weights)

        self._display_metrics = display_metrics
        self._multi_task_loss_metrics = {
            k: Average()
            for k in ["ner", "saliency", "n_ary_relation"]
        }

        self.training_mode = True
        self.prediction_mode = False

        initializer(self)
Ejemplo n.º 20
0
    def __init__(self,
                 node_types_vocabulary=None,
                 node_attrs_vocabulary=None,
                 p2p_edges_vocabulary=None,
                 p2r_edges_vocabulary=None,
                 bilstm_hidden_embedding_dim=200,
                 lexical_dropout=0.5,
                 lstm_dropout=0.4,
                 max_span_width=15,
                 feature_size=20,
                 embed_mode='bert-base-cased',
                 device=torch.device("cuda")):
        super().__init__()
        self.node_types_vocabulary = node_types_vocabulary
        self.node_attrs_vocabulary = node_attrs_vocabulary
        self.p2p_edges_vocabulary = p2p_edges_vocabulary
        self.p2r_edges_vocabulary = p2r_edges_vocabulary
        self.bilstm_hidden_embedding_dim = bilstm_hidden_embedding_dim
        self.lexical_dropout = lexical_dropout
        self.lstm_dropout = lstm_dropout
        self.embed_mode = embed_mode
        self.device = device
        self.max_span_width = max_span_width
        self.feature_size = feature_size

        if self.embed_mode == 'bert-base-cased':
            self.bert = AutoModel.from_pretrained("bert-base-cased")
            self.bert_hidden_embedding_dim = 768
        if lexical_dropout > 0:
            self._lexical_dropout = torch.nn.Dropout(p=lexical_dropout)
        else:
            self._lexical_dropout = lambda x: x
        self.bilstm = LSTM(input_size=self.bert_hidden_embedding_dim,
                           hidden_size=self.bilstm_hidden_embedding_dim,
                           dropout=self.lstm_dropout,
                           bidirectional=True,
                           num_layers=6)
        self._endpoint_span_extractor = EndpointSpanExtractor(
            self.bilstm_hidden_embedding_dim,
            combination="x,y",
            num_width_embeddings=self.max_span_width,
            span_width_embedding_dim=self.feature_size,
            bucket_widths=False,
        )
        self._attentive_span_extractor = SelfAttentiveSpanExtractor(
            input_dim=self.bert_hidden_embedding_dim)
Ejemplo n.º 21
0
    def test_masked_indices_are_handled_correctly_with_exclusive_indices(self):
        sequence_tensor = Variable(torch.randn([2, 5, 8]))
        # concatentate start and end points together to form our representation
        # for both the forward and backward directions.
        extractor = EndpointSpanExtractor(8,
                                          "x,y",
                                          use_exclusive_start_indices=True)
        indices = Variable(
            torch.LongTensor([[[1, 3], [2, 4]], [[0, 2], [0, 1]]]))
        sequence_mask = Variable(
            torch.LongTensor([[1, 1, 1, 1, 1], [1, 1, 1, 0, 0]]))

        span_representations = extractor(sequence_tensor,
                                         indices,
                                         sequence_mask=sequence_mask)

        # We just concatenated the start and end embeddings together, so
        # we can check they match the original indices if we split them apart.
        start_embeddings, end_embeddings = span_representations.split(8, -1)

        correct_start_indices = Variable(torch.LongTensor([[0, 1], [-1, -1]]))
        # These indices should be -1, so they'll be replaced with a sentinel. Here,
        # we'll set them to a value other than -1 so we can index select the indices and
        # replace them later.
        correct_start_indices[1, 0] = 1
        correct_start_indices[1, 1] = 1

        correct_end_indices = Variable(torch.LongTensor([[3, 4], [2, 1]]))

        correct_start_embeddings = batched_index_select(
            sequence_tensor.contiguous(), correct_start_indices)
        # This element had sequence_tensor index of 0, so it's exclusive index is the start sentinel.
        correct_start_embeddings[1, 0] = extractor._start_sentinel.data
        correct_start_embeddings[1, 1] = extractor._start_sentinel.data
        numpy.testing.assert_array_equal(start_embeddings.data.numpy(),
                                         correct_start_embeddings.data.numpy())

        correct_end_embeddings = batched_index_select(
            sequence_tensor.contiguous(), correct_end_indices)
        numpy.testing.assert_array_equal(end_embeddings.data.numpy(),
                                         correct_end_embeddings.data.numpy())
Ejemplo n.º 22
0
    def __init__(self, bert_model=''):
        super(score_model, self).__init__()

        if bert_model in ("bert-base-uncased", "bert-base-cased"):
            self.bert_hidden_size = 768
        elif bert_model in ("bert-large-uncased", "bert-large-cased"):
            self.bert_hidden_size = 1024
        else:
            raise ValueError("Unsupported BERT model.")

        self.buckets_embedding_size = 20
        self.score_hidden_size = 128

        self.buckets = [1, 2, 3, 4, 5, 8, 16, 32, 64]
        self.bert = BertModel.from_pretrained(bert_model)
        self.embedding = torch.nn.Embedding(
            len(self.buckets) + 1, self.buckets_embedding_size)
        self.span_extractor = EndpointSpanExtractor(self.bert_hidden_size,
                                                    "x,y,x*y")
        self.pair_score     = mentionpair_score(self.bert_hidden_size*3*3 \
                                + self.buckets_embedding_size,
                                self.score_hidden_size)
Ejemplo n.º 23
0
    def __init__(self,
                 vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 number_of_linear_layers : int = 2,
                 skip_connection: bool = False,
                 regularizer: RegularizerApplicator = None,
                 hidden_dim: int = 500,
                 add_distance_from_mean: bool = True,
                 drop_out_rate: float = 0.2,
                 devices = 0,
                 num_labels = 42):

        super().__init__(vocab,regularizer)
        self.num_labels = num_labels
        self.embbedings = text_field_embedder
        self.bert_type_model = BERT_BASE_CONFIG
        self.extractor = EndpointSpanExtractor(input_dim=self.bert_type_model['hidden_size'], combination="x,y")
        self.crossEntropyLoss = torch.nn.CrossEntropyLoss()
        if isinstance(devices, list):
            devices = devices[0]
        if devices:
            self.device = torch.device("cuda:{}".format(devices) if torch.cuda.is_available() else "cpu")
        else:
            self.device = torch.device("cuda")
        self.metrics = {
            # "NOTA_NotInBest2": NotaNotInsideBest2(),
            "accuracy": CategoricalAccuracy(),
            'f1': SpecialLoss(0)  # F1Measure(1)  # no relation is 0
        }
        # for i in range(1,42):
        #     self.metrics['f1_{}'.format(i)] = F1Measure(i)
        self.first_liner_layer = torch.nn.Linear(self.bert_type_model['hidden_size'] * 2,hidden_dim)
        self.second_liner_layer = torch.nn.Linear(hidden_dim, self.num_labels)  # TACRED labels

        self.number_of_linear_layers = number_of_linear_layers
        self.tanh = torch.nn.Tanh()
        self.drop_layer = torch.nn.Dropout(p=drop_out_rate)
        self.counter = 0
Ejemplo n.º 24
0
    def __init__(self,
                 vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 encoder: Seq2SeqEncoder,
                 classifier_feedforward: FeedForward,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None) -> None:
        super(RNNClassifier, self).__init__(vocab, regularizer)

        self.text_field_embedder = text_field_embedder
        self.num_classes = self.vocab.get_vocab_size("labels")
        self.encoder = encoder
        self.endpoint_span_extractor = EndpointSpanExtractor(
            encoder.get_output_dim(), combination="x,y")
        self.attentive_span_extractor = SelfAttentiveSpanExtractor(
            encoder.get_output_dim())

        attention_input_dim = encoder.get_output_dim() * 2
        self.holder_attention = nn.Linear(attention_input_dim, 1)
        self.target_attention = nn.Linear(attention_input_dim, 1)

        self.classifier_feedforward = classifier_feedforward

        if text_field_embedder.get_output_dim() != encoder.get_input_dim():
            raise ConfigurationError(
                "The output dimension of the text_field_embedder must match the "
                "input dimension of the title_encoder. Found {} and {}, "
                "respectively.".format(text_field_embedder.get_output_dim(),
                                       encoder.get_input_dim()))
        self.metrics = {
            "f1_neg": F1Measure(1),
            "f1_none": F1Measure(0),
            "f1_pos": F1Measure(2),
        }
        self.loss = torch.nn.CrossEntropyLoss()

        initializer(self)
Ejemplo n.º 25
0
    def __init__(
        self,
        vocab: Vocabulary,
        text_field_embedder: TextFieldEmbedder,
        context_layer: Seq2SeqEncoder,
        mention_feedforward: FeedForward,
        antecedent_feedforward: FeedForward,
        feature_size: int,
        max_span_width: int,
        spans_per_word: float,
        max_antecedents: int,
        coarse_to_fine: bool = False,
        inference_order: int = 1,
        lexical_dropout: float = 0.2,
        initializer: InitializerApplicator = InitializerApplicator(),
        **kwargs
    ) -> None:
        super().__init__(vocab, **kwargs)

        self._text_field_embedder = text_field_embedder
        self._context_layer = context_layer
        self._mention_feedforward = TimeDistributed(mention_feedforward)
        self._mention_scorer = TimeDistributed(
            torch.nn.Linear(mention_feedforward.get_output_dim(), 1)
        )
        self._antecedent_feedforward = TimeDistributed(antecedent_feedforward)
        self._antecedent_scorer = TimeDistributed(
            torch.nn.Linear(antecedent_feedforward.get_output_dim(), 1)
        )

        self._endpoint_span_extractor = EndpointSpanExtractor(
            context_layer.get_output_dim(),
            combination="x,y",
            num_width_embeddings=max_span_width,
            span_width_embedding_dim=feature_size,
            bucket_widths=False,
        )
        self._attentive_span_extractor = SelfAttentiveSpanExtractor(
            input_dim=text_field_embedder.get_output_dim()
        )

        # 10 possible distance buckets.
        self._num_distance_buckets = 10
        self._distance_embedding = Embedding(
            embedding_dim=feature_size, num_embeddings=self._num_distance_buckets
        )

        self._max_span_width = max_span_width
        self._spans_per_word = spans_per_word
        self._max_antecedents = max_antecedents

        self._coarse_to_fine = coarse_to_fine
        if self._coarse_to_fine:
            self._coarse2fine_scorer = torch.nn.Linear(
                mention_feedforward.get_input_dim(), mention_feedforward.get_input_dim()
            )
        self._inference_order = inference_order
        if self._inference_order > 1:
            self._span_updating_gated_sum = GatedSum(mention_feedforward.get_input_dim())

        self._mention_recall = MentionRecall()
        self._conll_coref_scores = ConllCorefScores()
        if lexical_dropout > 0:
            self._lexical_dropout = torch.nn.Dropout(p=lexical_dropout)
        else:
            self._lexical_dropout = lambda x: x
        initializer(self)
Ejemplo n.º 26
0
 def _make_span_extractor(self):
     #if self.span_pooling == "attn":
     #    return SelfAttentiveSpanExtractor(self.proj_dim)
     #else:
     #return EndpointSpanExtractor(self.proj_dim, combination=self.span_pooling)
     return EndpointSpanExtractor(self.proj_dim)
Ejemplo n.º 27
0
class DyGIE(Model):
    """
    TODO(dwadden) document me.

    Parameters
    ----------
    vocab : ``Vocabulary``
    text_field_embedder : ``TextFieldEmbedder``
        Used to embed the ``text`` ``TextField`` we get as input to the model.
    context_layer : ``Seq2SeqEncoder``
        This layer incorporates contextual information for each word in the document.
    feature_size: ``int``
        The embedding size for all the embedded features, such as distances or span widths.
    submodule_params: ``TODO(dwadden)``
        A nested dictionary specifying parameters to be passed on to initialize submodules.
    max_span_width: ``int``
        The maximum width of candidate spans.
    target_task: ``str``:
        The task used to make early stopping decisions.
    initializer : ``InitializerApplicator``, optional (default=``InitializerApplicator()``)
        Used to initialize the model parameters.
    module_initializer : ``InitializerApplicator``, optional (default=``InitializerApplicator()``)
        Used to initialize the individual modules.
    regularizer : ``RegularizerApplicator``, optional (default=``None``)
        If provided, will be used to calculate the regularization penalty during training.
    display_metrics: ``List[str]``. A list of the metrics that should be printed out during model
        training.
    """

    def __init__(self,
                 vocab: Vocabulary,
                 embedder: TextFieldEmbedder,
                 modules,  # TODO(dwadden) Add type.
                 feature_size: int,
                 max_span_width: int,
                 target_task: str,
                 feedforward_params: Dict[str, Union[int, float]],
                 loss_weights: Dict[str, float],
                 initializer: InitializerApplicator = InitializerApplicator(),
                 module_initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None,
                 display_metrics: List[str] = None) -> None:
        super(DyGIE, self).__init__(vocab, regularizer)

        ####################

        # Create span extractor.
        self._endpoint_span_extractor = EndpointSpanExtractor(
            embedder.get_output_dim(),
            combination="x,y",
            num_width_embeddings=max_span_width,
            span_width_embedding_dim=feature_size,
            bucket_widths=False)

        ####################

        # Set parameters.
        self._embedder = embedder
        self._loss_weights = loss_weights
        self._max_span_width = max_span_width
        self._display_metrics = self._get_display_metrics(target_task)
        token_emb_dim = self._embedder.get_output_dim()
        span_emb_dim = self._endpoint_span_extractor.get_output_dim()

        ####################

        # Create submodules.

        modules = Params(modules)

        # Helper function to create feedforward networks.
        def make_feedforward(input_dim):
            return FeedForward(input_dim=input_dim,
                               num_layers=feedforward_params["num_layers"],
                               hidden_dims=feedforward_params["hidden_dims"],
                               activations=torch.nn.ReLU(),
                               dropout=feedforward_params["dropout"])

        # Submodules

        self._ner = NERTagger.from_params(vocab=vocab,
                                          make_feedforward=make_feedforward,
                                          span_emb_dim=span_emb_dim,
                                          feature_size=feature_size,
                                          params=modules.pop("ner"))

        self._coref = CorefResolver.from_params(vocab=vocab,
                                                make_feedforward=make_feedforward,
                                                span_emb_dim=span_emb_dim,
                                                feature_size=feature_size,
                                                params=modules.pop("coref"))

        self._relation = RelationExtractor.from_params(vocab=vocab,
                                                       make_feedforward=make_feedforward,
                                                       span_emb_dim=span_emb_dim,
                                                       feature_size=feature_size,
                                                       params=modules.pop("relation"))

        self._events = EventExtractor.from_params(vocab=vocab,
                                                  make_feedforward=make_feedforward,
                                                  token_emb_dim=token_emb_dim,
                                                  span_emb_dim=span_emb_dim,
                                                  feature_size=feature_size,
                                                  params=modules.pop("events"))

        ####################

        # Initialize text embedder and all submodules
        for module in [self._ner, self._coref, self._relation, self._events]:
            module_initializer(module)

        initializer(self)

    @staticmethod
    def _get_display_metrics(target_task):
        """
        The `target` is the name of the task used to make early stopping decisions. Show metrics
        related to this task.
        """
        lookup = {
            "ner": [f"MEAN__{name}" for name in
                    ["ner_precision", "ner_recall", "ner_f1"]],
            "relation": [f"MEAN__{name}" for name in
                         ["relation_precision", "relation_recall", "relation_f1"]],
            "coref": ["coref_precision", "coref_recall", "coref_f1", "coref_mention_recall"],
            "events": [f"MEAN__{name}" for name in
                       ["trig_class_f1", "arg_class_f1"]]}
        if target_task not in lookup:
            raise ValueError(f"Invalied value {target_task} has been given as the target task.")
        return lookup[target_task]

    @staticmethod
    def _debatch(x):
        # TODO(dwadden) Get rid of this when I find a better way to do it.
        return x if x is None else x.squeeze(0)

    @overrides
    def forward(self,
                text,
                spans,
                metadata,
                ner_labels=None,
                coref_labels=None,
                relation_labels=None,
                trigger_labels=None,
                argument_labels=None):
        """
        TODO(dwadden) change this.
        """
        # In AllenNLP, AdjacencyFields are passed in as floats. This fixes it.
        if relation_labels is not None:
            relation_labels = relation_labels.long()
        if argument_labels is not None:
            argument_labels = argument_labels.long()

        # TODO(dwadden) Multi-document minibatching isn't supported yet. For now, get rid of the
        # extra dimension in the input tensors. Will return to this once the model runs.
        if len(metadata) > 1:
            raise NotImplementedError("Multi-document minibatching not supported.")

        metadata = metadata[0]
        spans = self._debatch(spans)  # (n_sents, max_n_spans, 2)
        ner_labels = self._debatch(ner_labels)  # (n_sents, max_n_spans)
        coref_labels = self._debatch(coref_labels)  #  (n_sents, max_n_spans)
        relation_labels = self._debatch(relation_labels)  # (n_sents, max_n_spans, max_n_spans)
        trigger_labels = self._debatch(trigger_labels)  # TODO(dwadden)
        argument_labels = self._debatch(argument_labels)  # TODO(dwadden)

        # Encode using BERT, then debatch.
        # Since the data are batched, we use `num_wrapping_dims=1` to unwrap the document dimension.
        # (1, n_sents, max_sententence_length, embedding_dim)

        # TODO(dwadden) Deal with the case where the input is longer than 512.
        text_embeddings = self._embedder(text, num_wrapping_dims=1)
        # (n_sents, max_n_wordpieces, embedding_dim)
        text_embeddings = self._debatch(text_embeddings)

        # (n_sents, max_sentence_length)
        text_mask = self._debatch(util.get_text_field_mask(text, num_wrapping_dims=1).float())
        sentence_lengths = text_mask.sum(dim=1).long()  # (n_sents)

        span_mask = (spans[:, :, 0] >= 0).float()  # (n_sents, max_n_spans)
        # SpanFields return -1 when they are used as padding. As we do some comparisons based on
        # span widths when we attend over the span representations that we generate from these
        # indices, we need them to be <= 0. This is only relevant in edge cases where the number of
        # spans we consider after the pruning stage is >= the total number of spans, because in this
        # case, it is possible we might consider a masked span.
        spans = F.relu(spans.float()).long()  # (n_sents, max_n_spans, 2)

        # Shape: (batch_size, num_spans, 2 * encoding_dim + feature_size)
        span_embeddings = self._endpoint_span_extractor(text_embeddings, spans)

        # Make calls out to the modules to get results.
        output_coref = {'loss': 0}
        output_ner = {'loss': 0}
        output_relation = {'loss': 0}
        output_events = {'loss': 0}

        # Prune and compute span representations for coreference module
        if self._loss_weights["coref"] > 0 or self._coref.coref_prop > 0:
            output_coref, coref_indices = self._coref.compute_representations(
                spans, span_mask, span_embeddings, sentence_lengths, coref_labels, metadata)

        # Propagation of global information to enhance the span embeddings
        if self._coref.coref_prop > 0:
            output_coref = self._coref.coref_propagation(output_coref)
            span_embeddings = self._coref.update_spans(
                output_coref, span_embeddings, coref_indices)

        # Make predictions and compute losses for each module
        if self._loss_weights['ner'] > 0:
            output_ner = self._ner(
                spans, span_mask, span_embeddings, sentence_lengths, ner_labels, metadata)

        if self._loss_weights['coref'] > 0:
            output_coref = self._coref.predict_labels(output_coref, metadata)

        if self._loss_weights['relation'] > 0:
            output_relation = self._relation(
                spans, span_mask, span_embeddings, sentence_lengths, relation_labels, metadata)

        if self._loss_weights['events'] > 0:
            # The `text_embeddings` serve as representations for event triggers.
            output_events = self._events(
                text_mask, text_embeddings, spans, span_mask, span_embeddings,
                sentence_lengths, trigger_labels, argument_labels,
                ner_labels, metadata)

        # Use `get` since there are some cases where the output dict won't have a loss - for
        # instance, when doing prediction.
        loss = (self._loss_weights['coref'] * output_coref.get("loss", 0) +
                self._loss_weights['ner'] * output_ner.get("loss", 0) +
                self._loss_weights['relation'] * output_relation.get("loss", 0) +
                self._loss_weights['events'] * output_events.get("loss", 0))

        # Multiply the loss by the weight multiplier for this document.
        weight = metadata.weight if metadata.weight is not None else 1.0
        loss *= torch.tensor(weight)

        output_dict = dict(coref=output_coref,
                           relation=output_relation,
                           ner=output_ner,
                           events=output_events)
        output_dict['loss'] = loss

        output_dict["metadata"] = metadata

        return output_dict

    def update_span_embeddings(self, span_embeddings, span_mask, top_span_embeddings,
                               top_span_mask, top_span_indices):
        # TODO(Ulme) Speed this up by tensorizing

        new_span_embeddings = span_embeddings.clone()
        for sample_nr in range(len(top_span_mask)):
            for top_span_nr, span_nr in enumerate(top_span_indices[sample_nr]):
                if top_span_mask[sample_nr, top_span_nr] == 0 or span_mask[sample_nr, span_nr] == 0:
                    break
                new_span_embeddings[sample_nr,
                                    span_nr] = top_span_embeddings[sample_nr, top_span_nr]
        return new_span_embeddings

    @overrides
    def make_output_human_readable(self, output_dict: Dict[str, torch.Tensor]):
        """
        Converts the list of spans and predicted antecedent indices into clusters
        of spans for each element in the batch.

        Parameters
        ----------
        output_dict : ``Dict[str, torch.Tensor]``, required.
            The result of calling :func:`forward` on an instance or batch of instances.

        Returns
        -------
        The same output dictionary, but with an additional ``clusters`` key:

        clusters : ``List[List[List[Tuple[int, int]]]]``
            A nested list, representing, for each instance in the batch, the list of clusters,
            which are in turn comprised of a list of (start, end) inclusive spans into the
            original document.
        """

        doc = copy.deepcopy(output_dict["metadata"])

        if self._loss_weights["coref"] > 0:
            # TODO(dwadden) Will need to get rid of the [0] when batch training is enabled.
            decoded_coref = self._coref.make_output_human_readable(output_dict["coref"])["predicted_clusters"][0]
            sentences = doc.sentences
            sentence_starts = [sent.sentence_start for sent in sentences]
            predicted_clusters = [document.Cluster(entry, i, sentences, sentence_starts)
                                  for i, entry in enumerate(decoded_coref)]
            doc.predicted_clusters = predicted_clusters
            # TODO(dwadden) update the sentences with cluster information.

        if self._loss_weights["ner"] > 0:
            for predictions, sentence in zip(output_dict["ner"]["predictions"], doc):
                sentence.predicted_ner = predictions

        if self._loss_weights["relation"] > 0:
            for predictions, sentence in zip(output_dict["relation"]["predictions"], doc):
                sentence.predicted_relations = predictions

        if self._loss_weights["events"] > 0:
            for predictions, sentence in zip(output_dict["events"]["predictions"], doc):
                sentence.predicted_events = predictions

        return doc

    def get_metrics(self, reset: bool = False) -> Dict[str, float]:
        """
        Get all metrics from all modules. For the ones that shouldn't be displayed, prefix their
        keys with an underscore.
        """
        metrics_coref = self._coref.get_metrics(reset=reset)
        metrics_ner = self._ner.get_metrics(reset=reset)
        metrics_relation = self._relation.get_metrics(reset=reset)
        metrics_events = self._events.get_metrics(reset=reset)

        # Make sure that there aren't any conflicting names.
        metric_names = (list(metrics_coref.keys()) + list(metrics_ner.keys()) +
                        list(metrics_relation.keys()) + list(metrics_events.keys()))
        assert len(set(metric_names)) == len(metric_names)
        all_metrics = dict(list(metrics_coref.items()) +
                           list(metrics_ner.items()) +
                           list(metrics_relation.items()) +
                           list(metrics_events.items()))

        # If no list of desired metrics given, display them all.
        if self._display_metrics is None:
            return all_metrics
        # Otherwise only display the selected ones.
        res = {}
        for k, v in all_metrics.items():
            if k in self._display_metrics:
                res[k] = v
            else:
                new_k = "_" + k
                res[new_k] = v
        return res
Ejemplo n.º 28
0
    def __init__(self,
                 vocab: Vocabulary,
                 embedder: TextFieldEmbedder,
                 modules,  # TODO(dwadden) Add type.
                 feature_size: int,
                 max_span_width: int,
                 target_task: str,
                 feedforward_params: Dict[str, Union[int, float]],
                 loss_weights: Dict[str, float],
                 initializer: InitializerApplicator = InitializerApplicator(),
                 module_initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None,
                 display_metrics: List[str] = None) -> None:
        super(DyGIE, self).__init__(vocab, regularizer)

        ####################

        # Create span extractor.
        self._endpoint_span_extractor = EndpointSpanExtractor(
            embedder.get_output_dim(),
            combination="x,y",
            num_width_embeddings=max_span_width,
            span_width_embedding_dim=feature_size,
            bucket_widths=False)

        ####################

        # Set parameters.
        self._embedder = embedder
        self._loss_weights = loss_weights
        self._max_span_width = max_span_width
        self._display_metrics = self._get_display_metrics(target_task)
        token_emb_dim = self._embedder.get_output_dim()
        span_emb_dim = self._endpoint_span_extractor.get_output_dim()

        ####################

        # Create submodules.

        modules = Params(modules)

        # Helper function to create feedforward networks.
        def make_feedforward(input_dim):
            return FeedForward(input_dim=input_dim,
                               num_layers=feedforward_params["num_layers"],
                               hidden_dims=feedforward_params["hidden_dims"],
                               activations=torch.nn.ReLU(),
                               dropout=feedforward_params["dropout"])

        # Submodules

        self._ner = NERTagger.from_params(vocab=vocab,
                                          make_feedforward=make_feedforward,
                                          span_emb_dim=span_emb_dim,
                                          feature_size=feature_size,
                                          params=modules.pop("ner"))

        self._coref = CorefResolver.from_params(vocab=vocab,
                                                make_feedforward=make_feedforward,
                                                span_emb_dim=span_emb_dim,
                                                feature_size=feature_size,
                                                params=modules.pop("coref"))

        self._relation = RelationExtractor.from_params(vocab=vocab,
                                                       make_feedforward=make_feedforward,
                                                       span_emb_dim=span_emb_dim,
                                                       feature_size=feature_size,
                                                       params=modules.pop("relation"))

        self._events = EventExtractor.from_params(vocab=vocab,
                                                  make_feedforward=make_feedforward,
                                                  token_emb_dim=token_emb_dim,
                                                  span_emb_dim=span_emb_dim,
                                                  feature_size=feature_size,
                                                  params=modules.pop("events"))

        ####################

        # Initialize text embedder and all submodules
        for module in [self._ner, self._coref, self._relation, self._events]:
            module_initializer(module)

        initializer(self)
Ejemplo n.º 29
0
    def __init__(self, config, args):
        super(BertNER, self).__init__(config)
        self.bert = BertModel(config)
        self.args = args
        if 'roberta' in self.args.bert_config_dir:
            self.bert = RobertaModel(config)
            print('use the roberta pre-trained model...')

        # self.start_outputs = nn.Linear(config.hidden_size, 2)
        # self.end_outputs = nn.Linear(config.hidden_size, 2)
        self.start_outputs = nn.Linear(config.hidden_size, 1)
        self.end_outputs = nn.Linear(config.hidden_size, 1)

        # self.span_embedding = SingleLinearClassifier(config.hidden_size * 2, 1)

        self.hidden_size = config.hidden_size

        self.span_combination_mode = self.args.span_combination_mode
        self.max_span_width = args.max_span_len
        self.n_class = args.n_class
        self.tokenLen_emb_dim = self.args.tokenLen_emb_dim  # must set, when set a value to the max_span_width.

        # if self.args.use_tokenLen:
        #     self.tokenLen_emb_dim = self.args.tokenLen_emb_dim
        # else:
        #     self.tokenLen_emb_dim = None

        print("self.max_span_width: ", self.max_span_width)
        print("self.tokenLen_emb_dim: ", self.tokenLen_emb_dim)

        #  bucket_widths: Whether to bucket the span widths into log-space buckets. If `False`, the raw span widths are used.

        self._endpoint_span_extractor = EndpointSpanExtractor(
            config.hidden_size,
            combination=self.span_combination_mode,
            num_width_embeddings=self.max_span_width,
            span_width_embedding_dim=self.tokenLen_emb_dim,
            bucket_widths=True)

        # self.span_embedding = MultiNonLinearClassifier(config.hidden_size*2+self.span_emb_dim, self.n_class, config.mrc_dropout)

        self.linear = nn.Linear(10, 1)
        self.score_func = nn.Softmax(dim=-1)

        # import span-length embedding
        self.spanLen_emb_dim = args.spanLen_emb_dim
        self.morph_emb_dim = args.morph_emb_dim
        input_dim = config.hidden_size * 2 + self.tokenLen_emb_dim
        if self.args.use_spanLen and not self.args.use_morphology:
            input_dim = config.hidden_size * 2 + self.tokenLen_emb_dim + self.spanLen_emb_dim
        elif not self.args.use_spanLen and self.args.use_morphology:
            input_dim = config.hidden_size * 2 + self.tokenLen_emb_dim + self.morph_emb_dim
        elif self.args.use_spanLen and self.args.use_morphology:
            input_dim = config.hidden_size * 2 + self.tokenLen_emb_dim + self.spanLen_emb_dim + self.morph_emb_dim

        self.span_embedding = MultiNonLinearClassifier(input_dim, self.n_class,
                                                       config.mrc_dropout)

        self.spanLen_embedding = nn.Embedding(args.max_span_len + 1,
                                              self.spanLen_emb_dim,
                                              padding_idx=0)

        self.morph_embedding = nn.Embedding(len(args.morph2idx_list) + 1,
                                            self.morph_emb_dim,
                                            padding_idx=0)
Ejemplo n.º 30
0
    def __init__(self,
                 vocab: Vocabulary,
                 span_graph_encoder: SpanGraphEncoder,
                 span_typer: SpanTyper,
                 embed_size: int,
                 label_namespace: str = 'span_labels',
                 event_namespace: str = 'event_labels',
                 use_event_embedding: bool = True):
        super(SelectorArgLinking, self).__init__()

        self.vocab: Vocabulary = vocab
        self.label_namespace: str = label_namespace
        self.event_namespace: str = event_namespace

        self.use_event_embedding = use_event_embedding
        self.embed_size = embed_size
        self.event_embedding_size = 50

        # self.span_finder: SpanFinder = span_finder
        # self.span_selector: SpanSelector = span_selector
        if use_event_embedding:
            self.event_embeddings: nn.Embedding = nn.Embedding(
                num_embeddings=len(vocab.get_token_to_index_vocabulary(namespace=event_namespace)),
                embedding_dim=self.event_embedding_size
            )

        self.lexical_dropout = nn.Dropout(p=0.2)
        # self.contextualized_encoder: Seq2SeqEncoder = LstmSeq2SeqEncoder(
        #     bidirectional=True,
        #     input_size=embed_size,
        #     hidden_size=embed_size,
        #     num_layers=2,
        #     dropout=0.4
        # )
        self.span_graph_encoder: SpanGraphEncoder = span_graph_encoder
        self.span_extractor: SpanExtractor = EndpointSpanExtractor(
            # input_dim=self.contextualized_encoder.get_output_dim(),
            input_dim=self.embed_size,
            combination='x,y'
        )
        self.attentive_span_extractor: SpanExtractor = SelfAttentiveSpanExtractor(embed_size)

        self.arg_affine = TimeDistributed(FeedForward(
            input_dim=self.span_extractor.get_output_dim() + self.attentive_span_extractor.get_output_dim(),
            hidden_dims=self.span_graph_encoder.get_input_dim(),
            num_layers=2,
            activations=nn.GELU(),
            dropout=0.2
        ))
        self.trigger_affine = FeedForward(
            input_dim=self.span_extractor.get_output_dim() + self.attentive_span_extractor.get_output_dim(),
            hidden_dims=self.span_graph_encoder.get_input_dim() - (
                self.event_embedding_size if use_event_embedding else 0),
            num_layers=2,
            activations=nn.GELU(),
            dropout=0.2
        )
        # self.arg_affine: nn.Linear = nn.Linear(
        #     self.span_extractor.get_output_dim() + self.attentive_span_extractor.get_output_dim(),
        #     self.span_graph_encoder.get_input_dim()
        # )
        # self.trigger_affine: nn.Linear = nn.Linear(
        #     self.span_extractor.get_output_dim() + self.attentive_span_extractor.get_output_dim(),
        #     self.span_graph_encoder.get_input_dim()
        # )

        # self.trigger_event_infuse: nn.Sequential = nn.Sequential(
        #     nn.Dropout(p=0.1),
        #     nn.Linear(4 * self.span_graph_encoder.get_input_dim(), 2 * self.span_graph_encoder.get_input_dim()),
        #     nn.Dropout(p=0.1),
        #     nn.GELU(),
        #     nn.Linear(2 * self.span_graph_encoder.get_input_dim(), self.span_graph_encoder.get_input_dim()),
        #     nn.Dropout(p=0.1),
        #     nn.GELU()
        # )

        self.span_typer: SpanTyper = span_typer

        self.apply(self._init_weights)