예제 #1
0
 def __init__(
     self,
     word_embeddings: TextFieldEmbedder,
     encoders: torch.nn.ModuleDict,
     vocab: Vocabulary,
     tasks: List[Task],
 ) -> None:
     super().__init__(vocab)
     self._tasks = tasks
     self.word_embeddings = word_embeddings
     self.encoders = encoders
     # self.hidden2tag = dict()
     self.crftagger = torch.nn.ModuleDict()
     self.metrics = dict()
     self._inference_mode = False
     for task in tasks:
         tag_namespace = task.tag_namespace
         self.crftagger[tag_namespace] = CrfTagger(
             vocab=vocab,
             text_field_embedder=self.word_embeddings,
             encoder=self.encoders[task.task_type],
             label_namespace=tag_namespace,
             label_encoding=task.label_encoding,
             calculate_span_f1=task.calculate_span_f1,
         )
예제 #2
0
    def __init__(self,
                vocab: Vocabulary,
                params: Params,
                regularizer: RegularizerApplicator = None):
                
        super(LayerNer, self).__init__(vocab = vocab, regularizer = regularizer)

        # Base Text Field Embedder
        text_field_embedder_params = params.pop("text_field_embedder")
        text_field_embedder = BasicTextFieldEmbedder.from_params(vocab=vocab, 
                                                                params=text_field_embedder_params)
        self._text_field_embedder = text_field_embedder
        
        ############
        # NER Stuffs
        ############
        ner_params = params.pop("ner")
        
        # Encoder
        encoder_ner_params = ner_params.pop("encoder")
        encoder_ner = Seq2SeqEncoder.from_params(encoder_ner_params)
        self._encoder_ner =  encoder_ner
        
        # Tagger NER - CRF Tagger
        tagger_ner_params = ner_params.pop("tagger")
        tagger_ner = CrfTagger(vocab = vocab,
                            text_field_embedder = self._text_field_embedder,
                            encoder = self._encoder_ner,
                            label_namespace = tagger_ner_params.pop("label_namespace", "labels"),
                            constraint_type = tagger_ner_params.pop("constraint_type", None),
                            dropout = tagger_ner_params.pop("dropout", None),
                            regularizer = regularizer)
        self._tagger_ner = tagger_ner
        
        logger.info("Multi-Task Learning Model has been instantiated.")
예제 #3
0
 def __init__(
     self,
     word_embeddings: TextFieldEmbedder,
     encoders: torch.nn.ModuleDict,
     vocab: Vocabulary,
     tasks: List[Task],
 ) -> None:
     super().__init__(vocab)
     self._tasks = tasks
     self.word_embeddings = word_embeddings
     self.encoders = encoders
     # self.hidden2tag = dict()
     self.tagger_or_classifier = torch.nn.ModuleDict()
     self.metrics = dict()
     self._inference_mode = False
     for task in tasks:
         tag_namespace = task.tag_namespace
         if task.task_type in TAGGING_TASKS:
             self.tagger_or_classifier[tag_namespace] = CrfTagger(
                 vocab=vocab,
                 text_field_embedder=self.word_embeddings,
                 encoder=self.encoders[task.task_type],
                 label_namespace=tag_namespace,
                 label_encoding=task.label_encoding,
                 calculate_span_f1=task.calculate_span_f1,
             )
         elif task.task_type in CLASSIFICATION_TASKS:
             self.tagger_or_classifier[tag_namespace] = BasicClassifierWithMetrics(
                 vocab=vocab,
                 text_field_embedder=self.word_embeddings,
                 seq2vec_encoder=self.encoders[task.task_type],
                 label_namespace=tag_namespace,
             )
         else:
             raise NotImplementedError(f"model for task.task_type={task.task_type} not implemented.")
예제 #4
0
    def __init__(self,
                 vocab: Vocabulary,
                 params: Params,
                 regularizer: RegularizerApplicator = None):

        super(LayerNerEmdCoref, self).__init__(vocab=vocab,
                                               regularizer=regularizer)

        # Base text Field Embedder
        text_field_embedder_params = params.pop("text_field_embedder")
        text_field_embedder = BasicTextFieldEmbedder.from_params(
            vocab=vocab, params=text_field_embedder_params)
        self._text_field_embedder = text_field_embedder

        ############
        # NER Stuffs
        ############
        ner_params = params.pop("ner")

        # Encoder
        encoder_ner_params = ner_params.pop("encoder")
        encoder_ner = Seq2SeqEncoder.from_params(encoder_ner_params)
        self._encoder_ner = encoder_ner

        # Tagger NER - CRF Tagger
        tagger_ner_params = ner_params.pop("tagger")
        tagger_ner = CrfTagger(
            vocab=vocab,
            text_field_embedder=self._text_field_embedder,
            encoder=self._encoder_ner,
            label_namespace=tagger_ner_params.pop("label_namespace", "labels"),
            constraint_type=tagger_ner_params.pop("constraint_type", None),
            dropout=tagger_ner_params.pop("dropout", None),
            regularizer=regularizer)
        self._tagger_ner = tagger_ner

        ############
        # EMD Stuffs
        ############
        emd_params = params.pop("emd")

        # Encoder
        encoder_emd_params = emd_params.pop("encoder")
        encoder_emd = Seq2SeqEncoder.from_params(encoder_emd_params)
        self._encoder_emd = encoder_emd

        shortcut_text_field_embedder = ShortcutConnectTextFieldEmbedder(
            base_text_field_embedder=self._text_field_embedder,
            previous_encoders=[self._encoder_ner])
        self._shortcut_text_field_embedder = shortcut_text_field_embedder

        # Tagger: EMD - CRF Tagger
        tagger_emd_params = emd_params.pop("tagger")
        tagger_emd = CrfTagger(
            vocab=vocab,
            text_field_embedder=self._shortcut_text_field_embedder,
            encoder=self._encoder_emd,
            label_namespace=tagger_emd_params.pop("label_namespace", "labels"),
            constraint_type=tagger_emd_params.pop("constraint_type", None),
            dropout=tagger_ner_params.pop("dropout", None),
            regularizer=regularizer)
        self._tagger_emd = tagger_emd

        ##############
        # Coref Stuffs
        ##############
        coref_params = params.pop("coref")

        # Encoder
        encoder_coref_params = coref_params.pop("encoder")
        encoder_coref = Seq2SeqEncoder.from_params(encoder_coref_params)
        self._encoder_coref = encoder_coref

        shortcut_text_field_embedder_coref = ShortcutConnectTextFieldEmbedder(
            base_text_field_embedder=self._text_field_embedder,
            previous_encoders=[self._encoder_ner, self._encoder_emd])
        self._shortcut_text_field_embedder_coref = shortcut_text_field_embedder_coref

        # Tagger: Coreference
        tagger_coref_params = coref_params.pop("tagger")
        eval_on_gold_mentions = tagger_coref_params.pop_bool(
            "eval_on_gold_mentions", False)
        init_params = tagger_coref_params.pop("initializer", None)
        initializer = (InitializerApplicator.from_params(init_params)
                       if init_params is not None else InitializerApplicator())

        tagger_coref = CoreferenceCustom(
            vocab=vocab,
            text_field_embedder=self._shortcut_text_field_embedder_coref,
            context_layer=self._encoder_coref,
            mention_feedforward=FeedForward.from_params(
                tagger_coref_params.pop("mention_feedforward")),
            antecedent_feedforward=FeedForward.from_params(
                tagger_coref_params.pop("antecedent_feedforward")),
            feature_size=tagger_coref_params.pop_int("feature_size"),
            max_span_width=tagger_coref_params.pop_int("max_span_width"),
            spans_per_word=tagger_coref_params.pop_float("spans_per_word"),
            max_antecedents=tagger_coref_params.pop_int("max_antecedents"),
            lexical_dropout=tagger_coref_params.pop_float(
                "lexical_dropout", 0.2),
            initializer=initializer,
            regularizer=regularizer,
            eval_on_gold_mentions=eval_on_gold_mentions)
        self._tagger_coref = tagger_coref
        if eval_on_gold_mentions:
            self._tagger_coref._eval_on_gold_mentions = True

        logger.info("Multi-Task Learning Model has been instantiated.")
예제 #5
0
    def __init__(self,
                 vocab: Vocabulary,
                 params: Params,
                 regularizer: RegularizerApplicator = None):

        super(LayerEmdRelation, self).__init__(vocab=vocab,
                                               regularizer=regularizer)

        # Base text Field Embedder
        text_field_embedder_params = params.pop("text_field_embedder")
        text_field_embedder = BasicTextFieldEmbedder.from_params(
            vocab=vocab, params=text_field_embedder_params)
        self._text_field_embedder = text_field_embedder

        ############
        # EMD Stuffs
        ############
        emd_params = params.pop("emd")

        # Encoder
        encoder_emd_params = emd_params.pop("encoder")
        encoder_emd = Seq2SeqEncoder.from_params(encoder_emd_params)
        self._encoder_emd = encoder_emd

        # Tagger EMD - CRF Tagger
        tagger_emd_params = emd_params.pop("tagger")
        tagger_emd = CrfTagger(
            vocab=vocab,
            text_field_embedder=self._text_field_embedder,
            encoder=self._encoder_emd,
            label_namespace=tagger_emd_params.pop("label_namespace", "labels"),
            label_encoding=tagger_emd_params.pop("label_encoding", None),
            dropout=tagger_emd_params.pop("dropout", None),
            regularizer=regularizer,
        )
        self._tagger_emd = tagger_emd

        ############################
        # Relation Extraction Stuffs
        ############################
        relation_params = params.pop("relation")

        # Encoder
        encoder_relation_params = relation_params.pop("encoder")
        encoder_relation = Seq2SeqEncoder.from_params(encoder_relation_params)
        self._encoder_relation = encoder_relation

        shortcut_text_field_embedder_relation = ShortcutConnectTextFieldEmbedder(
            base_text_field_embedder=self._text_field_embedder,
            previous_encoders=[self._encoder_emd])
        self._shortcut_text_field_embedder_relation = shortcut_text_field_embedder_relation

        # Tagger: Relation
        tagger_relation_params = relation_params.pop("tagger")
        tagger_relation = RelationExtractor(
            vocab=vocab,
            text_field_embedder=self._shortcut_text_field_embedder_relation,
            context_layer=self._encoder_relation,
            d=tagger_relation_params.pop_int("d"),
            l=tagger_relation_params.pop_int("l"),
            n_classes=tagger_relation_params.pop("n_classes"),
            activation=tagger_relation_params.pop("activation"),
        )
        self._tagger_relation = tagger_relation

        logger.info("Multi-Task Learning Model has been instantiated.")
예제 #6
0
    token_embedding = PretrainedBertEmbedder(
        pretrained_model="bert-base-uncased",
        top_layer_only=True,  # conserve memory
    )
else:
    token_embedding = Embedding(num_embeddings=vocab.get_vocab_size('tokens'),
                                embedding_dim=EMBEDDING_DIM)

word_embeddings = BasicTextFieldEmbedder({"tokens": token_embedding})

lstm = PytorchSeq2SeqWrapper(
    torch.nn.LSTM(word_embeddings.get_output_dim(),
                  hidden_dim // 2,
                  bidirectional=True,
                  batch_first=True))
model = CrfTagger(vocab, word_embeddings, lstm)

if torch.cuda.is_available():
    cuda_device = 0
    model = model.cuda(cuda_device)
else:
    cuda_device = -1

optimizer = optim.Adam(model.parameters(), lr=learning_rate)
iterator = BucketIterator(batch_size=batch_size,
                          sorting_keys=[("tokens", "num_tokens")])
iterator.index_with(vocab)
# print(next(iter(iterator(train_dataset))))
trainer = Trainer(model=model,
                  optimizer=optimizer,
                  iterator=iterator,
예제 #7
0
    def __init__(self,
                 vocab: Vocabulary,
                 params: Params,
                 regularizer: RegularizerApplicator = None):

        super(LayerNerEmd, self).__init__(vocab=vocab, regularizer=regularizer)

        # Base text Field Embedder
        text_field_embedder_params = params.pop("text_field_embedder")
        text_field_embedder = BasicTextFieldEmbedder.from_params(
            vocab=vocab, params=text_field_embedder_params)
        self._text_field_embedder = text_field_embedder

        ############
        # NER Stuffs
        ############
        ner_params = params.pop("ner")

        # Encoder
        encoder_ner_params = ner_params.pop("encoder")
        encoder_ner = Seq2SeqEncoder.from_params(encoder_ner_params)
        self._encoder_ner = encoder_ner

        # Tagger NER - CRF Tagger
        tagger_ner_params = ner_params.pop("tagger")
        tagger_ner = CrfTagger(
            vocab=vocab,
            text_field_embedder=self._text_field_embedder,
            encoder=self._encoder_ner,
            label_namespace=tagger_ner_params.pop("label_namespace", "labels"),
            label_encoding=tagger_ner_params.pop("label_encoding", None),
            dropout=tagger_ner_params.pop("dropout", None),
            regularizer=regularizer,
        )
        self._tagger_ner = tagger_ner

        ############
        # EMD Stuffs
        ############
        emd_params = params.pop("emd")

        # Encoder
        encoder_emd_params = emd_params.pop("encoder")
        encoder_emd = Seq2SeqEncoder.from_params(encoder_emd_params)
        self._encoder_emd = encoder_emd

        shortcut_text_field_embedder = ShortcutConnectTextFieldEmbedder(
            base_text_field_embedder=self._text_field_embedder,
            previous_encoders=[self._encoder_ner])
        self._shortcut_text_field_embedder = shortcut_text_field_embedder

        # Tagger: EMD - CRF Tagger
        tagger_emd_params = emd_params.pop("tagger")
        tagger_emd = CrfTagger(
            vocab=vocab,
            text_field_embedder=self._shortcut_text_field_embedder,
            encoder=self._encoder_emd,
            label_namespace=tagger_emd_params.pop("label_namespace", "labels"),
            label_encoding=tagger_emd_params.pop("label_encoding", None),
            dropout=tagger_ner_params.pop("dropout", None),
            regularizer=regularizer,
        )
        self._tagger_emd = tagger_emd

        logger.info("Multi-Task Learning Model has been instantiated.")
예제 #8
0
        weight_file = 'https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x1024_128_2048cnn_1xhighway/elmo_2x1024_128_2048cnn_1xhighway_weights.hdf5'

        token_embedding = ElmoTokenEmbedder(options_file, weight_file)
    else:
        token_embedding = Embedding(
            num_embeddings=vocab.get_vocab_size('tokens'),
            embedding_dim=config.embedding_dim)

    word_embeddings = BasicTextFieldEmbedder({"tokens": token_embedding})

    lstm = PytorchSeq2SeqWrapper(
        torch.nn.LSTM(word_embeddings.get_output_dim(),
                      400 // 2,
                      bidirectional=True,
                      batch_first=True))
    model = CrfTagger(vocab, word_embeddings, lstm)

    with open(model_path, 'rb') as f:
        model.load_state_dict(torch.load(f))

    predictor = SentenceTaggerPredictor(model, reader)
    predictions = predictor.predict_batch_instance(dataset)
    golds = []
    logits = []
    for d in dataset:
        # golds.append(d)
        # print(list(d.fields['tags']))
        golds.extend(list(d.fields['tags']))
    for p in predictions:
        # print(p['tags'])
        logits.extend(p['tags'])