def __init__( self, word_embeddings: TextFieldEmbedder, encoders: torch.nn.ModuleDict, vocab: Vocabulary, tasks: List[Task], ) -> None: super().__init__(vocab) self._tasks = tasks self.word_embeddings = word_embeddings self.encoders = encoders # self.hidden2tag = dict() self.crftagger = torch.nn.ModuleDict() self.metrics = dict() self._inference_mode = False for task in tasks: tag_namespace = task.tag_namespace self.crftagger[tag_namespace] = CrfTagger( vocab=vocab, text_field_embedder=self.word_embeddings, encoder=self.encoders[task.task_type], label_namespace=tag_namespace, label_encoding=task.label_encoding, calculate_span_f1=task.calculate_span_f1, )
def __init__(self, vocab: Vocabulary, params: Params, regularizer: RegularizerApplicator = None): super(LayerNer, self).__init__(vocab = vocab, regularizer = regularizer) # Base Text Field Embedder text_field_embedder_params = params.pop("text_field_embedder") text_field_embedder = BasicTextFieldEmbedder.from_params(vocab=vocab, params=text_field_embedder_params) self._text_field_embedder = text_field_embedder ############ # NER Stuffs ############ ner_params = params.pop("ner") # Encoder encoder_ner_params = ner_params.pop("encoder") encoder_ner = Seq2SeqEncoder.from_params(encoder_ner_params) self._encoder_ner = encoder_ner # Tagger NER - CRF Tagger tagger_ner_params = ner_params.pop("tagger") tagger_ner = CrfTagger(vocab = vocab, text_field_embedder = self._text_field_embedder, encoder = self._encoder_ner, label_namespace = tagger_ner_params.pop("label_namespace", "labels"), constraint_type = tagger_ner_params.pop("constraint_type", None), dropout = tagger_ner_params.pop("dropout", None), regularizer = regularizer) self._tagger_ner = tagger_ner logger.info("Multi-Task Learning Model has been instantiated.")
def __init__( self, word_embeddings: TextFieldEmbedder, encoders: torch.nn.ModuleDict, vocab: Vocabulary, tasks: List[Task], ) -> None: super().__init__(vocab) self._tasks = tasks self.word_embeddings = word_embeddings self.encoders = encoders # self.hidden2tag = dict() self.tagger_or_classifier = torch.nn.ModuleDict() self.metrics = dict() self._inference_mode = False for task in tasks: tag_namespace = task.tag_namespace if task.task_type in TAGGING_TASKS: self.tagger_or_classifier[tag_namespace] = CrfTagger( vocab=vocab, text_field_embedder=self.word_embeddings, encoder=self.encoders[task.task_type], label_namespace=tag_namespace, label_encoding=task.label_encoding, calculate_span_f1=task.calculate_span_f1, ) elif task.task_type in CLASSIFICATION_TASKS: self.tagger_or_classifier[tag_namespace] = BasicClassifierWithMetrics( vocab=vocab, text_field_embedder=self.word_embeddings, seq2vec_encoder=self.encoders[task.task_type], label_namespace=tag_namespace, ) else: raise NotImplementedError(f"model for task.task_type={task.task_type} not implemented.")
def __init__(self, vocab: Vocabulary, params: Params, regularizer: RegularizerApplicator = None): super(LayerNerEmdCoref, self).__init__(vocab=vocab, regularizer=regularizer) # Base text Field Embedder text_field_embedder_params = params.pop("text_field_embedder") text_field_embedder = BasicTextFieldEmbedder.from_params( vocab=vocab, params=text_field_embedder_params) self._text_field_embedder = text_field_embedder ############ # NER Stuffs ############ ner_params = params.pop("ner") # Encoder encoder_ner_params = ner_params.pop("encoder") encoder_ner = Seq2SeqEncoder.from_params(encoder_ner_params) self._encoder_ner = encoder_ner # Tagger NER - CRF Tagger tagger_ner_params = ner_params.pop("tagger") tagger_ner = CrfTagger( vocab=vocab, text_field_embedder=self._text_field_embedder, encoder=self._encoder_ner, label_namespace=tagger_ner_params.pop("label_namespace", "labels"), constraint_type=tagger_ner_params.pop("constraint_type", None), dropout=tagger_ner_params.pop("dropout", None), regularizer=regularizer) self._tagger_ner = tagger_ner ############ # EMD Stuffs ############ emd_params = params.pop("emd") # Encoder encoder_emd_params = emd_params.pop("encoder") encoder_emd = Seq2SeqEncoder.from_params(encoder_emd_params) self._encoder_emd = encoder_emd shortcut_text_field_embedder = ShortcutConnectTextFieldEmbedder( base_text_field_embedder=self._text_field_embedder, previous_encoders=[self._encoder_ner]) self._shortcut_text_field_embedder = shortcut_text_field_embedder # Tagger: EMD - CRF Tagger tagger_emd_params = emd_params.pop("tagger") tagger_emd = CrfTagger( vocab=vocab, text_field_embedder=self._shortcut_text_field_embedder, encoder=self._encoder_emd, label_namespace=tagger_emd_params.pop("label_namespace", "labels"), constraint_type=tagger_emd_params.pop("constraint_type", None), dropout=tagger_ner_params.pop("dropout", None), regularizer=regularizer) self._tagger_emd = tagger_emd ############## # Coref Stuffs ############## coref_params = params.pop("coref") # Encoder encoder_coref_params = coref_params.pop("encoder") encoder_coref = Seq2SeqEncoder.from_params(encoder_coref_params) self._encoder_coref = encoder_coref shortcut_text_field_embedder_coref = ShortcutConnectTextFieldEmbedder( base_text_field_embedder=self._text_field_embedder, previous_encoders=[self._encoder_ner, self._encoder_emd]) self._shortcut_text_field_embedder_coref = shortcut_text_field_embedder_coref # Tagger: Coreference tagger_coref_params = coref_params.pop("tagger") eval_on_gold_mentions = tagger_coref_params.pop_bool( "eval_on_gold_mentions", False) init_params = tagger_coref_params.pop("initializer", None) initializer = (InitializerApplicator.from_params(init_params) if init_params is not None else InitializerApplicator()) tagger_coref = CoreferenceCustom( vocab=vocab, text_field_embedder=self._shortcut_text_field_embedder_coref, context_layer=self._encoder_coref, mention_feedforward=FeedForward.from_params( tagger_coref_params.pop("mention_feedforward")), antecedent_feedforward=FeedForward.from_params( tagger_coref_params.pop("antecedent_feedforward")), feature_size=tagger_coref_params.pop_int("feature_size"), max_span_width=tagger_coref_params.pop_int("max_span_width"), spans_per_word=tagger_coref_params.pop_float("spans_per_word"), max_antecedents=tagger_coref_params.pop_int("max_antecedents"), lexical_dropout=tagger_coref_params.pop_float( "lexical_dropout", 0.2), initializer=initializer, regularizer=regularizer, eval_on_gold_mentions=eval_on_gold_mentions) self._tagger_coref = tagger_coref if eval_on_gold_mentions: self._tagger_coref._eval_on_gold_mentions = True logger.info("Multi-Task Learning Model has been instantiated.")
def __init__(self, vocab: Vocabulary, params: Params, regularizer: RegularizerApplicator = None): super(LayerEmdRelation, self).__init__(vocab=vocab, regularizer=regularizer) # Base text Field Embedder text_field_embedder_params = params.pop("text_field_embedder") text_field_embedder = BasicTextFieldEmbedder.from_params( vocab=vocab, params=text_field_embedder_params) self._text_field_embedder = text_field_embedder ############ # EMD Stuffs ############ emd_params = params.pop("emd") # Encoder encoder_emd_params = emd_params.pop("encoder") encoder_emd = Seq2SeqEncoder.from_params(encoder_emd_params) self._encoder_emd = encoder_emd # Tagger EMD - CRF Tagger tagger_emd_params = emd_params.pop("tagger") tagger_emd = CrfTagger( vocab=vocab, text_field_embedder=self._text_field_embedder, encoder=self._encoder_emd, label_namespace=tagger_emd_params.pop("label_namespace", "labels"), label_encoding=tagger_emd_params.pop("label_encoding", None), dropout=tagger_emd_params.pop("dropout", None), regularizer=regularizer, ) self._tagger_emd = tagger_emd ############################ # Relation Extraction Stuffs ############################ relation_params = params.pop("relation") # Encoder encoder_relation_params = relation_params.pop("encoder") encoder_relation = Seq2SeqEncoder.from_params(encoder_relation_params) self._encoder_relation = encoder_relation shortcut_text_field_embedder_relation = ShortcutConnectTextFieldEmbedder( base_text_field_embedder=self._text_field_embedder, previous_encoders=[self._encoder_emd]) self._shortcut_text_field_embedder_relation = shortcut_text_field_embedder_relation # Tagger: Relation tagger_relation_params = relation_params.pop("tagger") tagger_relation = RelationExtractor( vocab=vocab, text_field_embedder=self._shortcut_text_field_embedder_relation, context_layer=self._encoder_relation, d=tagger_relation_params.pop_int("d"), l=tagger_relation_params.pop_int("l"), n_classes=tagger_relation_params.pop("n_classes"), activation=tagger_relation_params.pop("activation"), ) self._tagger_relation = tagger_relation logger.info("Multi-Task Learning Model has been instantiated.")
token_embedding = PretrainedBertEmbedder( pretrained_model="bert-base-uncased", top_layer_only=True, # conserve memory ) else: token_embedding = Embedding(num_embeddings=vocab.get_vocab_size('tokens'), embedding_dim=EMBEDDING_DIM) word_embeddings = BasicTextFieldEmbedder({"tokens": token_embedding}) lstm = PytorchSeq2SeqWrapper( torch.nn.LSTM(word_embeddings.get_output_dim(), hidden_dim // 2, bidirectional=True, batch_first=True)) model = CrfTagger(vocab, word_embeddings, lstm) if torch.cuda.is_available(): cuda_device = 0 model = model.cuda(cuda_device) else: cuda_device = -1 optimizer = optim.Adam(model.parameters(), lr=learning_rate) iterator = BucketIterator(batch_size=batch_size, sorting_keys=[("tokens", "num_tokens")]) iterator.index_with(vocab) # print(next(iter(iterator(train_dataset)))) trainer = Trainer(model=model, optimizer=optimizer, iterator=iterator,
def __init__(self, vocab: Vocabulary, params: Params, regularizer: RegularizerApplicator = None): super(LayerNerEmd, self).__init__(vocab=vocab, regularizer=regularizer) # Base text Field Embedder text_field_embedder_params = params.pop("text_field_embedder") text_field_embedder = BasicTextFieldEmbedder.from_params( vocab=vocab, params=text_field_embedder_params) self._text_field_embedder = text_field_embedder ############ # NER Stuffs ############ ner_params = params.pop("ner") # Encoder encoder_ner_params = ner_params.pop("encoder") encoder_ner = Seq2SeqEncoder.from_params(encoder_ner_params) self._encoder_ner = encoder_ner # Tagger NER - CRF Tagger tagger_ner_params = ner_params.pop("tagger") tagger_ner = CrfTagger( vocab=vocab, text_field_embedder=self._text_field_embedder, encoder=self._encoder_ner, label_namespace=tagger_ner_params.pop("label_namespace", "labels"), label_encoding=tagger_ner_params.pop("label_encoding", None), dropout=tagger_ner_params.pop("dropout", None), regularizer=regularizer, ) self._tagger_ner = tagger_ner ############ # EMD Stuffs ############ emd_params = params.pop("emd") # Encoder encoder_emd_params = emd_params.pop("encoder") encoder_emd = Seq2SeqEncoder.from_params(encoder_emd_params) self._encoder_emd = encoder_emd shortcut_text_field_embedder = ShortcutConnectTextFieldEmbedder( base_text_field_embedder=self._text_field_embedder, previous_encoders=[self._encoder_ner]) self._shortcut_text_field_embedder = shortcut_text_field_embedder # Tagger: EMD - CRF Tagger tagger_emd_params = emd_params.pop("tagger") tagger_emd = CrfTagger( vocab=vocab, text_field_embedder=self._shortcut_text_field_embedder, encoder=self._encoder_emd, label_namespace=tagger_emd_params.pop("label_namespace", "labels"), label_encoding=tagger_emd_params.pop("label_encoding", None), dropout=tagger_ner_params.pop("dropout", None), regularizer=regularizer, ) self._tagger_emd = tagger_emd logger.info("Multi-Task Learning Model has been instantiated.")
weight_file = 'https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x1024_128_2048cnn_1xhighway/elmo_2x1024_128_2048cnn_1xhighway_weights.hdf5' token_embedding = ElmoTokenEmbedder(options_file, weight_file) else: token_embedding = Embedding( num_embeddings=vocab.get_vocab_size('tokens'), embedding_dim=config.embedding_dim) word_embeddings = BasicTextFieldEmbedder({"tokens": token_embedding}) lstm = PytorchSeq2SeqWrapper( torch.nn.LSTM(word_embeddings.get_output_dim(), 400 // 2, bidirectional=True, batch_first=True)) model = CrfTagger(vocab, word_embeddings, lstm) with open(model_path, 'rb') as f: model.load_state_dict(torch.load(f)) predictor = SentenceTaggerPredictor(model, reader) predictions = predictor.predict_batch_instance(dataset) golds = [] logits = [] for d in dataset: # golds.append(d) # print(list(d.fields['tags'])) golds.extend(list(d.fields['tags'])) for p in predictions: # print(p['tags']) logits.extend(p['tags'])