def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, number_of_linear_layers : int = 2, metrics: Dict[str, allennlp.training.metrics.Metric] = None, renorm_method: str = None, skip_connection: bool = False, regularizer: RegularizerApplicator = None, bert_model: str = None, ) -> None: super().__init__(vocab,regularizer) self.embbedings = text_field_embedder self.bert_type_model = BERT_BASE_CONFIG if "base" in bert_model else BERT_LARGE_CONFIG self.extractor = EndpointSpanExtractor(input_dim=self.bert_type_model['hidden_size'], combination="x,y") self.crossEntropyLoss = torch.nn.CrossEntropyLoss() self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") self.metrics = metrics or { "accuracy": CategoricalAccuracy() } self.first_liner_layer = torch.nn.Linear(self.bert_type_model['hidden_size']*2,self.bert_type_model['hidden_size']*2) self.second_liner_layer = torch.nn.Linear(self.bert_type_model['hidden_size']*2,self.bert_type_model['hidden_size']*2) self.do_skip_connection = skip_connection self.number_of_linear_layers = number_of_linear_layers self.relation_layer_norm = torch.nn.LayerNorm(torch.Size([self.bert_type_model['hidden_size']*2]), elementwise_affine=True) self.head_token_index = 1 # fixme this should be as argument self.tail_token_index = 3 self.tanh = torch.nn.Tanh() self.drop_layer = torch.nn.Dropout(p=0.2) self.renorm_method = renorm_method or linear
def __init__(self, vocab: Vocabulary, sentence_encoder: SentenceEncoder, qarg_ffnn: FeedForward, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None): super(ClauseAndSpanToAnswerSlotModel, self).__init__(vocab, regularizer) self._sentence_encoder = sentence_encoder self._qarg_ffnn = qarg_ffnn self._clause_embedding = Embedding( vocab.get_vocab_size("abst-clause-labels"), self._qarg_ffnn.get_input_dim()) self._span_extractor = EndpointSpanExtractor( input_dim=self._sentence_encoder.get_output_dim(), combination="x,y") self._span_hidden = TimeDistributed( Linear(2 * self._sentence_encoder.get_output_dim(), self._qarg_ffnn.get_input_dim())) self._predicate_hidden = Linear( self._sentence_encoder.get_output_dim(), self._qarg_ffnn.get_input_dim()) self._qarg_predictor = Linear(self._qarg_ffnn.get_output_dim(), self.vocab.get_vocab_size("qarg-labels")) self._metric = BinaryF1()
def __init__(self, vocab: Vocabulary, sentence_encoder: SentenceEncoder, tan_ffnn: FeedForward, inject_predicate: bool = False, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None): super(SpanToTanModel, self).__init__(vocab, regularizer) self._sentence_encoder = sentence_encoder self._tan_ffnn = tan_ffnn self._inject_predicate = inject_predicate self._span_extractor = EndpointSpanExtractor( input_dim=self._sentence_encoder.get_output_dim(), combination="x,y") prediction_input_dim = (3 * self._sentence_encoder.get_output_dim() ) if self._inject_predicate else ( 2 * self._sentence_encoder.get_output_dim()) self._tan_pred = TimeDistributed( Sequential( Linear(prediction_input_dim, self._tan_ffnn.get_input_dim()), ReLU(), self._tan_ffnn, Linear(self._tan_ffnn.get_output_dim(), self.vocab.get_vocab_size("tan-string-labels")))) self._metric = BinaryF1()
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, context_layer: Seq2SeqEncoder, mention_feedforward: FeedForward, antecedent_feedforward: FeedForward, feature_size: int, max_span_width: int, spans_per_word: float, max_antecedents: int, lexical_dropout: float = 0.2, context_layer_back: Seq2SeqEncoder = None, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(CoreferenceResolver, self).__init__(vocab, regularizer) self._text_field_embedder = text_field_embedder self._context_layer = context_layer self._context_layer_back = context_layer_back self._antecedent_feedforward = TimeDistributed(antecedent_feedforward) feedforward_scorer = torch.nn.Sequential( TimeDistributed(mention_feedforward), TimeDistributed( torch.nn.Linear(mention_feedforward.get_output_dim(), 1))) self._mention_pruner = SpanPruner(feedforward_scorer) self._antecedent_scorer = TimeDistributed( torch.nn.Linear(antecedent_feedforward.get_output_dim(), 1)) # TODO check the output dim when two context layers are passed through self._endpoint_span_extractor = EndpointSpanExtractor( context_layer.get_output_dim(), combination="x,y", num_width_embeddings=max_span_width, span_width_embedding_dim=feature_size, bucket_widths=False) self._attentive_span_extractor = SelfAttentiveSpanExtractor( input_dim=text_field_embedder.get_output_dim()) # 10 possible distance buckets. self._num_distance_buckets = 10 self._distance_embedding = Embedding(self._num_distance_buckets, feature_size) self._speaker_embedding = Embedding(2, feature_size) self.genres = { g: i for i, g in enumerate(['bc', 'bn', 'mz', 'nw', 'pt', 'tc', 'wb']) } self._genre_embedding = Embedding(len(self.genres), feature_size) self._max_span_width = max_span_width self._spans_per_word = spans_per_word self._max_antecedents = max_antecedents self._mention_recall = MentionRecall() self._conll_coref_scores = ConllCorefScores() if lexical_dropout > 0: self._lexical_dropout = torch.nn.Dropout(p=lexical_dropout) else: self._lexical_dropout = lambda x: x self._feature_dropout = torch.nn.Dropout(0.2) initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, context_layer: Seq2SeqEncoder, complex_word_feedforward: FeedForward, lexical_dropout: float = 0.2, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(NeuralMutilingualCWI, self).__init__(vocab, regularizer) self._text_field_embedder = text_field_embedder self._context_layer = context_layer self._complex_word_scorer = torch.nn.Sequential( complex_word_feedforward, torch.nn.Linear(complex_word_feedforward.get_output_dim(), 1)) self._target_word_extractor = EndpointSpanExtractor( context_layer.get_output_dim(), combination="x,y") self._loss = torch.nn.BCELoss() self._metric = F1Measure(1) if lexical_dropout > 0: self._lexical_dropout = torch.nn.Dropout(p=lexical_dropout) else: self._lexical_dropout = lambda x: x initializer(self)
def test_masked_indices_are_handled_correctly(self): sequence_tensor = torch.randn([2, 5, 7]) # concatentate start and end points together to form our representation. extractor = EndpointSpanExtractor(7, "x,y") indices = torch.LongTensor([[[1, 3], [2, 4]], [[0, 2], [3, 4]]]) span_representations = extractor(sequence_tensor, indices) # Make a mask with the second batch element completely masked. indices_mask = torch.LongTensor([[1, 1], [0, 0]]) span_representations = extractor(sequence_tensor, indices, span_indices_mask=indices_mask) start_embeddings, end_embeddings = span_representations.split(7, -1) start_indices, end_indices = indices.split(1, -1) correct_start_embeddings = batched_index_select( sequence_tensor, start_indices.squeeze()).data # Completely masked second batch element, so it should all be zero. correct_start_embeddings[1, :, :].fill_(0) correct_end_embeddings = batched_index_select( sequence_tensor, end_indices.squeeze()).data correct_end_embeddings[1, :, :].fill_(0) numpy.testing.assert_array_equal(start_embeddings.data.numpy(), correct_start_embeddings.numpy()) numpy.testing.assert_array_equal(end_embeddings.data.numpy(), correct_end_embeddings.numpy())
def __init__(self, bert_hidden_size: int): super().__init__() self.bert_hidden_size = bert_hidden_size fc_size = 256 # self.span_extractor = SelfAttentiveSpanExtractor(bert_hidden_size) self.span_extractor = EndpointSpanExtractor(bert_hidden_size, "x,y,x*y") self.fc = nn.Sequential(nn.BatchNorm1d(bert_hidden_size * 7), nn.Dropout(0.5), nn.Linear(bert_hidden_size * 7, fc_size), nn.ReLU(), nn.BatchNorm1d(fc_size), nn.Dropout(0.5), nn.Linear(fc_size, fc_size), nn.ReLU(), nn.BatchNorm1d(fc_size), nn.Dropout(0.5), nn.Linear(fc_size, fc_size), nn.ReLU(), nn.BatchNorm1d(fc_size), nn.Dropout(0.5), nn.Linear(fc_size, 3)) for i, module in enumerate(self.fc): if isinstance(module, (nn.BatchNorm1d, nn.BatchNorm2d)): nn.init.constant_(module.weight, 1) nn.init.constant_(module.bias, 0) print("Initing batchnorm") elif isinstance(module, nn.Linear): if getattr(module, "weight_v", None) is not None: nn.init.uniform_(module.weight_g, 0, 1) nn.init.kaiming_normal_(module.weight_v) print("Initing linear with weight normalization") assert model[i].weight_g is not None else: nn.init.kaiming_normal_(module.weight) print("Initing linear") nn.init.constant_(module.bias, 0)
def __init__(self, encoder_size=64, dim_num_feat=0, dropout=0.2, seq_dropout=0.1, num_outputs=5): super(EntityLink_bert, self).__init__() # self.word_embedding = nn.Embedding(vocab_size, # word_embed_size, # padding_idx=0) # self.pos_embedding = nn.Embedding(pos_embed_size, pos_dim, padding_idx=0) self.seq_dropout = seq_dropout self.dropout1d = nn.Dropout2d(self.seq_dropout) self.span_extractor = EndpointSpanExtractor(encoder_size * 2, combination="x,x+y,y") # selfspanextractor效果很差 bert_model = 'bert-base-chinese' self.bert = BertModel.from_pretrained(bert_model) self.use_layer = -1 self.LSTM = LSTMEncoder(embed_size=768, encoder_size=encoder_size, bidirectional=True) hidden_size = 100 self.hidden = nn.Linear(2 * encoder_size, num_outputs) self.classify = nn.Sequential( nn.BatchNorm1d(4 * 768), nn.Dropout(p=dropout), nn.Linear(in_features=4 * 768, out_features=num_outputs)) self.attn_pool = Attention(2 * encoder_size)
def test_correct_sequence_elements_are_embedded(self): sequence_tensor = torch.randn([2, 5, 7]) # Concatentate start and end points together to form our representation. extractor = EndpointSpanExtractor(7, "x,y") indices = torch.LongTensor([[[1, 3], [2, 4]], [[0, 2], [3, 4]]]) span_representations = extractor(sequence_tensor, indices) assert list(span_representations.size()) == [2, 2, 14] assert extractor.get_output_dim() == 14 assert extractor.get_input_dim() == 7 start_indices, end_indices = indices.split(1, -1) # We just concatenated the start and end embeddings together, so # we can check they match the original indices if we split them apart. start_embeddings, end_embeddings = span_representations.split(7, -1) correct_start_embeddings = batched_index_select( sequence_tensor, start_indices.squeeze()) correct_end_embeddings = batched_index_select(sequence_tensor, end_indices.squeeze()) numpy.testing.assert_array_equal(start_embeddings.data.numpy(), correct_start_embeddings.data.numpy()) numpy.testing.assert_array_equal(end_embeddings.data.numpy(), correct_end_embeddings.data.numpy())
def __init__( self, use_citation_graph_embeddings: bool, citation_embedding_file: str, doc_to_idx_mapping_file: str, finetune_embedding: bool, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, context_layer: Seq2SeqEncoder, modules: Params, loss_weights: Dict[str, int], lexical_dropout: float = 0.2, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None, display_metrics: List[str] = None, ) -> None: super(SalientOnlyModel, self).__init__(vocab, regularizer) self._text_field_embedder = text_field_embedder self._context_layer = context_layer self._lexical_dropout = torch.nn.Dropout(p=lexical_dropout) if use_citation_graph_embeddings: if citation_embedding_file == "" or doc_to_idx_mapping_file == "": raise ValueError( "Must supply citation embedding files to use graph embedding features" ) self._document_embedding = initialize_graph_embeddings( citation_embedding_file, finetune_embedding=finetune_embedding) self._doc_to_idx_mapping = json.load(open(doc_to_idx_mapping_file)) else: self._document_embedding = None self._doc_to_idx_mapping = None modules = Params(modules) self._saliency_classifier = SpanClassifier.from_params( vocab=vocab, document_embedding=self._document_embedding, doc_to_idx_mapping=self._doc_to_idx_mapping, params=modules.pop("saliency_classifier")) self._endpoint_span_extractor = EndpointSpanExtractor( context_layer.get_output_dim(), combination="x,y") self._attentive_span_extractor = SelfAttentiveSpanExtractor( input_dim=context_layer.get_output_dim()) for k in loss_weights: loss_weights[k] = float(loss_weights[k]) self._loss_weights = loss_weights self._permanent_loss_weights = copy.deepcopy(self._loss_weights) self._display_metrics = display_metrics self._multi_task_loss_metrics = {k: Average() for k in ["saliency"]} self.training_mode = True self.prediction_mode = False initializer(self)
def _make_span_extractor(self): if True: return MeanPoolingSpanExtractor(self.input_dim) #if self.span_pooling == "attn": # return SelfAttentiveSpanExtractor(self.input_dim) else: return EndpointSpanExtractor(self.input_dim, combination=self.span_pooling)
def __init__(self, vocab_size, init_embedding, word_embed_size=300, encoder_size=64, dim_num_feat=0, dropout=0.2, seq_dropout=0.1, num_outputs=5): super(EntityLink_v3, self).__init__() # self.word_embedding = nn.Embedding(vocab_size, # word_embed_size, # padding_idx=0) # self.pos_embedding = nn.Embedding(pos_embed_size, pos_dim, padding_idx=0) self.word_embedding = nn.Embedding(vocab_size, word_embed_size, padding_idx=0) self.seq_dropout = seq_dropout self.embed_size = word_embed_size self.encoder_size = encoder_size if init_embedding is not None: self.word_embedding.weight.data.copy_( torch.from_numpy(init_embedding)) for param in self.word_embedding.parameters(): param.requires_grad = False self.dropout1d = nn.Dropout2d(self.seq_dropout) self.span_extractor = EndpointSpanExtractor(encoder_size * 2) bert_model = 'bert-base-chinese' #self.bert = BertModel.from_pretrained(bert_model) self.use_layer = -1 self.query_attention = Attention(encoder_size * 2) self.abstract_attention = Attention(encoder_size * 2) self.lstm_attention = Attention(encoder_size * 2) self.LSTM_query = LSTMEncoder(embed_size=300, encoder_size=encoder_size, bidirectional=True) self.LSTM_abstract = LSTMEncoder(embed_size=300, encoder_size=encoder_size, bidirectional=True) self.LSTM = LSTMEncoder(embed_size=300, encoder_size=encoder_size, bidirectional=True) hidden_size = 100 self.hidden = nn.Linear(2 * encoder_size, hidden_size) self.span_linear = nn.Linear(encoder_size * 4, encoder_size * 2) self.classify = nn.Sequential( nn.BatchNorm1d(encoder_size * 4), nn.Dropout(p=dropout), nn.Linear(in_features=encoder_size * 4, out_features=num_outputs)) self.mlp = nn.Sequential(nn.BatchNorm1d(768), nn.Dropout(p=dropout), nn.Linear(in_features=768, out_features=128), nn.ReLU(inplace=True)) self.mlp2 = nn.Sequential( nn.BatchNorm1d(128 + 2), nn.Dropout(p=dropout), nn.Linear(in_features=128 + 2, out_features=1), nn.Sigmoid())
def __init__(self, vocab_size=0, word_embed_size=0, encoder_size=64, dim_num_feat=0, dropout=0.2, seq_dropout=0.1, num_outputs=5): super(EntityLink_v2, self).__init__() # self.word_embedding = nn.Embedding(vocab_size, # word_embed_size, # padding_idx=0) self.type_embedding = nn.Embedding(vocab_size, word_embed_size, padding_idx=0) self.seq_dropout = seq_dropout self.dropout1d = nn.Dropout2d(self.seq_dropout) self.span_extractor = EndpointSpanExtractor(768) bert_model = 'bert-base-chinese' self.bert = BertModel.from_pretrained(bert_model) self.use_layer = -1 self.lstm_attention = Attention(768) self.LSTM = LSTMEncoder(embed_size=768, encoder_size=encoder_size, bidirectional=True) hidden_size = 100 self.hidden = nn.Linear(2 * encoder_size, hidden_size) self.span_linear = nn.Linear(768 * 2, 768) self.span_extractor = EndpointSpanExtractor(768) self.classify = nn.Sequential( nn.BatchNorm1d(encoder_size * 4), nn.Dropout(p=dropout), nn.Linear(in_features=encoder_size * 4, out_features=num_outputs)) self.mlp1 = nn.Sequential( nn.BatchNorm1d(768 * 2 + 1), nn.Dropout(p=dropout), nn.Linear(in_features=768 * 2 + 1, out_features=128), nn.ReLU(inplace=True)) self.mlp2 = nn.Sequential(nn.BatchNorm1d(128), nn.Dropout(p=dropout), nn.Linear(in_features=128, out_features=1), nn.Sigmoid())
def __init__( self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, context_layer: Seq2SeqEncoder, mention_feedforward: FeedForward, antecedent_feedforward: FeedForward, feature_size: int, max_span_width: int, spans_per_word: float, max_antecedents: int, lexical_dropout: float = 0.2, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None, ) -> None: super().__init__(vocab, regularizer) self._text_field_embedder = text_field_embedder self._context_layer = context_layer self._antecedent_feedforward = TimeDistributed(antecedent_feedforward) feedforward_scorer = torch.nn.Sequential( TimeDistributed(mention_feedforward), TimeDistributed(torch.nn.Linear(mention_feedforward.get_output_dim(), 1)), ) self._mention_pruner = Pruner(feedforward_scorer) self._antecedent_scorer = TimeDistributed( torch.nn.Linear(antecedent_feedforward.get_output_dim(), 1) ) self._endpoint_span_extractor = EndpointSpanExtractor( context_layer.get_output_dim(), combination="x,y", num_width_embeddings=max_span_width, span_width_embedding_dim=feature_size, bucket_widths=False, ) self._attentive_span_extractor = SelfAttentiveSpanExtractor( input_dim=text_field_embedder.get_output_dim() ) # 10 possible distance buckets. self._num_distance_buckets = 10 self._distance_embedding = Embedding(self._num_distance_buckets, feature_size) self._max_span_width = max_span_width self._spans_per_word = spans_per_word self._max_antecedents = max_antecedents self._mention_recall = MentionRecall() self._conll_coref_scores = ConllCorefScores() if lexical_dropout > 0: self._lexical_dropout = torch.nn.Dropout(p=lexical_dropout) else: self._lexical_dropout = lambda x: x initializer(self)
def __init__(self, vocab: Vocabulary, span_typer: SpanTyper, embed_size: int, label_namespace: str = 'span_labels', event_namespace: str = 'event_labels'): super(ArgumentSpanClassifier, self).__init__() self.vocab: Vocabulary = vocab self.label_namespace: str = label_namespace self.event_namespace: str = event_namespace self.embed_size = embed_size self.event_embedding_size = 50 self.event_embeddings: nn.Embedding = nn.Embedding( num_embeddings=len( vocab.get_token_to_index_vocabulary( namespace=event_namespace)), embedding_dim=self.event_embedding_size) self.lexical_dropout = nn.Dropout(p=0.2) self.span_extractor: SpanExtractor = EndpointSpanExtractor( input_dim=self.embed_size, combination='x,y') self.attentive_span_extractor: SpanExtractor = SelfAttentiveSpanExtractor( embed_size) self.arg_affine = TimeDistributed( FeedForward(input_dim=self.span_extractor.get_output_dim() + self.attentive_span_extractor.get_output_dim(), hidden_dims=self.embed_size, num_layers=2, activations=nn.GELU(), dropout=0.2)) self.trigger_affine = FeedForward( input_dim=self.span_extractor.get_output_dim() + self.attentive_span_extractor.get_output_dim(), hidden_dims=self.embed_size - self.event_embedding_size, num_layers=2, activations=nn.GELU(), dropout=0.2) self.trigger_event_infusion = TimeDistributed( FeedForward(input_dim=2 * self.embed_size, hidden_dims=self.embed_size, num_layers=2, activations=nn.GELU(), dropout=0.2)) self.span_typer: SpanTyper = span_typer self.apply(self._init_weights)
def __init__( self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, metrics: Dict[str, allennlp.training.metrics.Metric] = None, number_of_layers: int = 2, number_of_linear_layers: int = 2, renorm_method: str = None, skip_connection: bool = False, regularizer: RegularizerApplicator = None, bert_model: str = None, ) -> None: super().__init__(vocab, regularizer) self.embbedings = text_field_embedder self.hidden_size = 250 self.bilstm = torch.nn.LSTM(input_size=300, hidden_size=self.hidden_size, num_layers=number_of_layers, batch_first=True, bidirectional=True, dropout=0.2) self.extractor = EndpointSpanExtractor(input_dim=self.hidden_size, combination="x,y") self.crossEntropyLoss = torch.nn.CrossEntropyLoss() self.device = torch.device( "cuda" if torch.cuda.is_available() else "cpu") self.metrics = metrics or {"accuracy": CategoricalAccuracy()} self.first_liner_layer = torch.nn.Linear( self.hidden_size * 2 * 2, self.hidden_size * 2 * 2 ) # twiche double, firest is because bidirectional and second because concat self.second_liner_layer = torch.nn.Linear(self.hidden_size * 2 * 2, self.hidden_size * 2 * 2) self.do_skip_connection = skip_connection self.number_of_linear_layers = number_of_linear_layers self.relation_layer_norm = torch.nn.LayerNorm(torch.Size( [self.hidden_size * 2]), elementwise_affine=True) self.tanh = torch.nn.Tanh() self.drop_layer = torch.nn.Dropout(p=0.2) self.renorm_method = renorm_method or linear for t in [ head_start_token, head_end_token, tail_start_token, tail_end_token ]: index = self.vocab.add_token_to_namespace(t) if t == head_start_token: self.head_token_index = index elif t == tail_start_token: self.tail_token_index = index
def __init__(self, vocab_size, init_embedding, word_embed_size=300, encoder_size=64, dim_num_feat=0, dropout=0.2, seq_dropout=0.1, num_outputs=5, use_bert=False): super(EntityLink_entity_vector, self).__init__() # self.word_embedding = nn.Embedding(vocab_size, # word_embed_size, # padding_idx=0) # self.pos_embedding = nn.Embedding(pos_embed_size, pos_dim, padding_idx=0) self.use_bert = use_bert if not use_bert: self.word_embedding = nn.Embedding(vocab_size, word_embed_size, padding_idx=0) self.seq_dropout = seq_dropout self.embed_size = word_embed_size self.encoder_size = encoder_size if init_embedding is not None: self.word_embedding.weight.data.copy_( torch.from_numpy(init_embedding)) self.seq_dropout = seq_dropout #self.lstm_attention = Attention(encoder_size*2) self.dropout1d = nn.Dropout2d(self.seq_dropout) self.LSTM = LSTMEncoder(embed_size=word_embed_size, encoder_size=encoder_size, bidirectional=True) self.classify = nn.Sequential( nn.BatchNorm1d(encoder_size * 4), nn.Dropout(p=dropout), nn.Linear(in_features=encoder_size * 4, out_features=num_outputs)) span_size = 2 * encoder_size else: bert_model = 'bert-base-chinese' self.bert = BertModel.from_pretrained(bert_model) span_size = 768 self.span_extractor = EndpointSpanExtractor(span_size) self.use_layer = -1 self.use_layer = -1 hidden_size = 100 self.hidden = nn.Linear(1536, hidden_size)
def __init__(self, flair_model: FlairEmbeddings) -> None: super().__init__() self.flair_model = flair_model self.pretrain_name = self.flair_model.name self.output_dim = flair_model.lm.hidden_size for param in self.flair_model.lm.parameters(): param.requires_grad = False # In Flair, every LM is unidirectional going forwards. # We always extract on the right side. comb_string = "y" self.span_extractor = EndpointSpanExtractor(input_dim=self.flair_model.lm.hidden_size, combination=comb_string)
def __init__( self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, context_layer: Seq2SeqEncoder, modules: Params, loss_weights: Dict[str, int], lexical_dropout: float = 0.2, initializer: InitializerApplicator = InitializerApplicator(), regularizer=None, #regularizer: Optional[GbiRegularizerApplicator] = None, display_metrics: List[str] = None, ) -> None: super(ScirexModel, self).__init__(vocab, regularizer) self._text_field_embedder = text_field_embedder self._context_layer = context_layer self._lexical_dropout = torch.nn.Dropout(p=lexical_dropout) modules = Params(modules) self._ner = NERTagger.from_params(vocab=vocab, params=modules.pop("ner")) self._saliency_classifier = SpanClassifier.from_params( vocab=vocab, params=modules.pop("saliency_classifier")) self._cluster_n_ary_relation = NAryRelationExtractor.from_params( vocab=vocab, params=modules.pop("n_ary_relation")) self._endpoint_span_extractor = EndpointSpanExtractor( context_layer.get_output_dim(), combination="x,y") self._attentive_span_extractor = SelfAttentiveSpanExtractor( input_dim=context_layer.get_output_dim()) for k in loss_weights: loss_weights[k] = float(loss_weights[k]) self._loss_weights = loss_weights self._permanent_loss_weights = copy.deepcopy(self._loss_weights) self._display_metrics = display_metrics self._multi_task_loss_metrics = { k: Average() for k in ["ner", "saliency", "n_ary_relation"] } self.training_mode = True self.prediction_mode = False initializer(self)
def __init__(self, node_types_vocabulary=None, node_attrs_vocabulary=None, p2p_edges_vocabulary=None, p2r_edges_vocabulary=None, bilstm_hidden_embedding_dim=200, lexical_dropout=0.5, lstm_dropout=0.4, max_span_width=15, feature_size=20, embed_mode='bert-base-cased', device=torch.device("cuda")): super().__init__() self.node_types_vocabulary = node_types_vocabulary self.node_attrs_vocabulary = node_attrs_vocabulary self.p2p_edges_vocabulary = p2p_edges_vocabulary self.p2r_edges_vocabulary = p2r_edges_vocabulary self.bilstm_hidden_embedding_dim = bilstm_hidden_embedding_dim self.lexical_dropout = lexical_dropout self.lstm_dropout = lstm_dropout self.embed_mode = embed_mode self.device = device self.max_span_width = max_span_width self.feature_size = feature_size if self.embed_mode == 'bert-base-cased': self.bert = AutoModel.from_pretrained("bert-base-cased") self.bert_hidden_embedding_dim = 768 if lexical_dropout > 0: self._lexical_dropout = torch.nn.Dropout(p=lexical_dropout) else: self._lexical_dropout = lambda x: x self.bilstm = LSTM(input_size=self.bert_hidden_embedding_dim, hidden_size=self.bilstm_hidden_embedding_dim, dropout=self.lstm_dropout, bidirectional=True, num_layers=6) self._endpoint_span_extractor = EndpointSpanExtractor( self.bilstm_hidden_embedding_dim, combination="x,y", num_width_embeddings=self.max_span_width, span_width_embedding_dim=self.feature_size, bucket_widths=False, ) self._attentive_span_extractor = SelfAttentiveSpanExtractor( input_dim=self.bert_hidden_embedding_dim)
def test_masked_indices_are_handled_correctly_with_exclusive_indices(self): sequence_tensor = Variable(torch.randn([2, 5, 8])) # concatentate start and end points together to form our representation # for both the forward and backward directions. extractor = EndpointSpanExtractor(8, "x,y", use_exclusive_start_indices=True) indices = Variable( torch.LongTensor([[[1, 3], [2, 4]], [[0, 2], [0, 1]]])) sequence_mask = Variable( torch.LongTensor([[1, 1, 1, 1, 1], [1, 1, 1, 0, 0]])) span_representations = extractor(sequence_tensor, indices, sequence_mask=sequence_mask) # We just concatenated the start and end embeddings together, so # we can check they match the original indices if we split them apart. start_embeddings, end_embeddings = span_representations.split(8, -1) correct_start_indices = Variable(torch.LongTensor([[0, 1], [-1, -1]])) # These indices should be -1, so they'll be replaced with a sentinel. Here, # we'll set them to a value other than -1 so we can index select the indices and # replace them later. correct_start_indices[1, 0] = 1 correct_start_indices[1, 1] = 1 correct_end_indices = Variable(torch.LongTensor([[3, 4], [2, 1]])) correct_start_embeddings = batched_index_select( sequence_tensor.contiguous(), correct_start_indices) # This element had sequence_tensor index of 0, so it's exclusive index is the start sentinel. correct_start_embeddings[1, 0] = extractor._start_sentinel.data correct_start_embeddings[1, 1] = extractor._start_sentinel.data numpy.testing.assert_array_equal(start_embeddings.data.numpy(), correct_start_embeddings.data.numpy()) correct_end_embeddings = batched_index_select( sequence_tensor.contiguous(), correct_end_indices) numpy.testing.assert_array_equal(end_embeddings.data.numpy(), correct_end_embeddings.data.numpy())
def __init__(self, bert_model=''): super(score_model, self).__init__() if bert_model in ("bert-base-uncased", "bert-base-cased"): self.bert_hidden_size = 768 elif bert_model in ("bert-large-uncased", "bert-large-cased"): self.bert_hidden_size = 1024 else: raise ValueError("Unsupported BERT model.") self.buckets_embedding_size = 20 self.score_hidden_size = 128 self.buckets = [1, 2, 3, 4, 5, 8, 16, 32, 64] self.bert = BertModel.from_pretrained(bert_model) self.embedding = torch.nn.Embedding( len(self.buckets) + 1, self.buckets_embedding_size) self.span_extractor = EndpointSpanExtractor(self.bert_hidden_size, "x,y,x*y") self.pair_score = mentionpair_score(self.bert_hidden_size*3*3 \ + self.buckets_embedding_size, self.score_hidden_size)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, number_of_linear_layers : int = 2, skip_connection: bool = False, regularizer: RegularizerApplicator = None, hidden_dim: int = 500, add_distance_from_mean: bool = True, drop_out_rate: float = 0.2, devices = 0, num_labels = 42): super().__init__(vocab,regularizer) self.num_labels = num_labels self.embbedings = text_field_embedder self.bert_type_model = BERT_BASE_CONFIG self.extractor = EndpointSpanExtractor(input_dim=self.bert_type_model['hidden_size'], combination="x,y") self.crossEntropyLoss = torch.nn.CrossEntropyLoss() if isinstance(devices, list): devices = devices[0] if devices: self.device = torch.device("cuda:{}".format(devices) if torch.cuda.is_available() else "cpu") else: self.device = torch.device("cuda") self.metrics = { # "NOTA_NotInBest2": NotaNotInsideBest2(), "accuracy": CategoricalAccuracy(), 'f1': SpecialLoss(0) # F1Measure(1) # no relation is 0 } # for i in range(1,42): # self.metrics['f1_{}'.format(i)] = F1Measure(i) self.first_liner_layer = torch.nn.Linear(self.bert_type_model['hidden_size'] * 2,hidden_dim) self.second_liner_layer = torch.nn.Linear(hidden_dim, self.num_labels) # TACRED labels self.number_of_linear_layers = number_of_linear_layers self.tanh = torch.nn.Tanh() self.drop_layer = torch.nn.Dropout(p=drop_out_rate) self.counter = 0
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, classifier_feedforward: FeedForward, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(RNNClassifier, self).__init__(vocab, regularizer) self.text_field_embedder = text_field_embedder self.num_classes = self.vocab.get_vocab_size("labels") self.encoder = encoder self.endpoint_span_extractor = EndpointSpanExtractor( encoder.get_output_dim(), combination="x,y") self.attentive_span_extractor = SelfAttentiveSpanExtractor( encoder.get_output_dim()) attention_input_dim = encoder.get_output_dim() * 2 self.holder_attention = nn.Linear(attention_input_dim, 1) self.target_attention = nn.Linear(attention_input_dim, 1) self.classifier_feedforward = classifier_feedforward if text_field_embedder.get_output_dim() != encoder.get_input_dim(): raise ConfigurationError( "The output dimension of the text_field_embedder must match the " "input dimension of the title_encoder. Found {} and {}, " "respectively.".format(text_field_embedder.get_output_dim(), encoder.get_input_dim())) self.metrics = { "f1_neg": F1Measure(1), "f1_none": F1Measure(0), "f1_pos": F1Measure(2), } self.loss = torch.nn.CrossEntropyLoss() initializer(self)
def __init__( self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, context_layer: Seq2SeqEncoder, mention_feedforward: FeedForward, antecedent_feedforward: FeedForward, feature_size: int, max_span_width: int, spans_per_word: float, max_antecedents: int, coarse_to_fine: bool = False, inference_order: int = 1, lexical_dropout: float = 0.2, initializer: InitializerApplicator = InitializerApplicator(), **kwargs ) -> None: super().__init__(vocab, **kwargs) self._text_field_embedder = text_field_embedder self._context_layer = context_layer self._mention_feedforward = TimeDistributed(mention_feedforward) self._mention_scorer = TimeDistributed( torch.nn.Linear(mention_feedforward.get_output_dim(), 1) ) self._antecedent_feedforward = TimeDistributed(antecedent_feedforward) self._antecedent_scorer = TimeDistributed( torch.nn.Linear(antecedent_feedforward.get_output_dim(), 1) ) self._endpoint_span_extractor = EndpointSpanExtractor( context_layer.get_output_dim(), combination="x,y", num_width_embeddings=max_span_width, span_width_embedding_dim=feature_size, bucket_widths=False, ) self._attentive_span_extractor = SelfAttentiveSpanExtractor( input_dim=text_field_embedder.get_output_dim() ) # 10 possible distance buckets. self._num_distance_buckets = 10 self._distance_embedding = Embedding( embedding_dim=feature_size, num_embeddings=self._num_distance_buckets ) self._max_span_width = max_span_width self._spans_per_word = spans_per_word self._max_antecedents = max_antecedents self._coarse_to_fine = coarse_to_fine if self._coarse_to_fine: self._coarse2fine_scorer = torch.nn.Linear( mention_feedforward.get_input_dim(), mention_feedforward.get_input_dim() ) self._inference_order = inference_order if self._inference_order > 1: self._span_updating_gated_sum = GatedSum(mention_feedforward.get_input_dim()) self._mention_recall = MentionRecall() self._conll_coref_scores = ConllCorefScores() if lexical_dropout > 0: self._lexical_dropout = torch.nn.Dropout(p=lexical_dropout) else: self._lexical_dropout = lambda x: x initializer(self)
def __init__( self, vocab: Vocabulary, task_type: str, model_type: str, random_init_bert: bool, # set True to shuffle the BERT encoder and get random init initializer: InitializerApplicator = InitializerApplicator() ) -> None: super().__init__(vocab) assert task_type in ["unary", "binary"] # unary or binary edges assert model_type in ["clf", "reg"] # classification or regression self.task_type = task_type self.model_type = model_type mix_params = None if self.task_type == "binary": # for binary tasks train two separate mixes self.bert_embedder = PretrainedBertEmbedderSplitMix( BERT_MODEL_NAME, requires_grad=False, top_layer_only=False, scalar_mix_parameters=mix_params) else: # for unary task train a single mix self.bert_embedder = PretrainedBertEmbedder( BERT_MODEL_NAME, requires_grad=False, top_layer_only=False, scalar_mix_parameters=mix_params) if random_init_bert: self.bert_embedder.bert_model.apply(init_weights) self.vocab = vocab self.num_classes = self.vocab.get_vocab_size("labels") self.num_classes = self.num_classes if self.num_classes > 0 else 1 self.span_projection_dim = self.bert_embedder.output_dim # represent each span by its first wordpiece token self.span_extractor = EndpointSpanExtractor(self.span_projection_dim, combination="x") if self.task_type == "binary": clf_input_dim = self.span_projection_dim * 2 else: clf_input_dim = self.span_projection_dim self.classifier = Linear( clf_input_dim, self.num_classes) # just a linear tag projection layer if self.model_type == "clf": self.loss = torch.nn.CrossEntropyLoss( ) # cross-entropy for classification else: self.loss = torch.nn.SmoothL1Loss() # smooth L1 for regresison self.m_acc = CategoricalAccuracy() self.m_fmicro = FBetaMeasure(average="micro") self.mse = MeanSquaredError() initializer(self)
def _make_span_extractor(self): #if self.span_pooling == "attn": # return SelfAttentiveSpanExtractor(self.proj_dim) #else: #return EndpointSpanExtractor(self.proj_dim, combination=self.span_pooling) return EndpointSpanExtractor(self.proj_dim)
def __init__(self, vocab: Vocabulary, embedder: TextFieldEmbedder, modules, # TODO(dwadden) Add type. feature_size: int, max_span_width: int, target_task: str, feedforward_params: Dict[str, Union[int, float]], loss_weights: Dict[str, float], initializer: InitializerApplicator = InitializerApplicator(), module_initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None, display_metrics: List[str] = None) -> None: super(DyGIE, self).__init__(vocab, regularizer) #################### # Create span extractor. self._endpoint_span_extractor = EndpointSpanExtractor( embedder.get_output_dim(), combination="x,y", num_width_embeddings=max_span_width, span_width_embedding_dim=feature_size, bucket_widths=False) #################### # Set parameters. self._embedder = embedder self._loss_weights = loss_weights self._max_span_width = max_span_width self._display_metrics = self._get_display_metrics(target_task) token_emb_dim = self._embedder.get_output_dim() span_emb_dim = self._endpoint_span_extractor.get_output_dim() #################### # Create submodules. modules = Params(modules) # Helper function to create feedforward networks. def make_feedforward(input_dim): return FeedForward(input_dim=input_dim, num_layers=feedforward_params["num_layers"], hidden_dims=feedforward_params["hidden_dims"], activations=torch.nn.ReLU(), dropout=feedforward_params["dropout"]) # Submodules self._ner = NERTagger.from_params(vocab=vocab, make_feedforward=make_feedforward, span_emb_dim=span_emb_dim, feature_size=feature_size, params=modules.pop("ner")) self._coref = CorefResolver.from_params(vocab=vocab, make_feedforward=make_feedforward, span_emb_dim=span_emb_dim, feature_size=feature_size, params=modules.pop("coref")) self._relation = RelationExtractor.from_params(vocab=vocab, make_feedforward=make_feedforward, span_emb_dim=span_emb_dim, feature_size=feature_size, params=modules.pop("relation")) self._events = EventExtractor.from_params(vocab=vocab, make_feedforward=make_feedforward, token_emb_dim=token_emb_dim, span_emb_dim=span_emb_dim, feature_size=feature_size, params=modules.pop("events")) #################### # Initialize text embedder and all submodules for module in [self._ner, self._coref, self._relation, self._events]: module_initializer(module) initializer(self)
def __init__(self, config, args): super(BertNER, self).__init__(config) self.bert = BertModel(config) self.args = args if 'roberta' in self.args.bert_config_dir: self.bert = RobertaModel(config) print('use the roberta pre-trained model...') # self.start_outputs = nn.Linear(config.hidden_size, 2) # self.end_outputs = nn.Linear(config.hidden_size, 2) self.start_outputs = nn.Linear(config.hidden_size, 1) self.end_outputs = nn.Linear(config.hidden_size, 1) # self.span_embedding = SingleLinearClassifier(config.hidden_size * 2, 1) self.hidden_size = config.hidden_size self.span_combination_mode = self.args.span_combination_mode self.max_span_width = args.max_span_len self.n_class = args.n_class self.tokenLen_emb_dim = self.args.tokenLen_emb_dim # must set, when set a value to the max_span_width. # if self.args.use_tokenLen: # self.tokenLen_emb_dim = self.args.tokenLen_emb_dim # else: # self.tokenLen_emb_dim = None print("self.max_span_width: ", self.max_span_width) print("self.tokenLen_emb_dim: ", self.tokenLen_emb_dim) # bucket_widths: Whether to bucket the span widths into log-space buckets. If `False`, the raw span widths are used. self._endpoint_span_extractor = EndpointSpanExtractor( config.hidden_size, combination=self.span_combination_mode, num_width_embeddings=self.max_span_width, span_width_embedding_dim=self.tokenLen_emb_dim, bucket_widths=True) # self.span_embedding = MultiNonLinearClassifier(config.hidden_size*2+self.span_emb_dim, self.n_class, config.mrc_dropout) self.linear = nn.Linear(10, 1) self.score_func = nn.Softmax(dim=-1) # import span-length embedding self.spanLen_emb_dim = args.spanLen_emb_dim self.morph_emb_dim = args.morph_emb_dim input_dim = config.hidden_size * 2 + self.tokenLen_emb_dim if self.args.use_spanLen and not self.args.use_morphology: input_dim = config.hidden_size * 2 + self.tokenLen_emb_dim + self.spanLen_emb_dim elif not self.args.use_spanLen and self.args.use_morphology: input_dim = config.hidden_size * 2 + self.tokenLen_emb_dim + self.morph_emb_dim elif self.args.use_spanLen and self.args.use_morphology: input_dim = config.hidden_size * 2 + self.tokenLen_emb_dim + self.spanLen_emb_dim + self.morph_emb_dim self.span_embedding = MultiNonLinearClassifier(input_dim, self.n_class, config.mrc_dropout) self.spanLen_embedding = nn.Embedding(args.max_span_len + 1, self.spanLen_emb_dim, padding_idx=0) self.morph_embedding = nn.Embedding(len(args.morph2idx_list) + 1, self.morph_emb_dim, padding_idx=0)
def __init__(self, vocab: Vocabulary, span_graph_encoder: SpanGraphEncoder, span_typer: SpanTyper, embed_size: int, label_namespace: str = 'span_labels', event_namespace: str = 'event_labels', use_event_embedding: bool = True): super(SelectorArgLinking, self).__init__() self.vocab: Vocabulary = vocab self.label_namespace: str = label_namespace self.event_namespace: str = event_namespace self.use_event_embedding = use_event_embedding self.embed_size = embed_size self.event_embedding_size = 50 # self.span_finder: SpanFinder = span_finder # self.span_selector: SpanSelector = span_selector if use_event_embedding: self.event_embeddings: nn.Embedding = nn.Embedding( num_embeddings=len(vocab.get_token_to_index_vocabulary(namespace=event_namespace)), embedding_dim=self.event_embedding_size ) self.lexical_dropout = nn.Dropout(p=0.2) # self.contextualized_encoder: Seq2SeqEncoder = LstmSeq2SeqEncoder( # bidirectional=True, # input_size=embed_size, # hidden_size=embed_size, # num_layers=2, # dropout=0.4 # ) self.span_graph_encoder: SpanGraphEncoder = span_graph_encoder self.span_extractor: SpanExtractor = EndpointSpanExtractor( # input_dim=self.contextualized_encoder.get_output_dim(), input_dim=self.embed_size, combination='x,y' ) self.attentive_span_extractor: SpanExtractor = SelfAttentiveSpanExtractor(embed_size) self.arg_affine = TimeDistributed(FeedForward( input_dim=self.span_extractor.get_output_dim() + self.attentive_span_extractor.get_output_dim(), hidden_dims=self.span_graph_encoder.get_input_dim(), num_layers=2, activations=nn.GELU(), dropout=0.2 )) self.trigger_affine = FeedForward( input_dim=self.span_extractor.get_output_dim() + self.attentive_span_extractor.get_output_dim(), hidden_dims=self.span_graph_encoder.get_input_dim() - ( self.event_embedding_size if use_event_embedding else 0), num_layers=2, activations=nn.GELU(), dropout=0.2 ) # self.arg_affine: nn.Linear = nn.Linear( # self.span_extractor.get_output_dim() + self.attentive_span_extractor.get_output_dim(), # self.span_graph_encoder.get_input_dim() # ) # self.trigger_affine: nn.Linear = nn.Linear( # self.span_extractor.get_output_dim() + self.attentive_span_extractor.get_output_dim(), # self.span_graph_encoder.get_input_dim() # ) # self.trigger_event_infuse: nn.Sequential = nn.Sequential( # nn.Dropout(p=0.1), # nn.Linear(4 * self.span_graph_encoder.get_input_dim(), 2 * self.span_graph_encoder.get_input_dim()), # nn.Dropout(p=0.1), # nn.GELU(), # nn.Linear(2 * self.span_graph_encoder.get_input_dim(), self.span_graph_encoder.get_input_dim()), # nn.Dropout(p=0.1), # nn.GELU() # ) self.span_typer: SpanTyper = span_typer self.apply(self._init_weights)