def __init__(self, config, use_crf=False): super(BertForJointBIOExtractAndClassification, self).__init__() self.bert = BertModel(config) self.use_crf = use_crf # TODO check with Google if it's normal there is no dropout on the token classifier of SQuAD in the TF version # self.dropout = nn.Dropout(config.hidden_dropout_prob) self.bio_affine = nn.Linear(config.hidden_size, 3) self.cls_affine = nn.Linear(config.hidden_size, 5) if self.use_crf: self.cls_crf = ConditionalRandomField(5) def init_weights(module): if isinstance(module, (nn.Linear, nn.Embedding)): # Slightly different from the TF version which uses truncated_normal for initialization # cf https://github.com/pytorch/pytorch/pull/5617 module.weight.data.normal_(mean=0.0, std=config.initializer_range) elif isinstance(module, BERTLayerNorm): module.beta.data.normal_(mean=0.0, std=config.initializer_range) module.gamma.data.normal_(mean=0.0, std=config.initializer_range) if isinstance(module, nn.Linear): module.bias.data.zero_() self.apply(init_weights)
def __init__(self, gen_emb, domain_emb, num_head=8, num_classes=3, dropout=0.5, crf=False): super(Model_att_s, self).__init__() self.gen_embedding = torch.nn.Embedding(gen_emb.shape[0], gen_emb.shape[1]) self.gen_embedding.weight = torch.nn.Parameter( torch.from_numpy(gen_emb), requires_grad=False) self.domain_embedding = torch.nn.Embedding(domain_emb.shape[0], domain_emb.shape[1]) self.domain_embedding.weight = torch.nn.Parameter( torch.from_numpy(domain_emb), requires_grad=False) self.dropout = torch.nn.Dropout(dropout) self.att = layers.BertSelfAttention(gen_emb.shape[1] + domain_emb.shape[1], num_attention_heads=8, attention_probs_dropout_prob=0.5) self.pwt = layers.PositionwiseFeedForward(gen_emb.shape[1] + domain_emb.shape[1], 256, dropout=0.5) self.linear_ae = torch.nn.Linear(256, num_classes) self.crf_flag = crf if self.crf_flag: from allennlp.modules import ConditionalRandomField self.crf = ConditionalRandomField(num_classes)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, label_namespace: str = "labels", initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super().__init__(vocab, regularizer) self.label_namespace = label_namespace self.text_field_embedder = text_field_embedder self.num_tags = self.vocab.get_vocab_size(label_namespace) self.encoder = encoder self.tag_projection_layer = TimeDistributed( Linear(self.encoder.get_output_dim(), self.num_tags)) self.crf = ConditionalRandomField(self.num_tags) self.span_metric = SpanBasedF1Measure(vocab, tag_namespace=label_namespace) if text_field_embedder.get_output_dim() != encoder.get_input_dim(): raise ConfigurationError( "The output dimension of the text_field_embedder must match the " "input dimension of the phrase_encoder. Found {} and {}, " "respectively.".format(text_field_embedder.get_output_dim(), encoder.get_input_dim())) initializer(self)
def __init__(self, vocab: Vocabulary, embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, dropout: float = 0.1, ff_dim: int = 100): super().__init__(vocab) self.embedder = embedder self.encoder = encoder assert self.embedder.get_output_dim() == self.encoder.get_input_dim() self.feedforward = FeedForward( encoder.get_output_dim(), 1, hidden_dims=ff_dim, activations=Activation.by_name('relu')(), dropout=dropout) self.out = torch.nn.Linear( in_features=self.feedforward.get_output_dim(), out_features=vocab.get_vocab_size('labels')) self.crf = ConditionalRandomField(vocab.get_vocab_size('labels')) self.f1 = FBetaMeasure(average='micro') self.accuracy = CategoricalAccuracy() self.idx_to_label = vocab.get_index_to_token_vocabulary('labels')
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, label_namespace: str = "labels", initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super().__init__(vocab, regularizer) self.label_namespace = label_namespace self.text_field_embedder = text_field_embedder self.num_tags = self.vocab.get_vocab_size(label_namespace) self.encoder = encoder self.tag_projection_layer = TimeDistributed( Linear(self.encoder.get_output_dim(), self.num_tags)) self.crf = ConditionalRandomField(self.num_tags) self.metrics = { "token_accuracy": CategoricalAccuracy(), "accuracy": BooleanAccuracy() } check_dimensions_match(text_field_embedder.get_output_dim(), encoder.get_input_dim(), "text field embedding dim", "encoder input dim") initializer(self)
def __init__( self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, label_namespace: str = "labels", feature_namespace: str = None, feature_encoder: Seq2VecEncoder = None, label_encoding: Optional[str] = None, include_start_end_transitions: bool = True, constrain_crf_decoding: bool = None, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None, ) -> None: super().__init__(vocab, regularizer) self.label_namespace = label_namespace self.text_field_embedder = text_field_embedder self.num_classes = self.vocab.get_vocab_size(label_namespace) self.encoder = encoder self.tag_projection_layer = TimeDistributed( Linear(self.encoder.get_output_dim(), self.num_classes)) if feature_namespace: self.feature_encoder = feature_encoder self.feat_classification_layer = Linear( self.feature_encoder.get_output_dim(), self.vocab.get_vocab_size(feature_namespace)) # print("num_features:", self.vocab.get_vocab_size(feature_namespace)) if constrain_crf_decoding: if not label_encoding: raise ConfigurationError("constrain_crf_decoding is True, but " "no label_encoding was specified.") labels = self.vocab.get_index_to_token_vocabulary(label_namespace) constraints = allowed_transitions(label_encoding, labels) else: constraints = None self.include_start_end_transitions = include_start_end_transitions self.crf = ConditionalRandomField( self.num_classes, constraints, include_start_end_transitions=include_start_end_transitions) check_dimensions_match( text_field_embedder.get_output_dim(), encoder.get_input_dim(), "text field embedding dim", "encoder input dim", ) self.metrics = { "accuracy": CategoricalAccuracy(), "accuracy3": CategoricalAccuracy(top_k=3), } self._f1_metric = None initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, feedforward: Optional[FeedForward] = None, include_start_end_transitions: bool = True, dropout: Optional[float] = None, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super().__init__(vocab, regularizer) self.label_namespace = 'labels' self.num_tags = self.vocab.get_vocab_size(self.label_namespace) # encode text self.text_field_embedder = text_field_embedder self.encoder = encoder self.dropout = torch.nn.Dropout(dropout) if dropout else None self.feedforward = feedforward # crf output_dim = self.encoder.get_output_dim() if feedforward is None else feedforward.get_output_dim() self.tag_projection_layer = TimeDistributed(Linear(output_dim, self.num_tags)) self.crf = ConditionalRandomField(self.num_tags, constraints=None, include_start_end_transitions=include_start_end_transitions) initializer(self) self.metrics = {} # Add F1 score for individual labels to metrics for index, label in self.vocab.get_index_to_token_vocabulary(self.label_namespace).items(): self.metrics[label] = F1Measure(positive_label=index)
def __init__(self, config): super().__init__() self.config = config self.tag_form = config.tag_form self.crf = ConditionalRandomField( num_tags=config.tag_vocab_size, include_start_end_transitions=False, ) del self.crf.transitions # must del parameter before assigning a tensor self.crf.transitions = None del self.crf._constraint_mask num_tags = config.tag_vocab_size constraint_mask = torch.Tensor(num_tags + 2, num_tags + 2).fill_(1.).to(config.device) self.crf._constraint_mask = constraint_mask #torch.nn.Parameter(constraint_mask, requires_grad=False) if self.tag_form == 'iobes': M = 4 elif self.tag_form == 'iob2': M = 2 else: raise Exception(f'unsupported tag form: {self.tag_form}') N = config.tag_vocab_size E = (config.tag_vocab_size - 1) // M self.N, self.M, self.E = N, M, E self.p_in = nn.Parameter(torch.randn([M, M], dtype=torch.float32)) self.p_cross = nn.Parameter(torch.randn([M, M], dtype=torch.float32)) self.p_out = nn.Parameter(torch.randn(1, dtype=torch.float32)) self.p_to_out = nn.Parameter(torch.randn(M, dtype=torch.float32)) self.p_from_out = nn.Parameter(torch.randn(M, dtype=torch.float32)) self.need_update = True
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, label_namespace: str = "labels", constraint_type: str = None, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super().__init__(vocab, regularizer) self.label_namespace = label_namespace self.text_field_embedder = text_field_embedder self.num_tags = self.vocab.get_vocab_size(label_namespace) self.encoder = encoder self.tag_projection_layer = TimeDistributed( Linear(self.encoder.get_output_dim(), self.num_tags)) if constraint_type is not None: labels = self.vocab.get_index_to_token_vocabulary(label_namespace) constraints = allowed_transitions(constraint_type, labels) else: constraints = None self.crf = ConditionalRandomField(self.num_tags, constraints) self.span_metric = SpanBasedF1Measure(vocab, tag_namespace=label_namespace, label_encoding=constraint_type or "BIO") check_dimensions_match(text_field_embedder.get_output_dim(), encoder.get_input_dim(), "text field embedding dim", "encoder input dim") initializer(self)
def __init__(self, gen_emb, domain_emb, num_head=8, num_classes=3, dropout=0.5, crf=False): super(Model_att, self).__init__() self.gen_embedding = torch.nn.Embedding(gen_emb.shape[0], gen_emb.shape[1]) self.gen_embedding.weight = torch.nn.Parameter( torch.from_numpy(gen_emb), requires_grad=False) self.domain_embedding = torch.nn.Embedding(domain_emb.shape[0], domain_emb.shape[1]) self.domain_embedding.weight = torch.nn.Parameter( torch.from_numpy(domain_emb), requires_grad=False) ##### self.dropout = torch.nn.Dropout(dropout) self.conva = torch.nn.Conv1d(gen_emb.shape[1] + domain_emb.shape[1], 256, 1, padding=0) self.sattb = layers.BertSelfAttention( 256, num_attention_heads=8, attention_probs_dropout_prob=dropout, output_attentions=True) self.convb = torch.nn.Conv1d(256, 256, 1, padding=0) self.linear_ae = torch.nn.Linear(256, num_classes) self.crf_flag = crf if self.crf_flag: from allennlp.modules import ConditionalRandomField self.crf = ConditionalRandomField(num_classes)
def __init__(self, vocab: Vocabulary, embedding_dim: int, use_crf: bool = False, label_namespace: str = "xpos_tags"): super().__init__(vocab) self.label_namespace = label_namespace self.labels = vocab.get_index_to_token_vocabulary(label_namespace) num_labels = vocab.get_vocab_size(label_namespace) if use_crf: self.crf = ConditionalRandomField( num_labels, include_start_end_transitions=True) self.label_projection_layer = TimeDistributed( torch.nn.Linear(embedding_dim, num_labels)) self.decoder = None else: self.crf = None self.decoder = GruSeq2SeqEncoder(input_size=embedding_dim, hidden_size=embedding_dim, num_layers=1, bidirectional=True) self.label_projection_layer = TimeDistributed( torch.nn.Linear(self.decoder.get_output_dim(), num_labels)) from allennlp.training.metrics import CategoricalAccuracy self.metrics = {"accuracy": CategoricalAccuracy()}
def test_constrained_viterbi_tags(self): constraints = {(0, 0), (0, 1), (1, 1), (1, 2), (2, 2), (2, 3), (3, 3), (3, 4), (4, 4), (4, 0)} # Add the transitions to the end tag # and from the start tag. for i in range(5): constraints.add((5, i)) constraints.add((i, 6)) crf = ConditionalRandomField(num_tags=5, constraints=constraints) crf.transitions = torch.nn.Parameter(self.transitions) crf.start_transitions = torch.nn.Parameter(self.transitions_from_start) crf.end_transitions = torch.nn.Parameter(self.transitions_to_end) mask = torch.LongTensor([ [1, 1, 1], [1, 1, 0] ]) viterbi_path = crf.viterbi_tags(self.logits, mask) # Get just the tags from each tuple of (tags, score). viterbi_tags = [x for x, y in viterbi_path] # Now the tags should respect the constraints assert viterbi_tags == [ [2, 3, 3], [2, 3] ]
def __init__(self, gen_emb, domain_emb, num_classes=3, dropout=0.5, crf=False): super(Model, self).__init__() self.gen_embedding = torch.nn.Embedding(gen_emb.shape[0], gen_emb.shape[1]) self.gen_embedding.weight = torch.nn.Parameter( torch.from_numpy(gen_emb), requires_grad=False) self.domain_embedding = torch.nn.Embedding(domain_emb.shape[0], domain_emb.shape[1]) self.domain_embedding.weight = torch.nn.Parameter( torch.from_numpy(domain_emb), requires_grad=False) self.conv1 = torch.nn.Conv1d(gen_emb.shape[1] + domain_emb.shape[1], 128, 5, padding=2) self.conv2 = torch.nn.Conv1d(gen_emb.shape[1] + domain_emb.shape[1], 128, 3, padding=1) self.dropout = torch.nn.Dropout(dropout) self.conv3 = torch.nn.Conv1d(256, 256, 5, padding=2) self.conv4 = torch.nn.Conv1d(256, 256, 5, padding=2) self.conv5 = torch.nn.Conv1d(256, 256, 5, padding=2) self.linear_ae = torch.nn.Linear(256, num_classes) self.crf_flag = crf if self.crf_flag: from allennlp.modules import ConditionalRandomField self.crf = ConditionalRandomField(num_classes)
def __init__( self, vocab: Vocabulary, embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder = None, dropout: float = 0.5, label_namespace: str = "entity_tags", ) -> None: super().__init__(vocab) self.vocab = vocab self.embedder = embedder self.encoder = encoder self.dropout = Dropout(dropout) self.label_namespace = label_namespace self.labels = vocab.get_index_to_token_vocabulary(label_namespace) num_labels = vocab.get_vocab_size(label_namespace) self.label_projection_layer = TimeDistributed( torch.nn.Linear( embedder.get_output_dim() if encoder is None else encoder.get_output_dim(), num_labels)) self.crf = ConditionalRandomField(num_labels, include_start_end_transitions=True) self.metrics = { "span_f1": SpanBasedF1Measure(vocab, tag_namespace=label_namespace, label_encoding="BIO"), "accuracy": CategoricalAccuracy(), }
def setUp(self): super().setUp() self.logits = Variable(torch.Tensor([ [[0, 0, .5, .5, .2], [0, 0, .3, .3, .1], [0, 0, .9, 10, 1]], [[0, 0, .2, .5, .2], [0, 0, 3, .3, .1], [0, 0, .9, 1, 1]], ])) self.tags = Variable(torch.LongTensor([ [2, 3, 4], [3, 2, 2] ])) self.transitions = torch.Tensor([ [0.1, 0.2, 0.3, 0.4, 0.5], [0.8, 0.3, 0.1, 0.7, 0.9], [-0.3, 2.1, -5.6, 3.4, 4.0], [0.2, 0.4, 0.6, -0.3, -0.4], [1.0, 1.0, 1.0, 1.0, 1.0] ]) self.transitions_from_start = torch.Tensor([0.1, 0.2, 0.3, 0.4, 0.6]) self.transitions_to_end = torch.Tensor([-0.1, -0.2, 0.3, -0.4, -0.4]) # Use the CRF Module with fixed transitions to compute the log_likelihood self.crf = ConditionalRandomField(5) self.crf.transitions = torch.nn.Parameter(self.transitions) self.crf.start_transitions = torch.nn.Parameter(self.transitions_from_start) self.crf.end_transitions = torch.nn.Parameter(self.transitions_to_end)
def __init__(self, config): super().__init__() self.config = config self.crf = ConditionalRandomField( num_tags=config.tag_vocab_size, include_start_end_transitions=False, )
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, sentence_encoder: Seq2VecEncoder, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None, label_smoothing: float = None) -> None: super(DiscourseClaimCrfClassifier, self).__init__(vocab, regularizer) self.text_field_embedder = text_field_embedder self.num_classes = self.vocab.get_vocab_size("labels") self.sentence_encoder = sentence_encoder self.metrics = { "accuracy": CategoricalAccuracy(), "accuracy3": CategoricalAccuracy(top_k=3) } self.loss = torch.nn.CrossEntropyLoss() self.label_projection_layer_discourse = TimeDistributed( Linear(self.sentence_encoder.get_output_dim(), 5)) self.label_projection_layer_claim = TimeDistributed( Linear(self.sentence_encoder.get_output_dim(), 2)) constraints = None self.crf_discourse = ConditionalRandomField( 5, constraints, include_start_end_transitions=False) self.crf_claim = ConditionalRandomField( 2, constraints, include_start_end_transitions=False) initializer(self)
def __init__( self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, embedding_dropout: float, seq2seq_encoder: Seq2SeqEncoder, initializer: InitializerApplicator = InitializerApplicator(), loss_weights: Optional[List] = [], regularizer: Optional[RegularizerApplicator] = None, ) -> None: super(SequenceLabeler, self).__init__(vocab, regularizer) self.text_field_embedder = text_field_embedder self._embedding_dropout = nn.Dropout(embedding_dropout) self.num_classes = self.vocab.get_vocab_size("labels") self.seq2seq_encoder = seq2seq_encoder self.self_attentive_pooling_projection = nn.Linear( seq2seq_encoder.get_output_dim(), 1) self._classifier = nn.Linear( in_features=seq2seq_encoder.get_output_dim(), out_features=vocab.get_vocab_size("labels"), ) self._crf = ConditionalRandomField(vocab.get_vocab_size("labels")) self.loss = torch.nn.CrossEntropyLoss() self._f1 = SpanBasedF1Measure(vocab, "labels") self.metrics = { "accuracy": CategoricalAccuracy(), } initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, dropout: Optional[float] = 0, label_encoding: Optional[str] = 'BIO', initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(CharBertCrfModel, self).__init__(vocab, regularizer) self._text_field_embedder = text_field_embedder self.num_tags = self.vocab.get_vocab_size('labels') self._labels_predictor = Linear( self._text_field_embedder.get_output_dim(), self.num_tags) self.dropout = torch.nn.Dropout(dropout) self.metrics = { "accuracy": CategoricalAccuracy(), "accuracy3": CategoricalAccuracy(top_k=3) } self._f1_metric = SpanBasedF1Measure(vocab, tag_namespace='labels', label_encoding=label_encoding) labels = self.vocab.get_index_to_token_vocabulary('labels') constraints = allowed_transitions(label_encoding, labels) self.label_to_index = self.vocab.get_token_to_index_vocabulary( 'labels') self.crf = ConditionalRandomField(self.num_tags, constraints, include_start_end_transitions=False) # self.loss = torch.nn.CrossEntropyLoss() initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, sentence_encoder: Seq2VecEncoder, initializer: InitializerApplicator = InitializerApplicator(), dropout: Optional[float] = None, regularizer: Optional[RegularizerApplicator] = None) -> None: super(DiscourseCrfClassifier, self).__init__(vocab, regularizer) self.text_field_embedder = text_field_embedder self.num_classes = self.vocab.get_vocab_size("labels") self.sentence_encoder = sentence_encoder if dropout: self.dropout = torch.nn.Dropout(dropout) else: self.dropout = None self.metrics = { "accuracy": CategoricalAccuracy(), "accuracy3": CategoricalAccuracy(top_k=3) } self.loss = torch.nn.CrossEntropyLoss() self.label_projection_layer = TimeDistributed( Linear(self.sentence_encoder.get_output_dim(), self.num_classes)) constraints = None # allowed_transitions(label_encoding, labels) self.crf = ConditionalRandomField(self.num_classes, constraints, include_start_end_transitions=False) initializer(self)
def __init__(self, args): super(BertGate, self).__init__() # args self.args = args self.bert_model = BertModel.from_pretrained(self.args.bert_model) # # pos_tag embedding # self.pos_tag_embedding = nn.Embedding(len(args.pos_tag_vocab), args.word_dim) # gate self.gate = nn.Linear(2 * self.bert_model.config.hidden_size, self.args.gate_class) # column pointer network self.column_pointer_network = GlobalAttention( args=self.args, dim=self.bert_model.config.hidden_size, is_transform_out=False, attn_type="mlp") if self.args.crf: # todo: set num for baseline if self.args.model == 'gate': if self.args.cell_info: self.crf = ConditionalRandomField( 1 + self.args.bert_columns_split_marker_max_len - 1 + self.args.bert_cells_split_marker_max_len - 1) else: self.crf = ConditionalRandomField( 1 + self.args.bert_columns_split_marker_max_len - 1 + 1) else: raise NotImplementedError
def __init__(self, vocab: Vocabulary, source_embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, max_decoding_steps: int, target_namespace: str = "target_tags", target_embedding_dim: int = None, attention_function: SimilarityFunction = None, scheduled_sampling_ratio: float = 0.0, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(SimpleSeq2SeqCrf, self).__init__(vocab, regularizer) self._source_embedder = source_embedder self._encoder = encoder self._max_decoding_steps = max_decoding_steps self._target_namespace = target_namespace self._attention_function = attention_function self._scheduled_sampling_ratio = scheduled_sampling_ratio # We need the start symbol to provide as the input at the first timestep of decoding, and # end symbol as a way to indicate the end of the decoded sequence. self._start_index = self.vocab.get_token_index(START_SYMBOL, self._target_namespace) self._end_index = self.vocab.get_token_index(END_SYMBOL, self._target_namespace) num_classes = self.vocab.get_vocab_size(self._target_namespace) self._crf = ConditionalRandomField(num_classes) # Decoder output dim needs to be the same as the encoder output dim since we initialize the # hidden state of the decoder with that of the final hidden states of the encoder. Also, if # we're using attention with ``DotProductSimilarity``, this is needed. self._decoder_output_dim = self._encoder.get_output_dim() target_embedding_dim = target_embedding_dim or self._source_embedder.get_output_dim( ) self._target_embedder = Embedding(num_classes, target_embedding_dim) if self._attention_function: self._decoder_attention = Attention(self._attention_function) # The output of attention, a weighted average over encoder outputs, will be # concatenated to the input vector of the decoder at each time step. self._decoder_input_dim = self._encoder.get_output_dim( ) + target_embedding_dim else: self._decoder_input_dim = target_embedding_dim # TODO (pradeep): Do not hardcode decoder cell type. self._decoder_cell = LSTMCell(self._decoder_input_dim, self._decoder_output_dim) # self._decoder_cell = GRUCell(self._decoder_input_dim, self._decoder_output_dim, bias=False) self._output_projection_layer = Linear(self._decoder_output_dim, num_classes) self.metrics = { "accuracy": CategoricalAccuracy(), "accuracy3": CategoricalAccuracy(top_k=3) } self.span_metric = SpanBasedF1Measure( vocab, tag_namespace=target_namespace, ignore_classes=[START_SYMBOL[2:], END_SYMBOL[2:]]) initializer(self) # Initialize forget gate """
def __init__(self, args): self.args = args super(BERTNER, self).__init__() self.emission = AutoModelForTokenClassification.from_pretrained(args.model_name_or_path, \ cache_dir=args.pretrained_cache_dir, num_labels=len(NER_ID2LABEL)) if self.args.use_crf: from allennlp.modules import ConditionalRandomField self.crf = ConditionalRandomField(len(NER_ID2LABEL), include_start_end_transitions=False)
def __init__(self, kwargs): super(CrfTagger, self).__init__() self.gpu = kwargs.pop("use_gpu", False) if kwargs.pop("use_lstm", False): self.lstm = LstmTagger(**kwargs) self.crf = ConditionalRandomField( kwargs["tagset_size"], include_start_end_transitions=True )
def __init__(self, vocab, text_field_embedder, encoder, label_namespace=u"labels", constraint_type=None, feedforward=None, include_start_end_transitions=True, dropout=None, verbose_metrics=False, initializer=InitializerApplicator(), regularizer=None): super(CrfTagger, self).__init__(vocab, regularizer) self.label_namespace = label_namespace self.text_field_embedder = text_field_embedder self.num_tags = self.vocab.get_vocab_size(label_namespace) self.encoder = encoder self._verbose_metrics = verbose_metrics if dropout: self.dropout = torch.nn.Dropout(dropout) else: self.dropout = None self._feedforward = feedforward if feedforward is not None: output_dim = feedforward.get_output_dim() else: output_dim = self.encoder.get_output_dim() self.tag_projection_layer = TimeDistributed( Linear(output_dim, self.num_tags)) if constraint_type is not None: labels = self.vocab.get_index_to_token_vocabulary(label_namespace) constraints = allowed_transitions(constraint_type, labels) else: constraints = None self.crf = ConditionalRandomField( self.num_tags, constraints, include_start_end_transitions=include_start_end_transitions) self.span_metric = SpanBasedF1Measure(vocab, tag_namespace=label_namespace, label_encoding=constraint_type or u"BIO") check_dimensions_match(text_field_embedder.get_output_dim(), encoder.get_input_dim(), u"text field embedding dim", u"encoder input dim") if feedforward is not None: check_dimensions_match(encoder.get_output_dim(), feedforward.get_input_dim(), u"encoder output dim", u"feedforward input dim") initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, entity_embedder: TextFieldEmbedder, encoder: Seq2VecEncoder, label_namespace: str = "logical_form", feedforward: Optional[FeedForward] = None, dropout: Optional[float] = None, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super().__init__(vocab, regularizer) self.num_tags = self.vocab.get_vocab_size("logical_form") self.encoder = encoder self.text_field_embedder = text_field_embedder self.entity_embedder = entity_embedder self.BOW_embedder_question = BagOfWordCountsTokenEmbedder( vocab, "tokens", projection_dim=self.encoder.get_output_dim()) self.BOW_embedder_description = BagOfWordCountsTokenEmbedder( vocab, "tokens", projection_dim=self.encoder.get_output_dim()) self.BOW_embedder_detail = BagOfWordCountsTokenEmbedder( vocab, "tokens", projection_dim=self.encoder.get_output_dim()) # using crf as the estimator for sequential tags self.crf = ConditionalRandomField(self.num_tags, include_start_end_transitions=False) self.crf_for_both = ConditionalRandomField( self.num_tags, include_start_end_transitions=False) self.softmax_layer = Softmax() self.ce_loss = CrossEntropyLoss() self.matched = 0 self.all_pred = 0 if dropout: self.dropout = torch.nn.Dropout(dropout) else: self.dropout = None output_dim = self.encoder.get_output_dim() self.pred_layer = Linear(4 * output_dim, 3 * self.num_tags) self.load_pretrained_weights() self.pred_layer_both = Linear(8 * output_dim, 3 * self.num_tags) # if constrain_crf_decoding and calculate_span_f1 are not # provided, (i.e., they're None), set them to True # if label_encoding is provided and False if it isn't. self.metrics = {} check_dimensions_match(text_field_embedder.get_output_dim(), encoder.get_input_dim(), "text field embedding dim", "encoder input dim") initializer(self)
def __init__( self, task: str, vocab: Vocabulary, input_dim: int, loss_weight: float = 1.0, label_encoding: Optional[str] = 'BIO', include_start_end_transitions: bool = True, constrain_crf_decoding: bool = True, calculate_span_f1: bool = None, verbose_metrics: bool = False, metric: str = 'span_f1', top_k: int = 1, **kwargs, ) -> None: super().__init__(vocab, **kwargs) self.task = task self.input_dim = input_dim self.loss_weight = loss_weight self.num_tags = self.vocab.get_vocab_size(task) self.top_k = top_k self._verbose_metrics = verbose_metrics self.tag_projection_layer = TimeDistributed( Linear(input_dim, self.num_tags)) # if constrain_crf_decoding and calculate_span_f1 are not # provided, (i.e., they're None), set them to True # if label_encoding is provided and False if it isn't. if constrain_crf_decoding is None: constrain_crf_decoding = label_encoding is not None if calculate_span_f1 is None: calculate_span_f1 = label_encoding is not None self.label_encoding = label_encoding if constrain_crf_decoding: if not label_encoding: raise ConfigurationError( "constrain_crf_decoding is True, but no label_encoding was specified." ) labels = self.vocab.get_index_to_token_vocabulary(task) constraints = allowed_transitions(label_encoding, labels) else: constraints = None self.include_start_end_transitions = include_start_end_transitions self.crf = ConditionalRandomField( self.num_tags, constraints, include_start_end_transitions=include_start_end_transitions) self.metrics = { "span_f1": SpanBasedF1Measure(self.vocab, tag_namespace=self.task, label_encoding="BIO") }
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, binary_feature_dim: int, embedding_dropout: float = 0.0, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None, label_smoothing: float = None, label_namespace: str = "labels", ignore_span_metric: bool = False, label_encoding: Optional[str] = 'BIO', include_start_end_transitions: bool = True, constrain_crf_decoding: bool = True) -> None: super(OieLabelerCRF, self).__init__(vocab, regularizer) self.text_field_embedder = text_field_embedder self.num_classes = self.vocab.get_vocab_size("labels") # For the span based evaluation, we don't want to consider labels # for verb, because the verb index is provided to the model. self.span_metric = SpanBasedF1Measure(vocab, tag_namespace="labels", ignore_classes=["V"]) self.label_namespace = label_namespace self.encoder = encoder # There are exactly 2 binary features for the verb predicate embedding. self.binary_feature_embedding = Embedding(2, binary_feature_dim) self.tag_projection_layer = TimeDistributed( Linear(self.encoder.get_output_dim(), self.num_classes)) self.embedding_dropout = Dropout(p=embedding_dropout) self._label_smoothing = label_smoothing self.ignore_span_metric = ignore_span_metric self.include_start_end_transitions = include_start_end_transitions if constrain_crf_decoding is None: constrain_crf_decoding = label_encoding is not None if constrain_crf_decoding: labels = self.vocab.get_index_to_token_vocabulary(label_namespace) print(labels) constraints = allowed_transitions(label_encoding, labels) else: constraints = None self.crf = ConditionalRandomField( self.num_classes, constraints, include_start_end_transitions=include_start_end_transitions) check_dimensions_match( text_field_embedder.get_output_dim() + binary_feature_dim, encoder.get_input_dim(), "text embedding dim + verb indicator embedding dim", "encoder input dim") initializer(self)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, feedforward: Optional[FeedForward] = None, dropout: Optional[float] = None, regularizer: Optional[RegularizerApplicator] = None) -> None: super().__init__(vocab, regularizer) label_namespace = 'labels' self.label_namespace = label_namespace self.text_field_embedder = text_field_embedder self.num_tags = self.vocab.get_vocab_size(label_namespace) self.encoder = encoder if dropout: self.dropout = torch.nn.Dropout(dropout) else: self.dropout = None self._feedforward = feedforward if feedforward is not None: output_dim = feedforward.get_output_dim() else: output_dim = self.encoder.get_output_dim() self.tag_projection_layer = TimeDistributed(Linear(output_dim, self.num_tags)) # if constrain_crf_decoding and calculate_span_f1 are not # provided, (i.e., they're None), set them to True # if label_encoding is provided and False if it isn't. self.label_encoding = 'BIOUL' labels = self.vocab.get_index_to_token_vocabulary(label_namespace) constraints = allowed_transitions(self.label_encoding, labels) self.include_start_end_transitions = True self.crf = ConditionalRandomField( self.num_tags, constraints, include_start_end_transitions=self.include_start_end_transitions ) self.metrics = { "accuracy": CategoricalAccuracy(), "accuracy3": CategoricalAccuracy(top_k=3) } self._f1_metric = SpanBasedF1Measure(vocab, tag_namespace=label_namespace, label_encoding=self.label_encoding)
def __init__(self, vocab: Vocabulary, word_embedding: Dict[str, Any], depsawr: torch.nn.Module = None, transform_dim: int = 0, pos_dim: int = 50, indicator_dim: int = 50, encoder: Dict[str, Any] = None, dropout: float = 0.33, label_namespace: str = "labels", top_k: int = 1, **kwargs) -> None: super().__init__() self.word_embedding = build_word_embedding(num_embeddings=len( vocab['words']), vocab=vocab, dropout=dropout, **word_embedding) feat_dim: int = self.word_embedding.output_dim if transform_dim > 0: self.word_transform = NonLinear(feat_dim, transform_dim) feat_dim: int = transform_dim else: self.word_transform = None if depsawr: dep_dim = kwargs.pop('dep_dim', 300) self.depsawr_forward = depsawr.forward self.projections = ModuleList( [NonLinear(i, dep_dim) for i in depsawr.dims]) self.depsawr_mix = ScalarMix(len(depsawr.dims), True) feat_dim += dep_dim else: self.depsawr_forward = None self.pos_embedding = Embedding(len(vocab['upostag']), pos_dim, 0) self.indicator_embedding = Embedding(2, indicator_dim) feat_dim += (pos_dim + indicator_dim) if encoder is not None: self.encoder = build_encoder(feat_dim, dropout=dropout, **encoder) feat_dim = self.encoder.output_dim else: self.encoder = None self.tag_projection_layer = torch.nn.Linear( feat_dim, len(vocab[label_namespace])) self.word_dropout = WordDropout(dropout) self.crf = ConditionalRandomField(len(vocab[label_namespace]), include_start_end_transitions=False) self.top_k = top_k self.metric = SRLMetric(vocab[label_namespace]['_'])
def __init__(self, config, num_labels, word_pool_type='mean'): super(BertForSequenceLabeling, self).__init__(config) if word_pool_type.lower() not in {'first', 'mean', 'sum'}: raise ValueError('No {} pooling methods!'.format(word_pool_type)) if word_pool_type.lower() == 'sum': self.layer_norm = BertLayerNorm(config) self.word_pool_type = word_pool_type self.bert = BertModel(config) self.dropout = torch.nn.Dropout(config.hidden_dropout_prob) self.classifier = torch.nn.Linear(config.hidden_size, num_labels) self.crf = ConditionalRandomField(num_labels) self.apply(self.init_bert_weights)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, label_namespace: str = "labels", constraint_type: str = None, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super().__init__(vocab, regularizer) self.label_namespace = label_namespace self.text_field_embedder = text_field_embedder self.num_tags = self.vocab.get_vocab_size(label_namespace) self.encoder = encoder self.tag_projection_layer = TimeDistributed(Linear(self.encoder.get_output_dim(), self.num_tags)) if constraint_type is not None: labels = self.vocab.get_index_to_token_vocabulary(label_namespace) constraints = allowed_transitions(constraint_type, labels) else: constraints = None self.crf = ConditionalRandomField(self.num_tags, constraints) self.span_metric = SpanBasedF1Measure(vocab, tag_namespace=label_namespace) check_dimensions_match(text_field_embedder.get_output_dim(), encoder.get_input_dim(), "text field embedding dim", "encoder input dim") initializer(self)
def setUp(self): super().setUp() self.logits = torch.Tensor([ [[0, 0, .5, .5, .2], [0, 0, .3, .3, .1], [0, 0, .9, 10, 1]], [[0, 0, .2, .5, .2], [0, 0, 3, .3, .1], [0, 0, .9, 1, 1]], ]) self.tags = torch.LongTensor([ [2, 3, 4], [3, 2, 2] ]) self.transitions = torch.Tensor([ [0.1, 0.2, 0.3, 0.4, 0.5], [0.8, 0.3, 0.1, 0.7, 0.9], [-0.3, 2.1, -5.6, 3.4, 4.0], [0.2, 0.4, 0.6, -0.3, -0.4], [1.0, 1.0, 1.0, 1.0, 1.0] ]) self.transitions_from_start = torch.Tensor([0.1, 0.2, 0.3, 0.4, 0.6]) self.transitions_to_end = torch.Tensor([-0.1, -0.2, 0.3, -0.4, -0.4]) # Use the CRF Module with fixed transitions to compute the log_likelihood self.crf = ConditionalRandomField(5) self.crf.transitions = torch.nn.Parameter(self.transitions) self.crf.start_transitions = torch.nn.Parameter(self.transitions_from_start) self.crf.end_transitions = torch.nn.Parameter(self.transitions_to_end)
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, label_namespace: str = "labels", constraint_type: str = None, feedforward: FeedForward = None, include_start_end_transitions: bool = True, dropout: float = None, verbose_metrics: bool = False, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super().__init__(vocab, regularizer) self.label_namespace = label_namespace self.text_field_embedder = text_field_embedder self.num_tags = self.vocab.get_vocab_size(label_namespace) self.encoder = encoder self._verbose_metrics = verbose_metrics if dropout: self.dropout = torch.nn.Dropout(dropout) else: self.dropout = None self._feedforward = feedforward if feedforward is not None: output_dim = feedforward.get_output_dim() else: output_dim = self.encoder.get_output_dim() self.tag_projection_layer = TimeDistributed(Linear(output_dim, self.num_tags)) if constraint_type is not None: labels = self.vocab.get_index_to_token_vocabulary(label_namespace) constraints = allowed_transitions(constraint_type, labels) else: constraints = None self.crf = ConditionalRandomField( self.num_tags, constraints, include_start_end_transitions=include_start_end_transitions ) self.span_metric = SpanBasedF1Measure(vocab, tag_namespace=label_namespace, label_encoding=constraint_type or "BIO") check_dimensions_match(text_field_embedder.get_output_dim(), encoder.get_input_dim(), "text field embedding dim", "encoder input dim") if feedforward is not None: check_dimensions_match(encoder.get_output_dim(), feedforward.get_input_dim(), "encoder output dim", "feedforward input dim") initializer(self)
def test_constrained_viterbi_tags(self): constraints = {(0, 0), (0, 1), (1, 1), (1, 2), (2, 2), (2, 3), (3, 3), (3, 4), (4, 4), (4, 0)} crf = ConditionalRandomField(num_tags=5, constraints=constraints) crf.transitions = torch.nn.Parameter(self.transitions) crf.start_transitions = torch.nn.Parameter(self.transitions_from_start) crf.end_transitions = torch.nn.Parameter(self.transitions_to_end) mask = Variable(torch.LongTensor([ [1, 1, 1], [1, 1, 0] ])) viterbi_tags = crf.viterbi_tags(self.logits, mask) # Now the tags should respect the constraints assert viterbi_tags == [ [2, 3, 3], [2, 3] ]
def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, label_namespace: str = "labels", feedforward: Optional[FeedForward] = None, label_encoding: Optional[str] = None, constraint_type: Optional[str] = None, include_start_end_transitions: bool = True, constrain_crf_decoding: bool = None, calculate_span_f1: bool = None, dropout: Optional[float] = None, verbose_metrics: bool = False, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super().__init__(vocab, regularizer) self.label_namespace = label_namespace self.text_field_embedder = text_field_embedder self.num_tags = self.vocab.get_vocab_size(label_namespace) self.encoder = encoder self._verbose_metrics = verbose_metrics if dropout: self.dropout = torch.nn.Dropout(dropout) else: self.dropout = None self._feedforward = feedforward if feedforward is not None: output_dim = feedforward.get_output_dim() else: output_dim = self.encoder.get_output_dim() self.tag_projection_layer = TimeDistributed(Linear(output_dim, self.num_tags)) if constraint_type is not None: warnings.warn("'constraint_type' was removed and replaced with" "'label_encoding', 'constrain_crf_decoding', and " "'calculate_span_f1' in version 0.6.1. It will be " "removed in version 0.8.", DeprecationWarning) label_encoding = constraint_type # if constrain_crf_decoding and calculate_span_f1 are not # provided, (i.e., they're None), set them to True # if label_encoding is provided and False if it isn't. if constrain_crf_decoding is None: constrain_crf_decoding = label_encoding is not None if calculate_span_f1 is None: calculate_span_f1 = label_encoding is not None self.label_encoding = label_encoding if constrain_crf_decoding: if not label_encoding: raise ConfigurationError("constrain_crf_decoding is True, but " "no label_encoding was specified.") labels = self.vocab.get_index_to_token_vocabulary(label_namespace) constraints = allowed_transitions(label_encoding, labels) else: constraints = None self.include_start_end_transitions = include_start_end_transitions self.crf = ConditionalRandomField( self.num_tags, constraints, include_start_end_transitions=include_start_end_transitions ) self.metrics = { "accuracy": CategoricalAccuracy(), "accuracy3": CategoricalAccuracy(top_k=3) } self.calculate_span_f1 = calculate_span_f1 if calculate_span_f1: if not label_encoding: raise ConfigurationError("calculate_span_f1 is True, but " "no label_encoding was specified.") self._f1_metric = SpanBasedF1Measure(vocab, tag_namespace=label_namespace, label_encoding=label_encoding) elif constraint_type is not None: # Maintain deprecated behavior if constraint_type is provided self._f1_metric = SpanBasedF1Measure(vocab, tag_namespace=label_namespace, label_encoding=constraint_type) check_dimensions_match(text_field_embedder.get_output_dim(), encoder.get_input_dim(), "text field embedding dim", "encoder input dim") if feedforward is not None: check_dimensions_match(encoder.get_output_dim(), feedforward.get_input_dim(), "encoder output dim", "feedforward input dim") initializer(self)
class CrfTagger(Model): """ The ``CrfTagger`` encodes a sequence of text with a ``Seq2SeqEncoder``, then uses a Conditional Random Field model to predict a tag for each token in the sequence. Parameters ---------- vocab : ``Vocabulary``, required A Vocabulary, required in order to compute sizes for input/output projections. text_field_embedder : ``TextFieldEmbedder``, required Used to embed the tokens ``TextField`` we get as input to the model. encoder : ``Seq2SeqEncoder`` The encoder that we will use in between embedding tokens and predicting output tags. label_namespace : ``str``, optional (default=``labels``) This is needed to compute the SpanBasedF1Measure metric. Unless you did something unusual, the default value should be what you want. feedforward : ``FeedForward``, optional, (default = None). An optional feedforward layer to apply after the encoder. label_encoding : ``str``, optional (default=``None``) Label encoding to use when calculating span f1 and constraining the CRF at decoding time . Valid options are "BIO", "BIOUL", "IOB1", "BMES". Required if ``calculate_span_f1`` or ``constrain_crf_decoding`` is true. constraint_type : ``str``, optional (default=``None``) If provided, the CRF will be constrained at decoding time to produce valid labels based on the specified type (e.g. "BIO", or "BIOUL"). .. deprecated:: 0.6.1 ``constraint_type`` was deprecated and replaced with ``label_encoding``, ``constrain_crf_decoding``, and ``calculate_span_f1`` in version 0.6.1. It will be removed in version 0.8. include_start_end_transitions : ``bool``, optional (default=``True``) Whether to include start and end transition parameters in the CRF. constrain_crf_decoding : ``bool``, optional (default=``None``) If ``True``, the CRF is constrained at decoding time to produce valid sequences of tags. If this is ``True``, then ``label_encoding`` is required. If ``None`` and label_encoding is specified, this is set to ``True``. If ``None`` and label_encoding is not specified, it defaults to ``False``. calculate_span_f1 : ``bool``, optional (default=``None``) Calculate span-level F1 metrics during training. If this is ``True``, then ``label_encoding`` is required. If ``None`` and label_encoding is specified, this is set to ``True``. If ``None`` and label_encoding is not specified, it defaults to ``False``. dropout: ``float``, optional (detault=``None``) verbose_metrics : ``bool``, optional (default = False) If true, metrics will be returned per label class in addition to the overall statistics. initializer : ``InitializerApplicator``, optional (default=``InitializerApplicator()``) Used to initialize the model parameters. regularizer : ``RegularizerApplicator``, optional (default=``None``) If provided, will be used to calculate the regularization penalty during training. """ def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, label_namespace: str = "labels", feedforward: Optional[FeedForward] = None, label_encoding: Optional[str] = None, constraint_type: Optional[str] = None, include_start_end_transitions: bool = True, constrain_crf_decoding: bool = None, calculate_span_f1: bool = None, dropout: Optional[float] = None, verbose_metrics: bool = False, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super().__init__(vocab, regularizer) self.label_namespace = label_namespace self.text_field_embedder = text_field_embedder self.num_tags = self.vocab.get_vocab_size(label_namespace) self.encoder = encoder self._verbose_metrics = verbose_metrics if dropout: self.dropout = torch.nn.Dropout(dropout) else: self.dropout = None self._feedforward = feedforward if feedforward is not None: output_dim = feedforward.get_output_dim() else: output_dim = self.encoder.get_output_dim() self.tag_projection_layer = TimeDistributed(Linear(output_dim, self.num_tags)) if constraint_type is not None: warnings.warn("'constraint_type' was removed and replaced with" "'label_encoding', 'constrain_crf_decoding', and " "'calculate_span_f1' in version 0.6.1. It will be " "removed in version 0.8.", DeprecationWarning) label_encoding = constraint_type # if constrain_crf_decoding and calculate_span_f1 are not # provided, (i.e., they're None), set them to True # if label_encoding is provided and False if it isn't. if constrain_crf_decoding is None: constrain_crf_decoding = label_encoding is not None if calculate_span_f1 is None: calculate_span_f1 = label_encoding is not None self.label_encoding = label_encoding if constrain_crf_decoding: if not label_encoding: raise ConfigurationError("constrain_crf_decoding is True, but " "no label_encoding was specified.") labels = self.vocab.get_index_to_token_vocabulary(label_namespace) constraints = allowed_transitions(label_encoding, labels) else: constraints = None self.include_start_end_transitions = include_start_end_transitions self.crf = ConditionalRandomField( self.num_tags, constraints, include_start_end_transitions=include_start_end_transitions ) self.metrics = { "accuracy": CategoricalAccuracy(), "accuracy3": CategoricalAccuracy(top_k=3) } self.calculate_span_f1 = calculate_span_f1 if calculate_span_f1: if not label_encoding: raise ConfigurationError("calculate_span_f1 is True, but " "no label_encoding was specified.") self._f1_metric = SpanBasedF1Measure(vocab, tag_namespace=label_namespace, label_encoding=label_encoding) elif constraint_type is not None: # Maintain deprecated behavior if constraint_type is provided self._f1_metric = SpanBasedF1Measure(vocab, tag_namespace=label_namespace, label_encoding=constraint_type) check_dimensions_match(text_field_embedder.get_output_dim(), encoder.get_input_dim(), "text field embedding dim", "encoder input dim") if feedforward is not None: check_dimensions_match(encoder.get_output_dim(), feedforward.get_input_dim(), "encoder output dim", "feedforward input dim") initializer(self) @overrides def forward(self, # type: ignore tokens: Dict[str, torch.LongTensor], tags: torch.LongTensor = None, metadata: List[Dict[str, Any]] = None, # pylint: disable=unused-argument **kwargs) -> Dict[str, torch.Tensor]: # pylint: disable=arguments-differ """ Parameters ---------- tokens : ``Dict[str, torch.LongTensor]``, required The output of ``TextField.as_array()``, which should typically be passed directly to a ``TextFieldEmbedder``. This output is a dictionary mapping keys to ``TokenIndexer`` tensors. At its most basic, using a ``SingleIdTokenIndexer`` this is: ``{"tokens": Tensor(batch_size, num_tokens)}``. This dictionary will have the same keys as were used for the ``TokenIndexers`` when you created the ``TextField`` representing your sequence. The dictionary is designed to be passed directly to a ``TextFieldEmbedder``, which knows how to combine different word representations into a single vector per token in your input. tags : ``torch.LongTensor``, optional (default = ``None``) A torch tensor representing the sequence of integer gold class labels of shape ``(batch_size, num_tokens)``. metadata : ``List[Dict[str, Any]]``, optional, (default = None) metadata containg the original words in the sentence to be tagged under a 'words' key. Returns ------- An output dictionary consisting of: logits : ``torch.FloatTensor`` The logits that are the output of the ``tag_projection_layer`` mask : ``torch.LongTensor`` The text field mask for the input tokens tags : ``List[List[int]]`` The predicted tags using the Viterbi algorithm. loss : ``torch.FloatTensor``, optional A scalar loss to be optimised. Only computed if gold label ``tags`` are provided. """ embedded_text_input = self.text_field_embedder(tokens) mask = util.get_text_field_mask(tokens) if self.dropout: embedded_text_input = self.dropout(embedded_text_input) encoded_text = self.encoder(embedded_text_input, mask) if self.dropout: encoded_text = self.dropout(encoded_text) if self._feedforward is not None: encoded_text = self._feedforward(encoded_text) logits = self.tag_projection_layer(encoded_text) best_paths = self.crf.viterbi_tags(logits, mask) # Just get the tags and ignore the score. predicted_tags = [x for x, y in best_paths] output = {"logits": logits, "mask": mask, "tags": predicted_tags} if tags is not None: # Add negative log-likelihood as loss log_likelihood = self.crf(logits, tags, mask) output["loss"] = -log_likelihood # Represent viterbi tags as "class probabilities" that we can # feed into the metrics class_probabilities = logits * 0. for i, instance_tags in enumerate(predicted_tags): for j, tag_id in enumerate(instance_tags): class_probabilities[i, j, tag_id] = 1 for metric in self.metrics.values(): metric(class_probabilities, tags, mask.float()) if self.calculate_span_f1: self._f1_metric(class_probabilities, tags, mask.float()) if metadata is not None: output["words"] = [x["words"] for x in metadata] return output @overrides def decode(self, output_dict: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]: """ Converts the tag ids to the actual tags. ``output_dict["tags"]`` is a list of lists of tag_ids, so we use an ugly nested list comprehension. """ output_dict["tags"] = [ [self.vocab.get_token_from_index(tag, namespace=self.label_namespace) for tag in instance_tags] for instance_tags in output_dict["tags"] ] return output_dict @overrides def get_metrics(self, reset: bool = False) -> Dict[str, float]: metrics_to_return = {metric_name: metric.get_metric(reset) for metric_name, metric in self.metrics.items()} if self.calculate_span_f1: f1_dict = self._f1_metric.get_metric(reset=reset) if self._verbose_metrics: metrics_to_return.update(f1_dict) else: metrics_to_return.update({ x: y for x, y in f1_dict.items() if "overall" in x}) return metrics_to_return
class CrfTagger(Model): """ The ``CrfTagger`` encodes a sequence of text with a ``Seq2SeqEncoder``, then uses a Conditional Random Field model to predict a tag for each token in the sequence. Parameters ---------- vocab : ``Vocabulary``, required A Vocabulary, required in order to compute sizes for input/output projections. text_field_embedder : ``TextFieldEmbedder``, required Used to embed the tokens ``TextField`` we get as input to the model. encoder : ``Seq2SeqEncoder`` The encoder that we will use in between embedding tokens and predicting output tags. label_namespace : ``str``, optional (default=``labels``) This is needed to compute the SpanBasedF1Measure metric. Unless you did something unusual, the default value should be what you want. constraint_type : ``str``, optional (default=``None``) If provided, the CRF will be constrained at decoding time to produce valid labels based on the specified type (e.g. "BIO", or "BIOUL"). initializer : ``InitializerApplicator``, optional (default=``InitializerApplicator()``) Used to initialize the model parameters. regularizer : ``RegularizerApplicator``, optional (default=``None``) If provided, will be used to calculate the regularization penalty during training. """ def __init__(self, vocab: Vocabulary, text_field_embedder: TextFieldEmbedder, encoder: Seq2SeqEncoder, label_namespace: str = "labels", constraint_type: str = None, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super().__init__(vocab, regularizer) self.label_namespace = label_namespace self.text_field_embedder = text_field_embedder self.num_tags = self.vocab.get_vocab_size(label_namespace) self.encoder = encoder self.tag_projection_layer = TimeDistributed(Linear(self.encoder.get_output_dim(), self.num_tags)) if constraint_type is not None: labels = self.vocab.get_index_to_token_vocabulary(label_namespace) constraints = allowed_transitions(constraint_type, labels) else: constraints = None self.crf = ConditionalRandomField(self.num_tags, constraints) self.span_metric = SpanBasedF1Measure(vocab, tag_namespace=label_namespace) check_dimensions_match(text_field_embedder.get_output_dim(), encoder.get_input_dim(), "text field embedding dim", "encoder input dim") initializer(self) @overrides def forward(self, # type: ignore tokens: Dict[str, torch.LongTensor], tags: torch.LongTensor = None) -> Dict[str, torch.Tensor]: # pylint: disable=arguments-differ """ Parameters ---------- tokens : ``Dict[str, torch.LongTensor]``, required The output of ``TextField.as_array()``, which should typically be passed directly to a ``TextFieldEmbedder``. This output is a dictionary mapping keys to ``TokenIndexer`` tensors. At its most basic, using a ``SingleIdTokenIndexer`` this is: ``{"tokens": Tensor(batch_size, num_tokens)}``. This dictionary will have the same keys as were used for the ``TokenIndexers`` when you created the ``TextField`` representing your sequence. The dictionary is designed to be passed directly to a ``TextFieldEmbedder``, which knows how to combine different word representations into a single vector per token in your input. tags : ``torch.LongTensor``, optional (default = ``None``) A torch tensor representing the sequence of integer gold class labels of shape ``(batch_size, num_tokens)``. Returns ------- An output dictionary consisting of: logits : ``torch.FloatTensor`` The logits that are the output of the ``tag_projection_layer`` mask : ``torch.LongTensor`` The text field mask for the input tokens tags : ``List[List[str]]`` The predicted tags using the Viterbi algorithm. loss : ``torch.FloatTensor``, optional A scalar loss to be optimised. Only computed if gold label ``tags`` are provided. """ embedded_text_input = self.text_field_embedder(tokens) mask = util.get_text_field_mask(tokens) encoded_text = self.encoder(embedded_text_input, mask) logits = self.tag_projection_layer(encoded_text) predicted_tags = self.crf.viterbi_tags(logits, mask) output = {"logits": logits, "mask": mask, "tags": predicted_tags} if tags is not None: # Add negative log-likelihood as loss log_likelihood = self.crf(logits, tags, mask) output["loss"] = -log_likelihood # Represent viterbi tags as "class probabilities" that we can # feed into the `span_metric` class_probabilities = logits * 0. for i, instance_tags in enumerate(predicted_tags): for j, tag_id in enumerate(instance_tags): class_probabilities[i, j, tag_id] = 1 self.span_metric(class_probabilities, tags, mask) return output @overrides def decode(self, output_dict: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]: """ Converts the tag ids to the actual tags. ``output_dict["tags"]`` is a list of lists of tag_ids, so we use an ugly nested list comprehension. """ output_dict["tags"] = [ [self.vocab.get_token_from_index(tag, namespace="labels") for tag in instance_tags] for instance_tags in output_dict["tags"] ] return output_dict @overrides def get_metrics(self, reset: bool = False) -> Dict[str, float]: metric_dict = self.span_metric.get_metric(reset=reset) return {x: y for x, y in metric_dict.items() if "overall" in x} @classmethod def from_params(cls, vocab: Vocabulary, params: Params) -> 'CrfTagger': embedder_params = params.pop("text_field_embedder") text_field_embedder = TextFieldEmbedder.from_params(vocab, embedder_params) encoder = Seq2SeqEncoder.from_params(params.pop("encoder")) label_namespace = params.pop("label_namespace", "labels") constraint_type = params.pop("constraint_type", None) initializer = InitializerApplicator.from_params(params.pop('initializer', [])) regularizer = RegularizerApplicator.from_params(params.pop('regularizer', [])) params.assert_empty(cls.__name__) return cls(vocab=vocab, text_field_embedder=text_field_embedder, encoder=encoder, label_namespace=label_namespace, constraint_type=constraint_type, initializer=initializer, regularizer=regularizer)
class TestConditionalRandomField(AllenNlpTestCase): def setUp(self): super().setUp() self.logits = torch.Tensor([ [[0, 0, .5, .5, .2], [0, 0, .3, .3, .1], [0, 0, .9, 10, 1]], [[0, 0, .2, .5, .2], [0, 0, 3, .3, .1], [0, 0, .9, 1, 1]], ]) self.tags = torch.LongTensor([ [2, 3, 4], [3, 2, 2] ]) self.transitions = torch.Tensor([ [0.1, 0.2, 0.3, 0.4, 0.5], [0.8, 0.3, 0.1, 0.7, 0.9], [-0.3, 2.1, -5.6, 3.4, 4.0], [0.2, 0.4, 0.6, -0.3, -0.4], [1.0, 1.0, 1.0, 1.0, 1.0] ]) self.transitions_from_start = torch.Tensor([0.1, 0.2, 0.3, 0.4, 0.6]) self.transitions_to_end = torch.Tensor([-0.1, -0.2, 0.3, -0.4, -0.4]) # Use the CRF Module with fixed transitions to compute the log_likelihood self.crf = ConditionalRandomField(5) self.crf.transitions = torch.nn.Parameter(self.transitions) self.crf.start_transitions = torch.nn.Parameter(self.transitions_from_start) self.crf.end_transitions = torch.nn.Parameter(self.transitions_to_end) def score(self, logits, tags): """ Computes the likelihood score for the given sequence of tags, given the provided logits (and the transition weights in the CRF model) """ # Start with transitions from START and to END total = self.transitions_from_start[tags[0]] + self.transitions_to_end[tags[-1]] # Add in all the intermediate transitions for tag, next_tag in zip(tags, tags[1:]): total += self.transitions[tag, next_tag] # Add in the logits for the observed tags for logit, tag in zip(logits, tags): total += logit[tag] return total def test_forward_works_without_mask(self): log_likelihood = self.crf(self.logits, self.tags).item() # Now compute the log-likelihood manually manual_log_likelihood = 0.0 # For each instance, manually compute the numerator # (which is just the score for the logits and actual tags) # and the denominator # (which is the log-sum-exp of the scores for the logits across all possible tags) for logits_i, tags_i in zip(self.logits, self.tags): numerator = self.score(logits_i.detach(), tags_i.detach()) all_scores = [self.score(logits_i.detach(), tags_j) for tags_j in itertools.product(range(5), repeat=3)] denominator = math.log(sum(math.exp(score) for score in all_scores)) # And include them in the manual calculation. manual_log_likelihood += numerator - denominator # The manually computed log likelihood should equal the result of crf.forward. assert manual_log_likelihood.item() == approx(log_likelihood) def test_forward_works_with_mask(self): # Use a non-trivial mask mask = torch.LongTensor([ [1, 1, 1], [1, 1, 0] ]) log_likelihood = self.crf(self.logits, self.tags, mask).item() # Now compute the log-likelihood manually manual_log_likelihood = 0.0 # For each instance, manually compute the numerator # (which is just the score for the logits and actual tags) # and the denominator # (which is the log-sum-exp of the scores for the logits across all possible tags) for logits_i, tags_i, mask_i in zip(self.logits, self.tags, mask): # Find the sequence length for this input and only look at that much of each sequence. sequence_length = torch.sum(mask_i.detach()) logits_i = logits_i.data[:sequence_length] tags_i = tags_i.data[:sequence_length] numerator = self.score(logits_i, tags_i) all_scores = [self.score(logits_i, tags_j) for tags_j in itertools.product(range(5), repeat=sequence_length)] denominator = math.log(sum(math.exp(score) for score in all_scores)) # And include them in the manual calculation. manual_log_likelihood += numerator - denominator # The manually computed log likelihood should equal the result of crf.forward. assert manual_log_likelihood.item() == approx(log_likelihood) def test_viterbi_tags(self): mask = torch.LongTensor([ [1, 1, 1], [1, 1, 0] ]) viterbi_path = self.crf.viterbi_tags(self.logits, mask) # Separate the tags and scores. viterbi_tags = [x for x, y in viterbi_path] viterbi_scores = [y for x, y in viterbi_path] # Check that the viterbi tags are what I think they should be. assert viterbi_tags == [ [2, 4, 3], [4, 2] ] # We can also iterate over all possible tag sequences and use self.score # to check the likelihood of each. The most likely sequence should be the # same as what we get from viterbi_tags. most_likely_tags = [] best_scores = [] for logit, mas in zip(self.logits, mask): sequence_length = torch.sum(mas.detach()) most_likely, most_likelihood = None, -float('inf') for tags in itertools.product(range(5), repeat=sequence_length): score = self.score(logit.data, tags) if score > most_likelihood: most_likely, most_likelihood = tags, score # Convert tuple to list; otherwise == complains. most_likely_tags.append(list(most_likely)) best_scores.append(most_likelihood) assert viterbi_tags == most_likely_tags assert viterbi_scores == best_scores def test_constrained_viterbi_tags(self): constraints = {(0, 0), (0, 1), (1, 1), (1, 2), (2, 2), (2, 3), (3, 3), (3, 4), (4, 4), (4, 0)} # Add the transitions to the end tag # and from the start tag. for i in range(5): constraints.add((5, i)) constraints.add((i, 6)) crf = ConditionalRandomField(num_tags=5, constraints=constraints) crf.transitions = torch.nn.Parameter(self.transitions) crf.start_transitions = torch.nn.Parameter(self.transitions_from_start) crf.end_transitions = torch.nn.Parameter(self.transitions_to_end) mask = torch.LongTensor([ [1, 1, 1], [1, 1, 0] ]) viterbi_path = crf.viterbi_tags(self.logits, mask) # Get just the tags from each tuple of (tags, score). viterbi_tags = [x for x, y in viterbi_path] # Now the tags should respect the constraints assert viterbi_tags == [ [2, 3, 3], [2, 3] ] def test_allowed_transitions(self): # pylint: disable=bad-whitespace,bad-continuation bio_labels = ['O', 'B-X', 'I-X', 'B-Y', 'I-Y'] # start tag, end tag # 0 1 2 3 4 5 6 allowed = allowed_transitions("BIO", dict(enumerate(bio_labels))) # The empty spaces in this matrix indicate disallowed transitions. assert set(allowed) == { # Extra column for end tag. (0, 0), (0, 1), (0, 3), (0, 6), (1, 0), (1, 1), (1, 2), (1, 3), (1, 6), (2, 0), (2, 1), (2, 2), (2, 3), (2, 6), (3, 0), (3, 1), (3, 3), (3, 4), (3, 6), (4, 0), (4, 1), (4, 3), (4, 4), (4, 6), (5, 0), (5, 1), (5, 3) # Extra row for start tag } bioul_labels = ['O', 'B-X', 'I-X', 'L-X', 'U-X', 'B-Y', 'I-Y', 'L-Y', 'U-Y'] # start tag, end tag # 0 1 2 3 4 5 6 7 8 9 10 allowed = allowed_transitions("BIOUL", dict(enumerate(bioul_labels))) # The empty spaces in this matrix indicate disallowed transitions. assert set(allowed) == { # Extra column for end tag. (0, 0), (0, 1), (0, 4), (0, 5), (0, 8), (0, 10), (1, 2), (1, 3), (2, 2), (2, 3), (3, 0), (3, 1), (3, 4), (3, 5), (3, 8), (3, 10), (4, 0), (4, 1), (4, 4), (4, 5), (4, 8), (4, 10), (5, 6), (5, 7), (6, 6), (6, 7), (7, 0), (7, 1), (7, 4), (7, 5), (7, 8), (7, 10), (8, 0), (8, 1), (8, 4), (8, 5), (8, 8), (8, 10), # Extra row for start tag. (9, 0), (9, 1), (9, 4), (9, 5), (9, 8) } iob1_labels = ['O', 'B-X', 'I-X', 'B-Y', 'I-Y'] # start tag, end tag # 0 1 2 3 4 5 6 allowed = allowed_transitions("IOB1", dict(enumerate(iob1_labels))) # The empty spaces in this matrix indicate disallowed transitions. assert set(allowed) == { # Extra column for end tag. (0, 0), (0, 2), (0, 4), (0, 6), (1, 0), (1, 1), (1, 2), (1, 4), (1, 6), (2, 0), (2, 1), (2, 2), (2, 4), (2, 6), (3, 0), (3, 2), (3, 3), (3, 4), (3, 6), (4, 0), (4, 2), (4, 3), (4, 4), (4, 6), (5, 0), (5, 2), (5, 4), # Extra row for start tag } with raises(ConfigurationError): allowed_transitions("allennlp", {}) bmes_labels = ['B-X', 'M-X', 'E-X', 'S-X', 'B-Y', 'M-Y', 'E-Y', 'S-Y'] # start tag, end tag # 0 1 2 3 4 5 6 7 8 9 allowed = allowed_transitions("BMES", dict(enumerate(bmes_labels))) assert set(allowed) == { (0, 1), (0, 2), (1, 2), # Extra column for end tag. (2, 0), (2, 3), (2, 4), (2, 7), (2, 9), (3, 0), (3, 3), (3, 4), (3, 7), (3, 9), (4, 5), (4, 6), (5, 6), (6, 0), (6, 3), (6, 4), (6, 7), (6, 9), (7, 0), (7, 3), (7, 4), (7, 7), (7, 9), (8, 0), (8, 3), (8, 4), (8, 7), # Extra row for start tag }