Ejemplo n.º 1
0
    def __init__(self, config, use_crf=False):
        super(BertForJointBIOExtractAndClassification, self).__init__()
        self.bert = BertModel(config)
        self.use_crf = use_crf
        # TODO check with Google if it's normal there is no dropout on the token classifier of SQuAD in the TF version
        # self.dropout = nn.Dropout(config.hidden_dropout_prob)
        self.bio_affine = nn.Linear(config.hidden_size, 3)
        self.cls_affine = nn.Linear(config.hidden_size, 5)
        if self.use_crf:
            self.cls_crf = ConditionalRandomField(5)

        def init_weights(module):
            if isinstance(module, (nn.Linear, nn.Embedding)):
                # Slightly different from the TF version which uses truncated_normal for initialization
                # cf https://github.com/pytorch/pytorch/pull/5617
                module.weight.data.normal_(mean=0.0,
                                           std=config.initializer_range)
            elif isinstance(module, BERTLayerNorm):
                module.beta.data.normal_(mean=0.0,
                                         std=config.initializer_range)
                module.gamma.data.normal_(mean=0.0,
                                          std=config.initializer_range)
            if isinstance(module, nn.Linear):
                module.bias.data.zero_()

        self.apply(init_weights)
Ejemplo n.º 2
0
    def __init__(self,
                 gen_emb,
                 domain_emb,
                 num_head=8,
                 num_classes=3,
                 dropout=0.5,
                 crf=False):
        super(Model_att_s, self).__init__()
        self.gen_embedding = torch.nn.Embedding(gen_emb.shape[0],
                                                gen_emb.shape[1])
        self.gen_embedding.weight = torch.nn.Parameter(
            torch.from_numpy(gen_emb), requires_grad=False)
        self.domain_embedding = torch.nn.Embedding(domain_emb.shape[0],
                                                   domain_emb.shape[1])
        self.domain_embedding.weight = torch.nn.Parameter(
            torch.from_numpy(domain_emb), requires_grad=False)

        self.dropout = torch.nn.Dropout(dropout)

        self.att = layers.BertSelfAttention(gen_emb.shape[1] +
                                            domain_emb.shape[1],
                                            num_attention_heads=8,
                                            attention_probs_dropout_prob=0.5)
        self.pwt = layers.PositionwiseFeedForward(gen_emb.shape[1] +
                                                  domain_emb.shape[1],
                                                  256,
                                                  dropout=0.5)

        self.linear_ae = torch.nn.Linear(256, num_classes)
        self.crf_flag = crf
        if self.crf_flag:
            from allennlp.modules import ConditionalRandomField
            self.crf = ConditionalRandomField(num_classes)
Ejemplo n.º 3
0
    def __init__(self,
                 vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 encoder: Seq2SeqEncoder,
                 label_namespace: str = "labels",
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None) -> None:
        super().__init__(vocab, regularizer)

        self.label_namespace = label_namespace
        self.text_field_embedder = text_field_embedder
        self.num_tags = self.vocab.get_vocab_size(label_namespace)
        self.encoder = encoder
        self.tag_projection_layer = TimeDistributed(
            Linear(self.encoder.get_output_dim(), self.num_tags))
        self.crf = ConditionalRandomField(self.num_tags)

        self.span_metric = SpanBasedF1Measure(vocab,
                                              tag_namespace=label_namespace)

        if text_field_embedder.get_output_dim() != encoder.get_input_dim():
            raise ConfigurationError(
                "The output dimension of the text_field_embedder must match the "
                "input dimension of the phrase_encoder. Found {} and {}, "
                "respectively.".format(text_field_embedder.get_output_dim(),
                                       encoder.get_input_dim()))
        initializer(self)
Ejemplo n.º 4
0
    def __init__(self,
                 vocab: Vocabulary,
                 embedder: TextFieldEmbedder,
                 encoder: Seq2SeqEncoder,
                 dropout: float = 0.1,
                 ff_dim: int = 100):
        super().__init__(vocab)
        self.embedder = embedder
        self.encoder = encoder

        assert self.embedder.get_output_dim() == self.encoder.get_input_dim()

        self.feedforward = FeedForward(
            encoder.get_output_dim(),
            1,
            hidden_dims=ff_dim,
            activations=Activation.by_name('relu')(),
            dropout=dropout)
        self.out = torch.nn.Linear(
            in_features=self.feedforward.get_output_dim(),
            out_features=vocab.get_vocab_size('labels'))
        self.crf = ConditionalRandomField(vocab.get_vocab_size('labels'))

        self.f1 = FBetaMeasure(average='micro')
        self.accuracy = CategoricalAccuracy()
        self.idx_to_label = vocab.get_index_to_token_vocabulary('labels')
Ejemplo n.º 5
0
    def __init__(self,
                 vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 encoder: Seq2SeqEncoder,
                 label_namespace: str = "labels",
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None) -> None:
        super().__init__(vocab, regularizer)

        self.label_namespace = label_namespace
        self.text_field_embedder = text_field_embedder
        self.num_tags = self.vocab.get_vocab_size(label_namespace)
        self.encoder = encoder
        self.tag_projection_layer = TimeDistributed(
            Linear(self.encoder.get_output_dim(), self.num_tags))
        self.crf = ConditionalRandomField(self.num_tags)

        self.metrics = {
            "token_accuracy": CategoricalAccuracy(),
            "accuracy": BooleanAccuracy()
        }

        check_dimensions_match(text_field_embedder.get_output_dim(),
                               encoder.get_input_dim(),
                               "text field embedding dim", "encoder input dim")
        initializer(self)
Ejemplo n.º 6
0
    def __init__(
        self,
        vocab: Vocabulary,
        text_field_embedder: TextFieldEmbedder,
        encoder: Seq2SeqEncoder,
        label_namespace: str = "labels",
        feature_namespace: str = None,
        feature_encoder: Seq2VecEncoder = None,
        label_encoding: Optional[str] = None,
        include_start_end_transitions: bool = True,
        constrain_crf_decoding: bool = None,
        initializer: InitializerApplicator = InitializerApplicator(),
        regularizer: Optional[RegularizerApplicator] = None,
    ) -> None:
        super().__init__(vocab, regularizer)

        self.label_namespace = label_namespace
        self.text_field_embedder = text_field_embedder
        self.num_classes = self.vocab.get_vocab_size(label_namespace)
        self.encoder = encoder
        self.tag_projection_layer = TimeDistributed(
            Linear(self.encoder.get_output_dim(), self.num_classes))

        if feature_namespace:
            self.feature_encoder = feature_encoder
            self.feat_classification_layer = Linear(
                self.feature_encoder.get_output_dim(),
                self.vocab.get_vocab_size(feature_namespace))
            # print("num_features:", self.vocab.get_vocab_size(feature_namespace))

        if constrain_crf_decoding:
            if not label_encoding:
                raise ConfigurationError("constrain_crf_decoding is True, but "
                                         "no label_encoding was specified.")
            labels = self.vocab.get_index_to_token_vocabulary(label_namespace)
            constraints = allowed_transitions(label_encoding, labels)
        else:
            constraints = None

        self.include_start_end_transitions = include_start_end_transitions
        self.crf = ConditionalRandomField(
            self.num_classes,
            constraints,
            include_start_end_transitions=include_start_end_transitions)

        check_dimensions_match(
            text_field_embedder.get_output_dim(),
            encoder.get_input_dim(),
            "text field embedding dim",
            "encoder input dim",
        )

        self.metrics = {
            "accuracy": CategoricalAccuracy(),
            "accuracy3": CategoricalAccuracy(top_k=3),
        }

        self._f1_metric = None

        initializer(self)
Ejemplo n.º 7
0
    def __init__(self, vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 encoder: Seq2SeqEncoder,
                 feedforward: Optional[FeedForward] = None,
                 include_start_end_transitions: bool = True,
                 dropout: Optional[float] = None,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None) -> None:
        super().__init__(vocab, regularizer)

        self.label_namespace = 'labels'
        self.num_tags = self.vocab.get_vocab_size(self.label_namespace)

        # encode text
        self.text_field_embedder = text_field_embedder
        self.encoder = encoder
        self.dropout = torch.nn.Dropout(dropout) if dropout else None
        self.feedforward = feedforward

        # crf
        output_dim = self.encoder.get_output_dim() if feedforward is None else feedforward.get_output_dim()
        self.tag_projection_layer = TimeDistributed(Linear(output_dim, self.num_tags))
        self.crf = ConditionalRandomField(self.num_tags, constraints=None, include_start_end_transitions=include_start_end_transitions)

        initializer(self)

        self.metrics = {}

        # Add F1 score for individual labels to metrics 
        for index, label in self.vocab.get_index_to_token_vocabulary(self.label_namespace).items():
            self.metrics[label] = F1Measure(positive_label=index)
    def __init__(self, config):
        super().__init__()
        self.config = config
        self.tag_form = config.tag_form

        self.crf = ConditionalRandomField(
            num_tags=config.tag_vocab_size,
            include_start_end_transitions=False,
        )
        del self.crf.transitions  # must del parameter before assigning a tensor
        self.crf.transitions = None
        del self.crf._constraint_mask
        num_tags = config.tag_vocab_size
        constraint_mask = torch.Tensor(num_tags + 2, num_tags +
                                       2).fill_(1.).to(config.device)
        self.crf._constraint_mask = constraint_mask  #torch.nn.Parameter(constraint_mask, requires_grad=False)

        if self.tag_form == 'iobes':
            M = 4
        elif self.tag_form == 'iob2':
            M = 2
        else:
            raise Exception(f'unsupported tag form: {self.tag_form}')

        N = config.tag_vocab_size
        E = (config.tag_vocab_size - 1) // M

        self.N, self.M, self.E = N, M, E
        self.p_in = nn.Parameter(torch.randn([M, M], dtype=torch.float32))
        self.p_cross = nn.Parameter(torch.randn([M, M], dtype=torch.float32))
        self.p_out = nn.Parameter(torch.randn(1, dtype=torch.float32))
        self.p_to_out = nn.Parameter(torch.randn(M, dtype=torch.float32))
        self.p_from_out = nn.Parameter(torch.randn(M, dtype=torch.float32))

        self.need_update = True
Ejemplo n.º 9
0
    def __init__(self,
                 vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 encoder: Seq2SeqEncoder,
                 label_namespace: str = "labels",
                 constraint_type: str = None,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None) -> None:
        super().__init__(vocab, regularizer)

        self.label_namespace = label_namespace
        self.text_field_embedder = text_field_embedder
        self.num_tags = self.vocab.get_vocab_size(label_namespace)
        self.encoder = encoder
        self.tag_projection_layer = TimeDistributed(
            Linear(self.encoder.get_output_dim(), self.num_tags))

        if constraint_type is not None:
            labels = self.vocab.get_index_to_token_vocabulary(label_namespace)
            constraints = allowed_transitions(constraint_type, labels)
        else:
            constraints = None

        self.crf = ConditionalRandomField(self.num_tags, constraints)

        self.span_metric = SpanBasedF1Measure(vocab,
                                              tag_namespace=label_namespace,
                                              label_encoding=constraint_type
                                              or "BIO")

        check_dimensions_match(text_field_embedder.get_output_dim(),
                               encoder.get_input_dim(),
                               "text field embedding dim", "encoder input dim")
        initializer(self)
Ejemplo n.º 10
0
 def __init__(self,
              gen_emb,
              domain_emb,
              num_head=8,
              num_classes=3,
              dropout=0.5,
              crf=False):
     super(Model_att, self).__init__()
     self.gen_embedding = torch.nn.Embedding(gen_emb.shape[0],
                                             gen_emb.shape[1])
     self.gen_embedding.weight = torch.nn.Parameter(
         torch.from_numpy(gen_emb), requires_grad=False)
     self.domain_embedding = torch.nn.Embedding(domain_emb.shape[0],
                                                domain_emb.shape[1])
     self.domain_embedding.weight = torch.nn.Parameter(
         torch.from_numpy(domain_emb), requires_grad=False)
     #####
     self.dropout = torch.nn.Dropout(dropout)
     self.conva = torch.nn.Conv1d(gen_emb.shape[1] + domain_emb.shape[1],
                                  256,
                                  1,
                                  padding=0)
     self.sattb = layers.BertSelfAttention(
         256,
         num_attention_heads=8,
         attention_probs_dropout_prob=dropout,
         output_attentions=True)
     self.convb = torch.nn.Conv1d(256, 256, 1, padding=0)
     self.linear_ae = torch.nn.Linear(256, num_classes)
     self.crf_flag = crf
     if self.crf_flag:
         from allennlp.modules import ConditionalRandomField
         self.crf = ConditionalRandomField(num_classes)
Ejemplo n.º 11
0
    def __init__(self,
                 vocab: Vocabulary,
                 embedding_dim: int,
                 use_crf: bool = False,
                 label_namespace: str = "xpos_tags"):
        super().__init__(vocab)
        self.label_namespace = label_namespace
        self.labels = vocab.get_index_to_token_vocabulary(label_namespace)
        num_labels = vocab.get_vocab_size(label_namespace)

        if use_crf:
            self.crf = ConditionalRandomField(
                num_labels, include_start_end_transitions=True)
            self.label_projection_layer = TimeDistributed(
                torch.nn.Linear(embedding_dim, num_labels))
            self.decoder = None
        else:
            self.crf = None
            self.decoder = GruSeq2SeqEncoder(input_size=embedding_dim,
                                             hidden_size=embedding_dim,
                                             num_layers=1,
                                             bidirectional=True)
            self.label_projection_layer = TimeDistributed(
                torch.nn.Linear(self.decoder.get_output_dim(), num_labels))

        from allennlp.training.metrics import CategoricalAccuracy

        self.metrics = {"accuracy": CategoricalAccuracy()}
    def test_constrained_viterbi_tags(self):
        constraints = {(0, 0), (0, 1),
                       (1, 1), (1, 2),
                       (2, 2), (2, 3),
                       (3, 3), (3, 4),
                       (4, 4), (4, 0)}

        # Add the transitions to the end tag
        # and from the start tag.
        for i in range(5):
            constraints.add((5, i))
            constraints.add((i, 6))

        crf = ConditionalRandomField(num_tags=5, constraints=constraints)
        crf.transitions = torch.nn.Parameter(self.transitions)
        crf.start_transitions = torch.nn.Parameter(self.transitions_from_start)
        crf.end_transitions = torch.nn.Parameter(self.transitions_to_end)

        mask = torch.LongTensor([
                [1, 1, 1],
                [1, 1, 0]
        ])

        viterbi_path = crf.viterbi_tags(self.logits, mask)

        # Get just the tags from each tuple of (tags, score).
        viterbi_tags = [x for x, y in viterbi_path]

        # Now the tags should respect the constraints
        assert viterbi_tags == [
                [2, 3, 3],
                [2, 3]
        ]
Ejemplo n.º 13
0
    def __init__(self,
                 gen_emb,
                 domain_emb,
                 num_classes=3,
                 dropout=0.5,
                 crf=False):
        super(Model, self).__init__()
        self.gen_embedding = torch.nn.Embedding(gen_emb.shape[0],
                                                gen_emb.shape[1])
        self.gen_embedding.weight = torch.nn.Parameter(
            torch.from_numpy(gen_emb), requires_grad=False)
        self.domain_embedding = torch.nn.Embedding(domain_emb.shape[0],
                                                   domain_emb.shape[1])
        self.domain_embedding.weight = torch.nn.Parameter(
            torch.from_numpy(domain_emb), requires_grad=False)

        self.conv1 = torch.nn.Conv1d(gen_emb.shape[1] + domain_emb.shape[1],
                                     128,
                                     5,
                                     padding=2)
        self.conv2 = torch.nn.Conv1d(gen_emb.shape[1] + domain_emb.shape[1],
                                     128,
                                     3,
                                     padding=1)
        self.dropout = torch.nn.Dropout(dropout)

        self.conv3 = torch.nn.Conv1d(256, 256, 5, padding=2)
        self.conv4 = torch.nn.Conv1d(256, 256, 5, padding=2)
        self.conv5 = torch.nn.Conv1d(256, 256, 5, padding=2)
        self.linear_ae = torch.nn.Linear(256, num_classes)
        self.crf_flag = crf
        if self.crf_flag:
            from allennlp.modules import ConditionalRandomField
            self.crf = ConditionalRandomField(num_classes)
Ejemplo n.º 14
0
    def __init__(
        self,
        vocab: Vocabulary,
        embedder: TextFieldEmbedder,
        encoder: Seq2SeqEncoder = None,
        dropout: float = 0.5,
        label_namespace: str = "entity_tags",
    ) -> None:
        super().__init__(vocab)
        self.vocab = vocab
        self.embedder = embedder
        self.encoder = encoder
        self.dropout = Dropout(dropout)

        self.label_namespace = label_namespace
        self.labels = vocab.get_index_to_token_vocabulary(label_namespace)
        num_labels = vocab.get_vocab_size(label_namespace)

        self.label_projection_layer = TimeDistributed(
            torch.nn.Linear(
                embedder.get_output_dim()
                if encoder is None else encoder.get_output_dim(), num_labels))
        self.crf = ConditionalRandomField(num_labels,
                                          include_start_end_transitions=True)

        self.metrics = {
            "span_f1":
            SpanBasedF1Measure(vocab,
                               tag_namespace=label_namespace,
                               label_encoding="BIO"),
            "accuracy":
            CategoricalAccuracy(),
        }
Ejemplo n.º 15
0
    def setUp(self):
        super().setUp()
        self.logits = Variable(torch.Tensor([
                [[0, 0, .5, .5, .2], [0, 0, .3, .3, .1], [0, 0, .9, 10, 1]],
                [[0, 0, .2, .5, .2], [0, 0, 3, .3, .1], [0, 0, .9, 1, 1]],
        ]))
        self.tags = Variable(torch.LongTensor([
                [2, 3, 4],
                [3, 2, 2]
        ]))

        self.transitions = torch.Tensor([
                [0.1, 0.2, 0.3, 0.4, 0.5],
                [0.8, 0.3, 0.1, 0.7, 0.9],
                [-0.3, 2.1, -5.6, 3.4, 4.0],
                [0.2, 0.4, 0.6, -0.3, -0.4],
                [1.0, 1.0, 1.0, 1.0, 1.0]
        ])

        self.transitions_from_start = torch.Tensor([0.1, 0.2, 0.3, 0.4, 0.6])
        self.transitions_to_end = torch.Tensor([-0.1, -0.2, 0.3, -0.4, -0.4])

        # Use the CRF Module with fixed transitions to compute the log_likelihood
        self.crf = ConditionalRandomField(5)
        self.crf.transitions = torch.nn.Parameter(self.transitions)
        self.crf.start_transitions = torch.nn.Parameter(self.transitions_from_start)
        self.crf.end_transitions = torch.nn.Parameter(self.transitions_to_end)
 def __init__(self, config):
     super().__init__()
     self.config = config
     self.crf = ConditionalRandomField(
         num_tags=config.tag_vocab_size,
         include_start_end_transitions=False,
     )
    def __init__(self,
                 vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 sentence_encoder: Seq2VecEncoder,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None,
                 label_smoothing: float = None) -> None:
        super(DiscourseClaimCrfClassifier, self).__init__(vocab, regularizer)

        self.text_field_embedder = text_field_embedder
        self.num_classes = self.vocab.get_vocab_size("labels")
        self.sentence_encoder = sentence_encoder
        self.metrics = {
            "accuracy": CategoricalAccuracy(),
            "accuracy3": CategoricalAccuracy(top_k=3)
        }
        self.loss = torch.nn.CrossEntropyLoss()
        self.label_projection_layer_discourse = TimeDistributed(
            Linear(self.sentence_encoder.get_output_dim(), 5))
        self.label_projection_layer_claim = TimeDistributed(
            Linear(self.sentence_encoder.get_output_dim(), 2))

        constraints = None
        self.crf_discourse = ConditionalRandomField(
            5, constraints, include_start_end_transitions=False)
        self.crf_claim = ConditionalRandomField(
            2, constraints, include_start_end_transitions=False)
        initializer(self)
Ejemplo n.º 18
0
 def __init__(
     self,
     vocab: Vocabulary,
     text_field_embedder: TextFieldEmbedder,
     embedding_dropout: float,
     seq2seq_encoder: Seq2SeqEncoder,
     initializer: InitializerApplicator = InitializerApplicator(),
     loss_weights: Optional[List] = [],
     regularizer: Optional[RegularizerApplicator] = None,
 ) -> None:
     super(SequenceLabeler, self).__init__(vocab, regularizer)
     self.text_field_embedder = text_field_embedder
     self._embedding_dropout = nn.Dropout(embedding_dropout)
     self.num_classes = self.vocab.get_vocab_size("labels")
     self.seq2seq_encoder = seq2seq_encoder
     self.self_attentive_pooling_projection = nn.Linear(
         seq2seq_encoder.get_output_dim(), 1)
     self._classifier = nn.Linear(
         in_features=seq2seq_encoder.get_output_dim(),
         out_features=vocab.get_vocab_size("labels"),
     )
     self._crf = ConditionalRandomField(vocab.get_vocab_size("labels"))
     self.loss = torch.nn.CrossEntropyLoss()
     self._f1 = SpanBasedF1Measure(vocab, "labels")
     self.metrics = {
         "accuracy": CategoricalAccuracy(),
     }
     initializer(self)
Ejemplo n.º 19
0
    def __init__(self,
                 vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 dropout: Optional[float] = 0,
                 label_encoding: Optional[str] = 'BIO',
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None) -> None:

        super(CharBertCrfModel, self).__init__(vocab, regularizer)
        self._text_field_embedder = text_field_embedder
        self.num_tags = self.vocab.get_vocab_size('labels')

        self._labels_predictor = Linear(
            self._text_field_embedder.get_output_dim(), self.num_tags)
        self.dropout = torch.nn.Dropout(dropout)
        self.metrics = {
            "accuracy": CategoricalAccuracy(),
            "accuracy3": CategoricalAccuracy(top_k=3)
        }
        self._f1_metric = SpanBasedF1Measure(vocab,
                                             tag_namespace='labels',
                                             label_encoding=label_encoding)
        labels = self.vocab.get_index_to_token_vocabulary('labels')
        constraints = allowed_transitions(label_encoding, labels)
        self.label_to_index = self.vocab.get_token_to_index_vocabulary(
            'labels')
        self.crf = ConditionalRandomField(self.num_tags,
                                          constraints,
                                          include_start_end_transitions=False)
        # self.loss = torch.nn.CrossEntropyLoss()
        initializer(self)
    def __init__(self,
                 vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 sentence_encoder: Seq2VecEncoder,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 dropout: Optional[float] = None,
                 regularizer: Optional[RegularizerApplicator] = None) -> None:
        super(DiscourseCrfClassifier, self).__init__(vocab, regularizer)

        self.text_field_embedder = text_field_embedder
        self.num_classes = self.vocab.get_vocab_size("labels")
        self.sentence_encoder = sentence_encoder
        if dropout:
            self.dropout = torch.nn.Dropout(dropout)
        else:
            self.dropout = None
        self.metrics = {
            "accuracy": CategoricalAccuracy(),
            "accuracy3": CategoricalAccuracy(top_k=3)
        }
        self.loss = torch.nn.CrossEntropyLoss()
        self.label_projection_layer = TimeDistributed(
            Linear(self.sentence_encoder.get_output_dim(), self.num_classes))

        constraints = None  # allowed_transitions(label_encoding, labels)
        self.crf = ConditionalRandomField(self.num_classes,
                                          constraints,
                                          include_start_end_transitions=False)
        initializer(self)
Ejemplo n.º 21
0
 def __init__(self, args):
     super(BertGate, self).__init__()
     # args
     self.args = args
     self.bert_model = BertModel.from_pretrained(self.args.bert_model)
     # # pos_tag embedding
     # self.pos_tag_embedding = nn.Embedding(len(args.pos_tag_vocab), args.word_dim)
     # gate
     self.gate = nn.Linear(2 * self.bert_model.config.hidden_size,
                           self.args.gate_class)
     # column pointer network
     self.column_pointer_network = GlobalAttention(
         args=self.args,
         dim=self.bert_model.config.hidden_size,
         is_transform_out=False,
         attn_type="mlp")
     if self.args.crf:
         # todo: set num for baseline
         if self.args.model == 'gate':
             if self.args.cell_info:
                 self.crf = ConditionalRandomField(
                     1 + self.args.bert_columns_split_marker_max_len - 1 +
                     self.args.bert_cells_split_marker_max_len - 1)
             else:
                 self.crf = ConditionalRandomField(
                     1 + self.args.bert_columns_split_marker_max_len - 1 +
                     1)
         else:
             raise NotImplementedError
Ejemplo n.º 22
0
    def __init__(self,
                 vocab: Vocabulary,
                 source_embedder: TextFieldEmbedder,
                 encoder: Seq2SeqEncoder,
                 max_decoding_steps: int,
                 target_namespace: str = "target_tags",
                 target_embedding_dim: int = None,
                 attention_function: SimilarityFunction = None,
                 scheduled_sampling_ratio: float = 0.0,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None) -> None:
        super(SimpleSeq2SeqCrf, self).__init__(vocab, regularizer)
        self._source_embedder = source_embedder
        self._encoder = encoder
        self._max_decoding_steps = max_decoding_steps
        self._target_namespace = target_namespace
        self._attention_function = attention_function
        self._scheduled_sampling_ratio = scheduled_sampling_ratio
        # We need the start symbol to provide as the input at the first timestep of decoding, and
        # end symbol as a way to indicate the end of the decoded sequence.
        self._start_index = self.vocab.get_token_index(START_SYMBOL,
                                                       self._target_namespace)
        self._end_index = self.vocab.get_token_index(END_SYMBOL,
                                                     self._target_namespace)
        num_classes = self.vocab.get_vocab_size(self._target_namespace)
        self._crf = ConditionalRandomField(num_classes)
        # Decoder output dim needs to be the same as the encoder output dim since we initialize the
        # hidden state of the decoder with that of the final hidden states of the encoder. Also, if
        # we're using attention with ``DotProductSimilarity``, this is needed.
        self._decoder_output_dim = self._encoder.get_output_dim()
        target_embedding_dim = target_embedding_dim or self._source_embedder.get_output_dim(
        )
        self._target_embedder = Embedding(num_classes, target_embedding_dim)
        if self._attention_function:
            self._decoder_attention = Attention(self._attention_function)
            # The output of attention, a weighted average over encoder outputs, will be
            # concatenated to the input vector of the decoder at each time step.
            self._decoder_input_dim = self._encoder.get_output_dim(
            ) + target_embedding_dim
        else:
            self._decoder_input_dim = target_embedding_dim
        # TODO (pradeep): Do not hardcode decoder cell type.
        self._decoder_cell = LSTMCell(self._decoder_input_dim,
                                      self._decoder_output_dim)
        # self._decoder_cell = GRUCell(self._decoder_input_dim, self._decoder_output_dim, bias=False)
        self._output_projection_layer = Linear(self._decoder_output_dim,
                                               num_classes)
        self.metrics = {
            "accuracy": CategoricalAccuracy(),
            "accuracy3": CategoricalAccuracy(top_k=3)
        }
        self.span_metric = SpanBasedF1Measure(
            vocab,
            tag_namespace=target_namespace,
            ignore_classes=[START_SYMBOL[2:], END_SYMBOL[2:]])
        initializer(self)

        # Initialize forget gate
        """
Ejemplo n.º 23
0
 def __init__(self, args):
     self.args = args
     super(BERTNER, self).__init__()
     self.emission = AutoModelForTokenClassification.from_pretrained(args.model_name_or_path, \
         cache_dir=args.pretrained_cache_dir, num_labels=len(NER_ID2LABEL))
     if self.args.use_crf:
         from allennlp.modules import ConditionalRandomField
         self.crf = ConditionalRandomField(len(NER_ID2LABEL), include_start_end_transitions=False)
Ejemplo n.º 24
0
 def __init__(self, kwargs):
     super(CrfTagger, self).__init__()
     self.gpu = kwargs.pop("use_gpu", False)
     if kwargs.pop("use_lstm", False):
         self.lstm = LstmTagger(**kwargs)
     self.crf = ConditionalRandomField(
         kwargs["tagset_size"], include_start_end_transitions=True
     )
Ejemplo n.º 25
0
    def __init__(self,
                 vocab,
                 text_field_embedder,
                 encoder,
                 label_namespace=u"labels",
                 constraint_type=None,
                 feedforward=None,
                 include_start_end_transitions=True,
                 dropout=None,
                 verbose_metrics=False,
                 initializer=InitializerApplicator(),
                 regularizer=None):
        super(CrfTagger, self).__init__(vocab, regularizer)

        self.label_namespace = label_namespace
        self.text_field_embedder = text_field_embedder
        self.num_tags = self.vocab.get_vocab_size(label_namespace)
        self.encoder = encoder
        self._verbose_metrics = verbose_metrics
        if dropout:
            self.dropout = torch.nn.Dropout(dropout)
        else:
            self.dropout = None
        self._feedforward = feedforward

        if feedforward is not None:
            output_dim = feedforward.get_output_dim()
        else:
            output_dim = self.encoder.get_output_dim()
        self.tag_projection_layer = TimeDistributed(
            Linear(output_dim, self.num_tags))

        if constraint_type is not None:
            labels = self.vocab.get_index_to_token_vocabulary(label_namespace)
            constraints = allowed_transitions(constraint_type, labels)
        else:
            constraints = None

        self.crf = ConditionalRandomField(
            self.num_tags,
            constraints,
            include_start_end_transitions=include_start_end_transitions)

        self.span_metric = SpanBasedF1Measure(vocab,
                                              tag_namespace=label_namespace,
                                              label_encoding=constraint_type
                                              or u"BIO")

        check_dimensions_match(text_field_embedder.get_output_dim(),
                               encoder.get_input_dim(),
                               u"text field embedding dim",
                               u"encoder input dim")
        if feedforward is not None:
            check_dimensions_match(encoder.get_output_dim(),
                                   feedforward.get_input_dim(),
                                   u"encoder output dim",
                                   u"feedforward input dim")
        initializer(self)
Ejemplo n.º 26
0
    def __init__(self,
                 vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 entity_embedder: TextFieldEmbedder,
                 encoder: Seq2VecEncoder,
                 label_namespace: str = "logical_form",
                 feedforward: Optional[FeedForward] = None,
                 dropout: Optional[float] = None,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None) -> None:
        super().__init__(vocab, regularizer)

        self.num_tags = self.vocab.get_vocab_size("logical_form")
        self.encoder = encoder

        self.text_field_embedder = text_field_embedder
        self.entity_embedder = entity_embedder

        self.BOW_embedder_question = BagOfWordCountsTokenEmbedder(
            vocab, "tokens", projection_dim=self.encoder.get_output_dim())
        self.BOW_embedder_description = BagOfWordCountsTokenEmbedder(
            vocab, "tokens", projection_dim=self.encoder.get_output_dim())
        self.BOW_embedder_detail = BagOfWordCountsTokenEmbedder(
            vocab, "tokens", projection_dim=self.encoder.get_output_dim())

        # using crf as the estimator for sequential tags
        self.crf = ConditionalRandomField(self.num_tags,
                                          include_start_end_transitions=False)

        self.crf_for_both = ConditionalRandomField(
            self.num_tags, include_start_end_transitions=False)

        self.softmax_layer = Softmax()
        self.ce_loss = CrossEntropyLoss()

        self.matched = 0
        self.all_pred = 0

        if dropout:
            self.dropout = torch.nn.Dropout(dropout)
        else:
            self.dropout = None

        output_dim = self.encoder.get_output_dim()

        self.pred_layer = Linear(4 * output_dim, 3 * self.num_tags)
        self.load_pretrained_weights()

        self.pred_layer_both = Linear(8 * output_dim, 3 * self.num_tags)
        # if  constrain_crf_decoding and calculate_span_f1 are not
        # provided, (i.e., they're None), set them to True
        # if label_encoding is provided and False if it isn't.

        self.metrics = {}
        check_dimensions_match(text_field_embedder.get_output_dim(),
                               encoder.get_input_dim(),
                               "text field embedding dim", "encoder input dim")
        initializer(self)
Ejemplo n.º 27
0
    def __init__(
        self,
        task: str,
        vocab: Vocabulary,
        input_dim: int,
        loss_weight: float = 1.0,
        label_encoding: Optional[str] = 'BIO',
        include_start_end_transitions: bool = True,
        constrain_crf_decoding: bool = True,
        calculate_span_f1: bool = None,
        verbose_metrics: bool = False,
        metric: str = 'span_f1',
        top_k: int = 1,
        **kwargs,
    ) -> None:
        super().__init__(vocab, **kwargs)

        self.task = task
        self.input_dim = input_dim
        self.loss_weight = loss_weight
        self.num_tags = self.vocab.get_vocab_size(task)
        self.top_k = top_k
        self._verbose_metrics = verbose_metrics

        self.tag_projection_layer = TimeDistributed(
            Linear(input_dim, self.num_tags))

        # if  constrain_crf_decoding and calculate_span_f1 are not
        # provided, (i.e., they're None), set them to True
        # if label_encoding is provided and False if it isn't.
        if constrain_crf_decoding is None:
            constrain_crf_decoding = label_encoding is not None
        if calculate_span_f1 is None:
            calculate_span_f1 = label_encoding is not None

        self.label_encoding = label_encoding
        if constrain_crf_decoding:
            if not label_encoding:
                raise ConfigurationError(
                    "constrain_crf_decoding is True, but no label_encoding was specified."
                )
            labels = self.vocab.get_index_to_token_vocabulary(task)
            constraints = allowed_transitions(label_encoding, labels)
        else:
            constraints = None

        self.include_start_end_transitions = include_start_end_transitions
        self.crf = ConditionalRandomField(
            self.num_tags,
            constraints,
            include_start_end_transitions=include_start_end_transitions)
        self.metrics = {
            "span_f1":
            SpanBasedF1Measure(self.vocab,
                               tag_namespace=self.task,
                               label_encoding="BIO")
        }
Ejemplo n.º 28
0
    def __init__(self,
                 vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 encoder: Seq2SeqEncoder,
                 binary_feature_dim: int,
                 embedding_dropout: float = 0.0,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None,
                 label_smoothing: float = None,
                 label_namespace: str = "labels",
                 ignore_span_metric: bool = False,
                 label_encoding: Optional[str] = 'BIO',
                 include_start_end_transitions: bool = True,
                 constrain_crf_decoding: bool = True) -> None:
        super(OieLabelerCRF, self).__init__(vocab, regularizer)

        self.text_field_embedder = text_field_embedder
        self.num_classes = self.vocab.get_vocab_size("labels")

        # For the span based evaluation, we don't want to consider labels
        # for verb, because the verb index is provided to the model.
        self.span_metric = SpanBasedF1Measure(vocab,
                                              tag_namespace="labels",
                                              ignore_classes=["V"])
        self.label_namespace = label_namespace
        self.encoder = encoder
        # There are exactly 2 binary features for the verb predicate embedding.
        self.binary_feature_embedding = Embedding(2, binary_feature_dim)
        self.tag_projection_layer = TimeDistributed(
            Linear(self.encoder.get_output_dim(), self.num_classes))
        self.embedding_dropout = Dropout(p=embedding_dropout)
        self._label_smoothing = label_smoothing
        self.ignore_span_metric = ignore_span_metric
        self.include_start_end_transitions = include_start_end_transitions
        if constrain_crf_decoding is None:
            constrain_crf_decoding = label_encoding is not None
        if constrain_crf_decoding:
            labels = self.vocab.get_index_to_token_vocabulary(label_namespace)
            print(labels)
            constraints = allowed_transitions(label_encoding, labels)
        else:
            constraints = None
        self.crf = ConditionalRandomField(
            self.num_classes,
            constraints,
            include_start_end_transitions=include_start_end_transitions)

        check_dimensions_match(
            text_field_embedder.get_output_dim() + binary_feature_dim,
            encoder.get_input_dim(),
            "text embedding dim + verb indicator embedding dim",
            "encoder input dim")
        initializer(self)
    def __init__(self, vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 encoder: Seq2SeqEncoder,
                 feedforward: Optional[FeedForward] = None,
                 dropout: Optional[float] = None,
                 regularizer: Optional[RegularizerApplicator] = None) -> None:

        super().__init__(vocab, regularizer)
        label_namespace = 'labels'
        self.label_namespace = label_namespace
        self.text_field_embedder = text_field_embedder

        self.num_tags = self.vocab.get_vocab_size(label_namespace)
        self.encoder = encoder

        if dropout:
            self.dropout = torch.nn.Dropout(dropout)
        else:
            self.dropout = None

        self._feedforward = feedforward

        if feedforward is not None:
            output_dim = feedforward.get_output_dim()
        else:
            output_dim = self.encoder.get_output_dim()
        self.tag_projection_layer = TimeDistributed(Linear(output_dim,
                                                           self.num_tags))

        # if  constrain_crf_decoding and calculate_span_f1 are not
        # provided, (i.e., they're None), set them to True
        # if label_encoding is provided and False if it isn't.


        self.label_encoding = 'BIOUL'
        labels = self.vocab.get_index_to_token_vocabulary(label_namespace)
        constraints = allowed_transitions(self.label_encoding, labels)


        self.include_start_end_transitions = True
        self.crf = ConditionalRandomField(
            self.num_tags, constraints,
            include_start_end_transitions=self.include_start_end_transitions
        )

        self.metrics = {
            "accuracy": CategoricalAccuracy(),
            "accuracy3": CategoricalAccuracy(top_k=3)
        }

        self._f1_metric = SpanBasedF1Measure(vocab,
                                             tag_namespace=label_namespace,
                                             label_encoding=self.label_encoding)
Ejemplo n.º 30
0
    def __init__(self,
                 vocab: Vocabulary,
                 word_embedding: Dict[str, Any],
                 depsawr: torch.nn.Module = None,
                 transform_dim: int = 0,
                 pos_dim: int = 50,
                 indicator_dim: int = 50,
                 encoder: Dict[str, Any] = None,
                 dropout: float = 0.33,
                 label_namespace: str = "labels",
                 top_k: int = 1,
                 **kwargs) -> None:
        super().__init__()
        self.word_embedding = build_word_embedding(num_embeddings=len(
            vocab['words']),
                                                   vocab=vocab,
                                                   dropout=dropout,
                                                   **word_embedding)
        feat_dim: int = self.word_embedding.output_dim

        if transform_dim > 0:
            self.word_transform = NonLinear(feat_dim, transform_dim)
            feat_dim: int = transform_dim
        else:
            self.word_transform = None

        if depsawr:
            dep_dim = kwargs.pop('dep_dim', 300)
            self.depsawr_forward = depsawr.forward
            self.projections = ModuleList(
                [NonLinear(i, dep_dim) for i in depsawr.dims])
            self.depsawr_mix = ScalarMix(len(depsawr.dims), True)
            feat_dim += dep_dim
        else:
            self.depsawr_forward = None

        self.pos_embedding = Embedding(len(vocab['upostag']), pos_dim, 0)
        self.indicator_embedding = Embedding(2, indicator_dim)
        feat_dim += (pos_dim + indicator_dim)

        if encoder is not None:
            self.encoder = build_encoder(feat_dim, dropout=dropout, **encoder)
            feat_dim = self.encoder.output_dim
        else:
            self.encoder = None

        self.tag_projection_layer = torch.nn.Linear(
            feat_dim, len(vocab[label_namespace]))
        self.word_dropout = WordDropout(dropout)
        self.crf = ConditionalRandomField(len(vocab[label_namespace]),
                                          include_start_end_transitions=False)
        self.top_k = top_k
        self.metric = SRLMetric(vocab[label_namespace]['_'])
Ejemplo n.º 31
0
    def __init__(self, config, num_labels, word_pool_type='mean'):

        super(BertForSequenceLabeling, self).__init__(config)
        if word_pool_type.lower() not in {'first', 'mean', 'sum'}:
            raise ValueError('No {} pooling methods!'.format(word_pool_type))
        if word_pool_type.lower() == 'sum':
            self.layer_norm = BertLayerNorm(config)
        self.word_pool_type = word_pool_type
        self.bert = BertModel(config)
        self.dropout = torch.nn.Dropout(config.hidden_dropout_prob)
        self.classifier = torch.nn.Linear(config.hidden_size, num_labels)
        self.crf = ConditionalRandomField(num_labels)
        self.apply(self.init_bert_weights)
Ejemplo n.º 32
0
    def __init__(self, vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 encoder: Seq2SeqEncoder,
                 label_namespace: str = "labels",
                 constraint_type: str = None,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None) -> None:
        super().__init__(vocab, regularizer)

        self.label_namespace = label_namespace
        self.text_field_embedder = text_field_embedder
        self.num_tags = self.vocab.get_vocab_size(label_namespace)
        self.encoder = encoder
        self.tag_projection_layer = TimeDistributed(Linear(self.encoder.get_output_dim(),
                                                           self.num_tags))

        if constraint_type is not None:
            labels = self.vocab.get_index_to_token_vocabulary(label_namespace)
            constraints = allowed_transitions(constraint_type, labels)
        else:
            constraints = None

        self.crf = ConditionalRandomField(self.num_tags, constraints)

        self.span_metric = SpanBasedF1Measure(vocab, tag_namespace=label_namespace)

        check_dimensions_match(text_field_embedder.get_output_dim(), encoder.get_input_dim(),
                               "text field embedding dim", "encoder input dim")
        initializer(self)
    def setUp(self):
        super().setUp()
        self.logits = torch.Tensor([
                [[0, 0, .5, .5, .2], [0, 0, .3, .3, .1], [0, 0, .9, 10, 1]],
                [[0, 0, .2, .5, .2], [0, 0, 3, .3, .1], [0, 0, .9, 1, 1]],
        ])
        self.tags = torch.LongTensor([
                [2, 3, 4],
                [3, 2, 2]
        ])

        self.transitions = torch.Tensor([
                [0.1, 0.2, 0.3, 0.4, 0.5],
                [0.8, 0.3, 0.1, 0.7, 0.9],
                [-0.3, 2.1, -5.6, 3.4, 4.0],
                [0.2, 0.4, 0.6, -0.3, -0.4],
                [1.0, 1.0, 1.0, 1.0, 1.0]
        ])

        self.transitions_from_start = torch.Tensor([0.1, 0.2, 0.3, 0.4, 0.6])
        self.transitions_to_end = torch.Tensor([-0.1, -0.2, 0.3, -0.4, -0.4])

        # Use the CRF Module with fixed transitions to compute the log_likelihood
        self.crf = ConditionalRandomField(5)
        self.crf.transitions = torch.nn.Parameter(self.transitions)
        self.crf.start_transitions = torch.nn.Parameter(self.transitions_from_start)
        self.crf.end_transitions = torch.nn.Parameter(self.transitions_to_end)
Ejemplo n.º 34
0
    def __init__(self, vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 encoder: Seq2SeqEncoder,
                 label_namespace: str = "labels",
                 constraint_type: str = None,
                 feedforward: FeedForward = None,
                 include_start_end_transitions: bool = True,
                 dropout: float = None,
                 verbose_metrics: bool = False,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None) -> None:
        super().__init__(vocab, regularizer)

        self.label_namespace = label_namespace
        self.text_field_embedder = text_field_embedder
        self.num_tags = self.vocab.get_vocab_size(label_namespace)
        self.encoder = encoder
        self._verbose_metrics = verbose_metrics
        if dropout:
            self.dropout = torch.nn.Dropout(dropout)
        else:
            self.dropout = None
        self._feedforward = feedforward

        if feedforward is not None:
            output_dim = feedforward.get_output_dim()
        else:
            output_dim = self.encoder.get_output_dim()
        self.tag_projection_layer = TimeDistributed(Linear(output_dim,
                                                           self.num_tags))

        if constraint_type is not None:
            labels = self.vocab.get_index_to_token_vocabulary(label_namespace)
            constraints = allowed_transitions(constraint_type, labels)
        else:
            constraints = None

        self.crf = ConditionalRandomField(
                self.num_tags, constraints,
                include_start_end_transitions=include_start_end_transitions
        )

        self.span_metric = SpanBasedF1Measure(vocab,
                                              tag_namespace=label_namespace,
                                              label_encoding=constraint_type or "BIO")


        check_dimensions_match(text_field_embedder.get_output_dim(), encoder.get_input_dim(),
                               "text field embedding dim", "encoder input dim")
        if feedforward is not None:
            check_dimensions_match(encoder.get_output_dim(), feedforward.get_input_dim(),
                                   "encoder output dim", "feedforward input dim")
        initializer(self)
    def test_constrained_viterbi_tags(self):
        constraints = {(0, 0), (0, 1),
                       (1, 1), (1, 2),
                       (2, 2), (2, 3),
                       (3, 3), (3, 4),
                       (4, 4), (4, 0)}

        crf = ConditionalRandomField(num_tags=5, constraints=constraints)
        crf.transitions = torch.nn.Parameter(self.transitions)
        crf.start_transitions = torch.nn.Parameter(self.transitions_from_start)
        crf.end_transitions = torch.nn.Parameter(self.transitions_to_end)

        mask = Variable(torch.LongTensor([
                [1, 1, 1],
                [1, 1, 0]
        ]))

        viterbi_tags = crf.viterbi_tags(self.logits, mask)

        # Now the tags should respect the constraints
        assert viterbi_tags == [
                [2, 3, 3],
                [2, 3]
        ]
Ejemplo n.º 36
0
    def __init__(self, vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 encoder: Seq2SeqEncoder,
                 label_namespace: str = "labels",
                 feedforward: Optional[FeedForward] = None,
                 label_encoding: Optional[str] = None,
                 constraint_type: Optional[str] = None,
                 include_start_end_transitions: bool = True,
                 constrain_crf_decoding: bool = None,
                 calculate_span_f1: bool = None,
                 dropout: Optional[float] = None,
                 verbose_metrics: bool = False,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None) -> None:
        super().__init__(vocab, regularizer)

        self.label_namespace = label_namespace
        self.text_field_embedder = text_field_embedder
        self.num_tags = self.vocab.get_vocab_size(label_namespace)
        self.encoder = encoder
        self._verbose_metrics = verbose_metrics
        if dropout:
            self.dropout = torch.nn.Dropout(dropout)
        else:
            self.dropout = None
        self._feedforward = feedforward

        if feedforward is not None:
            output_dim = feedforward.get_output_dim()
        else:
            output_dim = self.encoder.get_output_dim()
        self.tag_projection_layer = TimeDistributed(Linear(output_dim,
                                                           self.num_tags))

        if constraint_type is not None:
            warnings.warn("'constraint_type' was removed and replaced with"
                          "'label_encoding', 'constrain_crf_decoding', and "
                          "'calculate_span_f1' in version 0.6.1. It will be "
                          "removed in version 0.8.", DeprecationWarning)
            label_encoding = constraint_type

        # if  constrain_crf_decoding and calculate_span_f1 are not
        # provided, (i.e., they're None), set them to True
        # if label_encoding is provided and False if it isn't.
        if constrain_crf_decoding is None:
            constrain_crf_decoding = label_encoding is not None
        if calculate_span_f1 is None:
            calculate_span_f1 = label_encoding is not None

        self.label_encoding = label_encoding
        if constrain_crf_decoding:
            if not label_encoding:
                raise ConfigurationError("constrain_crf_decoding is True, but "
                                         "no label_encoding was specified.")
            labels = self.vocab.get_index_to_token_vocabulary(label_namespace)
            constraints = allowed_transitions(label_encoding, labels)
        else:
            constraints = None

        self.include_start_end_transitions = include_start_end_transitions
        self.crf = ConditionalRandomField(
                self.num_tags, constraints,
                include_start_end_transitions=include_start_end_transitions
        )

        self.metrics = {
                "accuracy": CategoricalAccuracy(),
                "accuracy3": CategoricalAccuracy(top_k=3)
        }
        self.calculate_span_f1 = calculate_span_f1
        if calculate_span_f1:
            if not label_encoding:
                raise ConfigurationError("calculate_span_f1 is True, but "
                                         "no label_encoding was specified.")
            self._f1_metric = SpanBasedF1Measure(vocab,
                                                 tag_namespace=label_namespace,
                                                 label_encoding=label_encoding)
        elif constraint_type is not None:
            # Maintain deprecated behavior if constraint_type is provided
            self._f1_metric = SpanBasedF1Measure(vocab,
                                                 tag_namespace=label_namespace,
                                                 label_encoding=constraint_type)

        check_dimensions_match(text_field_embedder.get_output_dim(), encoder.get_input_dim(),
                               "text field embedding dim", "encoder input dim")
        if feedforward is not None:
            check_dimensions_match(encoder.get_output_dim(), feedforward.get_input_dim(),
                                   "encoder output dim", "feedforward input dim")
        initializer(self)
Ejemplo n.º 37
0
class CrfTagger(Model):
    """
    The ``CrfTagger`` encodes a sequence of text with a ``Seq2SeqEncoder``,
    then uses a Conditional Random Field model to predict a tag for each token in the sequence.

    Parameters
    ----------
    vocab : ``Vocabulary``, required
        A Vocabulary, required in order to compute sizes for input/output projections.
    text_field_embedder : ``TextFieldEmbedder``, required
        Used to embed the tokens ``TextField`` we get as input to the model.
    encoder : ``Seq2SeqEncoder``
        The encoder that we will use in between embedding tokens and predicting output tags.
    label_namespace : ``str``, optional (default=``labels``)
        This is needed to compute the SpanBasedF1Measure metric.
        Unless you did something unusual, the default value should be what you want.
    feedforward : ``FeedForward``, optional, (default = None).
        An optional feedforward layer to apply after the encoder.
    label_encoding : ``str``, optional (default=``None``)
        Label encoding to use when calculating span f1 and constraining
        the CRF at decoding time . Valid options are "BIO", "BIOUL", "IOB1", "BMES".
        Required if ``calculate_span_f1`` or ``constrain_crf_decoding`` is true.
    constraint_type : ``str``, optional (default=``None``)
        If provided, the CRF will be constrained at decoding time
        to produce valid labels based on the specified type
        (e.g. "BIO", or "BIOUL").

        .. deprecated:: 0.6.1
           ``constraint_type`` was deprecated and replaced with
           ``label_encoding``, ``constrain_crf_decoding``, and
           ``calculate_span_f1`` in version 0.6.1. It will be removed
           in version 0.8.

    include_start_end_transitions : ``bool``, optional (default=``True``)
        Whether to include start and end transition parameters in the CRF.
    constrain_crf_decoding : ``bool``, optional (default=``None``)
        If ``True``, the CRF is constrained at decoding time to
        produce valid sequences of tags. If this is ``True``, then
        ``label_encoding`` is required. If ``None`` and
        label_encoding is specified, this is set to ``True``.
        If ``None`` and label_encoding is not specified, it defaults
        to ``False``.
    calculate_span_f1 : ``bool``, optional (default=``None``)
        Calculate span-level F1 metrics during training. If this is ``True``, then
        ``label_encoding`` is required. If ``None`` and
        label_encoding is specified, this is set to ``True``.
        If ``None`` and label_encoding is not specified, it defaults
        to ``False``.
    dropout:  ``float``, optional (detault=``None``)
    verbose_metrics : ``bool``, optional (default = False)
        If true, metrics will be returned per label class in addition
        to the overall statistics.
    initializer : ``InitializerApplicator``, optional (default=``InitializerApplicator()``)
        Used to initialize the model parameters.
    regularizer : ``RegularizerApplicator``, optional (default=``None``)
        If provided, will be used to calculate the regularization penalty during training.
    """

    def __init__(self, vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 encoder: Seq2SeqEncoder,
                 label_namespace: str = "labels",
                 feedforward: Optional[FeedForward] = None,
                 label_encoding: Optional[str] = None,
                 constraint_type: Optional[str] = None,
                 include_start_end_transitions: bool = True,
                 constrain_crf_decoding: bool = None,
                 calculate_span_f1: bool = None,
                 dropout: Optional[float] = None,
                 verbose_metrics: bool = False,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None) -> None:
        super().__init__(vocab, regularizer)

        self.label_namespace = label_namespace
        self.text_field_embedder = text_field_embedder
        self.num_tags = self.vocab.get_vocab_size(label_namespace)
        self.encoder = encoder
        self._verbose_metrics = verbose_metrics
        if dropout:
            self.dropout = torch.nn.Dropout(dropout)
        else:
            self.dropout = None
        self._feedforward = feedforward

        if feedforward is not None:
            output_dim = feedforward.get_output_dim()
        else:
            output_dim = self.encoder.get_output_dim()
        self.tag_projection_layer = TimeDistributed(Linear(output_dim,
                                                           self.num_tags))

        if constraint_type is not None:
            warnings.warn("'constraint_type' was removed and replaced with"
                          "'label_encoding', 'constrain_crf_decoding', and "
                          "'calculate_span_f1' in version 0.6.1. It will be "
                          "removed in version 0.8.", DeprecationWarning)
            label_encoding = constraint_type

        # if  constrain_crf_decoding and calculate_span_f1 are not
        # provided, (i.e., they're None), set them to True
        # if label_encoding is provided and False if it isn't.
        if constrain_crf_decoding is None:
            constrain_crf_decoding = label_encoding is not None
        if calculate_span_f1 is None:
            calculate_span_f1 = label_encoding is not None

        self.label_encoding = label_encoding
        if constrain_crf_decoding:
            if not label_encoding:
                raise ConfigurationError("constrain_crf_decoding is True, but "
                                         "no label_encoding was specified.")
            labels = self.vocab.get_index_to_token_vocabulary(label_namespace)
            constraints = allowed_transitions(label_encoding, labels)
        else:
            constraints = None

        self.include_start_end_transitions = include_start_end_transitions
        self.crf = ConditionalRandomField(
                self.num_tags, constraints,
                include_start_end_transitions=include_start_end_transitions
        )

        self.metrics = {
                "accuracy": CategoricalAccuracy(),
                "accuracy3": CategoricalAccuracy(top_k=3)
        }
        self.calculate_span_f1 = calculate_span_f1
        if calculate_span_f1:
            if not label_encoding:
                raise ConfigurationError("calculate_span_f1 is True, but "
                                         "no label_encoding was specified.")
            self._f1_metric = SpanBasedF1Measure(vocab,
                                                 tag_namespace=label_namespace,
                                                 label_encoding=label_encoding)
        elif constraint_type is not None:
            # Maintain deprecated behavior if constraint_type is provided
            self._f1_metric = SpanBasedF1Measure(vocab,
                                                 tag_namespace=label_namespace,
                                                 label_encoding=constraint_type)

        check_dimensions_match(text_field_embedder.get_output_dim(), encoder.get_input_dim(),
                               "text field embedding dim", "encoder input dim")
        if feedforward is not None:
            check_dimensions_match(encoder.get_output_dim(), feedforward.get_input_dim(),
                                   "encoder output dim", "feedforward input dim")
        initializer(self)

    @overrides
    def forward(self,  # type: ignore
                tokens: Dict[str, torch.LongTensor],
                tags: torch.LongTensor = None,
                metadata: List[Dict[str, Any]] = None,
                # pylint: disable=unused-argument
                **kwargs) -> Dict[str, torch.Tensor]:
        # pylint: disable=arguments-differ
        """
        Parameters
        ----------
        tokens : ``Dict[str, torch.LongTensor]``, required
            The output of ``TextField.as_array()``, which should typically be passed directly to a
            ``TextFieldEmbedder``. This output is a dictionary mapping keys to ``TokenIndexer``
            tensors.  At its most basic, using a ``SingleIdTokenIndexer`` this is: ``{"tokens":
            Tensor(batch_size, num_tokens)}``. This dictionary will have the same keys as were used
            for the ``TokenIndexers`` when you created the ``TextField`` representing your
            sequence.  The dictionary is designed to be passed directly to a ``TextFieldEmbedder``,
            which knows how to combine different word representations into a single vector per
            token in your input.
        tags : ``torch.LongTensor``, optional (default = ``None``)
            A torch tensor representing the sequence of integer gold class labels of shape
            ``(batch_size, num_tokens)``.
        metadata : ``List[Dict[str, Any]]``, optional, (default = None)
            metadata containg the original words in the sentence to be tagged under a 'words' key.

        Returns
        -------
        An output dictionary consisting of:

        logits : ``torch.FloatTensor``
            The logits that are the output of the ``tag_projection_layer``
        mask : ``torch.LongTensor``
            The text field mask for the input tokens
        tags : ``List[List[int]]``
            The predicted tags using the Viterbi algorithm.
        loss : ``torch.FloatTensor``, optional
            A scalar loss to be optimised. Only computed if gold label ``tags`` are provided.
        """
        embedded_text_input = self.text_field_embedder(tokens)
        mask = util.get_text_field_mask(tokens)

        if self.dropout:
            embedded_text_input = self.dropout(embedded_text_input)

        encoded_text = self.encoder(embedded_text_input, mask)

        if self.dropout:
            encoded_text = self.dropout(encoded_text)

        if self._feedforward is not None:
            encoded_text = self._feedforward(encoded_text)

        logits = self.tag_projection_layer(encoded_text)
        best_paths = self.crf.viterbi_tags(logits, mask)

        # Just get the tags and ignore the score.
        predicted_tags = [x for x, y in best_paths]

        output = {"logits": logits, "mask": mask, "tags": predicted_tags}

        if tags is not None:
            # Add negative log-likelihood as loss
            log_likelihood = self.crf(logits, tags, mask)
            output["loss"] = -log_likelihood

            # Represent viterbi tags as "class probabilities" that we can
            # feed into the metrics
            class_probabilities = logits * 0.
            for i, instance_tags in enumerate(predicted_tags):
                for j, tag_id in enumerate(instance_tags):
                    class_probabilities[i, j, tag_id] = 1

            for metric in self.metrics.values():
                metric(class_probabilities, tags, mask.float())
            if self.calculate_span_f1:
                self._f1_metric(class_probabilities, tags, mask.float())
        if metadata is not None:
            output["words"] = [x["words"] for x in metadata]
        return output

    @overrides
    def decode(self, output_dict: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]:
        """
        Converts the tag ids to the actual tags.
        ``output_dict["tags"]`` is a list of lists of tag_ids,
        so we use an ugly nested list comprehension.
        """
        output_dict["tags"] = [
                [self.vocab.get_token_from_index(tag, namespace=self.label_namespace)
                 for tag in instance_tags]
                for instance_tags in output_dict["tags"]
        ]

        return output_dict

    @overrides
    def get_metrics(self, reset: bool = False) -> Dict[str, float]:
        metrics_to_return = {metric_name: metric.get_metric(reset) for
                             metric_name, metric in self.metrics.items()}

        if self.calculate_span_f1:
            f1_dict = self._f1_metric.get_metric(reset=reset)
            if self._verbose_metrics:
                metrics_to_return.update(f1_dict)
            else:
                metrics_to_return.update({
                        x: y for x, y in f1_dict.items() if
                        "overall" in x})
        return metrics_to_return
Ejemplo n.º 38
0
class CrfTagger(Model):
    """
    The ``CrfTagger`` encodes a sequence of text with a ``Seq2SeqEncoder``,
    then uses a Conditional Random Field model to predict a tag for each token in the sequence.

    Parameters
    ----------
    vocab : ``Vocabulary``, required
        A Vocabulary, required in order to compute sizes for input/output projections.
    text_field_embedder : ``TextFieldEmbedder``, required
        Used to embed the tokens ``TextField`` we get as input to the model.
    encoder : ``Seq2SeqEncoder``
        The encoder that we will use in between embedding tokens and predicting output tags.
    label_namespace : ``str``, optional (default=``labels``)
        This is needed to compute the SpanBasedF1Measure metric.
        Unless you did something unusual, the default value should be what you want.
    constraint_type : ``str``, optional (default=``None``)
        If provided, the CRF will be constrained at decoding time
        to produce valid labels based on the specified type (e.g. "BIO", or "BIOUL").
    initializer : ``InitializerApplicator``, optional (default=``InitializerApplicator()``)
        Used to initialize the model parameters.
    regularizer : ``RegularizerApplicator``, optional (default=``None``)
        If provided, will be used to calculate the regularization penalty during training.
    """

    def __init__(self, vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 encoder: Seq2SeqEncoder,
                 label_namespace: str = "labels",
                 constraint_type: str = None,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None) -> None:
        super().__init__(vocab, regularizer)

        self.label_namespace = label_namespace
        self.text_field_embedder = text_field_embedder
        self.num_tags = self.vocab.get_vocab_size(label_namespace)
        self.encoder = encoder
        self.tag_projection_layer = TimeDistributed(Linear(self.encoder.get_output_dim(),
                                                           self.num_tags))

        if constraint_type is not None:
            labels = self.vocab.get_index_to_token_vocabulary(label_namespace)
            constraints = allowed_transitions(constraint_type, labels)
        else:
            constraints = None

        self.crf = ConditionalRandomField(self.num_tags, constraints)

        self.span_metric = SpanBasedF1Measure(vocab, tag_namespace=label_namespace)

        check_dimensions_match(text_field_embedder.get_output_dim(), encoder.get_input_dim(),
                               "text field embedding dim", "encoder input dim")
        initializer(self)

    @overrides
    def forward(self,  # type: ignore
                tokens: Dict[str, torch.LongTensor],
                tags: torch.LongTensor = None) -> Dict[str, torch.Tensor]:
        # pylint: disable=arguments-differ
        """
        Parameters
        ----------
        tokens : ``Dict[str, torch.LongTensor]``, required
            The output of ``TextField.as_array()``, which should typically be passed directly to a
            ``TextFieldEmbedder``. This output is a dictionary mapping keys to ``TokenIndexer``
            tensors.  At its most basic, using a ``SingleIdTokenIndexer`` this is: ``{"tokens":
            Tensor(batch_size, num_tokens)}``. This dictionary will have the same keys as were used
            for the ``TokenIndexers`` when you created the ``TextField`` representing your
            sequence.  The dictionary is designed to be passed directly to a ``TextFieldEmbedder``,
            which knows how to combine different word representations into a single vector per
            token in your input.
        tags : ``torch.LongTensor``, optional (default = ``None``)
            A torch tensor representing the sequence of integer gold class labels of shape
            ``(batch_size, num_tokens)``.

        Returns
        -------
        An output dictionary consisting of:

        logits : ``torch.FloatTensor``
            The logits that are the output of the ``tag_projection_layer``
        mask : ``torch.LongTensor``
            The text field mask for the input tokens
        tags : ``List[List[str]]``
            The predicted tags using the Viterbi algorithm.
        loss : ``torch.FloatTensor``, optional
            A scalar loss to be optimised. Only computed if gold label ``tags`` are provided.
        """
        embedded_text_input = self.text_field_embedder(tokens)
        mask = util.get_text_field_mask(tokens)
        encoded_text = self.encoder(embedded_text_input, mask)

        logits = self.tag_projection_layer(encoded_text)
        predicted_tags = self.crf.viterbi_tags(logits, mask)

        output = {"logits": logits, "mask": mask, "tags": predicted_tags}

        if tags is not None:
            # Add negative log-likelihood as loss
            log_likelihood = self.crf(logits, tags, mask)
            output["loss"] = -log_likelihood

            # Represent viterbi tags as "class probabilities" that we can
            # feed into the `span_metric`
            class_probabilities = logits * 0.
            for i, instance_tags in enumerate(predicted_tags):
                for j, tag_id in enumerate(instance_tags):
                    class_probabilities[i, j, tag_id] = 1

            self.span_metric(class_probabilities, tags, mask)

        return output

    @overrides
    def decode(self, output_dict: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]:
        """
        Converts the tag ids to the actual tags.
        ``output_dict["tags"]`` is a list of lists of tag_ids,
        so we use an ugly nested list comprehension.
        """
        output_dict["tags"] = [
                [self.vocab.get_token_from_index(tag, namespace="labels")
                 for tag in instance_tags]
                for instance_tags in output_dict["tags"]
        ]

        return output_dict

    @overrides
    def get_metrics(self, reset: bool = False) -> Dict[str, float]:
        metric_dict = self.span_metric.get_metric(reset=reset)
        return {x: y for x, y in metric_dict.items() if "overall" in x}

    @classmethod
    def from_params(cls, vocab: Vocabulary, params: Params) -> 'CrfTagger':
        embedder_params = params.pop("text_field_embedder")
        text_field_embedder = TextFieldEmbedder.from_params(vocab, embedder_params)
        encoder = Seq2SeqEncoder.from_params(params.pop("encoder"))
        label_namespace = params.pop("label_namespace", "labels")
        constraint_type = params.pop("constraint_type", None)
        initializer = InitializerApplicator.from_params(params.pop('initializer', []))
        regularizer = RegularizerApplicator.from_params(params.pop('regularizer', []))

        params.assert_empty(cls.__name__)

        return cls(vocab=vocab,
                   text_field_embedder=text_field_embedder,
                   encoder=encoder,
                   label_namespace=label_namespace,
                   constraint_type=constraint_type,
                   initializer=initializer,
                   regularizer=regularizer)
class TestConditionalRandomField(AllenNlpTestCase):
    def setUp(self):
        super().setUp()
        self.logits = torch.Tensor([
                [[0, 0, .5, .5, .2], [0, 0, .3, .3, .1], [0, 0, .9, 10, 1]],
                [[0, 0, .2, .5, .2], [0, 0, 3, .3, .1], [0, 0, .9, 1, 1]],
        ])
        self.tags = torch.LongTensor([
                [2, 3, 4],
                [3, 2, 2]
        ])

        self.transitions = torch.Tensor([
                [0.1, 0.2, 0.3, 0.4, 0.5],
                [0.8, 0.3, 0.1, 0.7, 0.9],
                [-0.3, 2.1, -5.6, 3.4, 4.0],
                [0.2, 0.4, 0.6, -0.3, -0.4],
                [1.0, 1.0, 1.0, 1.0, 1.0]
        ])

        self.transitions_from_start = torch.Tensor([0.1, 0.2, 0.3, 0.4, 0.6])
        self.transitions_to_end = torch.Tensor([-0.1, -0.2, 0.3, -0.4, -0.4])

        # Use the CRF Module with fixed transitions to compute the log_likelihood
        self.crf = ConditionalRandomField(5)
        self.crf.transitions = torch.nn.Parameter(self.transitions)
        self.crf.start_transitions = torch.nn.Parameter(self.transitions_from_start)
        self.crf.end_transitions = torch.nn.Parameter(self.transitions_to_end)

    def score(self, logits, tags):
        """
        Computes the likelihood score for the given sequence of tags,
        given the provided logits (and the transition weights in the CRF model)
        """
        # Start with transitions from START and to END
        total = self.transitions_from_start[tags[0]] + self.transitions_to_end[tags[-1]]
        # Add in all the intermediate transitions
        for tag, next_tag in zip(tags, tags[1:]):
            total += self.transitions[tag, next_tag]
        # Add in the logits for the observed tags
        for logit, tag in zip(logits, tags):
            total += logit[tag]
        return total

    def test_forward_works_without_mask(self):
        log_likelihood = self.crf(self.logits, self.tags).item()

        # Now compute the log-likelihood manually
        manual_log_likelihood = 0.0

        # For each instance, manually compute the numerator
        # (which is just the score for the logits and actual tags)
        # and the denominator
        # (which is the log-sum-exp of the scores for the logits across all possible tags)
        for logits_i, tags_i in zip(self.logits, self.tags):
            numerator = self.score(logits_i.detach(), tags_i.detach())
            all_scores = [self.score(logits_i.detach(), tags_j)
                          for tags_j in itertools.product(range(5), repeat=3)]
            denominator = math.log(sum(math.exp(score) for score in all_scores))
            # And include them in the manual calculation.
            manual_log_likelihood += numerator - denominator

        # The manually computed log likelihood should equal the result of crf.forward.
        assert manual_log_likelihood.item() == approx(log_likelihood)


    def test_forward_works_with_mask(self):
        # Use a non-trivial mask
        mask = torch.LongTensor([
                [1, 1, 1],
                [1, 1, 0]
        ])

        log_likelihood = self.crf(self.logits, self.tags, mask).item()

        # Now compute the log-likelihood manually
        manual_log_likelihood = 0.0

        # For each instance, manually compute the numerator
        #   (which is just the score for the logits and actual tags)
        # and the denominator
        #   (which is the log-sum-exp of the scores for the logits across all possible tags)
        for logits_i, tags_i, mask_i in zip(self.logits, self.tags, mask):
            # Find the sequence length for this input and only look at that much of each sequence.
            sequence_length = torch.sum(mask_i.detach())
            logits_i = logits_i.data[:sequence_length]
            tags_i = tags_i.data[:sequence_length]

            numerator = self.score(logits_i, tags_i)
            all_scores = [self.score(logits_i, tags_j)
                          for tags_j in itertools.product(range(5), repeat=sequence_length)]
            denominator = math.log(sum(math.exp(score) for score in all_scores))
            # And include them in the manual calculation.
            manual_log_likelihood += numerator - denominator

        # The manually computed log likelihood should equal the result of crf.forward.
        assert manual_log_likelihood.item() == approx(log_likelihood)


    def test_viterbi_tags(self):
        mask = torch.LongTensor([
                [1, 1, 1],
                [1, 1, 0]
        ])

        viterbi_path = self.crf.viterbi_tags(self.logits, mask)

        # Separate the tags and scores.
        viterbi_tags = [x for x, y in viterbi_path]
        viterbi_scores = [y for x, y in viterbi_path]

        # Check that the viterbi tags are what I think they should be.
        assert viterbi_tags == [
                [2, 4, 3],
                [4, 2]
        ]

        # We can also iterate over all possible tag sequences and use self.score
        # to check the likelihood of each. The most likely sequence should be the
        # same as what we get from viterbi_tags.
        most_likely_tags = []
        best_scores = []

        for logit, mas in zip(self.logits, mask):
            sequence_length = torch.sum(mas.detach())
            most_likely, most_likelihood = None, -float('inf')
            for tags in itertools.product(range(5), repeat=sequence_length):
                score = self.score(logit.data, tags)
                if score > most_likelihood:
                    most_likely, most_likelihood = tags, score
            # Convert tuple to list; otherwise == complains.
            most_likely_tags.append(list(most_likely))
            best_scores.append(most_likelihood)

        assert viterbi_tags == most_likely_tags
        assert viterbi_scores == best_scores

    def test_constrained_viterbi_tags(self):
        constraints = {(0, 0), (0, 1),
                       (1, 1), (1, 2),
                       (2, 2), (2, 3),
                       (3, 3), (3, 4),
                       (4, 4), (4, 0)}

        # Add the transitions to the end tag
        # and from the start tag.
        for i in range(5):
            constraints.add((5, i))
            constraints.add((i, 6))

        crf = ConditionalRandomField(num_tags=5, constraints=constraints)
        crf.transitions = torch.nn.Parameter(self.transitions)
        crf.start_transitions = torch.nn.Parameter(self.transitions_from_start)
        crf.end_transitions = torch.nn.Parameter(self.transitions_to_end)

        mask = torch.LongTensor([
                [1, 1, 1],
                [1, 1, 0]
        ])

        viterbi_path = crf.viterbi_tags(self.logits, mask)

        # Get just the tags from each tuple of (tags, score).
        viterbi_tags = [x for x, y in viterbi_path]

        # Now the tags should respect the constraints
        assert viterbi_tags == [
                [2, 3, 3],
                [2, 3]
        ]

    def test_allowed_transitions(self):
        # pylint: disable=bad-whitespace,bad-continuation
        bio_labels = ['O', 'B-X', 'I-X', 'B-Y', 'I-Y'] # start tag, end tag
        #              0     1      2      3      4         5          6
        allowed = allowed_transitions("BIO", dict(enumerate(bio_labels)))

        # The empty spaces in this matrix indicate disallowed transitions.
        assert set(allowed) == {                         # Extra column for end tag.
            (0, 0), (0, 1),         (0, 3),              (0, 6),
            (1, 0), (1, 1), (1, 2), (1, 3),              (1, 6),
            (2, 0), (2, 1), (2, 2), (2, 3),              (2, 6),
            (3, 0), (3, 1),         (3, 3), (3, 4),      (3, 6),
            (4, 0), (4, 1),         (4, 3), (4, 4),      (4, 6),
            (5, 0), (5, 1),         (5, 3)                      # Extra row for start tag
        }

        bioul_labels = ['O', 'B-X', 'I-X', 'L-X', 'U-X', 'B-Y', 'I-Y', 'L-Y', 'U-Y'] # start tag, end tag
        #                0     1      2      3      4      5      6      7      8          9        10
        allowed = allowed_transitions("BIOUL", dict(enumerate(bioul_labels)))

        # The empty spaces in this matrix indicate disallowed transitions.
        assert set(allowed) == {                                                   # Extra column for end tag.
            (0, 0), (0, 1),                 (0, 4), (0, 5),                 (0, 8),       (0, 10),
                            (1, 2), (1, 3),
                            (2, 2), (2, 3),
            (3, 0), (3, 1),                 (3, 4), (3, 5),                 (3, 8),       (3, 10),
            (4, 0), (4, 1),                 (4, 4), (4, 5),                 (4, 8),       (4, 10),
                                                            (5, 6), (5, 7),
                                                            (6, 6), (6, 7),
            (7, 0), (7, 1),                 (7, 4), (7, 5),                 (7, 8),       (7, 10),
            (8, 0), (8, 1),                 (8, 4), (8, 5),                 (8, 8),       (8, 10),
            # Extra row for start tag.
            (9, 0), (9, 1),                 (9, 4), (9, 5),                 (9, 8)
        }

        iob1_labels = ['O', 'B-X', 'I-X', 'B-Y', 'I-Y'] # start tag, end tag
        #              0     1      2      3      4         5          6
        allowed = allowed_transitions("IOB1", dict(enumerate(iob1_labels)))

        # The empty spaces in this matrix indicate disallowed transitions.
        assert set(allowed) == {                            # Extra column for end tag.
            (0, 0),         (0, 2),         (0, 4),         (0, 6),
            (1, 0), (1, 1), (1, 2),         (1, 4),         (1, 6),
            (2, 0), (2, 1), (2, 2),         (2, 4),         (2, 6),
            (3, 0),         (3, 2), (3, 3), (3, 4),         (3, 6),
            (4, 0),         (4, 2), (4, 3), (4, 4),         (4, 6),
            (5, 0),         (5, 2),         (5, 4),                # Extra row for start tag
        }
        with raises(ConfigurationError):
            allowed_transitions("allennlp", {})

        bmes_labels = ['B-X', 'M-X', 'E-X', 'S-X', 'B-Y', 'M-Y', 'E-Y', 'S-Y'] # start tag, end tag
        #               0      1      2      3      4      5      6      7       8          9
        allowed = allowed_transitions("BMES", dict(enumerate(bmes_labels)))
        assert set(allowed) == {
                    (0, 1), (0, 2),
                            (1, 2),                                         # Extra column for end tag.
            (2, 0),                 (2, 3), (2, 4),                 (2, 7), (2, 9),
            (3, 0),                 (3, 3), (3, 4),                 (3, 7), (3, 9),
                                                    (4, 5), (4, 6),
                                                            (5, 6),
            (6, 0),                 (6, 3), (6, 4),                 (6, 7), (6, 9),
            (7, 0),                 (7, 3), (7, 4),                 (7, 7), (7, 9),
            (8, 0),                 (8, 3), (8, 4),                 (8, 7),  # Extra row for start tag
        }