Exemple #1
0
class InferenceLayer(nn.Module):
    def __init__(self, input_dim, n_classes, use_crf):
        super(InferenceLayer, self).__init__()

        self.use_crf = use_crf
        self.input_dim = input_dim
        self.output_dim = n_classes

        self.proj = nn.Linear(input_dim, n_classes)

        if self.use_crf:
            self.crf = ConditionalRandomField(
                n_classes,
                constraints=None,
                include_start_end_transitions=True)
        else:
            self.xent = nn.CrossEntropyLoss(reduction='mean')

    def crf_forward(self, logits, mask, target):
        mask = mask.long()
        loss = -self.crf.forward(logits, target,
                                 mask)  # neg log-likelihood loss
        loss = loss / torch.sum(mask)

        return loss, logits

    def fc_forward(self, logits, mask, target):
        if mask is not None:
            mask = mask.long()
            mask = mask.view(-1) == 1

            logits_ = logits.view(-1, logits.size(-1))[mask]
            target_ = target.view(-1)[mask]
            loss = self.xent(logits_, target_)
        else:
            loss = self.xent(logits.view(-1, logits.size(-1)), target.view(-1))

        return loss, logits

    def forward(self, vectors, mask, targets):
        logits = self.proj(vectors)

        if self.use_crf:
            loss, logits = self.crf_forward(logits, mask, targets)
        else:
            loss, logits = self.fc_forward(logits, mask, targets)

        return loss, logits
Exemple #2
0
class CrfTagger(Model):
    """
    The ``CrfTagger`` encodes a sequence of text with a ``Seq2SeqEncoder``,
    then uses a Conditional Random Field model to predict a tag for each token in the sequence.

    Parameters
    ----------
    vocab : ``Vocabulary``, required
        A Vocabulary, required in order to compute sizes for input/output projections.
    text_field_embedder : ``TextFieldEmbedder``, required
        Used to embed the tokens ``TextField`` we get as input to the model.
    encoder : ``Seq2SeqEncoder``
        The encoder that we will use in between embedding tokens and predicting output tags.
    label_namespace : ``str``, optional (default=``labels``)
        This is needed to compute the SpanBasedF1Measure metric.
        Unless you did something unusual, the default value should be what you want.
    initializer : ``InitializerApplicator``, optional (default=``InitializerApplicator()``)
        Used to initialize the model parameters.
    regularizer : ``RegularizerApplicator``, optional (default=``None``)
        If provided, will be used to calculate the regularization penalty during training.
    """
    def __init__(self,
                 vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 encoder: Seq2SeqEncoder,
                 label_namespace: str = "labels",
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None) -> None:
        super().__init__(vocab, regularizer)

        self.label_namespace = label_namespace
        self.text_field_embedder = text_field_embedder
        self.num_tags = self.vocab.get_vocab_size(label_namespace)
        self.encoder = encoder
        self.tag_projection_layer = TimeDistributed(
            Linear(self.encoder.get_output_dim(), self.num_tags))
        self.crf = ConditionalRandomField(self.num_tags)

        self.span_metric = SpanBasedF1Measure(vocab,
                                              tag_namespace=label_namespace)

        if text_field_embedder.get_output_dim() != encoder.get_input_dim():
            raise ConfigurationError(
                "The output dimension of the text_field_embedder must match the "
                "input dimension of the phrase_encoder. Found {} and {}, "
                "respectively.".format(text_field_embedder.get_output_dim(),
                                       encoder.get_input_dim()))
        initializer(self)

    @overrides
    def forward(
            self,  # type: ignore
            tokens: Dict[str, torch.LongTensor],
            tags: torch.LongTensor = None) -> Dict[str, torch.Tensor]:
        # pylint: disable=arguments-differ
        """
        Parameters
        ----------
        tokens : ``Dict[str, torch.LongTensor]``, required
            The output of ``TextField.as_array()``, which should typically be passed directly to a
            ``TextFieldEmbedder``. This output is a dictionary mapping keys to ``TokenIndexer``
            tensors.  At its most basic, using a ``SingleIdTokenIndexer`` this is: ``{"tokens":
            Tensor(batch_size, num_tokens)}``. This dictionary will have the same keys as were used
            for the ``TokenIndexers`` when you created the ``TextField`` representing your
            sequence.  The dictionary is designed to be passed directly to a ``TextFieldEmbedder``,
            which knows how to combine different word representations into a single vector per
            token in your input.
        tags : ``torch.LongTensor``, optional (default = ``None``)
            A torch tensor representing the sequence of integer gold class labels of shape
            ``(batch_size, num_tokens)``.

        Returns
        -------
        An output dictionary consisting of:

        logits : ``torch.FloatTensor``
            The logits that are the output of the ``tag_projection_layer``
        mask : ``torch.LongTensor``
            The text field mask for the input tokens
        tags : ``List[List[str]]``
            The predicted tags using the Viterbi algorithm.
        loss : ``torch.FloatTensor``, optional
            A scalar loss to be optimised. Only computed if gold label ``tags`` are provided.
        """
        embedded_text_input = self.text_field_embedder(tokens)
        mask = util.get_text_field_mask(tokens)
        encoded_text = self.encoder(embedded_text_input, mask)

        logits = self.tag_projection_layer(encoded_text)
        predicted_tags = self.crf.viterbi_tags(logits, mask)

        output = {"logits": logits, "mask": mask, "tags": predicted_tags}

        if tags is not None:
            # Add negative log-likelihood as loss
            log_likelihood = self.crf.forward(logits, tags, mask)
            output["loss"] = -log_likelihood

            # Represent viterbi tags as "class probabilities" that we can
            # feed into the `span_metric`
            class_probabilities = logits * 0.
            for i, instance_tags in enumerate(predicted_tags):
                for j, tag_id in enumerate(instance_tags):
                    class_probabilities[i, j, tag_id] = 1

            self.span_metric(class_probabilities, tags, mask)

        return output

    @overrides
    def get_metrics(self, reset: bool = False) -> Dict[str, float]:
        metric_dict = self.span_metric.get_metric(reset=reset)
        return {x: y for x, y in metric_dict.items() if "overall" in x}

    @classmethod
    def from_params(cls, vocab: Vocabulary, params: Params) -> 'CrfTagger':
        embedder_params = params.pop("text_field_embedder")
        text_field_embedder = TextFieldEmbedder.from_params(
            vocab, embedder_params)
        encoder = Seq2SeqEncoder.from_params(params.pop("encoder"))
        label_namespace = params.pop("label_namespace", "labels")
        initializer = InitializerApplicator.from_params(
            params.pop('initializer', []))
        regularizer = RegularizerApplicator.from_params(
            params.pop('regularizer', []))

        params.assert_empty(cls.__name__)

        return cls(vocab=vocab,
                   text_field_embedder=text_field_embedder,
                   encoder=encoder,
                   label_namespace=label_namespace,
                   initializer=initializer,
                   regularizer=regularizer)
Exemple #3
0
class BiLSTMCRFSequenceTagger(Model):
    def __init__(self, vocab, text_field_embedder, hidden_size=128, num_layers=2, dropout=0.5,
                 tag_namespace='tags', initializer=None, metric=None):
        if initializer is None:
            initializer = InitializerApplicator()
        if metric is None:
            metric = SpanBasedF1Measure(vocab, tag_namespace=tag_namespace)

        super().__init__(vocab)
        self.text_field_embedder = text_field_embedder
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.dropout = dropout
        self.tag_namespace = tag_namespace
        self.initializer = initializer
        self.metric = metric
        self.seq2seq_encoder = Seq2SeqEncoder.from_params(Params({
            'type': 'lstm',
            'input_size': text_field_embedder.get_output_dim(),
            'hidden_size': hidden_size,
            'num_layers': num_layers,
            'dropout': dropout,
            'bidirectional': True,
        }))
        self.num_tags = vocab.get_vocab_size(tag_namespace)
        self.tags_projection_layer = TimeDistributed(
            Linear(self.seq2seq_encoder.get_output_dim(), self.num_tags))
        self.crf = CRF(self.num_tags)
        self.initializer(self)

    def forward(self, sentence, tags=None):
        """Forward computation.

        Arguments
        ---------
        sentence : Dict[str, Variable[torch.LongTensor]]
            Mapping from indexer name to a tensor of indices. The indices tensor can
            have shape like ``(batch_size, num_tokens)`` if indexed by tokens or
            ``(batch_size, num_tokens, num_chars)`` if indexed by characters.
        tags : Variable[torch.LongTensor]
            Tag indices for this batch. This should have a shape ``(batch_size, num_tokens)``.

        Returns
        -------
        output : Dict[str, Variable]
            Output dictionary with keys ``logits``, ``mask``, and ``loss``.
        """
        mask = get_text_field_mask(sentence)
        embedded = self.text_field_embedder(sentence)  # (bsize, n_tokens, emb_dim)
        encoded = self.seq2seq_encoder(embedded, mask)  # (bsize, n_tokens, out_dim)
        logits = self.tags_projection_layer(encoded)  # (bsize, n_tokens, n_tags)
        output = {'logits': logits, 'mask': mask}
        if tags is not None:
            llh = self.crf.forward(logits, tags, mask=mask)
            output['loss'] = -llh
            self.metric(logits, tags, mask=mask)
        return output

    def decode(self, output):
        """Compute best tag sequence.

        Arguments
        ---------
        output : Dict[str, Variable]
            Output dictionary returned by ``.forward()``.

        Returns
        -------
        output : Dict[str, Variable]
            The same dictionary given as input but updated with keys ``predicted_tags``
            and ``prediction_probs``.
        """
        predicted_tags = self.crf.viterbi_tags(output['logits'], output['mask'])
        prediction_probs = output['logits'] * 0.
        for i, sentence_tags in enumerate(predicted_tags):
            for j, tag_id in enumerate(sentence_tags):
                prediction_probs[i, j, tag_id] = 1.
        output.update({'predicted_tags': predicted_tags, 'prediction_probs': prediction_probs})
        return output

    def get_metrics(self, reset=False):
        return self.metric.get_metric(reset)

    @classmethod
    def from_params(cls, vocab, params):
        text_field_embedder = TextFieldEmbedder.from_params(
            vocab, params.pop('text_field_embedder'))
        hidden_size = params.pop('hidden_size', 128)
        num_layers = params.pop('num_layers', 2)
        dropout = params.pop('dropout', 0.5)
        tag_namespace = params.pop('tag_namespace', 'tags')
        initializer = None
        initializer_params = params.pop('initializer', None)
        if initializer_params is not None:
            initializer = Initializer.from_params(initializer_params)
        metric = None
        metric_params = params.pop('metric', None)
        if metric_params is not None:
            metric = Metric.from_params(metric_params)
        params.assert_empty(cls.__name__)
        return cls(vocab, text_field_embedder, hidden_size=hidden_size, num_layers=num_layers,
                   dropout=dropout, tag_namespace=tag_namespace, initializer=initializer,
                   metric=metric)
Exemple #4
0
class InferenceLayer(nn.Module):
    def __init__(self, input_dim, n_classes, use_crf):
        super(InferenceLayer, self).__init__()

        self.use_crf = use_crf
        self.input_dim = input_dim
        self.output_dim = n_classes

        self.proj = nn.Linear(input_dim, n_classes)

        if self.use_crf:
            self.crf = ConditionalRandomField(
                n_classes,
                constraints=None,
                include_start_end_transitions=True)
        else:
            self.xent = nn.CrossEntropyLoss(reduction='mean')

    def crf_forward(self, logits, mask, target):
        mask = mask.long()
        best_paths = self.crf.viterbi_tags(logits, mask)
        tags, viterbi_scores = zip(*best_paths)
        loss = -self.crf.forward(logits, target,
                                 mask)  # neg log-likelihood loss
        loss = loss / torch.sum(mask)

        return {
            'loss': loss,
            'logits': logits,
            'tags': tags,
            'path_scores': viterbi_scores
        }

    def fc_forward(self, logits, mask, target):
        assert len(logits.size()) == 3

        if mask is not None:
            mask = mask.long()
            tags = torch.softmax(logits, dim=2).max(-1)
            tags = tags[1].cpu().tolist()

            for i in range(len(tags)):
                tags[i] = tags[i][:mask[i].sum().item()]

            mask = mask.view(-1) == 1

            logits_ = logits.view(-1, logits.size(-1))
            target_ = target.view(-1)

            loss = self.xent(logits_[mask], target_[mask])
        else:
            tags = torch.softmax(logits, dim=2).max(-1)
            tags = tags[1].cpu().tolist()

            for i in range(len(tags)):
                tags[i] = tags[i][:]

            logits_ = logits.view(-1, logits.size(-1))
            target_ = target.view(-1)

            loss = self.xent(logits_, target_)

        return {'loss': loss, 'logits': logits, 'tags': tags}

    def forward(self, vectors, mask, targets):
        logits = self.proj(vectors)

        if self.use_crf:
            results = self.crf_forward(logits, mask, targets)
        else:
            results = self.fc_forward(logits, mask, targets)

        results['mask'] = mask.data if mask is not None else None

        return results
Exemple #5
0
class TestConditionalRandomField(AllenNlpTestCase):
    def setUp(self):
        super().setUp()
        self.logits = Variable(torch.Tensor([
                [[0, 0, .5, .5, .2], [0, 0, .3, .3, .1], [0, 0, .9, 10, 1]],
                [[0, 0, .2, .5, .2], [0, 0, 3, .3, .1], [0, 0, .9, 1, 1]],
        ]))
        self.tags = Variable(torch.LongTensor([
                [2, 3, 4],
                [3, 2, 2]
        ]))

        self.transitions = torch.Tensor([
                [0.1, 0.2, 0.3, 0.4, 0.5],
                [0.8, 0.3, 0.1, 0.7, 0.9],
                [-0.3, 2.1, -5.6, 3.4, 4.0],
                [0.2, 0.4, 0.6, -0.3, -0.4],
                [1.0, 1.0, 1.0, 1.0, 1.0]
        ])

        self.transitions_from_start = torch.Tensor([0.1, 0.2, 0.3, 0.4, 0.6])
        self.transitions_to_end = torch.Tensor([-0.1, -0.2, 0.3, -0.4, -0.4])

        # Use the CRF Module with fixed transitions to compute the log_likelihood
        self.crf = ConditionalRandomField(5)
        self.crf.transitions = torch.nn.Parameter(self.transitions)
        self.crf.start_transitions = torch.nn.Parameter(self.transitions_from_start)
        self.crf.end_transitions = torch.nn.Parameter(self.transitions_to_end)

    def score(self, logits, tags):
        """
        Computes the likelihood score for the given sequence of tags,
        given the provided logits (and the transition weights in the CRF model)
        """
        # Start with transitions from START and to END
        total = self.transitions_from_start[tags[0]] + self.transitions_to_end[tags[-1]]
        # Add in all the intermediate transitions
        for tag, next_tag in zip(tags, tags[1:]):
            total += self.transitions[tag, next_tag]
        # Add in the logits for the observed tags
        for logit, tag in zip(logits, tags):
            total += logit[tag]
        return total

    def test_forward_works_without_mask(self):
        log_likelihood = self.crf.forward(self.logits, self.tags).data[0]

        # Now compute the log-likelihood manually
        manual_log_likelihood = 0.0

        # For each instance, manually compute the numerator
        # (which is just the score for the logits and actual tags)
        # and the denominator
        # (which is the log-sum-exp of the scores for the logits across all possible tags)
        for logits_i, tags_i in zip(self.logits, self.tags):
            numerator = self.score(logits_i.data, tags_i.data)
            all_scores = [self.score(logits_i.data, tags_j) for tags_j in itertools.product(range(5), repeat=3)]
            denominator = math.log(sum(math.exp(score) for score in all_scores))
            # And include them in the manual calculation.
            manual_log_likelihood += numerator - denominator

        # The manually computed log likelihood should equal the result of crf.forward.
        assert manual_log_likelihood == approx(log_likelihood)


    def test_forward_works_with_mask(self):
        # Use a non-trivial mask
        mask = Variable(torch.LongTensor([
                [1, 1, 1],
                [1, 1, 0]
        ]))

        log_likelihood = self.crf.forward(self.logits, self.tags, mask).data[0]

        # Now compute the log-likelihood manually
        manual_log_likelihood = 0.0

        # For each instance, manually compute the numerator
        #   (which is just the score for the logits and actual tags)
        # and the denominator
        #   (which is the log-sum-exp of the scores for the logits across all possible tags)
        for logits_i, tags_i, mask_i in zip(self.logits, self.tags, mask):
            # Find the sequence length for this input and only look at that much of each sequence.
            sequence_length = torch.sum(mask_i.data)
            logits_i = logits_i.data[:sequence_length]
            tags_i = tags_i.data[:sequence_length]

            numerator = self.score(logits_i, tags_i)
            all_scores = [self.score(logits_i, tags_j)
                          for tags_j in itertools.product(range(5), repeat=sequence_length)]
            denominator = math.log(sum(math.exp(score) for score in all_scores))
            # And include them in the manual calculation.
            manual_log_likelihood += numerator - denominator

        # The manually computed log likelihood should equal the result of crf.forward.
        assert manual_log_likelihood == approx(log_likelihood)


    def test_viterbi_tags(self):
        mask = Variable(torch.LongTensor([
                [1, 1, 1],
                [1, 1, 0]
        ]))

        viterbi_tags = self.crf.viterbi_tags(self.logits, mask)

        # Check that the viterbi tags are what I think they should be.
        assert viterbi_tags == [
                [2, 4, 3],
                [4, 2]
        ]

        # We can also iterate over all possible tag sequences and use self.score
        # to check the likelihood of each. The most likely sequence should be the
        # same as what we get from viterbi_tags.
        most_likely_tags = []

        for logit, mas in zip(self.logits, mask):
            sequence_length = torch.sum(mas.data)
            most_likely, most_likelihood = None, -float('inf')
            for tags in itertools.product(range(5), repeat=sequence_length):
                score = self.score(logit.data, tags)
                if score > most_likelihood:
                    most_likely, most_likelihood = tags, score
            # Convert tuple to list; otherwise == complains.
            most_likely_tags.append(list(most_likely))

        assert viterbi_tags == most_likely_tags