コード例 #1
0
ファイル: crf_test.py プロジェクト: SamuelMarks/addons
def test_crf_log_likelihood(dtype):
    inputs = np.array([[4, 5, -3], [3, -1, 3], [-1, 2, 1], [0, 0, 0]], dtype=dtype)
    transition_params = np.array([[-3, 5, -2], [3, 4, 1], [1, 2, 1]], dtype=dtype)
    sequence_lengths = np.array(3, dtype=np.int32)

    num_words = inputs.shape[0]
    num_tags = inputs.shape[1]
    all_sequence_log_likelihoods = []

    # Make sure all probabilities sum to 1.
    for tag_indices in itertools.product(range(num_tags), repeat=sequence_lengths):
        tag_indices = list(tag_indices)
        tag_indices.extend([0] * (num_words - sequence_lengths))
        sequence_log_likelihood, _ = text.crf_log_likelihood(
            inputs=tf.expand_dims(inputs, 0),
            tag_indices=tf.expand_dims(tag_indices, 0),
            sequence_lengths=tf.expand_dims(sequence_lengths, 0),
            transition_params=tf.constant(transition_params),
        )
        all_sequence_log_likelihoods.append(sequence_log_likelihood)
    total_log_likelihood = tf.reduce_logsumexp(all_sequence_log_likelihoods)
    test_utils.assert_allclose_according_to_type(
        total_log_likelihood, 0.0, rtol=1e-6, atol=1e-6, half_rtol=2e-3, half_atol=2e-3
    )

    # check if `transition_params = None` raises an error
    text.crf_log_likelihood(
        inputs=tf.expand_dims(inputs, 0),
        tag_indices=tf.expand_dims(tag_indices, 0),
        sequence_lengths=tf.expand_dims(sequence_lengths, 0),
    )
コード例 #2
0
ファイル: crf_test.py プロジェクト: tian-ux543/addons
    def testCrfLogLikelihood(self):
        inputs = np.array([[4, 5, -3], [3, -1, 3], [-1, 2, 1], [0, 0, 0]],
                          dtype=np.float32)
        transition_params = np.array([[-3, 5, -2], [3, 4, 1], [1, 2, 1]],
                                     dtype=np.float32)
        sequence_lengths = np.array(3, dtype=np.int32)
        # TODO: https://github.com/PyCQA/pylint/issues/3139
        # pylint: disable=E1136
        num_words = inputs.shape[0]
        num_tags = inputs.shape[1]
        # pylint: enable=E1136
        all_sequence_log_likelihoods = []

        # Make sure all probabilities sum to 1.
        for tag_indices in itertools.product(range(num_tags),
                                             repeat=sequence_lengths):
            tag_indices = list(tag_indices)
            tag_indices.extend([0] * (num_words - sequence_lengths))
            sequence_log_likelihood, _ = text.crf_log_likelihood(
                inputs=tf.expand_dims(inputs, 0),
                tag_indices=tf.expand_dims(tag_indices, 0),
                sequence_lengths=tf.expand_dims(sequence_lengths, 0),
                transition_params=tf.constant(transition_params))
            all_sequence_log_likelihoods.append(sequence_log_likelihood)
        total_log_likelihood = tf.reduce_logsumexp(
            all_sequence_log_likelihoods)
        tf_total_log_likelihood = self.evaluate(total_log_likelihood)
        self.assertAllClose(tf_total_log_likelihood, 0.0)

        # check if `transition_params = None` raises an error
        text.crf_log_likelihood(inputs=tf.expand_dims(inputs, 0),
                                tag_indices=tf.expand_dims(tag_indices, 0),
                                sequence_lengths=tf.expand_dims(
                                    sequence_lengths, 0))
コード例 #3
0
ファイル: crf_test.py プロジェクト: xsxustc/tensorflow-addons
    def testCrfLogLikelihood(self):
        inputs = np.array([[4, 5, -3], [3, -1, 3], [-1, 2, 1], [0, 0, 0]],
                          dtype=np.float32)
        transition_params = np.array([[-3, 5, -2], [3, 4, 1], [1, 2, 1]],
                                     dtype=np.float32)
        sequence_lengths = np.array(3, dtype=np.int32)
        num_words = inputs.shape[0]
        num_tags = inputs.shape[1]
        all_sequence_log_likelihoods = []

        # Make sure all probabilities sum to 1.
        for tag_indices in itertools.product(range(num_tags),
                                             repeat=sequence_lengths):
            tag_indices = list(tag_indices)
            tag_indices.extend([0] * (num_words - sequence_lengths))
            sequence_log_likelihood, _ = text.crf_log_likelihood(
                inputs=tf.expand_dims(inputs, 0),
                tag_indices=tf.expand_dims(tag_indices, 0),
                sequence_lengths=tf.expand_dims(sequence_lengths, 0),
                transition_params=tf.constant(transition_params))
            all_sequence_log_likelihoods.append(sequence_log_likelihood)
        total_log_likelihood = tf.reduce_logsumexp(
            all_sequence_log_likelihoods)
        tf_total_log_likelihood = self.evaluate(total_log_likelihood)
        self.assertAllClose(tf_total_log_likelihood, 0.0)
コード例 #4
0
    def call(self, inputs, training=False):
        """Decodes the highest scoring sequence of tags.

    If training, calculates and records the CRF log-likelihood loss (length
    normalized).
    Args:
      inputs: A list with three tensors. The first tensor is [batch_size,
        max_seq_len, num_tags] tensor of logits. The second tensor is a
        [batch_size] vector of true sequence lengths. The third tensor is
        [batch_size, max_seq_len] tensor of expected ids (only used in training
        mode).
      training: Whether it runs in training mode.

    Returns:
      decode_tags: A [batch_size, max_seq_len] matrix, with dtype `tf.int32`.
      Contains the highest scoring tag indices.
    """
        logits, sequence_length, labels = inputs

        decode_tags, _ = tfa_text.crf_decode(logits, self._transition_matrix,
                                             sequence_length)
        decode_tags = tf.cast(decode_tags, tf.int32)

        if training:
            # Clip right-padding which equals -1 and messes with the loss calculation.
            labels = tf.maximum(labels, 0)
            log_likelihood, _ = tfa_text.crf_log_likelihood(
                logits, labels, sequence_length, self._transition_matrix)
            self.add_loss(tf.reduce_mean(-log_likelihood))

        return decode_tags
コード例 #5
0
ファイル: crf_test.py プロジェクト: tian-ux543/addons
    def testDifferentDtype(self):
        inputs = np.ones([16, 20, 5], dtype=np.float32)
        tags = tf.convert_to_tensor(np.ones([16, 20], dtype=np.int64))
        seq_lens = np.ones([16], dtype=np.int64) * 20

        loss, _ = text.crf_log_likelihood(inputs=inputs,
                                          tag_indices=tags,
                                          sequence_lengths=seq_lens)
コード例 #6
0
 def crf_loss(y_true, y_pred):
     y_pred = tf.convert_to_tensor(y_pred, dtype=self.dtype)
     log_likelihood, self.transitions = crf_log_likelihood(
         y_pred,
         tf.cast(K.argmax(y_true), dtype=tf.int32) if self.sparse_target else y_true,
         self.sequence_lengths,
         transition_params=self.transitions,
     )
     return tf.reduce_mean(-log_likelihood)
コード例 #7
0
    def compute_crf_loss(self,
                         potentials,
                         sequence_length,
                         kernel,
                         y,
                         sample_weight=None):
        crf_likelihood, _ = crf_log_likelihood(potentials, y, sequence_length,
                                               kernel)
        # convert likelihood to loss
        flat_crf_loss = -1 * crf_likelihood
        if sample_weight is not None:
            flat_crf_loss = flat_crf_loss * sample_weight
        crf_loss = tf.reduce_mean(flat_crf_loss)

        return crf_loss
コード例 #8
0
    def call(self, inputs: list[tf.Tensor], mask: tf.Tensor) -> tf.RaggedTensor:
        emissions, tag_ids = inputs
        mask = tf.cast(mask, tf.int32)
        sequence_lengths = tf.math.reduce_sum(mask, axis=1)
        likelihoods, _ = crf_log_likelihood(
            emissions,
            tag_ids,
            sequence_lengths,
            self.transition_weight,
        )
        loss = tf.math.negative(tf.math.reduce_mean(likelihoods))
        self.add_loss(loss)

        decoded_tag_ids, _ = crf_decode(
            emissions,
            self.transition_weight,
            sequence_lengths,
        )
        is_equal = tf.cast(tf.equal(tag_ids, decoded_tag_ids), tf.int32)
        tag_accuracy = tf.reduce_sum(is_equal * mask) / tf.reduce_sum(mask)
        self.add_metric(tag_accuracy, name="tag_accuracy")
        boolean_mask = tf.cast(mask, tf.bool)
        return tf.ragged.boolean_mask(decoded_tag_ids, boolean_mask)