Ejemplo n.º 1
0
  def build_export_output(self, model):  # pylint: disable=no-self-use
    """
    Build the output of the model for export.
    `score` and `input_y` are for loss calculation.
    `preds` and `y_ground_truth` are for metric calculation.
    """
    transitions = model.transitions
    intent_logits, slots_logits = model.logits

    intent_score = tf.nn.softmax(intent_logits, name="intent_score")
    intent_preds = tf.argmax(intent_logits, axis=-1, name="intent_preds")

    slots_preds, slots_score = crf_decode(slots_logits, transitions,
                                          model.input_x_len)

    slots_preds = tf.identity(slots_preds, name="slots_preds")
    slots_score = tf.identity(slots_score, name="slots_score")

    model.preds = intent_preds, slots_preds
    model.score = intent_score, slots_score
    model.output_dict = {
        "slots_score": slots_score,
        "slots_preds": slots_preds,
        "intent_score": intent_score,
        "intent_preds": intent_preds
    }
    logging.info("Model built.")
Ejemplo n.º 2
0
    def call(self, inputs, training=False):
        """Decodes the highest scoring sequence of tags.

    If training, calculates and records the CRF log-likelihood loss (length
    normalized).
    Args:
      inputs: A list with three tensors. The first tensor is [batch_size,
        max_seq_len, num_tags] tensor of logits. The second tensor is a
        [batch_size] vector of true sequence lengths. The third tensor is
        [batch_size, max_seq_len] tensor of expected ids (only used in training
        mode).
      training: Whether it runs in training mode.

    Returns:
      decode_tags: A [batch_size, max_seq_len] matrix, with dtype `tf.int32`.
      Contains the highest scoring tag indices.
    """
        logits, sequence_length, labels = inputs

        decode_tags, _ = tfa_text.crf_decode(logits, self._transition_matrix,
                                             sequence_length)
        decode_tags = tf.cast(decode_tags, tf.int32)

        if training:
            # Clip right-padding which equals -1 and messes with the loss calculation.
            labels = tf.maximum(labels, 0)
            log_likelihood, _ = tfa_text.crf_log_likelihood(
                logits, labels, sequence_length, self._transition_matrix)
            self.add_loss(tf.reduce_mean(-log_likelihood))

        return decode_tags
Ejemplo n.º 3
0
    def testCrfDecode(self):
        transition_params = np.array([[-3, 5, -2], [3, 4, 1], [1, 2, 1]],
                                     dtype=np.float32)
        # Test both the length-1 and regular cases.
        sequence_lengths_list = [
            np.array(3, dtype=np.int32),
            np.array(1, dtype=np.int64)
        ]
        inputs_list = [
            np.array([[4, 5, -3], [3, -1, 3], [-1, 2, 1], [0, 0, 0]],
                     dtype=np.float32),
            np.array([[-1, 2, 1]], dtype=np.float32),
        ]
        tag_indices_list = [
            np.array([1, 2, 1, 0], dtype=np.int32),
            np.array([2], dtype=np.int32)
        ]

        for sequence_lengths, inputs, tag_indices in zip(
                sequence_lengths_list, inputs_list, tag_indices_list):
            num_words = inputs.shape[0]
            num_tags = inputs.shape[1]

            all_sequence_scores = []
            all_sequences = []

            # Compare the dynamic program with brute force computation.
            for tag_indices in itertools.product(
                    range(num_tags), repeat=sequence_lengths):
                tag_indices = list(tag_indices)
                tag_indices.extend([0] * (num_words - sequence_lengths))
                all_sequences.append(tag_indices)
                sequence_score = text.crf_sequence_score(
                    inputs=tf.expand_dims(inputs, 0),
                    tag_indices=tf.expand_dims(tag_indices, 0),
                    sequence_lengths=tf.expand_dims(sequence_lengths, 0),
                    transition_params=tf.constant(transition_params))
                sequence_score = tf.squeeze(sequence_score, [0])
                all_sequence_scores.append(sequence_score)

            tf_all_sequence_scores = self.evaluate(all_sequence_scores)

            expected_max_sequence_index = np.argmax(tf_all_sequence_scores)
            expected_max_sequence = all_sequences[expected_max_sequence_index]
            expected_max_score = tf_all_sequence_scores[
                expected_max_sequence_index]

            actual_max_sequence, actual_max_score = text.crf_decode(
                tf.expand_dims(inputs, 0), tf.constant(transition_params),
                tf.expand_dims(sequence_lengths, 0))
            actual_max_sequence = tf.squeeze(actual_max_sequence, [0])
            actual_max_score = tf.squeeze(actual_max_score, [0])
            tf_actual_max_sequence, tf_actual_max_score = self.evaluate(
                [actual_max_sequence, actual_max_score])

            self.assertAllClose(tf_actual_max_score, expected_max_score)
            self.assertEqual(
                list(tf_actual_max_sequence[:sequence_lengths]),
                expected_max_sequence[:sequence_lengths])
Ejemplo n.º 4
0
def test_crf_decode_zero_seq_length():
    """Test that crf_decode works when sequence_length contains one or more
    zeros."""
    inputs = tf.constant(np.ones([2, 10, 5], dtype=np.float32))
    transition_params = tf.constant(np.ones([5, 5], dtype=np.float32))
    sequence_lengths = tf.constant(np.zeros([2], dtype=np.int32))
    tags, scores = text.crf_decode(inputs, transition_params, sequence_lengths)
    assert len(tags.shape) == 2
    assert len(scores.shape) == 1
Ejemplo n.º 5
0
 def call(self, emissions: tf.Tensor, mask: tf.Tensor) -> tf.Tensor:
     sequence_lengths = tf.math.reduce_sum(tf.cast(mask, tf.int32), axis=1)
     decoded_tag_ids, _ = crf_decode(
         emissions,
         self.transition_weight,
         sequence_lengths,
     )
     boolean_mask = tf.cast(mask, tf.bool)
     return tf.ragged.boolean_mask(decoded_tag_ids, boolean_mask)
Ejemplo n.º 6
0
def test_crf_constrained_decode(dtype):
    transition_params = np.array([[-3, 5, -2], [3, 4, 1], [1, 2, 1]],
                                 dtype=dtype)
    # Test both the length-1 and regular cases.
    sequence_lengths_list = [
        np.array(3, dtype=np.int32),
        np.array(1, dtype=np.int32)
    ]
    inputs_list = [
        np.array([[4, 5, -3], [3, -1, 3], [-1, 2, 1], [0, 0, 0]], dtype=dtype),
        np.array([[4, 5, -3]], dtype=dtype),
    ]
    tag_bitmap_list = [
        np.array(
            [
                [True, False, False],
                [False, True, True],
                [False, True, True],
                [False, True, True],
            ],
            dtype=np.bool,
        ),
        np.array([[False, True, True]], dtype=np.bool),
    ]
    for sequence_lengths, inputs, tag_bitmap in zip(sequence_lengths_list,
                                                    inputs_list,
                                                    tag_bitmap_list):
        filtered_inputs = text.crf_filtered_inputs(
            inputs=tf.expand_dims(inputs, 0),
            tag_bitmap=tf.expand_dims(tag_bitmap, 0))

        expected_max_sequence, expected_max_score = text.crf_decode(
            filtered_inputs,
            tf.constant(transition_params),
            tf.expand_dims(sequence_lengths, 0),
        )

        expected_max_sequence = tf.squeeze(expected_max_sequence, [0])
        expected_max_score = tf.squeeze(expected_max_score, [0])

        actual_max_sequence, actual_max_score = text.crf_constrained_decode(
            tf.expand_dims(inputs, 0),
            tf.expand_dims(tag_bitmap, 0),
            tf.constant(transition_params),
            tf.expand_dims(sequence_lengths, 0),
        )

        actual_max_sequence = tf.squeeze(actual_max_sequence, [0])
        actual_max_score = tf.squeeze(actual_max_score, [0])

        test_utils.assert_allclose_according_to_type(actual_max_score,
                                                     expected_max_score, 1e-6,
                                                     1e-6)
        assert list(actual_max_sequence[:sequence_lengths]) == list(
            expected_max_sequence[:sequence_lengths])
Ejemplo n.º 7
0
 def testCrfDecodeZeroSeqLength(self):
     """Test that crf_decode works when sequence_length contains one or more
     zeros."""
     inputs = tf.constant(np.ones([2, 10, 5], dtype=np.float32))
     transition_params = tf.constant(np.ones([5, 5], dtype=np.float32))
     sequence_lengths = tf.constant(np.zeros([2], dtype=np.int32))
     tags, scores = text.crf_decode(inputs, transition_params,
                                    sequence_lengths)
     tf_tags, tf_scores = self.evaluate([tags, scores])
     self.assertEqual(len(tf_tags.shape), 2)
     self.assertEqual(len(tf_scores.shape), 1)
Ejemplo n.º 8
0
    def build_export_output(self, model):  # pylint: disable=no-self-use
        """
    Build the output of the model.
    `score` and `input_y` are for loss calculation.
    `preds` and `y_ground_truth` are for metric calculation.
    """
        model.preds, score = crf_decode(model.logits, model.transitions,
                                        model.input_x_len)

        model.score = tf.identity(score, name="score")
        model.output_dict = {"score": model.score, "preds": model.preds}
Ejemplo n.º 9
0
 def viterbi_accuracy(y_true, y_pred):
     # -1e10 to avoid zero at sum(mask)
     mask = K.cast(K.all(K.greater(y_pred, -1e10), axis=2), K.floatx())
     shape = tf.shape(y_pred)
     sequence_lengths = tf.ones(shape[0], dtype=tf.int32) * (shape[1])
     y_pred, _ = crf_decode(y_pred, self.transitions, sequence_lengths)
     if self.sparse_target:
         y_true = K.argmax(y_true, 2)
     y_pred = K.cast(y_pred, 'int32')
     y_true = K.cast(y_true, 'int32')
     corrects = K.cast(K.equal(y_true, y_pred), K.floatx())
     return K.sum(corrects * mask) / K.sum(mask)
Ejemplo n.º 10
0
def test_crf_decode_save_load(tmpdir):
    tf.keras.backend.clear_session()
    input_tensor = tf.keras.Input(shape=(10, 3),
                                  dtype=tf.float32,
                                  name="input_tensor")
    seq_len = tf.keras.Input(shape=(), dtype=tf.int32, name="seq_len")
    transition = tf.constant([[1, 1, 0], [0, 1, 1], [1, 0, 1]],
                             dtype=tf.float32)

    output = tf.multiply(input_tensor, tf.constant(1.0))
    decoded, _ = text.crf_decode(input_tensor, transition, seq_len)

    model = tf.keras.Model(inputs=[input_tensor, seq_len],
                           outputs=[output, decoded],
                           name="example_model")
    model.compile(optimizer="Adam")

    x_data = {
        "input_tensor":
        np.random.random_sample((5, 10, 3)).astype(dtype=np.float32),
        "seq_len":
        np.array([10] * 5, dtype=np.int32),
    }

    tensor_name = ("tf.math.multiply"
                   if LooseVersion(tf.__version__) >= "2.5.0" else
                   "tf_op_layer_Mul")
    y_data = {tensor_name: np.random.randint(0, 3, (5, 10))}

    model.fit(x_data, y_data)
    model.predict({
        "input_tensor": tf.expand_dims(x_data["input_tensor"][0], 0),
        "seq_len": np.array([10]),
    })

    temp_dir = str(tmpdir.mkdir("model"))
    tf.saved_model.save(model, temp_dir)

    tf.keras.backend.clear_session()
    model = tf.keras.models.load_model(
        temp_dir,
        custom_objects={
            "CrfDecodeForwardRnnCell": text.crf.CrfDecodeForwardRnnCell
        },
    )
    model.fit(x_data, y_data)
    model.predict({
        "input_tensor": tf.expand_dims(x_data["input_tensor"][0], 0),
        "seq_len": np.array([10]),
    })
Ejemplo n.º 11
0
	def call(self, inputs, sequence_lengths=None, training=None, **kwargs):
		sequences = tf.convert_to_tensor(inputs, dtype=self.dtype)
		if sequence_lengths is not None:
			assert len(sequence_lengths.shape) == 2
			assert tf.convert_to_tensor(sequence_lengths).dtype == 'int32'
			seq_len_shape = tf.convert_to_tensor(sequence_lengths).get_shape().as_list()
			assert seq_len_shape[1] == 1
			self.sequence_lengths = K.flatten(sequence_lengths)
		else:
			self.sequence_lengths = tf.ones(tf.shape(inputs)[0], dtype=tf.int32) * (tf.shape(inputs)[1])

		viterbi_sequence, _ = crf_decode(sequences, 
					self.transitions, self.sequence_lengths)
		output = K.one_hot(viterbi_sequence, self.output_dim)
		return K.in_train_phase(sequences, output)
    def build_output(self, model):  # pylint: disable=no-self-use
        """
    Build the output of the model.
    `score` and `input_y` are for loss calculation.
    `preds` and `y_ground_truth` are for metric calculation.
    """
        model.preds, score = crf_decode(model.logits, model.transitions,
                                        model.input_x_len)

        model.score = tf.identity(score, name="score")
        model.y_ground_truth = model.input_y
        if model.use_pretrained_model:
            logging.info("initialize_pretrained_model_variables")
            self.initialize_pretrained_model_variables(
                model.pretrained_model_path, model.pretrained_model_mode)
Ejemplo n.º 13
0
    def viterbi_decode(self, potentials, sequence_length):
        """Decode the highest scoring sequence of tags in TensorFlow.

        This is a function for tensor.

        Args:
            potentials: A [batch_size, max_seq_len, num_tags] tensor, matrix of unary potentials.
            sequence_length: A [batch_size] tensor, containing sequence lengths.

        Returns:
            decode_tags: A [batch_size, max_seq_len] tensor, with dtype tf.int32.
                         Contains the highest scoring tag indicies.
        """
        decode_tags, best_score = crf_decode(potentials, self.transition_params, sequence_length)

        return decode_tags
Ejemplo n.º 14
0
def test_crf_decode(dtype):
    transition_params = np.array([[-3, 5, -2], [3, 4, 1], [1, 2, 1]], dtype=dtype)
    # Test both the length-1 and regular cases.
    sequence_lengths_list = [
        np.array(3, dtype=np.int32),
        np.array(1, dtype=np.int64),
    ]
    inputs_list = [
        np.array([[4, 5, -3], [3, -1, 3], [-1, 2, 1], [0, 0, 0]], dtype=dtype),
        np.array([[-1, 2, 1]], dtype=dtype),
    ]
    tag_indices_list = [
        np.array([1, 2, 1, 0], dtype=np.int32),
        np.array([2], dtype=np.int32),
    ]

    for sequence_lengths, inputs, tag_indices in zip(
        sequence_lengths_list, inputs_list, tag_indices_list
    ):
        expected_max_sequence, expected_max_score = brute_force_decode(
            sequence_lengths, inputs, transition_params
        )

        actual_max_sequence, actual_max_score = text.crf_decode(
            tf.expand_dims(inputs, 0),
            tf.constant(transition_params),
            tf.expand_dims(sequence_lengths, 0),
        )
        actual_max_sequence = tf.squeeze(actual_max_sequence, [0])
        actual_max_score = tf.squeeze(actual_max_score, [0])

        test_utils.assert_allclose_according_to_type(
            actual_max_score, expected_max_score, 1e-6, 1e-6
        )
        assert (
            list(actual_max_sequence[:sequence_lengths])
            == expected_max_sequence[:sequence_lengths]
        )
Ejemplo n.º 15
0
    def call(self, inputs: list[tf.Tensor], mask: tf.Tensor) -> tf.RaggedTensor:
        emissions, tag_ids = inputs
        mask = tf.cast(mask, tf.int32)
        sequence_lengths = tf.math.reduce_sum(mask, axis=1)
        likelihoods, _ = crf_log_likelihood(
            emissions,
            tag_ids,
            sequence_lengths,
            self.transition_weight,
        )
        loss = tf.math.negative(tf.math.reduce_mean(likelihoods))
        self.add_loss(loss)

        decoded_tag_ids, _ = crf_decode(
            emissions,
            self.transition_weight,
            sequence_lengths,
        )
        is_equal = tf.cast(tf.equal(tag_ids, decoded_tag_ids), tf.int32)
        tag_accuracy = tf.reduce_sum(is_equal * mask) / tf.reduce_sum(mask)
        self.add_metric(tag_accuracy, name="tag_accuracy")
        boolean_mask = tf.cast(mask, tf.bool)
        return tf.ragged.boolean_mask(decoded_tag_ids, boolean_mask)
Ejemplo n.º 16
0
 def call(self, potentials, transition_params, sequence_length):
     return text.crf_decode(potentials, transition_params, sequence_length)