def build_export_output(self, model): # pylint: disable=no-self-use """ Build the output of the model for export. `score` and `input_y` are for loss calculation. `preds` and `y_ground_truth` are for metric calculation. """ transitions = model.transitions intent_logits, slots_logits = model.logits intent_score = tf.nn.softmax(intent_logits, name="intent_score") intent_preds = tf.argmax(intent_logits, axis=-1, name="intent_preds") slots_preds, slots_score = crf_decode(slots_logits, transitions, model.input_x_len) slots_preds = tf.identity(slots_preds, name="slots_preds") slots_score = tf.identity(slots_score, name="slots_score") model.preds = intent_preds, slots_preds model.score = intent_score, slots_score model.output_dict = { "slots_score": slots_score, "slots_preds": slots_preds, "intent_score": intent_score, "intent_preds": intent_preds } logging.info("Model built.")
def call(self, inputs, training=False): """Decodes the highest scoring sequence of tags. If training, calculates and records the CRF log-likelihood loss (length normalized). Args: inputs: A list with three tensors. The first tensor is [batch_size, max_seq_len, num_tags] tensor of logits. The second tensor is a [batch_size] vector of true sequence lengths. The third tensor is [batch_size, max_seq_len] tensor of expected ids (only used in training mode). training: Whether it runs in training mode. Returns: decode_tags: A [batch_size, max_seq_len] matrix, with dtype `tf.int32`. Contains the highest scoring tag indices. """ logits, sequence_length, labels = inputs decode_tags, _ = tfa_text.crf_decode(logits, self._transition_matrix, sequence_length) decode_tags = tf.cast(decode_tags, tf.int32) if training: # Clip right-padding which equals -1 and messes with the loss calculation. labels = tf.maximum(labels, 0) log_likelihood, _ = tfa_text.crf_log_likelihood( logits, labels, sequence_length, self._transition_matrix) self.add_loss(tf.reduce_mean(-log_likelihood)) return decode_tags
def testCrfDecode(self): transition_params = np.array([[-3, 5, -2], [3, 4, 1], [1, 2, 1]], dtype=np.float32) # Test both the length-1 and regular cases. sequence_lengths_list = [ np.array(3, dtype=np.int32), np.array(1, dtype=np.int64) ] inputs_list = [ np.array([[4, 5, -3], [3, -1, 3], [-1, 2, 1], [0, 0, 0]], dtype=np.float32), np.array([[-1, 2, 1]], dtype=np.float32), ] tag_indices_list = [ np.array([1, 2, 1, 0], dtype=np.int32), np.array([2], dtype=np.int32) ] for sequence_lengths, inputs, tag_indices in zip( sequence_lengths_list, inputs_list, tag_indices_list): num_words = inputs.shape[0] num_tags = inputs.shape[1] all_sequence_scores = [] all_sequences = [] # Compare the dynamic program with brute force computation. for tag_indices in itertools.product( range(num_tags), repeat=sequence_lengths): tag_indices = list(tag_indices) tag_indices.extend([0] * (num_words - sequence_lengths)) all_sequences.append(tag_indices) sequence_score = text.crf_sequence_score( inputs=tf.expand_dims(inputs, 0), tag_indices=tf.expand_dims(tag_indices, 0), sequence_lengths=tf.expand_dims(sequence_lengths, 0), transition_params=tf.constant(transition_params)) sequence_score = tf.squeeze(sequence_score, [0]) all_sequence_scores.append(sequence_score) tf_all_sequence_scores = self.evaluate(all_sequence_scores) expected_max_sequence_index = np.argmax(tf_all_sequence_scores) expected_max_sequence = all_sequences[expected_max_sequence_index] expected_max_score = tf_all_sequence_scores[ expected_max_sequence_index] actual_max_sequence, actual_max_score = text.crf_decode( tf.expand_dims(inputs, 0), tf.constant(transition_params), tf.expand_dims(sequence_lengths, 0)) actual_max_sequence = tf.squeeze(actual_max_sequence, [0]) actual_max_score = tf.squeeze(actual_max_score, [0]) tf_actual_max_sequence, tf_actual_max_score = self.evaluate( [actual_max_sequence, actual_max_score]) self.assertAllClose(tf_actual_max_score, expected_max_score) self.assertEqual( list(tf_actual_max_sequence[:sequence_lengths]), expected_max_sequence[:sequence_lengths])
def test_crf_decode_zero_seq_length(): """Test that crf_decode works when sequence_length contains one or more zeros.""" inputs = tf.constant(np.ones([2, 10, 5], dtype=np.float32)) transition_params = tf.constant(np.ones([5, 5], dtype=np.float32)) sequence_lengths = tf.constant(np.zeros([2], dtype=np.int32)) tags, scores = text.crf_decode(inputs, transition_params, sequence_lengths) assert len(tags.shape) == 2 assert len(scores.shape) == 1
def call(self, emissions: tf.Tensor, mask: tf.Tensor) -> tf.Tensor: sequence_lengths = tf.math.reduce_sum(tf.cast(mask, tf.int32), axis=1) decoded_tag_ids, _ = crf_decode( emissions, self.transition_weight, sequence_lengths, ) boolean_mask = tf.cast(mask, tf.bool) return tf.ragged.boolean_mask(decoded_tag_ids, boolean_mask)
def test_crf_constrained_decode(dtype): transition_params = np.array([[-3, 5, -2], [3, 4, 1], [1, 2, 1]], dtype=dtype) # Test both the length-1 and regular cases. sequence_lengths_list = [ np.array(3, dtype=np.int32), np.array(1, dtype=np.int32) ] inputs_list = [ np.array([[4, 5, -3], [3, -1, 3], [-1, 2, 1], [0, 0, 0]], dtype=dtype), np.array([[4, 5, -3]], dtype=dtype), ] tag_bitmap_list = [ np.array( [ [True, False, False], [False, True, True], [False, True, True], [False, True, True], ], dtype=np.bool, ), np.array([[False, True, True]], dtype=np.bool), ] for sequence_lengths, inputs, tag_bitmap in zip(sequence_lengths_list, inputs_list, tag_bitmap_list): filtered_inputs = text.crf_filtered_inputs( inputs=tf.expand_dims(inputs, 0), tag_bitmap=tf.expand_dims(tag_bitmap, 0)) expected_max_sequence, expected_max_score = text.crf_decode( filtered_inputs, tf.constant(transition_params), tf.expand_dims(sequence_lengths, 0), ) expected_max_sequence = tf.squeeze(expected_max_sequence, [0]) expected_max_score = tf.squeeze(expected_max_score, [0]) actual_max_sequence, actual_max_score = text.crf_constrained_decode( tf.expand_dims(inputs, 0), tf.expand_dims(tag_bitmap, 0), tf.constant(transition_params), tf.expand_dims(sequence_lengths, 0), ) actual_max_sequence = tf.squeeze(actual_max_sequence, [0]) actual_max_score = tf.squeeze(actual_max_score, [0]) test_utils.assert_allclose_according_to_type(actual_max_score, expected_max_score, 1e-6, 1e-6) assert list(actual_max_sequence[:sequence_lengths]) == list( expected_max_sequence[:sequence_lengths])
def testCrfDecodeZeroSeqLength(self): """Test that crf_decode works when sequence_length contains one or more zeros.""" inputs = tf.constant(np.ones([2, 10, 5], dtype=np.float32)) transition_params = tf.constant(np.ones([5, 5], dtype=np.float32)) sequence_lengths = tf.constant(np.zeros([2], dtype=np.int32)) tags, scores = text.crf_decode(inputs, transition_params, sequence_lengths) tf_tags, tf_scores = self.evaluate([tags, scores]) self.assertEqual(len(tf_tags.shape), 2) self.assertEqual(len(tf_scores.shape), 1)
def build_export_output(self, model): # pylint: disable=no-self-use """ Build the output of the model. `score` and `input_y` are for loss calculation. `preds` and `y_ground_truth` are for metric calculation. """ model.preds, score = crf_decode(model.logits, model.transitions, model.input_x_len) model.score = tf.identity(score, name="score") model.output_dict = {"score": model.score, "preds": model.preds}
def viterbi_accuracy(y_true, y_pred): # -1e10 to avoid zero at sum(mask) mask = K.cast(K.all(K.greater(y_pred, -1e10), axis=2), K.floatx()) shape = tf.shape(y_pred) sequence_lengths = tf.ones(shape[0], dtype=tf.int32) * (shape[1]) y_pred, _ = crf_decode(y_pred, self.transitions, sequence_lengths) if self.sparse_target: y_true = K.argmax(y_true, 2) y_pred = K.cast(y_pred, 'int32') y_true = K.cast(y_true, 'int32') corrects = K.cast(K.equal(y_true, y_pred), K.floatx()) return K.sum(corrects * mask) / K.sum(mask)
def test_crf_decode_save_load(tmpdir): tf.keras.backend.clear_session() input_tensor = tf.keras.Input(shape=(10, 3), dtype=tf.float32, name="input_tensor") seq_len = tf.keras.Input(shape=(), dtype=tf.int32, name="seq_len") transition = tf.constant([[1, 1, 0], [0, 1, 1], [1, 0, 1]], dtype=tf.float32) output = tf.multiply(input_tensor, tf.constant(1.0)) decoded, _ = text.crf_decode(input_tensor, transition, seq_len) model = tf.keras.Model(inputs=[input_tensor, seq_len], outputs=[output, decoded], name="example_model") model.compile(optimizer="Adam") x_data = { "input_tensor": np.random.random_sample((5, 10, 3)).astype(dtype=np.float32), "seq_len": np.array([10] * 5, dtype=np.int32), } tensor_name = ("tf.math.multiply" if LooseVersion(tf.__version__) >= "2.5.0" else "tf_op_layer_Mul") y_data = {tensor_name: np.random.randint(0, 3, (5, 10))} model.fit(x_data, y_data) model.predict({ "input_tensor": tf.expand_dims(x_data["input_tensor"][0], 0), "seq_len": np.array([10]), }) temp_dir = str(tmpdir.mkdir("model")) tf.saved_model.save(model, temp_dir) tf.keras.backend.clear_session() model = tf.keras.models.load_model( temp_dir, custom_objects={ "CrfDecodeForwardRnnCell": text.crf.CrfDecodeForwardRnnCell }, ) model.fit(x_data, y_data) model.predict({ "input_tensor": tf.expand_dims(x_data["input_tensor"][0], 0), "seq_len": np.array([10]), })
def call(self, inputs, sequence_lengths=None, training=None, **kwargs): sequences = tf.convert_to_tensor(inputs, dtype=self.dtype) if sequence_lengths is not None: assert len(sequence_lengths.shape) == 2 assert tf.convert_to_tensor(sequence_lengths).dtype == 'int32' seq_len_shape = tf.convert_to_tensor(sequence_lengths).get_shape().as_list() assert seq_len_shape[1] == 1 self.sequence_lengths = K.flatten(sequence_lengths) else: self.sequence_lengths = tf.ones(tf.shape(inputs)[0], dtype=tf.int32) * (tf.shape(inputs)[1]) viterbi_sequence, _ = crf_decode(sequences, self.transitions, self.sequence_lengths) output = K.one_hot(viterbi_sequence, self.output_dim) return K.in_train_phase(sequences, output)
def build_output(self, model): # pylint: disable=no-self-use """ Build the output of the model. `score` and `input_y` are for loss calculation. `preds` and `y_ground_truth` are for metric calculation. """ model.preds, score = crf_decode(model.logits, model.transitions, model.input_x_len) model.score = tf.identity(score, name="score") model.y_ground_truth = model.input_y if model.use_pretrained_model: logging.info("initialize_pretrained_model_variables") self.initialize_pretrained_model_variables( model.pretrained_model_path, model.pretrained_model_mode)
def viterbi_decode(self, potentials, sequence_length): """Decode the highest scoring sequence of tags in TensorFlow. This is a function for tensor. Args: potentials: A [batch_size, max_seq_len, num_tags] tensor, matrix of unary potentials. sequence_length: A [batch_size] tensor, containing sequence lengths. Returns: decode_tags: A [batch_size, max_seq_len] tensor, with dtype tf.int32. Contains the highest scoring tag indicies. """ decode_tags, best_score = crf_decode(potentials, self.transition_params, sequence_length) return decode_tags
def test_crf_decode(dtype): transition_params = np.array([[-3, 5, -2], [3, 4, 1], [1, 2, 1]], dtype=dtype) # Test both the length-1 and regular cases. sequence_lengths_list = [ np.array(3, dtype=np.int32), np.array(1, dtype=np.int64), ] inputs_list = [ np.array([[4, 5, -3], [3, -1, 3], [-1, 2, 1], [0, 0, 0]], dtype=dtype), np.array([[-1, 2, 1]], dtype=dtype), ] tag_indices_list = [ np.array([1, 2, 1, 0], dtype=np.int32), np.array([2], dtype=np.int32), ] for sequence_lengths, inputs, tag_indices in zip( sequence_lengths_list, inputs_list, tag_indices_list ): expected_max_sequence, expected_max_score = brute_force_decode( sequence_lengths, inputs, transition_params ) actual_max_sequence, actual_max_score = text.crf_decode( tf.expand_dims(inputs, 0), tf.constant(transition_params), tf.expand_dims(sequence_lengths, 0), ) actual_max_sequence = tf.squeeze(actual_max_sequence, [0]) actual_max_score = tf.squeeze(actual_max_score, [0]) test_utils.assert_allclose_according_to_type( actual_max_score, expected_max_score, 1e-6, 1e-6 ) assert ( list(actual_max_sequence[:sequence_lengths]) == expected_max_sequence[:sequence_lengths] )
def call(self, inputs: list[tf.Tensor], mask: tf.Tensor) -> tf.RaggedTensor: emissions, tag_ids = inputs mask = tf.cast(mask, tf.int32) sequence_lengths = tf.math.reduce_sum(mask, axis=1) likelihoods, _ = crf_log_likelihood( emissions, tag_ids, sequence_lengths, self.transition_weight, ) loss = tf.math.negative(tf.math.reduce_mean(likelihoods)) self.add_loss(loss) decoded_tag_ids, _ = crf_decode( emissions, self.transition_weight, sequence_lengths, ) is_equal = tf.cast(tf.equal(tag_ids, decoded_tag_ids), tf.int32) tag_accuracy = tf.reduce_sum(is_equal * mask) / tf.reduce_sum(mask) self.add_metric(tag_accuracy, name="tag_accuracy") boolean_mask = tf.cast(mask, tf.bool) return tf.ragged.boolean_mask(decoded_tag_ids, boolean_mask)
def call(self, potentials, transition_params, sequence_length): return text.crf_decode(potentials, transition_params, sequence_length)