def testDenseSequencesToSparse(self): labels = [[1, 3, 3, 3, 0], [1, 4, 4, 4, 0], [4, 2, 2, 9, 4]] length = [4, 5, 5] sparse = ctc_ops.dense_labels_to_sparse(labels, length) new_dense = sparse_ops.sparse_tensor_to_dense(sparse) self.assertAllEqual(labels, new_dense) padded_labels = [[1, 3, 3, 3, 0, 0, 0, 0], [1, 4, 4, 4, 0, 0, 0, 0], [4, 2, 2, 9, 4, 0, 0, 0]] length = [4, 5, 5] sparse = ctc_ops.dense_labels_to_sparse(padded_labels, length) padded_dense = sparse_ops.sparse_tensor_to_dense(sparse) self.assertAllEqual(padded_dense, new_dense)
def testCtcLossDenseWithBlankIndexIsSameAsCtcLoss(self): random_seed.set_random_seed(5) batch_size = 8 num_labels = 6 label_length = 5 num_frames = 12 logits = random_ops.random_uniform( [num_frames, batch_size, num_labels]) labels = random_ops.random_uniform([batch_size, label_length], minval=0, maxval=num_labels - 1, dtype=dtypes.int64) label_lengths = random_ops.random_uniform([batch_size], minval=2, maxval=label_length, dtype=dtypes.int64) label_mask = array_ops.sequence_mask(label_lengths, maxlen=label_length, dtype=label_lengths.dtype) labels *= label_mask logit_lengths = [num_frames] * batch_size tf_ctc_loss_labels = math_ops.cast(labels, dtypes.int32) tf_ctc_loss_labels = ctc_ops.dense_labels_to_sparse( tf_ctc_loss_labels, label_lengths) tf_nn_ctc_loss = ctc_ops.ctc_loss(labels=tf_ctc_loss_labels, inputs=logits, sequence_length=logit_lengths, time_major=True) tf_nn_ctc_grads = gradients_impl.gradients(tf_nn_ctc_loss, [logits])[0] # Shift the blank logits/labels to be somewhere in the middle. blank_index = 2 shifted_logits = array_ops.concat([ logits[:, :, :blank_index], logits[:, :, -1:], logits[:, :, blank_index:-1], ], axis=2) shifted_labels = array_ops.where(labels < blank_index, labels, labels + 1) ctc_loss = ctc_ops.ctc_loss_dense(labels=shifted_labels, logits=shifted_logits, label_length=label_lengths, logit_length=logit_lengths, blank_index=blank_index) ctc_loss_grads = gradients_impl.gradients(ctc_loss, [logits])[0] with self.cached_session() as sess: for _ in range(32): self.assertAllClose(*self.evaluate([ctc_loss, tf_nn_ctc_loss])) self.assertAllClose(*self.evaluate( [ctc_loss_grads, tf_nn_ctc_grads]), rtol=2e-06, atol=2e-06)
def testCtcLossDenseIsSameAsCtcLoss(self): with ops.device("/GPU:0" if test.is_gpu_available() else "/CPU:0"): random_seed.set_random_seed(5) batch_size = 8 num_labels = 6 label_length = 5 minimum_logits_length = 10 num_frames = minimum_logits_length + batch_size logits = random_ops.random_uniform( [num_frames, batch_size, num_labels]) labels = random_ops.random_uniform([batch_size, label_length], minval=1, maxval=num_labels, dtype=dtypes.int64) label_lengths = random_ops.random_uniform([batch_size], minval=2, maxval=label_length, dtype=dtypes.int64) label_mask = array_ops.sequence_mask(label_lengths, maxlen=label_length, dtype=label_lengths.dtype) labels *= label_mask logit_lengths = math_ops.range(batch_size) + minimum_logits_length ctc_loss = ctc_ops.ctc_loss_dense(labels=labels, logits=logits, label_length=label_lengths, logit_length=logit_lengths) ctc_loss_grads = gradients_impl.gradients(ctc_loss, [logits])[0] # Shift labels down by one (move blank from 0 to num_labels -1) tf_ctc_loss_labels = math_ops.cast(labels, dtypes.int32) - 1 tf_nn_ctc_logits = array_ops.concat([ logits[:, :, 1:], logits[:, :, 0:1], ], axis=2) tf_ctc_loss_labels = ctc_ops.dense_labels_to_sparse( tf_ctc_loss_labels, label_lengths) tf_nn_ctc_loss = ctc_ops.ctc_loss(labels=tf_ctc_loss_labels, inputs=tf_nn_ctc_logits, sequence_length=logit_lengths, time_major=True) tf_nn_ctc_grads = gradients_impl.gradients(tf_nn_ctc_loss, [logits])[0] with self.cached_session() as sess: for _ in range(32): self.assertAllClose( *self.evaluate([ctc_loss, tf_nn_ctc_loss])) self.assertAllClose(*self.evaluate( [ctc_loss_grads, tf_nn_ctc_grads]), rtol=4e-06, atol=4e-06)
def testDenseSequencesToSparse(self): labels = [[1, 3, 3, 3, 0], [1, 4, 4, 4, 0], [4, 2, 2, 9, 4]] length = [4, 5, 5] sparse = ctc_ops.dense_labels_to_sparse(labels, length) new_dense = sparse_ops.sparse_tensor_to_dense(sparse) self.assertAllEqual(labels, new_dense) padded_labels = [[1, 3, 3, 3, 0, 0, 0, 0], [1, 4, 4, 4, 0, 0, 0, 0], [4, 2, 2, 9, 4, 0, 0, 0]] length = [4, 5, 5] sparse = ctc_ops.dense_labels_to_sparse(padded_labels, length) padded_dense = sparse_ops.sparse_tensor_to_dense(sparse) self.assertAllEqual(padded_dense, new_dense)
def testCtcLossDenseUniqueFastPathIsSameAsCtcLoss(self): random_seed.set_random_seed(5) batch_size = 8 num_labels = 6 label_length = 5 num_frames = 12 logits = random_ops.random_uniform( [num_frames, batch_size, num_labels]) labels = random_ops.random_uniform([batch_size, label_length], minval=1, maxval=num_labels, dtype=dtypes.int64) label_lengths = random_ops.random_uniform([batch_size], minval=2, maxval=label_length, dtype=dtypes.int64) label_mask = array_ops.sequence_mask(label_lengths, maxlen=label_length, dtype=label_lengths.dtype) labels *= label_mask logit_lengths = [num_frames] * batch_size ctc_loss = ctc_ops.ctc_loss_dense( labels=labels, logits=logits, label_length=label_lengths, logit_length=logit_lengths, unique=ctc_ops.ctc_unique_labels(labels)) ctc_loss_grads = gradients_impl.gradients(ctc_loss, [logits])[0] # Shift labels down by one (move blank from 0 to num_labels -1) tf_ctc_loss_labels = math_ops.cast(labels, dtypes.int32) - 1 tf_nn_ctc_logits = array_ops.concat([ logits[:, :, 1:], logits[:, :, 0:1], ], axis=2) tf_ctc_loss_labels = ctc_ops.dense_labels_to_sparse( tf_ctc_loss_labels, label_lengths) tf_nn_ctc_loss = ctc_ops.ctc_loss(labels=tf_ctc_loss_labels, inputs=tf_nn_ctc_logits, sequence_length=logit_lengths, time_major=True) tf_nn_ctc_grads = gradients_impl.gradients(tf_nn_ctc_loss, [logits])[0] with self.cached_session() as sess: for _ in range(32): self.assertAllClose(*sess.run([ctc_loss, tf_nn_ctc_loss])) self.assertAllClose(*sess.run( [ctc_loss_grads, tf_nn_ctc_grads]), rtol=2e-06, atol=2e-06)
def testCtcLossDenseWithBlankIndexIsSameAsCtcLoss(self): random_seed.set_random_seed(5) batch_size = 8 num_labels = 6 label_length = 5 num_frames = 12 logits = random_ops.random_uniform([num_frames, batch_size, num_labels]) labels = random_ops.random_uniform( [batch_size, label_length], minval=0, maxval=num_labels-1, dtype=dtypes.int64) label_lengths = random_ops.random_uniform( [batch_size], minval=2, maxval=label_length, dtype=dtypes.int64) label_mask = array_ops.sequence_mask( label_lengths, maxlen=label_length, dtype=label_lengths.dtype) labels *= label_mask logit_lengths = [num_frames] * batch_size tf_ctc_loss_labels = math_ops.cast(labels, dtypes.int32) tf_ctc_loss_labels = ctc_ops.dense_labels_to_sparse( tf_ctc_loss_labels, label_lengths) tf_nn_ctc_loss = ctc_ops.ctc_loss( labels=tf_ctc_loss_labels, inputs=logits, sequence_length=logit_lengths, time_major=True) tf_nn_ctc_grads = gradients_impl.gradients(tf_nn_ctc_loss, [logits])[0] # Shift the blank logits/labels to be somewhere in the middle. blank_index = 2 shifted_logits = array_ops.concat([ logits[:, :, :blank_index], logits[:, :, -1:], logits[:, :, blank_index:-1], ], axis=2) shifted_labels = array_ops.where(labels < blank_index, labels, labels + 1) ctc_loss = ctc_ops.ctc_loss_dense( labels=shifted_labels, logits=shifted_logits, label_length=label_lengths, logit_length=logit_lengths, blank_index=blank_index) ctc_loss_grads = gradients_impl.gradients(ctc_loss, [logits])[0] with self.cached_session() as sess: for _ in range(32): self.assertAllClose(*self.evaluate([ctc_loss, tf_nn_ctc_loss])) self.assertAllClose( *self.evaluate([ctc_loss_grads, tf_nn_ctc_grads]), rtol=2e-06, atol=2e-06)
def _ctc_loss_v3(labels, logits, label_length, logit_length, use_gpu): with test_util.device(use_gpu=use_gpu): sparse_labels = ctc_ops.dense_labels_to_sparse(labels, label_length) with backprop.GradientTape() as t: t.watch(logits) ref_loss = ctc_ops.ctc_loss_v3(labels=sparse_labels, logits=logits, label_length=label_length, logit_length=logit_length, blank_index=0) ref_grad = t.gradient(ref_loss, [logits]) return ref_loss, ref_grad
def testCtcLossDenseWithNegativeBlankIndexIsSameAsCtcLoss(self): with ops.device("/GPU:0" if test.is_gpu_available() else "/CPU:0"): random_seed.set_random_seed(5) batch_size = 8 num_labels = 6 label_length = 5 num_frames = 12 logits = random_ops.random_uniform( [num_frames, batch_size, num_labels]) labels = random_ops.random_uniform([batch_size, label_length], minval=0, maxval=num_labels - 1, dtype=dtypes.int64) label_lengths = random_ops.random_uniform([batch_size], minval=2, maxval=label_length, dtype=dtypes.int64) label_mask = array_ops.sequence_mask(label_lengths, maxlen=label_length, dtype=label_lengths.dtype) labels *= label_mask logit_lengths = [num_frames] * batch_size ctc_loss = ctc_ops.ctc_loss_dense(labels=labels, logits=logits, label_length=label_lengths, logit_length=logit_lengths, blank_index=-1) ctc_loss_grads = gradients_impl.gradients(ctc_loss, [logits])[0] tf_ctc_loss_labels = math_ops.cast(labels, dtypes.int32) tf_ctc_loss_labels = ctc_ops.dense_labels_to_sparse( tf_ctc_loss_labels, label_lengths) tf_nn_ctc_loss = ctc_ops.ctc_loss(labels=tf_ctc_loss_labels, inputs=logits, sequence_length=logit_lengths, time_major=True) tf_nn_ctc_grads = gradients_impl.gradients(tf_nn_ctc_loss, [logits])[0] with self.cached_session() as sess: for _ in range(32): self.assertAllClose( *self.evaluate([ctc_loss, tf_nn_ctc_loss])) self.assertAllClose(*self.evaluate( [ctc_loss_grads, tf_nn_ctc_grads]), rtol=2e-06, atol=2e-06)
def testCtcLossDenseUniqueFastPathIsSameAsCtcLoss(self): random_seed.set_random_seed(5) batch_size = 8 num_labels = 6 label_length = 5 num_frames = 12 logits = random_ops.random_uniform([num_frames, batch_size, num_labels]) labels = random_ops.random_uniform( [batch_size, label_length], minval=1, maxval=num_labels, dtype=dtypes.int64) label_lengths = random_ops.random_uniform( [batch_size], minval=2, maxval=label_length, dtype=dtypes.int64) label_mask = array_ops.sequence_mask( label_lengths, maxlen=label_length, dtype=label_lengths.dtype) labels *= label_mask logit_lengths = [num_frames] * batch_size ctc_loss = ctc_ops.ctc_loss_dense( labels=labels, logits=logits, label_length=label_lengths, logit_length=logit_lengths, unique=ctc_ops.ctc_unique_labels(labels)) ctc_loss_grads = gradients_impl.gradients(ctc_loss, [logits])[0] # Shift labels down by one (move blank from 0 to num_labels -1) tf_ctc_loss_labels = math_ops.cast(labels, dtypes.int32) - 1 tf_nn_ctc_logits = array_ops.concat([ logits[:, :, 1:], logits[:, :, 0:1], ], axis=2) tf_ctc_loss_labels = ctc_ops.dense_labels_to_sparse( tf_ctc_loss_labels, label_lengths) tf_nn_ctc_loss = ctc_ops.ctc_loss( labels=tf_ctc_loss_labels, inputs=tf_nn_ctc_logits, sequence_length=logit_lengths, time_major=True) tf_nn_ctc_grads = gradients_impl.gradients(tf_nn_ctc_loss, [logits])[0] with self.cached_session() as sess: for _ in range(32): self.assertAllClose(*self.evaluate([ctc_loss, tf_nn_ctc_loss])) self.assertAllClose( *self.evaluate([ctc_loss_grads, tf_nn_ctc_grads]), rtol=2e-06, atol=2e-06)
def testCtcLossV2(self): random_seed.set_random_seed(5) batch_size = 8 num_labels = 6 max_label_length = 5 num_frames = 12 labels = random_ops.random_uniform([batch_size, max_label_length], minval=1, maxval=num_labels, dtype=dtypes.int64) logits = random_ops.random_uniform( [num_frames, batch_size, num_labels]) label_length = random_ops.random_uniform([batch_size], minval=2, maxval=max_label_length, dtype=dtypes.int64) label_mask = array_ops.sequence_mask(label_length, maxlen=max_label_length, dtype=label_length.dtype) labels *= label_mask logit_length = [num_frames] * batch_size with backprop.GradientTape() as t: t.watch(logits) ref_loss = ctc_ops.ctc_loss_v2(labels=labels, logits=logits, label_length=label_length, logit_length=logit_length) ref_grad = t.gradient(ref_loss, [logits]) sparse_labels = ctc_ops.dense_labels_to_sparse(labels, label_length) def assert_same_loss_and_grads(loss): if context.executing_eagerly(): return with self.cached_session(): self.assertAllClose(*self.evaluate([loss, ref_loss])) grad = gradients_impl.gradients(loss, [logits]) self.assertAllClose(*self.evaluate([grad, ref_grad]), rtol=2e-06, atol=2e-06) assert_same_loss_and_grads( ctc_ops.ctc_loss_v2(labels=sparse_labels, logits=logits, label_length=label_length, logit_length=logit_length, blank_index=0))
def testCtcLossDenseWithNegativeBlankIndexIsSameAsCtcLoss(self): with ops.device("/GPU:0" if test.is_gpu_available() else "/CPU:0"): random_seed.set_random_seed(5) batch_size = 8 num_labels = 6 label_length = 5 num_frames = 12 logits = random_ops.random_uniform([num_frames, batch_size, num_labels]) labels = random_ops.random_uniform( [batch_size, label_length], minval=0, maxval=num_labels-1, dtype=dtypes.int64) label_lengths = random_ops.random_uniform( [batch_size], minval=2, maxval=label_length, dtype=dtypes.int64) label_mask = array_ops.sequence_mask( label_lengths, maxlen=label_length, dtype=label_lengths.dtype) labels *= label_mask logit_lengths = [num_frames] * batch_size ctc_loss = ctc_ops.ctc_loss_dense( labels=labels, logits=logits, label_length=label_lengths, logit_length=logit_lengths, blank_index=-1) ctc_loss_grads = gradients_impl.gradients(ctc_loss, [logits])[0] tf_ctc_loss_labels = math_ops.cast(labels, dtypes.int32) tf_ctc_loss_labels = ctc_ops.dense_labels_to_sparse( tf_ctc_loss_labels, label_lengths) tf_nn_ctc_loss = ctc_ops.ctc_loss( labels=tf_ctc_loss_labels, inputs=logits, sequence_length=logit_lengths, time_major=True) tf_nn_ctc_grads = gradients_impl.gradients(tf_nn_ctc_loss, [logits])[0] with self.cached_session() as sess: for _ in range(32): self.assertAllClose(*self.evaluate([ctc_loss, tf_nn_ctc_loss])) self.assertAllClose( *self.evaluate([ctc_loss_grads, tf_nn_ctc_grads]), rtol=2e-06, atol=2e-06)
def testCtcLossV2(self): random_seed.set_random_seed(5) batch_size = 8 num_labels = 6 max_label_length = 5 num_frames = 12 labels = random_ops.random_uniform( [batch_size, max_label_length], minval=1, maxval=num_labels, dtype=dtypes.int64) logits = random_ops.random_uniform([num_frames, batch_size, num_labels]) label_length = random_ops.random_uniform( [batch_size], minval=2, maxval=max_label_length, dtype=dtypes.int64) label_mask = array_ops.sequence_mask( label_length, maxlen=max_label_length, dtype=label_length.dtype) labels *= label_mask logit_length = [num_frames] * batch_size with backprop.GradientTape() as t: t.watch(logits) ref_loss = ctc_ops.ctc_loss_v2( labels=labels, logits=logits, label_length=label_length, logit_length=logit_length) ref_grad = t.gradient(ref_loss, [logits]) sparse_labels = ctc_ops.dense_labels_to_sparse(labels, label_length) def assert_same_loss_and_grads(loss): if context.executing_eagerly(): return with self.cached_session(): self.assertAllClose(*self.evaluate([loss, ref_loss])) grad = gradients_impl.gradients(loss, [logits]) self.assertAllClose( *self.evaluate([grad, ref_grad]), rtol=2e-06, atol=2e-06) assert_same_loss_and_grads( ctc_ops.ctc_loss_v2( labels=sparse_labels, logits=logits, label_length=label_length, logit_length=logit_length, blank_index=0))
def _genInputParams(self, num_classes=10, batch_size=32, max_label_sequence_length=50, num_frames=100, logits_time_major=True, sparse_labels=True): assert num_frames >= max_label_sequence_length labels_shape = (batch_size, max_label_sequence_length) # Zero-pad the labels. Zero is the default blank index in the TF2 API. # num_classes includes the blank class unmasked_labels = np.random.randint(1, num_classes, size=labels_shape, dtype=np.int32) labels_lengths = np.random.randint(1, high=max_label_sequence_length, size=batch_size, dtype=np.int32) labels_masks = (np.arange(max_label_sequence_length) < labels_lengths.reshape(batch_size, 1)).astype(np.int32) labels = unmasked_labels * labels_masks if sparse_labels: labels = ctc_ops.dense_labels_to_sparse(labels, labels_lengths) if logits_time_major: logits_shape = (num_frames, batch_size, num_classes) else: logits_shape = (batch_size, num_frames, num_classes) logits = self._randomFloats(logits_shape) labels_lengths = constant_op.constant(labels_lengths) logits_lengths = [num_frames] * batch_size logits_lengths = constant_op.constant(logits_lengths) return labels, logits, labels_lengths, logits_lengths