def testCtcLossDenseWithBlankIndexIsSameAsCtcLoss(self): random_seed.set_random_seed(5) batch_size = 8 num_labels = 6 label_length = 5 num_frames = 12 logits = random_ops.random_uniform( [num_frames, batch_size, num_labels]) labels = random_ops.random_uniform([batch_size, label_length], minval=0, maxval=num_labels - 1, dtype=dtypes.int64) label_lengths = random_ops.random_uniform([batch_size], minval=2, maxval=label_length, dtype=dtypes.int64) label_mask = array_ops.sequence_mask(label_lengths, maxlen=label_length, dtype=label_lengths.dtype) labels *= label_mask logit_lengths = [num_frames] * batch_size tf_ctc_loss_labels = math_ops.cast(labels, dtypes.int32) tf_ctc_loss_labels = ctc_ops.dense_labels_to_sparse( tf_ctc_loss_labels, label_lengths) tf_nn_ctc_loss = ctc_ops.ctc_loss(labels=tf_ctc_loss_labels, inputs=logits, sequence_length=logit_lengths, time_major=True) tf_nn_ctc_grads = gradients_impl.gradients(tf_nn_ctc_loss, [logits])[0] # Shift the blank logits/labels to be somewhere in the middle. blank_index = 2 shifted_logits = array_ops.concat([ logits[:, :, :blank_index], logits[:, :, -1:], logits[:, :, blank_index:-1], ], axis=2) shifted_labels = array_ops.where(labels < blank_index, labels, labels + 1) ctc_loss = ctc_ops.ctc_loss_dense(labels=shifted_labels, logits=shifted_logits, label_length=label_lengths, logit_length=logit_lengths, blank_index=blank_index) ctc_loss_grads = gradients_impl.gradients(ctc_loss, [logits])[0] with self.cached_session() as sess: for _ in range(32): self.assertAllClose(*self.evaluate([ctc_loss, tf_nn_ctc_loss])) self.assertAllClose(*self.evaluate( [ctc_loss_grads, tf_nn_ctc_grads]), rtol=2e-06, atol=2e-06)
def testCtcLossDenseIsSameAsCtcLoss(self): with ops.device("/GPU:0" if test.is_gpu_available() else "/CPU:0"): random_seed.set_random_seed(5) batch_size = 8 num_labels = 6 label_length = 5 minimum_logits_length = 10 num_frames = minimum_logits_length + batch_size logits = random_ops.random_uniform( [num_frames, batch_size, num_labels]) labels = random_ops.random_uniform([batch_size, label_length], minval=1, maxval=num_labels, dtype=dtypes.int64) label_lengths = random_ops.random_uniform([batch_size], minval=2, maxval=label_length, dtype=dtypes.int64) label_mask = array_ops.sequence_mask(label_lengths, maxlen=label_length, dtype=label_lengths.dtype) labels *= label_mask logit_lengths = math_ops.range(batch_size) + minimum_logits_length ctc_loss = ctc_ops.ctc_loss_dense(labels=labels, logits=logits, label_length=label_lengths, logit_length=logit_lengths) ctc_loss_grads = gradients_impl.gradients(ctc_loss, [logits])[0] # Shift labels down by one (move blank from 0 to num_labels -1) tf_ctc_loss_labels = math_ops.cast(labels, dtypes.int32) - 1 tf_nn_ctc_logits = array_ops.concat([ logits[:, :, 1:], logits[:, :, 0:1], ], axis=2) tf_ctc_loss_labels = ctc_ops.dense_labels_to_sparse( tf_ctc_loss_labels, label_lengths) tf_nn_ctc_loss = ctc_ops.ctc_loss(labels=tf_ctc_loss_labels, inputs=tf_nn_ctc_logits, sequence_length=logit_lengths, time_major=True) tf_nn_ctc_grads = gradients_impl.gradients(tf_nn_ctc_loss, [logits])[0] with self.cached_session() as sess: for _ in range(32): self.assertAllClose( *self.evaluate([ctc_loss, tf_nn_ctc_loss])) self.assertAllClose(*self.evaluate( [ctc_loss_grads, tf_nn_ctc_grads]), rtol=4e-06, atol=4e-06)
def testCtcLossDenseUniqueFastPathIsSameAsCtcLoss(self): random_seed.set_random_seed(5) batch_size = 8 num_labels = 6 label_length = 5 num_frames = 12 logits = random_ops.random_uniform( [num_frames, batch_size, num_labels]) labels = random_ops.random_uniform([batch_size, label_length], minval=1, maxval=num_labels, dtype=dtypes.int64) label_lengths = random_ops.random_uniform([batch_size], minval=2, maxval=label_length, dtype=dtypes.int64) label_mask = array_ops.sequence_mask(label_lengths, maxlen=label_length, dtype=label_lengths.dtype) labels *= label_mask logit_lengths = [num_frames] * batch_size ctc_loss = ctc_ops.ctc_loss_dense( labels=labels, logits=logits, label_length=label_lengths, logit_length=logit_lengths, unique=ctc_ops.ctc_unique_labels(labels)) ctc_loss_grads = gradients_impl.gradients(ctc_loss, [logits])[0] # Shift labels down by one (move blank from 0 to num_labels -1) tf_ctc_loss_labels = math_ops.cast(labels, dtypes.int32) - 1 tf_nn_ctc_logits = array_ops.concat([ logits[:, :, 1:], logits[:, :, 0:1], ], axis=2) tf_ctc_loss_labels = ctc_ops.dense_labels_to_sparse( tf_ctc_loss_labels, label_lengths) tf_nn_ctc_loss = ctc_ops.ctc_loss(labels=tf_ctc_loss_labels, inputs=tf_nn_ctc_logits, sequence_length=logit_lengths, time_major=True) tf_nn_ctc_grads = gradients_impl.gradients(tf_nn_ctc_loss, [logits])[0] with self.cached_session() as sess: for _ in range(32): self.assertAllClose(*sess.run([ctc_loss, tf_nn_ctc_loss])) self.assertAllClose(*sess.run( [ctc_loss_grads, tf_nn_ctc_grads]), rtol=2e-06, atol=2e-06)
def testCtcLossDenseWithBlankIndexIsSameAsCtcLoss(self): random_seed.set_random_seed(5) batch_size = 8 num_labels = 6 label_length = 5 num_frames = 12 logits = random_ops.random_uniform([num_frames, batch_size, num_labels]) labels = random_ops.random_uniform( [batch_size, label_length], minval=0, maxval=num_labels-1, dtype=dtypes.int64) label_lengths = random_ops.random_uniform( [batch_size], minval=2, maxval=label_length, dtype=dtypes.int64) label_mask = array_ops.sequence_mask( label_lengths, maxlen=label_length, dtype=label_lengths.dtype) labels *= label_mask logit_lengths = [num_frames] * batch_size tf_ctc_loss_labels = math_ops.cast(labels, dtypes.int32) tf_ctc_loss_labels = ctc_ops.dense_labels_to_sparse( tf_ctc_loss_labels, label_lengths) tf_nn_ctc_loss = ctc_ops.ctc_loss( labels=tf_ctc_loss_labels, inputs=logits, sequence_length=logit_lengths, time_major=True) tf_nn_ctc_grads = gradients_impl.gradients(tf_nn_ctc_loss, [logits])[0] # Shift the blank logits/labels to be somewhere in the middle. blank_index = 2 shifted_logits = array_ops.concat([ logits[:, :, :blank_index], logits[:, :, -1:], logits[:, :, blank_index:-1], ], axis=2) shifted_labels = array_ops.where(labels < blank_index, labels, labels + 1) ctc_loss = ctc_ops.ctc_loss_dense( labels=shifted_labels, logits=shifted_logits, label_length=label_lengths, logit_length=logit_lengths, blank_index=blank_index) ctc_loss_grads = gradients_impl.gradients(ctc_loss, [logits])[0] with self.cached_session() as sess: for _ in range(32): self.assertAllClose(*self.evaluate([ctc_loss, tf_nn_ctc_loss])) self.assertAllClose( *self.evaluate([ctc_loss_grads, tf_nn_ctc_grads]), rtol=2e-06, atol=2e-06)
def testCtcLossDenseWithNegativeBlankIndexIsSameAsCtcLoss(self): with ops.device("/GPU:0" if test.is_gpu_available() else "/CPU:0"): random_seed.set_random_seed(5) batch_size = 8 num_labels = 6 label_length = 5 num_frames = 12 logits = random_ops.random_uniform( [num_frames, batch_size, num_labels]) labels = random_ops.random_uniform([batch_size, label_length], minval=0, maxval=num_labels - 1, dtype=dtypes.int64) label_lengths = random_ops.random_uniform([batch_size], minval=2, maxval=label_length, dtype=dtypes.int64) label_mask = array_ops.sequence_mask(label_lengths, maxlen=label_length, dtype=label_lengths.dtype) labels *= label_mask logit_lengths = [num_frames] * batch_size ctc_loss = ctc_ops.ctc_loss_dense(labels=labels, logits=logits, label_length=label_lengths, logit_length=logit_lengths, blank_index=-1) ctc_loss_grads = gradients_impl.gradients(ctc_loss, [logits])[0] tf_ctc_loss_labels = math_ops.cast(labels, dtypes.int32) tf_ctc_loss_labels = ctc_ops.dense_labels_to_sparse( tf_ctc_loss_labels, label_lengths) tf_nn_ctc_loss = ctc_ops.ctc_loss(labels=tf_ctc_loss_labels, inputs=logits, sequence_length=logit_lengths, time_major=True) tf_nn_ctc_grads = gradients_impl.gradients(tf_nn_ctc_loss, [logits])[0] with self.cached_session() as sess: for _ in range(32): self.assertAllClose( *self.evaluate([ctc_loss, tf_nn_ctc_loss])) self.assertAllClose(*self.evaluate( [ctc_loss_grads, tf_nn_ctc_grads]), rtol=2e-06, atol=2e-06)
def testCtcLossDenseUniqueFastPathIsSameAsCtcLoss(self): random_seed.set_random_seed(5) batch_size = 8 num_labels = 6 label_length = 5 num_frames = 12 logits = random_ops.random_uniform([num_frames, batch_size, num_labels]) labels = random_ops.random_uniform( [batch_size, label_length], minval=1, maxval=num_labels, dtype=dtypes.int64) label_lengths = random_ops.random_uniform( [batch_size], minval=2, maxval=label_length, dtype=dtypes.int64) label_mask = array_ops.sequence_mask( label_lengths, maxlen=label_length, dtype=label_lengths.dtype) labels *= label_mask logit_lengths = [num_frames] * batch_size ctc_loss = ctc_ops.ctc_loss_dense( labels=labels, logits=logits, label_length=label_lengths, logit_length=logit_lengths, unique=ctc_ops.ctc_unique_labels(labels)) ctc_loss_grads = gradients_impl.gradients(ctc_loss, [logits])[0] # Shift labels down by one (move blank from 0 to num_labels -1) tf_ctc_loss_labels = math_ops.cast(labels, dtypes.int32) - 1 tf_nn_ctc_logits = array_ops.concat([ logits[:, :, 1:], logits[:, :, 0:1], ], axis=2) tf_ctc_loss_labels = ctc_ops.dense_labels_to_sparse( tf_ctc_loss_labels, label_lengths) tf_nn_ctc_loss = ctc_ops.ctc_loss( labels=tf_ctc_loss_labels, inputs=tf_nn_ctc_logits, sequence_length=logit_lengths, time_major=True) tf_nn_ctc_grads = gradients_impl.gradients(tf_nn_ctc_loss, [logits])[0] with self.cached_session() as sess: for _ in range(32): self.assertAllClose(*self.evaluate([ctc_loss, tf_nn_ctc_loss])) self.assertAllClose( *self.evaluate([ctc_loss_grads, tf_nn_ctc_grads]), rtol=2e-06, atol=2e-06)
def testCtcLossDenseWithNegativeBlankIndexIsSameAsCtcLoss(self): with ops.device("/GPU:0" if test.is_gpu_available() else "/CPU:0"): random_seed.set_random_seed(5) batch_size = 8 num_labels = 6 label_length = 5 num_frames = 12 logits = random_ops.random_uniform([num_frames, batch_size, num_labels]) labels = random_ops.random_uniform( [batch_size, label_length], minval=0, maxval=num_labels-1, dtype=dtypes.int64) label_lengths = random_ops.random_uniform( [batch_size], minval=2, maxval=label_length, dtype=dtypes.int64) label_mask = array_ops.sequence_mask( label_lengths, maxlen=label_length, dtype=label_lengths.dtype) labels *= label_mask logit_lengths = [num_frames] * batch_size ctc_loss = ctc_ops.ctc_loss_dense( labels=labels, logits=logits, label_length=label_lengths, logit_length=logit_lengths, blank_index=-1) ctc_loss_grads = gradients_impl.gradients(ctc_loss, [logits])[0] tf_ctc_loss_labels = math_ops.cast(labels, dtypes.int32) tf_ctc_loss_labels = ctc_ops.dense_labels_to_sparse( tf_ctc_loss_labels, label_lengths) tf_nn_ctc_loss = ctc_ops.ctc_loss( labels=tf_ctc_loss_labels, inputs=logits, sequence_length=logit_lengths, time_major=True) tf_nn_ctc_grads = gradients_impl.gradients(tf_nn_ctc_loss, [logits])[0] with self.cached_session() as sess: for _ in range(32): self.assertAllClose(*self.evaluate([ctc_loss, tf_nn_ctc_loss])) self.assertAllClose( *self.evaluate([ctc_loss_grads, tf_nn_ctc_grads]), rtol=2e-06, atol=2e-06)
def ctc_estimator(tokens, token_lengths, logits, glogits, sequence_mask, sequence_length_ctc, vocab, run_config, params, mode, model_scope, training_hooks=[]): with tf.name_scope(model_scope + "/"): tok_1 = tokens + 1 ctc_labels_sparse = sparsify(tf.cast(tok_1, tf.int32), sequence_mask) ctc_labels = tf.sparse_tensor_to_dense(ctc_labels_sparse, default_value=-1) # ctc_labels = tf.sparse_transpose(ctc_labels, (1,0)) print("Labels: {}".format(ctc_labels)) print("logits: {}".format(logits)) print("glogits: {}".format(glogits)) # tf.tile(tf.pow([2], depth), (n,)) print("CTC: {}, {}, {}".format(ctc_labels, logits, sequence_length_ctc)) if tf.flags.FLAGS.gpu_ctc: ctc_loss_raw = ctc_loss_dense(labels=tok_1, label_length=token_lengths, logits=logits, logit_length=sequence_length_ctc) else: with tf.device("/cpu:0"): ctc_loss_raw = ctc_loss_dense(labels=tok_1, label_length=token_lengths, logits=logits, logit_length=sequence_length_ctc) # blank_index=-1 # sequence_length=tf.shape(logits)[0], # ctc_merge_repeated=True, # preprocess_collapse_repeated=False, # ctc_merge_repeated=True, # ignore_longer_outputs_than_inputs=False, # time_major=True ctc_loss = tf.reduce_mean(ctc_loss_raw, name='ctc_loss') tf.losses.add_loss(ctc_loss) losses = tf.losses.get_losses(scope=model_scope) print("Estimator losses: {}".format(losses)) losses += tf.losses.get_regularization_losses(scope=model_scope) total_loss = tf.add_n(losses) updates = tf.get_collection(key=tf.GraphKeys.UPDATE_OPS, scope=model_scope) evaluation_hooks = [] if logits is not None: autoencode_hook = CTCHook(logits=logits, lengths=sequence_length_ctc, vocab=vocab, path=os.path.join(run_config.model_dir, "autoencoded", "autoencoded-{:08d}.csv"), true=ctc_labels, name="Autoencoded", merge_repeated=True) evaluation_hooks.append(autoencode_hook) if glogits is not None: generate_hook = CTCHook(logits=glogits, lengths=sequence_length_ctc, vocab=vocab, path=os.path.join(run_config.model_dir, "generated", "generated-{:08d}.csv"), true=ctc_labels, name="Generated", merge_repeated=True) evaluation_hooks.append(generate_hook) tf.summary.scalar('ctc_loss', ctc_loss) tf.summary.scalar('total_loss', total_loss) # Train optimizer = tf.train.AdamOptimizer(params.lr) variables = tf.trainable_variables(scope=model_scope) transform_grads_fn = make_transform_grads_fn(params=params) train_op = create_train_op(total_loss=total_loss, optimizer=optimizer, update_ops=updates, variables_to_train=variables, transform_grads_fn=transform_grads_fn, summarize_gradients=False, aggregation_method=None, check_numerics=True) eval_metric_ops = { 'ctc_loss_eval': tf.metrics.mean(ctc_loss_raw), 'token_lengths_eval': tf.metrics.mean(token_lengths) } return EstimatorSpec(mode=mode, loss=total_loss, eval_metric_ops=eval_metric_ops, evaluation_hooks=evaluation_hooks, training_hooks=training_hooks, train_op=train_op)