def testCtcLossDenseWithBlankIndexIsSameAsCtcLoss(self):
        random_seed.set_random_seed(5)

        batch_size = 8
        num_labels = 6
        label_length = 5
        num_frames = 12
        logits = random_ops.random_uniform(
            [num_frames, batch_size, num_labels])
        labels = random_ops.random_uniform([batch_size, label_length],
                                           minval=0,
                                           maxval=num_labels - 1,
                                           dtype=dtypes.int64)

        label_lengths = random_ops.random_uniform([batch_size],
                                                  minval=2,
                                                  maxval=label_length,
                                                  dtype=dtypes.int64)
        label_mask = array_ops.sequence_mask(label_lengths,
                                             maxlen=label_length,
                                             dtype=label_lengths.dtype)
        labels *= label_mask

        logit_lengths = [num_frames] * batch_size

        tf_ctc_loss_labels = math_ops.cast(labels, dtypes.int32)
        tf_ctc_loss_labels = ctc_ops.dense_labels_to_sparse(
            tf_ctc_loss_labels, label_lengths)

        tf_nn_ctc_loss = ctc_ops.ctc_loss(labels=tf_ctc_loss_labels,
                                          inputs=logits,
                                          sequence_length=logit_lengths,
                                          time_major=True)
        tf_nn_ctc_grads = gradients_impl.gradients(tf_nn_ctc_loss, [logits])[0]

        # Shift the blank logits/labels to be somewhere in the middle.
        blank_index = 2
        shifted_logits = array_ops.concat([
            logits[:, :, :blank_index],
            logits[:, :, -1:],
            logits[:, :, blank_index:-1],
        ],
                                          axis=2)
        shifted_labels = array_ops.where(labels < blank_index, labels,
                                         labels + 1)

        ctc_loss = ctc_ops.ctc_loss_dense(labels=shifted_labels,
                                          logits=shifted_logits,
                                          label_length=label_lengths,
                                          logit_length=logit_lengths,
                                          blank_index=blank_index)
        ctc_loss_grads = gradients_impl.gradients(ctc_loss, [logits])[0]

        with self.cached_session() as sess:
            for _ in range(32):
                self.assertAllClose(*self.evaluate([ctc_loss, tf_nn_ctc_loss]))
                self.assertAllClose(*self.evaluate(
                    [ctc_loss_grads, tf_nn_ctc_grads]),
                                    rtol=2e-06,
                                    atol=2e-06)
Example #2
0
    def testCtcLossDenseIsSameAsCtcLoss(self):
        with ops.device("/GPU:0" if test.is_gpu_available() else "/CPU:0"):
            random_seed.set_random_seed(5)

            batch_size = 8
            num_labels = 6
            label_length = 5
            minimum_logits_length = 10
            num_frames = minimum_logits_length + batch_size
            logits = random_ops.random_uniform(
                [num_frames, batch_size, num_labels])
            labels = random_ops.random_uniform([batch_size, label_length],
                                               minval=1,
                                               maxval=num_labels,
                                               dtype=dtypes.int64)

            label_lengths = random_ops.random_uniform([batch_size],
                                                      minval=2,
                                                      maxval=label_length,
                                                      dtype=dtypes.int64)
            label_mask = array_ops.sequence_mask(label_lengths,
                                                 maxlen=label_length,
                                                 dtype=label_lengths.dtype)
            labels *= label_mask

            logit_lengths = math_ops.range(batch_size) + minimum_logits_length

            ctc_loss = ctc_ops.ctc_loss_dense(labels=labels,
                                              logits=logits,
                                              label_length=label_lengths,
                                              logit_length=logit_lengths)
            ctc_loss_grads = gradients_impl.gradients(ctc_loss, [logits])[0]

            # Shift labels down by one (move blank from 0 to num_labels -1)
            tf_ctc_loss_labels = math_ops.cast(labels, dtypes.int32) - 1
            tf_nn_ctc_logits = array_ops.concat([
                logits[:, :, 1:],
                logits[:, :, 0:1],
            ],
                                                axis=2)

            tf_ctc_loss_labels = ctc_ops.dense_labels_to_sparse(
                tf_ctc_loss_labels, label_lengths)

            tf_nn_ctc_loss = ctc_ops.ctc_loss(labels=tf_ctc_loss_labels,
                                              inputs=tf_nn_ctc_logits,
                                              sequence_length=logit_lengths,
                                              time_major=True)
            tf_nn_ctc_grads = gradients_impl.gradients(tf_nn_ctc_loss,
                                                       [logits])[0]

            with self.cached_session() as sess:
                for _ in range(32):
                    self.assertAllClose(
                        *self.evaluate([ctc_loss, tf_nn_ctc_loss]))
                    self.assertAllClose(*self.evaluate(
                        [ctc_loss_grads, tf_nn_ctc_grads]),
                                        rtol=4e-06,
                                        atol=4e-06)
Example #3
0
    def testCtcLossDenseUniqueFastPathIsSameAsCtcLoss(self):
        random_seed.set_random_seed(5)

        batch_size = 8
        num_labels = 6
        label_length = 5
        num_frames = 12
        logits = random_ops.random_uniform(
            [num_frames, batch_size, num_labels])
        labels = random_ops.random_uniform([batch_size, label_length],
                                           minval=1,
                                           maxval=num_labels,
                                           dtype=dtypes.int64)

        label_lengths = random_ops.random_uniform([batch_size],
                                                  minval=2,
                                                  maxval=label_length,
                                                  dtype=dtypes.int64)
        label_mask = array_ops.sequence_mask(label_lengths,
                                             maxlen=label_length,
                                             dtype=label_lengths.dtype)
        labels *= label_mask

        logit_lengths = [num_frames] * batch_size

        ctc_loss = ctc_ops.ctc_loss_dense(
            labels=labels,
            logits=logits,
            label_length=label_lengths,
            logit_length=logit_lengths,
            unique=ctc_ops.ctc_unique_labels(labels))
        ctc_loss_grads = gradients_impl.gradients(ctc_loss, [logits])[0]

        # Shift labels down by one (move blank from 0 to num_labels -1)
        tf_ctc_loss_labels = math_ops.cast(labels, dtypes.int32) - 1
        tf_nn_ctc_logits = array_ops.concat([
            logits[:, :, 1:],
            logits[:, :, 0:1],
        ],
                                            axis=2)

        tf_ctc_loss_labels = ctc_ops.dense_labels_to_sparse(
            tf_ctc_loss_labels, label_lengths)

        tf_nn_ctc_loss = ctc_ops.ctc_loss(labels=tf_ctc_loss_labels,
                                          inputs=tf_nn_ctc_logits,
                                          sequence_length=logit_lengths,
                                          time_major=True)
        tf_nn_ctc_grads = gradients_impl.gradients(tf_nn_ctc_loss, [logits])[0]

        with self.cached_session() as sess:
            for _ in range(32):
                self.assertAllClose(*sess.run([ctc_loss, tf_nn_ctc_loss]))
                self.assertAllClose(*sess.run(
                    [ctc_loss_grads, tf_nn_ctc_grads]),
                                    rtol=2e-06,
                                    atol=2e-06)
  def testCtcLossDenseWithBlankIndexIsSameAsCtcLoss(self):
    random_seed.set_random_seed(5)

    batch_size = 8
    num_labels = 6
    label_length = 5
    num_frames = 12
    logits = random_ops.random_uniform([num_frames, batch_size, num_labels])
    labels = random_ops.random_uniform(
        [batch_size, label_length], minval=0, maxval=num_labels-1,
        dtype=dtypes.int64)

    label_lengths = random_ops.random_uniform(
        [batch_size], minval=2, maxval=label_length, dtype=dtypes.int64)
    label_mask = array_ops.sequence_mask(
        label_lengths, maxlen=label_length, dtype=label_lengths.dtype)
    labels *= label_mask

    logit_lengths = [num_frames] * batch_size

    tf_ctc_loss_labels = math_ops.cast(labels, dtypes.int32)
    tf_ctc_loss_labels = ctc_ops.dense_labels_to_sparse(
        tf_ctc_loss_labels, label_lengths)

    tf_nn_ctc_loss = ctc_ops.ctc_loss(
        labels=tf_ctc_loss_labels,
        inputs=logits,
        sequence_length=logit_lengths,
        time_major=True)
    tf_nn_ctc_grads = gradients_impl.gradients(tf_nn_ctc_loss, [logits])[0]

    # Shift the blank logits/labels to be somewhere in the middle.
    blank_index = 2
    shifted_logits = array_ops.concat([
        logits[:, :, :blank_index],
        logits[:, :, -1:],
        logits[:, :, blank_index:-1],
    ], axis=2)
    shifted_labels = array_ops.where(labels < blank_index, labels, labels + 1)

    ctc_loss = ctc_ops.ctc_loss_dense(
        labels=shifted_labels,
        logits=shifted_logits,
        label_length=label_lengths,
        logit_length=logit_lengths,
        blank_index=blank_index)
    ctc_loss_grads = gradients_impl.gradients(ctc_loss, [logits])[0]

    with self.cached_session() as sess:
      for _ in range(32):
        self.assertAllClose(*self.evaluate([ctc_loss, tf_nn_ctc_loss]))
        self.assertAllClose(
            *self.evaluate([ctc_loss_grads, tf_nn_ctc_grads]),
            rtol=2e-06,
            atol=2e-06)
    def testCtcLossDenseWithNegativeBlankIndexIsSameAsCtcLoss(self):
        with ops.device("/GPU:0" if test.is_gpu_available() else "/CPU:0"):
            random_seed.set_random_seed(5)

            batch_size = 8
            num_labels = 6
            label_length = 5
            num_frames = 12
            logits = random_ops.random_uniform(
                [num_frames, batch_size, num_labels])
            labels = random_ops.random_uniform([batch_size, label_length],
                                               minval=0,
                                               maxval=num_labels - 1,
                                               dtype=dtypes.int64)

            label_lengths = random_ops.random_uniform([batch_size],
                                                      minval=2,
                                                      maxval=label_length,
                                                      dtype=dtypes.int64)
            label_mask = array_ops.sequence_mask(label_lengths,
                                                 maxlen=label_length,
                                                 dtype=label_lengths.dtype)
            labels *= label_mask

            logit_lengths = [num_frames] * batch_size

            ctc_loss = ctc_ops.ctc_loss_dense(labels=labels,
                                              logits=logits,
                                              label_length=label_lengths,
                                              logit_length=logit_lengths,
                                              blank_index=-1)
            ctc_loss_grads = gradients_impl.gradients(ctc_loss, [logits])[0]

            tf_ctc_loss_labels = math_ops.cast(labels, dtypes.int32)
            tf_ctc_loss_labels = ctc_ops.dense_labels_to_sparse(
                tf_ctc_loss_labels, label_lengths)

            tf_nn_ctc_loss = ctc_ops.ctc_loss(labels=tf_ctc_loss_labels,
                                              inputs=logits,
                                              sequence_length=logit_lengths,
                                              time_major=True)
            tf_nn_ctc_grads = gradients_impl.gradients(tf_nn_ctc_loss,
                                                       [logits])[0]

            with self.cached_session() as sess:
                for _ in range(32):
                    self.assertAllClose(
                        *self.evaluate([ctc_loss, tf_nn_ctc_loss]))
                    self.assertAllClose(*self.evaluate(
                        [ctc_loss_grads, tf_nn_ctc_grads]),
                                        rtol=2e-06,
                                        atol=2e-06)
  def testCtcLossDenseUniqueFastPathIsSameAsCtcLoss(self):
    random_seed.set_random_seed(5)

    batch_size = 8
    num_labels = 6
    label_length = 5
    num_frames = 12
    logits = random_ops.random_uniform([num_frames, batch_size, num_labels])
    labels = random_ops.random_uniform(
        [batch_size, label_length], minval=1, maxval=num_labels,
        dtype=dtypes.int64)

    label_lengths = random_ops.random_uniform(
        [batch_size], minval=2, maxval=label_length, dtype=dtypes.int64)
    label_mask = array_ops.sequence_mask(
        label_lengths, maxlen=label_length, dtype=label_lengths.dtype)
    labels *= label_mask

    logit_lengths = [num_frames] * batch_size

    ctc_loss = ctc_ops.ctc_loss_dense(
        labels=labels,
        logits=logits,
        label_length=label_lengths,
        logit_length=logit_lengths,
        unique=ctc_ops.ctc_unique_labels(labels))
    ctc_loss_grads = gradients_impl.gradients(ctc_loss, [logits])[0]

    # Shift labels down by one (move blank from 0 to num_labels -1)
    tf_ctc_loss_labels = math_ops.cast(labels, dtypes.int32) - 1
    tf_nn_ctc_logits = array_ops.concat([
        logits[:, :, 1:],
        logits[:, :, 0:1],
    ], axis=2)

    tf_ctc_loss_labels = ctc_ops.dense_labels_to_sparse(
        tf_ctc_loss_labels, label_lengths)

    tf_nn_ctc_loss = ctc_ops.ctc_loss(
        labels=tf_ctc_loss_labels,
        inputs=tf_nn_ctc_logits,
        sequence_length=logit_lengths,
        time_major=True)
    tf_nn_ctc_grads = gradients_impl.gradients(tf_nn_ctc_loss, [logits])[0]

    with self.cached_session() as sess:
      for _ in range(32):
        self.assertAllClose(*self.evaluate([ctc_loss, tf_nn_ctc_loss]))
        self.assertAllClose(
            *self.evaluate([ctc_loss_grads, tf_nn_ctc_grads]),
            rtol=2e-06,
            atol=2e-06)
  def testCtcLossDenseWithNegativeBlankIndexIsSameAsCtcLoss(self):
    with ops.device("/GPU:0" if test.is_gpu_available() else "/CPU:0"):
      random_seed.set_random_seed(5)

      batch_size = 8
      num_labels = 6
      label_length = 5
      num_frames = 12
      logits = random_ops.random_uniform([num_frames, batch_size, num_labels])
      labels = random_ops.random_uniform(
          [batch_size, label_length], minval=0, maxval=num_labels-1,
          dtype=dtypes.int64)

      label_lengths = random_ops.random_uniform(
          [batch_size], minval=2, maxval=label_length, dtype=dtypes.int64)
      label_mask = array_ops.sequence_mask(
          label_lengths, maxlen=label_length, dtype=label_lengths.dtype)
      labels *= label_mask

      logit_lengths = [num_frames] * batch_size

      ctc_loss = ctc_ops.ctc_loss_dense(
          labels=labels,
          logits=logits,
          label_length=label_lengths,
          logit_length=logit_lengths,
          blank_index=-1)
      ctc_loss_grads = gradients_impl.gradients(ctc_loss, [logits])[0]

      tf_ctc_loss_labels = math_ops.cast(labels, dtypes.int32)
      tf_ctc_loss_labels = ctc_ops.dense_labels_to_sparse(
          tf_ctc_loss_labels, label_lengths)

      tf_nn_ctc_loss = ctc_ops.ctc_loss(
          labels=tf_ctc_loss_labels,
          inputs=logits,
          sequence_length=logit_lengths,
          time_major=True)
      tf_nn_ctc_grads = gradients_impl.gradients(tf_nn_ctc_loss, [logits])[0]

      with self.cached_session() as sess:
        for _ in range(32):
          self.assertAllClose(*self.evaluate([ctc_loss, tf_nn_ctc_loss]))
          self.assertAllClose(
              *self.evaluate([ctc_loss_grads, tf_nn_ctc_grads]),
              rtol=2e-06,
              atol=2e-06)
Example #8
0
def ctc_estimator(tokens,
                  token_lengths,
                  logits,
                  glogits,
                  sequence_mask,
                  sequence_length_ctc,
                  vocab,
                  run_config,
                  params,
                  mode,
                  model_scope,
                  training_hooks=[]):
    with tf.name_scope(model_scope + "/"):
        tok_1 = tokens + 1
        ctc_labels_sparse = sparsify(tf.cast(tok_1, tf.int32), sequence_mask)
        ctc_labels = tf.sparse_tensor_to_dense(ctc_labels_sparse,
                                               default_value=-1)
        # ctc_labels = tf.sparse_transpose(ctc_labels, (1,0))
        print("Labels: {}".format(ctc_labels))
        print("logits: {}".format(logits))
        print("glogits: {}".format(glogits))
        # tf.tile(tf.pow([2], depth), (n,))
        print("CTC: {}, {}, {}".format(ctc_labels, logits,
                                       sequence_length_ctc))
        if tf.flags.FLAGS.gpu_ctc:
            ctc_loss_raw = ctc_loss_dense(labels=tok_1,
                                          label_length=token_lengths,
                                          logits=logits,
                                          logit_length=sequence_length_ctc)
        else:
            with tf.device("/cpu:0"):
                ctc_loss_raw = ctc_loss_dense(labels=tok_1,
                                              label_length=token_lengths,
                                              logits=logits,
                                              logit_length=sequence_length_ctc)
            # blank_index=-1
            # sequence_length=tf.shape(logits)[0],
            # ctc_merge_repeated=True,
            # preprocess_collapse_repeated=False,
            # ctc_merge_repeated=True,
            # ignore_longer_outputs_than_inputs=False,
            # time_major=True
        ctc_loss = tf.reduce_mean(ctc_loss_raw, name='ctc_loss')
        tf.losses.add_loss(ctc_loss)

    losses = tf.losses.get_losses(scope=model_scope)
    print("Estimator losses: {}".format(losses))
    losses += tf.losses.get_regularization_losses(scope=model_scope)
    total_loss = tf.add_n(losses)
    updates = tf.get_collection(key=tf.GraphKeys.UPDATE_OPS, scope=model_scope)

    evaluation_hooks = []
    if logits is not None:
        autoencode_hook = CTCHook(logits=logits,
                                  lengths=sequence_length_ctc,
                                  vocab=vocab,
                                  path=os.path.join(run_config.model_dir,
                                                    "autoencoded",
                                                    "autoencoded-{:08d}.csv"),
                                  true=ctc_labels,
                                  name="Autoencoded",
                                  merge_repeated=True)
        evaluation_hooks.append(autoencode_hook)
    if glogits is not None:
        generate_hook = CTCHook(logits=glogits,
                                lengths=sequence_length_ctc,
                                vocab=vocab,
                                path=os.path.join(run_config.model_dir,
                                                  "generated",
                                                  "generated-{:08d}.csv"),
                                true=ctc_labels,
                                name="Generated",
                                merge_repeated=True)
        evaluation_hooks.append(generate_hook)

    tf.summary.scalar('ctc_loss', ctc_loss)
    tf.summary.scalar('total_loss', total_loss)

    # Train
    optimizer = tf.train.AdamOptimizer(params.lr)
    variables = tf.trainable_variables(scope=model_scope)
    transform_grads_fn = make_transform_grads_fn(params=params)

    train_op = create_train_op(total_loss=total_loss,
                               optimizer=optimizer,
                               update_ops=updates,
                               variables_to_train=variables,
                               transform_grads_fn=transform_grads_fn,
                               summarize_gradients=False,
                               aggregation_method=None,
                               check_numerics=True)
    eval_metric_ops = {
        'ctc_loss_eval': tf.metrics.mean(ctc_loss_raw),
        'token_lengths_eval': tf.metrics.mean(token_lengths)
    }

    return EstimatorSpec(mode=mode,
                         loss=total_loss,
                         eval_metric_ops=eval_metric_ops,
                         evaluation_hooks=evaluation_hooks,
                         training_hooks=training_hooks,
                         train_op=train_op)