Ejemplo n.º 1
0
    def testDenseSequencesToSparse(self):
        labels = [[1, 3, 3, 3, 0], [1, 4, 4, 4, 0], [4, 2, 2, 9, 4]]
        length = [4, 5, 5]
        sparse = ctc_ops.dense_labels_to_sparse(labels, length)
        new_dense = sparse_ops.sparse_tensor_to_dense(sparse)

        self.assertAllEqual(labels, new_dense)

        padded_labels = [[1, 3, 3, 3, 0, 0, 0, 0], [1, 4, 4, 4, 0, 0, 0, 0],
                         [4, 2, 2, 9, 4, 0, 0, 0]]
        length = [4, 5, 5]
        sparse = ctc_ops.dense_labels_to_sparse(padded_labels, length)
        padded_dense = sparse_ops.sparse_tensor_to_dense(sparse)

        self.assertAllEqual(padded_dense, new_dense)
Ejemplo n.º 2
0
    def testCtcLossDenseWithBlankIndexIsSameAsCtcLoss(self):
        random_seed.set_random_seed(5)

        batch_size = 8
        num_labels = 6
        label_length = 5
        num_frames = 12
        logits = random_ops.random_uniform(
            [num_frames, batch_size, num_labels])
        labels = random_ops.random_uniform([batch_size, label_length],
                                           minval=0,
                                           maxval=num_labels - 1,
                                           dtype=dtypes.int64)

        label_lengths = random_ops.random_uniform([batch_size],
                                                  minval=2,
                                                  maxval=label_length,
                                                  dtype=dtypes.int64)
        label_mask = array_ops.sequence_mask(label_lengths,
                                             maxlen=label_length,
                                             dtype=label_lengths.dtype)
        labels *= label_mask

        logit_lengths = [num_frames] * batch_size

        tf_ctc_loss_labels = math_ops.cast(labels, dtypes.int32)
        tf_ctc_loss_labels = ctc_ops.dense_labels_to_sparse(
            tf_ctc_loss_labels, label_lengths)

        tf_nn_ctc_loss = ctc_ops.ctc_loss(labels=tf_ctc_loss_labels,
                                          inputs=logits,
                                          sequence_length=logit_lengths,
                                          time_major=True)
        tf_nn_ctc_grads = gradients_impl.gradients(tf_nn_ctc_loss, [logits])[0]

        # Shift the blank logits/labels to be somewhere in the middle.
        blank_index = 2
        shifted_logits = array_ops.concat([
            logits[:, :, :blank_index],
            logits[:, :, -1:],
            logits[:, :, blank_index:-1],
        ],
                                          axis=2)
        shifted_labels = array_ops.where(labels < blank_index, labels,
                                         labels + 1)

        ctc_loss = ctc_ops.ctc_loss_dense(labels=shifted_labels,
                                          logits=shifted_logits,
                                          label_length=label_lengths,
                                          logit_length=logit_lengths,
                                          blank_index=blank_index)
        ctc_loss_grads = gradients_impl.gradients(ctc_loss, [logits])[0]

        with self.cached_session() as sess:
            for _ in range(32):
                self.assertAllClose(*self.evaluate([ctc_loss, tf_nn_ctc_loss]))
                self.assertAllClose(*self.evaluate(
                    [ctc_loss_grads, tf_nn_ctc_grads]),
                                    rtol=2e-06,
                                    atol=2e-06)
Ejemplo n.º 3
0
    def testCtcLossDenseIsSameAsCtcLoss(self):
        with ops.device("/GPU:0" if test.is_gpu_available() else "/CPU:0"):
            random_seed.set_random_seed(5)

            batch_size = 8
            num_labels = 6
            label_length = 5
            minimum_logits_length = 10
            num_frames = minimum_logits_length + batch_size
            logits = random_ops.random_uniform(
                [num_frames, batch_size, num_labels])
            labels = random_ops.random_uniform([batch_size, label_length],
                                               minval=1,
                                               maxval=num_labels,
                                               dtype=dtypes.int64)

            label_lengths = random_ops.random_uniform([batch_size],
                                                      minval=2,
                                                      maxval=label_length,
                                                      dtype=dtypes.int64)
            label_mask = array_ops.sequence_mask(label_lengths,
                                                 maxlen=label_length,
                                                 dtype=label_lengths.dtype)
            labels *= label_mask

            logit_lengths = math_ops.range(batch_size) + minimum_logits_length

            ctc_loss = ctc_ops.ctc_loss_dense(labels=labels,
                                              logits=logits,
                                              label_length=label_lengths,
                                              logit_length=logit_lengths)
            ctc_loss_grads = gradients_impl.gradients(ctc_loss, [logits])[0]

            # Shift labels down by one (move blank from 0 to num_labels -1)
            tf_ctc_loss_labels = math_ops.cast(labels, dtypes.int32) - 1
            tf_nn_ctc_logits = array_ops.concat([
                logits[:, :, 1:],
                logits[:, :, 0:1],
            ],
                                                axis=2)

            tf_ctc_loss_labels = ctc_ops.dense_labels_to_sparse(
                tf_ctc_loss_labels, label_lengths)

            tf_nn_ctc_loss = ctc_ops.ctc_loss(labels=tf_ctc_loss_labels,
                                              inputs=tf_nn_ctc_logits,
                                              sequence_length=logit_lengths,
                                              time_major=True)
            tf_nn_ctc_grads = gradients_impl.gradients(tf_nn_ctc_loss,
                                                       [logits])[0]

            with self.cached_session() as sess:
                for _ in range(32):
                    self.assertAllClose(
                        *self.evaluate([ctc_loss, tf_nn_ctc_loss]))
                    self.assertAllClose(*self.evaluate(
                        [ctc_loss_grads, tf_nn_ctc_grads]),
                                        rtol=4e-06,
                                        atol=4e-06)
Ejemplo n.º 4
0
  def testDenseSequencesToSparse(self):
    labels = [[1, 3, 3, 3, 0],
              [1, 4, 4, 4, 0],
              [4, 2, 2, 9, 4]]
    length = [4, 5, 5]
    sparse = ctc_ops.dense_labels_to_sparse(labels, length)
    new_dense = sparse_ops.sparse_tensor_to_dense(sparse)

    self.assertAllEqual(labels, new_dense)

    padded_labels = [[1, 3, 3, 3, 0, 0, 0, 0],
                     [1, 4, 4, 4, 0, 0, 0, 0],
                     [4, 2, 2, 9, 4, 0, 0, 0]]
    length = [4, 5, 5]
    sparse = ctc_ops.dense_labels_to_sparse(padded_labels, length)
    padded_dense = sparse_ops.sparse_tensor_to_dense(sparse)

    self.assertAllEqual(padded_dense, new_dense)
Ejemplo n.º 5
0
    def testCtcLossDenseUniqueFastPathIsSameAsCtcLoss(self):
        random_seed.set_random_seed(5)

        batch_size = 8
        num_labels = 6
        label_length = 5
        num_frames = 12
        logits = random_ops.random_uniform(
            [num_frames, batch_size, num_labels])
        labels = random_ops.random_uniform([batch_size, label_length],
                                           minval=1,
                                           maxval=num_labels,
                                           dtype=dtypes.int64)

        label_lengths = random_ops.random_uniform([batch_size],
                                                  minval=2,
                                                  maxval=label_length,
                                                  dtype=dtypes.int64)
        label_mask = array_ops.sequence_mask(label_lengths,
                                             maxlen=label_length,
                                             dtype=label_lengths.dtype)
        labels *= label_mask

        logit_lengths = [num_frames] * batch_size

        ctc_loss = ctc_ops.ctc_loss_dense(
            labels=labels,
            logits=logits,
            label_length=label_lengths,
            logit_length=logit_lengths,
            unique=ctc_ops.ctc_unique_labels(labels))
        ctc_loss_grads = gradients_impl.gradients(ctc_loss, [logits])[0]

        # Shift labels down by one (move blank from 0 to num_labels -1)
        tf_ctc_loss_labels = math_ops.cast(labels, dtypes.int32) - 1
        tf_nn_ctc_logits = array_ops.concat([
            logits[:, :, 1:],
            logits[:, :, 0:1],
        ],
                                            axis=2)

        tf_ctc_loss_labels = ctc_ops.dense_labels_to_sparse(
            tf_ctc_loss_labels, label_lengths)

        tf_nn_ctc_loss = ctc_ops.ctc_loss(labels=tf_ctc_loss_labels,
                                          inputs=tf_nn_ctc_logits,
                                          sequence_length=logit_lengths,
                                          time_major=True)
        tf_nn_ctc_grads = gradients_impl.gradients(tf_nn_ctc_loss, [logits])[0]

        with self.cached_session() as sess:
            for _ in range(32):
                self.assertAllClose(*sess.run([ctc_loss, tf_nn_ctc_loss]))
                self.assertAllClose(*sess.run(
                    [ctc_loss_grads, tf_nn_ctc_grads]),
                                    rtol=2e-06,
                                    atol=2e-06)
Ejemplo n.º 6
0
  def testCtcLossDenseWithBlankIndexIsSameAsCtcLoss(self):
    random_seed.set_random_seed(5)

    batch_size = 8
    num_labels = 6
    label_length = 5
    num_frames = 12
    logits = random_ops.random_uniform([num_frames, batch_size, num_labels])
    labels = random_ops.random_uniform(
        [batch_size, label_length], minval=0, maxval=num_labels-1,
        dtype=dtypes.int64)

    label_lengths = random_ops.random_uniform(
        [batch_size], minval=2, maxval=label_length, dtype=dtypes.int64)
    label_mask = array_ops.sequence_mask(
        label_lengths, maxlen=label_length, dtype=label_lengths.dtype)
    labels *= label_mask

    logit_lengths = [num_frames] * batch_size

    tf_ctc_loss_labels = math_ops.cast(labels, dtypes.int32)
    tf_ctc_loss_labels = ctc_ops.dense_labels_to_sparse(
        tf_ctc_loss_labels, label_lengths)

    tf_nn_ctc_loss = ctc_ops.ctc_loss(
        labels=tf_ctc_loss_labels,
        inputs=logits,
        sequence_length=logit_lengths,
        time_major=True)
    tf_nn_ctc_grads = gradients_impl.gradients(tf_nn_ctc_loss, [logits])[0]

    # Shift the blank logits/labels to be somewhere in the middle.
    blank_index = 2
    shifted_logits = array_ops.concat([
        logits[:, :, :blank_index],
        logits[:, :, -1:],
        logits[:, :, blank_index:-1],
    ], axis=2)
    shifted_labels = array_ops.where(labels < blank_index, labels, labels + 1)

    ctc_loss = ctc_ops.ctc_loss_dense(
        labels=shifted_labels,
        logits=shifted_logits,
        label_length=label_lengths,
        logit_length=logit_lengths,
        blank_index=blank_index)
    ctc_loss_grads = gradients_impl.gradients(ctc_loss, [logits])[0]

    with self.cached_session() as sess:
      for _ in range(32):
        self.assertAllClose(*self.evaluate([ctc_loss, tf_nn_ctc_loss]))
        self.assertAllClose(
            *self.evaluate([ctc_loss_grads, tf_nn_ctc_grads]),
            rtol=2e-06,
            atol=2e-06)
Ejemplo n.º 7
0
def _ctc_loss_v3(labels, logits, label_length, logit_length, use_gpu):
    with test_util.device(use_gpu=use_gpu):
        sparse_labels = ctc_ops.dense_labels_to_sparse(labels, label_length)
        with backprop.GradientTape() as t:
            t.watch(logits)
            ref_loss = ctc_ops.ctc_loss_v3(labels=sparse_labels,
                                           logits=logits,
                                           label_length=label_length,
                                           logit_length=logit_length,
                                           blank_index=0)
        ref_grad = t.gradient(ref_loss, [logits])
        return ref_loss, ref_grad
Ejemplo n.º 8
0
    def testCtcLossDenseWithNegativeBlankIndexIsSameAsCtcLoss(self):
        with ops.device("/GPU:0" if test.is_gpu_available() else "/CPU:0"):
            random_seed.set_random_seed(5)

            batch_size = 8
            num_labels = 6
            label_length = 5
            num_frames = 12
            logits = random_ops.random_uniform(
                [num_frames, batch_size, num_labels])
            labels = random_ops.random_uniform([batch_size, label_length],
                                               minval=0,
                                               maxval=num_labels - 1,
                                               dtype=dtypes.int64)

            label_lengths = random_ops.random_uniform([batch_size],
                                                      minval=2,
                                                      maxval=label_length,
                                                      dtype=dtypes.int64)
            label_mask = array_ops.sequence_mask(label_lengths,
                                                 maxlen=label_length,
                                                 dtype=label_lengths.dtype)
            labels *= label_mask

            logit_lengths = [num_frames] * batch_size

            ctc_loss = ctc_ops.ctc_loss_dense(labels=labels,
                                              logits=logits,
                                              label_length=label_lengths,
                                              logit_length=logit_lengths,
                                              blank_index=-1)
            ctc_loss_grads = gradients_impl.gradients(ctc_loss, [logits])[0]

            tf_ctc_loss_labels = math_ops.cast(labels, dtypes.int32)
            tf_ctc_loss_labels = ctc_ops.dense_labels_to_sparse(
                tf_ctc_loss_labels, label_lengths)

            tf_nn_ctc_loss = ctc_ops.ctc_loss(labels=tf_ctc_loss_labels,
                                              inputs=logits,
                                              sequence_length=logit_lengths,
                                              time_major=True)
            tf_nn_ctc_grads = gradients_impl.gradients(tf_nn_ctc_loss,
                                                       [logits])[0]

            with self.cached_session() as sess:
                for _ in range(32):
                    self.assertAllClose(
                        *self.evaluate([ctc_loss, tf_nn_ctc_loss]))
                    self.assertAllClose(*self.evaluate(
                        [ctc_loss_grads, tf_nn_ctc_grads]),
                                        rtol=2e-06,
                                        atol=2e-06)
Ejemplo n.º 9
0
  def testCtcLossDenseUniqueFastPathIsSameAsCtcLoss(self):
    random_seed.set_random_seed(5)

    batch_size = 8
    num_labels = 6
    label_length = 5
    num_frames = 12
    logits = random_ops.random_uniform([num_frames, batch_size, num_labels])
    labels = random_ops.random_uniform(
        [batch_size, label_length], minval=1, maxval=num_labels,
        dtype=dtypes.int64)

    label_lengths = random_ops.random_uniform(
        [batch_size], minval=2, maxval=label_length, dtype=dtypes.int64)
    label_mask = array_ops.sequence_mask(
        label_lengths, maxlen=label_length, dtype=label_lengths.dtype)
    labels *= label_mask

    logit_lengths = [num_frames] * batch_size

    ctc_loss = ctc_ops.ctc_loss_dense(
        labels=labels,
        logits=logits,
        label_length=label_lengths,
        logit_length=logit_lengths,
        unique=ctc_ops.ctc_unique_labels(labels))
    ctc_loss_grads = gradients_impl.gradients(ctc_loss, [logits])[0]

    # Shift labels down by one (move blank from 0 to num_labels -1)
    tf_ctc_loss_labels = math_ops.cast(labels, dtypes.int32) - 1
    tf_nn_ctc_logits = array_ops.concat([
        logits[:, :, 1:],
        logits[:, :, 0:1],
    ], axis=2)

    tf_ctc_loss_labels = ctc_ops.dense_labels_to_sparse(
        tf_ctc_loss_labels, label_lengths)

    tf_nn_ctc_loss = ctc_ops.ctc_loss(
        labels=tf_ctc_loss_labels,
        inputs=tf_nn_ctc_logits,
        sequence_length=logit_lengths,
        time_major=True)
    tf_nn_ctc_grads = gradients_impl.gradients(tf_nn_ctc_loss, [logits])[0]

    with self.cached_session() as sess:
      for _ in range(32):
        self.assertAllClose(*self.evaluate([ctc_loss, tf_nn_ctc_loss]))
        self.assertAllClose(
            *self.evaluate([ctc_loss_grads, tf_nn_ctc_grads]),
            rtol=2e-06,
            atol=2e-06)
Ejemplo n.º 10
0
    def testCtcLossV2(self):
        random_seed.set_random_seed(5)

        batch_size = 8
        num_labels = 6
        max_label_length = 5
        num_frames = 12

        labels = random_ops.random_uniform([batch_size, max_label_length],
                                           minval=1,
                                           maxval=num_labels,
                                           dtype=dtypes.int64)
        logits = random_ops.random_uniform(
            [num_frames, batch_size, num_labels])

        label_length = random_ops.random_uniform([batch_size],
                                                 minval=2,
                                                 maxval=max_label_length,
                                                 dtype=dtypes.int64)
        label_mask = array_ops.sequence_mask(label_length,
                                             maxlen=max_label_length,
                                             dtype=label_length.dtype)
        labels *= label_mask
        logit_length = [num_frames] * batch_size

        with backprop.GradientTape() as t:
            t.watch(logits)
            ref_loss = ctc_ops.ctc_loss_v2(labels=labels,
                                           logits=logits,
                                           label_length=label_length,
                                           logit_length=logit_length)
        ref_grad = t.gradient(ref_loss, [logits])

        sparse_labels = ctc_ops.dense_labels_to_sparse(labels, label_length)

        def assert_same_loss_and_grads(loss):
            if context.executing_eagerly():
                return
            with self.cached_session():
                self.assertAllClose(*self.evaluate([loss, ref_loss]))
                grad = gradients_impl.gradients(loss, [logits])
                self.assertAllClose(*self.evaluate([grad, ref_grad]),
                                    rtol=2e-06,
                                    atol=2e-06)

        assert_same_loss_and_grads(
            ctc_ops.ctc_loss_v2(labels=sparse_labels,
                                logits=logits,
                                label_length=label_length,
                                logit_length=logit_length,
                                blank_index=0))
Ejemplo n.º 11
0
  def testCtcLossDenseWithNegativeBlankIndexIsSameAsCtcLoss(self):
    with ops.device("/GPU:0" if test.is_gpu_available() else "/CPU:0"):
      random_seed.set_random_seed(5)

      batch_size = 8
      num_labels = 6
      label_length = 5
      num_frames = 12
      logits = random_ops.random_uniform([num_frames, batch_size, num_labels])
      labels = random_ops.random_uniform(
          [batch_size, label_length], minval=0, maxval=num_labels-1,
          dtype=dtypes.int64)

      label_lengths = random_ops.random_uniform(
          [batch_size], minval=2, maxval=label_length, dtype=dtypes.int64)
      label_mask = array_ops.sequence_mask(
          label_lengths, maxlen=label_length, dtype=label_lengths.dtype)
      labels *= label_mask

      logit_lengths = [num_frames] * batch_size

      ctc_loss = ctc_ops.ctc_loss_dense(
          labels=labels,
          logits=logits,
          label_length=label_lengths,
          logit_length=logit_lengths,
          blank_index=-1)
      ctc_loss_grads = gradients_impl.gradients(ctc_loss, [logits])[0]

      tf_ctc_loss_labels = math_ops.cast(labels, dtypes.int32)
      tf_ctc_loss_labels = ctc_ops.dense_labels_to_sparse(
          tf_ctc_loss_labels, label_lengths)

      tf_nn_ctc_loss = ctc_ops.ctc_loss(
          labels=tf_ctc_loss_labels,
          inputs=logits,
          sequence_length=logit_lengths,
          time_major=True)
      tf_nn_ctc_grads = gradients_impl.gradients(tf_nn_ctc_loss, [logits])[0]

      with self.cached_session() as sess:
        for _ in range(32):
          self.assertAllClose(*self.evaluate([ctc_loss, tf_nn_ctc_loss]))
          self.assertAllClose(
              *self.evaluate([ctc_loss_grads, tf_nn_ctc_grads]),
              rtol=2e-06,
              atol=2e-06)
Ejemplo n.º 12
0
  def testCtcLossV2(self):
    random_seed.set_random_seed(5)

    batch_size = 8
    num_labels = 6
    max_label_length = 5
    num_frames = 12

    labels = random_ops.random_uniform(
        [batch_size, max_label_length], minval=1, maxval=num_labels,
        dtype=dtypes.int64)
    logits = random_ops.random_uniform([num_frames, batch_size, num_labels])

    label_length = random_ops.random_uniform(
        [batch_size], minval=2, maxval=max_label_length, dtype=dtypes.int64)
    label_mask = array_ops.sequence_mask(
        label_length, maxlen=max_label_length, dtype=label_length.dtype)
    labels *= label_mask
    logit_length = [num_frames] * batch_size

    with backprop.GradientTape() as t:
      t.watch(logits)
      ref_loss = ctc_ops.ctc_loss_v2(
          labels=labels,
          logits=logits,
          label_length=label_length,
          logit_length=logit_length)
    ref_grad = t.gradient(ref_loss, [logits])

    sparse_labels = ctc_ops.dense_labels_to_sparse(labels, label_length)

    def assert_same_loss_and_grads(loss):
      if context.executing_eagerly():
        return
      with self.cached_session():
        self.assertAllClose(*self.evaluate([loss, ref_loss]))
        grad = gradients_impl.gradients(loss, [logits])
        self.assertAllClose(
            *self.evaluate([grad, ref_grad]), rtol=2e-06, atol=2e-06)

    assert_same_loss_and_grads(
        ctc_ops.ctc_loss_v2(
            labels=sparse_labels,
            logits=logits,
            label_length=label_length,
            logit_length=logit_length,
            blank_index=0))
Ejemplo n.º 13
0
    def _genInputParams(self,
                        num_classes=10,
                        batch_size=32,
                        max_label_sequence_length=50,
                        num_frames=100,
                        logits_time_major=True,
                        sparse_labels=True):
        assert num_frames >= max_label_sequence_length

        labels_shape = (batch_size, max_label_sequence_length)
        # Zero-pad the labels. Zero is the default blank index in the TF2 API.
        # num_classes includes the blank class
        unmasked_labels = np.random.randint(1,
                                            num_classes,
                                            size=labels_shape,
                                            dtype=np.int32)
        labels_lengths = np.random.randint(1,
                                           high=max_label_sequence_length,
                                           size=batch_size,
                                           dtype=np.int32)
        labels_masks = (np.arange(max_label_sequence_length) <
                        labels_lengths.reshape(batch_size, 1)).astype(np.int32)
        labels = unmasked_labels * labels_masks
        if sparse_labels:
            labels = ctc_ops.dense_labels_to_sparse(labels, labels_lengths)

        if logits_time_major:
            logits_shape = (num_frames, batch_size, num_classes)
        else:
            logits_shape = (batch_size, num_frames, num_classes)
        logits = self._randomFloats(logits_shape)

        labels_lengths = constant_op.constant(labels_lengths)

        logits_lengths = [num_frames] * batch_size
        logits_lengths = constant_op.constant(logits_lengths)

        return labels, logits, labels_lengths, logits_lengths