def testWithUnknownShape(self):
        components = np.random.randint(5, size=(40, )).astype(np.int32)
        dataset = dataset_ops.Dataset.from_tensor_slices(components).map(
            lambda x: array_ops.fill([x, x], x)).apply(
                batching.dense_to_sparse_batch(4, [5, None]))

        get_next = self.getNext(dataset)

        for start in range(0, len(components), 4):
            results = self.evaluate(get_next())
            self.assertAllEqual(
                [[i, j, z] for i, c in enumerate(components[start:start + 4])
                 for j in range(c) for z in range(c)], results.indices)
            self.assertAllEqual([
                c for c in components[start:start + 4] for _ in range(c)
                for _ in range(c)
            ], results.values)
            self.assertAllEqual([
                min(4,
                    len(components) - start), 5,
                np.max(components[start:start + 4])
            ], results.dense_shape)

        with self.assertRaises(errors.OutOfRangeError):
            self.evaluate(get_next())
  def testDenseToSparseBatchDatasetWithUnknownShape(self):
    components = np.random.randint(5, size=(40,)).astype(np.int32)
    iterator = (
        dataset_ops.Dataset.from_tensor_slices(components)
        .map(lambda x: array_ops.fill([x, x], x)).apply(
            batching.dense_to_sparse_batch(
                4, [5, None])).make_initializable_iterator())
    init_op = iterator.initializer
    get_next = iterator.get_next()

    with self.cached_session() as sess:
      self.evaluate(init_op)

      for start in range(0, len(components), 4):
        results = self.evaluate(get_next)
        self.assertAllEqual([[i, j, z]
                             for i, c in enumerate(components[start:start + 4])
                             for j in range(c)
                             for z in range(c)], results.indices)
        self.assertAllEqual([
            c
            for c in components[start:start + 4] for _ in range(c)
            for _ in range(c)
        ], results.values)
        self.assertAllEqual([
            min(4,
                len(components) - start), 5,
            np.max(components[start:start + 4])
        ], results.dense_shape)

      with self.assertRaises(errors.OutOfRangeError):
        sess.run(get_next)
예제 #3
0
    def testDenseToSparseBatchDatasetWithUnknownShape(self):
        components = np.random.randint(5, size=(40, )).astype(np.int32)
        iterator = (dataset_ops.Dataset.from_tensor_slices(components).map(
            lambda x: array_ops.fill([x, x], x)).apply(
                batching.dense_to_sparse_batch(
                    4, [5, None])).make_initializable_iterator())
        init_op = iterator.initializer
        get_next = iterator.get_next()

        with self.cached_session() as sess:
            self.evaluate(init_op)

            for start in range(0, len(components), 4):
                results = self.evaluate(get_next)
                self.assertAllEqual(
                    [[i, j, z]
                     for i, c in enumerate(components[start:start + 4])
                     for j in range(c) for z in range(c)], results.indices)
                self.assertAllEqual([
                    c for c in components[start:start + 4] for _ in range(c)
                    for _ in range(c)
                ], results.values)
                self.assertAllEqual([
                    min(4,
                        len(components) - start), 5,
                    np.max(components[start:start + 4])
                ], results.dense_shape)

            with self.assertRaises(errors.OutOfRangeError):
                sess.run(get_next)
예제 #4
0
def dense_to_sparse_batch(batch_size, row_shape):
    """A transformation that batches ragged elements into `tf.SparseTensor`s.

  Like `Dataset.padded_batch()`, this transformation combines multiple
  consecutive elements of the dataset, which might have different
  shapes, into a single element. The resulting element has three
  components (`indices`, `values`, and `dense_shape`), which
  comprise a `tf.SparseTensor` that represents the same data. The
  `row_shape` represents the dense shape of each row in the
  resulting `tf.SparseTensor`, to which the effective batch size is
  prepended. For example:

  ```python
  # NOTE: The following examples use `{ ... }` to represent the
  # contents of a dataset.
  a = { ['a', 'b', 'c'], ['a', 'b'], ['a', 'b', 'c', 'd'] }

  a.apply(tf.contrib.data.dense_to_sparse_batch(batch_size=2, row_shape=[6])) ==
  {
      ([[0, 0], [0, 1], [0, 2], [1, 0], [1, 1]],  # indices
       ['a', 'b', 'c', 'a', 'b'],                 # values
       [2, 6]),                                   # dense_shape
      ([[0, 0], [0, 1], [0, 2], [0, 3]],
       ['a', 'b', 'c', 'd'],
       [1, 6])
  }
  ```

  Args:
    batch_size: A `tf.int64` scalar `tf.Tensor`, representing the
      number of consecutive elements of this dataset to combine in a
      single batch.
    row_shape: A `tf.TensorShape` or `tf.int64` vector tensor-like
      object representing the equivalent dense shape of a row in the
      resulting `tf.SparseTensor`. Each element of this dataset must
      have the same rank as `row_shape`, and must have size less
      than or equal to `row_shape` in each dimension.

  Returns:
    A `Dataset` transformation function, which can be passed to
    `tf.data.Dataset.apply`.
  """
    return batching.dense_to_sparse_batch(batch_size, row_shape)
예제 #5
0
def dense_to_sparse_batch(batch_size, row_shape):
  """A transformation that batches ragged elements into `tf.SparseTensor`s.

  Like `Dataset.padded_batch()`, this transformation combines multiple
  consecutive elements of the dataset, which might have different
  shapes, into a single element. The resulting element has three
  components (`indices`, `values`, and `dense_shape`), which
  comprise a `tf.SparseTensor` that represents the same data. The
  `row_shape` represents the dense shape of each row in the
  resulting `tf.SparseTensor`, to which the effective batch size is
  prepended. For example:

  ```python
  # NOTE: The following examples use `{ ... }` to represent the
  # contents of a dataset.
  a = { ['a', 'b', 'c'], ['a', 'b'], ['a', 'b', 'c', 'd'] }

  a.apply(tf.contrib.data.dense_to_sparse_batch(batch_size=2, row_shape=[6])) ==
  {
      ([[0, 0], [0, 1], [0, 2], [1, 0], [1, 1]],  # indices
       ['a', 'b', 'c', 'a', 'b'],                 # values
       [2, 6]),                                   # dense_shape
      ([[0, 0], [0, 1], [0, 2], [0, 3]],
       ['a', 'b', 'c', 'd'],
       [1, 6])
  }
  ```

  Args:
    batch_size: A `tf.int64` scalar `tf.Tensor`, representing the
      number of consecutive elements of this dataset to combine in a
      single batch.
    row_shape: A `tf.TensorShape` or `tf.int64` vector tensor-like
      object representing the equivalent dense shape of a row in the
      resulting `tf.SparseTensor`. Each element of this dataset must
      have the same rank as `row_shape`, and must have size less
      than or equal to `row_shape` in each dimension.

  Returns:
    A `Dataset` transformation function, which can be passed to
    `tf.data.Dataset.apply`.
  """
  return batching.dense_to_sparse_batch(batch_size, row_shape)
예제 #6
0
    def testDenseToSparseBatchDatasetShapeErrors(self):
        input_tensor = array_ops.placeholder(dtypes.int32)
        iterator = (dataset_ops.Dataset.from_tensors(input_tensor).apply(
            batching.dense_to_sparse_batch(
                4, [12])).make_initializable_iterator())
        init_op = iterator.initializer
        get_next = iterator.get_next()

        with self.cached_session() as sess:
            # Initialize with an input tensor of incompatible rank.
            sess.run(init_op, feed_dict={input_tensor: [[1]]})
            with self.assertRaisesRegexp(errors.InvalidArgumentError,
                                         "incompatible with the row shape"):
                sess.run(get_next)

            # Initialize with an input tensor that is larger than `row_shape`.
            sess.run(init_op, feed_dict={input_tensor: range(13)})
            with self.assertRaisesRegexp(errors.DataLossError,
                                         "larger than the row shape"):
                sess.run(get_next)
  def testDenseToSparseBatchDatasetShapeErrors(self):
    input_tensor = array_ops.placeholder(dtypes.int32)
    iterator = (
        dataset_ops.Dataset.from_tensors(input_tensor).apply(
            batching.dense_to_sparse_batch(4, [12]))
        .make_initializable_iterator())
    init_op = iterator.initializer
    get_next = iterator.get_next()

    with self.cached_session() as sess:
      # Initialize with an input tensor of incompatible rank.
      sess.run(init_op, feed_dict={input_tensor: [[1]]})
      with self.assertRaisesRegexp(errors.InvalidArgumentError,
                                   "incompatible with the row shape"):
        sess.run(get_next)

      # Initialize with an input tensor that is larger than `row_shape`.
      sess.run(init_op, feed_dict={input_tensor: range(13)})
      with self.assertRaisesRegexp(errors.DataLossError,
                                   "larger than the row shape"):
        sess.run(get_next)
  def testDenseToSparseBatchDataset(self):
    components = np.random.randint(12, size=(100,)).astype(np.int32)
    dataset = dataset_ops.Dataset.from_tensor_slices(
        components).map(lambda x: array_ops.fill([x], x)).apply(
            batching.dense_to_sparse_batch(4, [12]))
    get_next = self.getNext(dataset)

    for start in range(0, len(components), 4):
      results = self.evaluate(get_next())
      self.assertAllEqual([[i, j]
                           for i, c in enumerate(components[start:start + 4])
                           for j in range(c)], results.indices)
      self.assertAllEqual(
          [c for c in components[start:start + 4] for _ in range(c)],
          results.values)
      self.assertAllEqual([min(4,
                               len(components) - start), 12],
                          results.dense_shape)

    with self.assertRaises(errors.OutOfRangeError):
      self.evaluate(get_next())
 def testDenseToSparseBatchDatasetWithInvalidShape(self):
   input_tensor = array_ops.constant([[1]])
   with self.assertRaisesRegexp(ValueError, "Dimension -2 must be >= 0"):
     dataset_ops.Dataset.from_tensors(input_tensor).apply(
         batching.dense_to_sparse_batch(4, [-2])).make_initializable_iterator()
예제 #10
0
 def _build_dataset_dense_to_sparse(self, components):
     return dataset_ops.Dataset.from_tensor_slices(components).map(
         lambda x: array_ops.fill([x], x)).apply(
             batching.dense_to_sparse_batch(4, [12]))
 def dataset_fn(input_tensor):
     return dataset_ops.Dataset.from_tensors(input_tensor).apply(
         batching.dense_to_sparse_batch(4, [12]))
 def testWithInvalidShape(self):
     input_tensor = array_ops.constant([[1]])
     with self.assertRaisesRegex(ValueError, "Dimension -2 must be >= 0"):
         dataset_ops.Dataset.from_tensors(input_tensor).apply(
             batching.dense_to_sparse_batch(4, [-2]))
예제 #13
0
 def testDenseToSparseBatchDatasetWithInvalidShape(self):
     input_tensor = array_ops.constant([[1]])
     with self.assertRaisesRegexp(ValueError, "Dimension -2 must be >= 0"):
         dataset_ops.Dataset.from_tensors(input_tensor).apply(
             batching.dense_to_sparse_batch(
                 4, [-2])).make_initializable_iterator()
 def _build_dataset_dense_to_sparse(self, components):
   return dataset_ops.Dataset.from_tensor_slices(components).map(
       lambda x: array_ops.fill([x], x)).apply(
           batching.dense_to_sparse_batch(4, [12]))
 def dataset_fn(input_tensor):
   return dataset_ops.Dataset.from_tensors(input_tensor).apply(
       batching.dense_to_sparse_batch(4, [12]))