def testWithUnknownShape(self): components = np.random.randint(5, size=(40, )).astype(np.int32) dataset = dataset_ops.Dataset.from_tensor_slices(components).map( lambda x: array_ops.fill([x, x], x)).apply( batching.dense_to_sparse_batch(4, [5, None])) get_next = self.getNext(dataset) for start in range(0, len(components), 4): results = self.evaluate(get_next()) self.assertAllEqual( [[i, j, z] for i, c in enumerate(components[start:start + 4]) for j in range(c) for z in range(c)], results.indices) self.assertAllEqual([ c for c in components[start:start + 4] for _ in range(c) for _ in range(c) ], results.values) self.assertAllEqual([ min(4, len(components) - start), 5, np.max(components[start:start + 4]) ], results.dense_shape) with self.assertRaises(errors.OutOfRangeError): self.evaluate(get_next())
def testDenseToSparseBatchDatasetWithUnknownShape(self): components = np.random.randint(5, size=(40,)).astype(np.int32) iterator = ( dataset_ops.Dataset.from_tensor_slices(components) .map(lambda x: array_ops.fill([x, x], x)).apply( batching.dense_to_sparse_batch( 4, [5, None])).make_initializable_iterator()) init_op = iterator.initializer get_next = iterator.get_next() with self.cached_session() as sess: self.evaluate(init_op) for start in range(0, len(components), 4): results = self.evaluate(get_next) self.assertAllEqual([[i, j, z] for i, c in enumerate(components[start:start + 4]) for j in range(c) for z in range(c)], results.indices) self.assertAllEqual([ c for c in components[start:start + 4] for _ in range(c) for _ in range(c) ], results.values) self.assertAllEqual([ min(4, len(components) - start), 5, np.max(components[start:start + 4]) ], results.dense_shape) with self.assertRaises(errors.OutOfRangeError): sess.run(get_next)
def testDenseToSparseBatchDatasetWithUnknownShape(self): components = np.random.randint(5, size=(40, )).astype(np.int32) iterator = (dataset_ops.Dataset.from_tensor_slices(components).map( lambda x: array_ops.fill([x, x], x)).apply( batching.dense_to_sparse_batch( 4, [5, None])).make_initializable_iterator()) init_op = iterator.initializer get_next = iterator.get_next() with self.cached_session() as sess: self.evaluate(init_op) for start in range(0, len(components), 4): results = self.evaluate(get_next) self.assertAllEqual( [[i, j, z] for i, c in enumerate(components[start:start + 4]) for j in range(c) for z in range(c)], results.indices) self.assertAllEqual([ c for c in components[start:start + 4] for _ in range(c) for _ in range(c) ], results.values) self.assertAllEqual([ min(4, len(components) - start), 5, np.max(components[start:start + 4]) ], results.dense_shape) with self.assertRaises(errors.OutOfRangeError): sess.run(get_next)
def dense_to_sparse_batch(batch_size, row_shape): """A transformation that batches ragged elements into `tf.SparseTensor`s. Like `Dataset.padded_batch()`, this transformation combines multiple consecutive elements of the dataset, which might have different shapes, into a single element. The resulting element has three components (`indices`, `values`, and `dense_shape`), which comprise a `tf.SparseTensor` that represents the same data. The `row_shape` represents the dense shape of each row in the resulting `tf.SparseTensor`, to which the effective batch size is prepended. For example: ```python # NOTE: The following examples use `{ ... }` to represent the # contents of a dataset. a = { ['a', 'b', 'c'], ['a', 'b'], ['a', 'b', 'c', 'd'] } a.apply(tf.contrib.data.dense_to_sparse_batch(batch_size=2, row_shape=[6])) == { ([[0, 0], [0, 1], [0, 2], [1, 0], [1, 1]], # indices ['a', 'b', 'c', 'a', 'b'], # values [2, 6]), # dense_shape ([[0, 0], [0, 1], [0, 2], [0, 3]], ['a', 'b', 'c', 'd'], [1, 6]) } ``` Args: batch_size: A `tf.int64` scalar `tf.Tensor`, representing the number of consecutive elements of this dataset to combine in a single batch. row_shape: A `tf.TensorShape` or `tf.int64` vector tensor-like object representing the equivalent dense shape of a row in the resulting `tf.SparseTensor`. Each element of this dataset must have the same rank as `row_shape`, and must have size less than or equal to `row_shape` in each dimension. Returns: A `Dataset` transformation function, which can be passed to `tf.data.Dataset.apply`. """ return batching.dense_to_sparse_batch(batch_size, row_shape)
def testDenseToSparseBatchDatasetShapeErrors(self): input_tensor = array_ops.placeholder(dtypes.int32) iterator = (dataset_ops.Dataset.from_tensors(input_tensor).apply( batching.dense_to_sparse_batch( 4, [12])).make_initializable_iterator()) init_op = iterator.initializer get_next = iterator.get_next() with self.cached_session() as sess: # Initialize with an input tensor of incompatible rank. sess.run(init_op, feed_dict={input_tensor: [[1]]}) with self.assertRaisesRegexp(errors.InvalidArgumentError, "incompatible with the row shape"): sess.run(get_next) # Initialize with an input tensor that is larger than `row_shape`. sess.run(init_op, feed_dict={input_tensor: range(13)}) with self.assertRaisesRegexp(errors.DataLossError, "larger than the row shape"): sess.run(get_next)
def testDenseToSparseBatchDatasetShapeErrors(self): input_tensor = array_ops.placeholder(dtypes.int32) iterator = ( dataset_ops.Dataset.from_tensors(input_tensor).apply( batching.dense_to_sparse_batch(4, [12])) .make_initializable_iterator()) init_op = iterator.initializer get_next = iterator.get_next() with self.cached_session() as sess: # Initialize with an input tensor of incompatible rank. sess.run(init_op, feed_dict={input_tensor: [[1]]}) with self.assertRaisesRegexp(errors.InvalidArgumentError, "incompatible with the row shape"): sess.run(get_next) # Initialize with an input tensor that is larger than `row_shape`. sess.run(init_op, feed_dict={input_tensor: range(13)}) with self.assertRaisesRegexp(errors.DataLossError, "larger than the row shape"): sess.run(get_next)
def testDenseToSparseBatchDataset(self): components = np.random.randint(12, size=(100,)).astype(np.int32) dataset = dataset_ops.Dataset.from_tensor_slices( components).map(lambda x: array_ops.fill([x], x)).apply( batching.dense_to_sparse_batch(4, [12])) get_next = self.getNext(dataset) for start in range(0, len(components), 4): results = self.evaluate(get_next()) self.assertAllEqual([[i, j] for i, c in enumerate(components[start:start + 4]) for j in range(c)], results.indices) self.assertAllEqual( [c for c in components[start:start + 4] for _ in range(c)], results.values) self.assertAllEqual([min(4, len(components) - start), 12], results.dense_shape) with self.assertRaises(errors.OutOfRangeError): self.evaluate(get_next())
def testDenseToSparseBatchDatasetWithInvalidShape(self): input_tensor = array_ops.constant([[1]]) with self.assertRaisesRegexp(ValueError, "Dimension -2 must be >= 0"): dataset_ops.Dataset.from_tensors(input_tensor).apply( batching.dense_to_sparse_batch(4, [-2])).make_initializable_iterator()
def _build_dataset_dense_to_sparse(self, components): return dataset_ops.Dataset.from_tensor_slices(components).map( lambda x: array_ops.fill([x], x)).apply( batching.dense_to_sparse_batch(4, [12]))
def dataset_fn(input_tensor): return dataset_ops.Dataset.from_tensors(input_tensor).apply( batching.dense_to_sparse_batch(4, [12]))
def testWithInvalidShape(self): input_tensor = array_ops.constant([[1]]) with self.assertRaisesRegex(ValueError, "Dimension -2 must be >= 0"): dataset_ops.Dataset.from_tensors(input_tensor).apply( batching.dense_to_sparse_batch(4, [-2]))
def testDenseToSparseBatchDatasetWithInvalidShape(self): input_tensor = array_ops.constant([[1]]) with self.assertRaisesRegexp(ValueError, "Dimension -2 must be >= 0"): dataset_ops.Dataset.from_tensors(input_tensor).apply( batching.dense_to_sparse_batch( 4, [-2])).make_initializable_iterator()