def _batch_dense_window(dataset): """Batches a window of dense tensors.""" def key_fn(_): return np.int64(0) def shape_init_fn(_): return array_ops.shape(first_element) def shape_reduce_fn(state, value): check_ops.assert_equal(state, array_ops.shape(value)) return state def finalize_fn(state): return state if dataset.output_shapes.is_fully_defined(): shape = dataset.output_shapes else: first_element = get_single_element.get_single_element(dataset.take(1)) shape_reducer = grouping.Reducer(shape_init_fn, shape_reduce_fn, finalize_fn) shape = get_single_element.get_single_element( dataset.apply(grouping.group_by_reducer(key_fn, shape_reducer))) def batch_init_fn(_): batch_shape = array_ops.concat([[0], shape], 0) return gen_array_ops.empty(batch_shape, dtype=dataset.output_types) def batch_reduce_fn(state, value): return array_ops.concat([state, [value]], 0) batch_reducer = grouping.Reducer(batch_init_fn, batch_reduce_fn, finalize_fn) return get_single_element.get_single_element( dataset.apply(grouping.group_by_reducer(key_fn, batch_reducer)))
def _padded_batch_sparse_window(dataset, padded_shape): """Batches a window of sparse tensors with padding.""" def key_fn(_): return np.int64(0) def max_init_fn(_): return convert.partial_shape_to_tensor(padded_shape) def max_reduce_fn(state, value): """Computes the maximum shape to pad to.""" condition = math_ops.reduce_all( math_ops.logical_or( math_ops.less_equal(value.dense_shape, padded_shape), math_ops.equal(padded_shape, -1))) assert_op = control_flow_ops.Assert(condition, [ "Actual shape greater than padded shape: ", value.dense_shape, padded_shape ]) with ops.control_dependencies([assert_op]): return math_ops.maximum(state, value.dense_shape) def finalize_fn(state): return state # Compute the padded shape. max_reducer = grouping.Reducer(max_init_fn, max_reduce_fn, finalize_fn) padded_shape = get_single_element.get_single_element( dataset.apply(grouping.group_by_reducer(key_fn, max_reducer))) def batch_init_fn(_): indices_shape = array_ops.concat( [[0], [array_ops.size(padded_shape) + 1]], 0) return sparse_tensor.SparseTensor( indices=gen_array_ops.empty(indices_shape, dtype=dtypes.int64), values=constant_op.constant( [], shape=[0], dtype=dataset_ops.get_legacy_output_types(dataset)), dense_shape=array_ops.concat( [np.array([0], dtype=np.int64), padded_shape], 0)) def batch_reduce_fn(state, value): padded_value = sparse_tensor.SparseTensor(indices=value.indices, values=value.values, dense_shape=padded_shape) reshaped_value = sparse_ops.sparse_reshape( padded_value, array_ops.concat( [np.array([1], dtype=np.int64), padded_value.dense_shape], 0)) return sparse_ops.sparse_concat(0, [state, reshaped_value]) reducer = grouping.Reducer(batch_init_fn, batch_reduce_fn, finalize_fn) return get_single_element.get_single_element( dataset.apply(grouping.group_by_reducer(key_fn, reducer)))
def _batch_sparse_window(dataset): """Batches a window of sparse tensors.""" def key_fn(_): return np.int64(0) def shape_init_fn(_): return first_element.dense_shape def shape_reduce_fn(state, value): check_ops.assert_equal(state, value.dense_shape) return state def finalize_fn(state): return state dataset_output_shapes = dataset_ops.get_legacy_output_shapes(dataset) if dataset_output_shapes.is_fully_defined(): shape = dataset_output_shapes else: first_element = get_single_element.get_single_element(dataset.take(1)) shape_reducer = grouping.Reducer(shape_init_fn, shape_reduce_fn, finalize_fn) shape = get_single_element.get_single_element( dataset.apply(grouping.group_by_reducer(key_fn, shape_reducer))) def batch_init_fn(_): indices_shape = array_ops.concat([[0], [array_ops.size(shape) + 1]], 0) return sparse_tensor.SparseTensor( indices=gen_array_ops.empty(indices_shape, dtype=dtypes.int64), values=constant_op.constant( [], shape=[0], dtype=dataset_ops.get_legacy_output_types(dataset)), dense_shape=array_ops.concat([ np.array([0], dtype=np.int64), math_ops.cast(shape, dtypes.int64) ], 0)) def batch_reduce_fn(state, value): return sparse_ops.sparse_concat(0, [state, value]) def reshape_fn(value): return sparse_ops.sparse_reshape( value, array_ops.concat( [np.array([1], dtype=np.int64), value.dense_shape], 0)) batch_reducer = grouping.Reducer(batch_init_fn, batch_reduce_fn, finalize_fn) return get_single_element.get_single_element( dataset.map(reshape_fn).apply( grouping.group_by_reducer(key_fn, batch_reducer)))
def _padded_batch_sparse_window(dataset, padded_shape): """Batches a window of sparse tensors with padding.""" def key_fn(_): return np.int64(0) def max_init_fn(_): return convert.partial_shape_to_tensor(padded_shape) def max_reduce_fn(state, value): """Computes the maximum shape to pad to.""" condition = math_ops.reduce_all( math_ops.logical_or( math_ops.less_equal(value.dense_shape, padded_shape), math_ops.equal(padded_shape, -1))) assert_op = control_flow_ops.Assert(condition, [ "Actual shape greater than padded shape: ", value.dense_shape, padded_shape ]) with ops.control_dependencies([assert_op]): return math_ops.maximum(state, value.dense_shape) def finalize_fn(state): return state # Compute the padded shape. max_reducer = grouping.Reducer(max_init_fn, max_reduce_fn, finalize_fn) padded_shape = get_single_element.get_single_element( dataset.apply(grouping.group_by_reducer(key_fn, max_reducer))) def batch_init_fn(_): indices_shape = array_ops.concat([[0], [array_ops.size(padded_shape) + 1]], 0) return sparse_tensor.SparseTensor( indices=gen_array_ops.empty(indices_shape, dtype=dtypes.int64), values=constant_op.constant( [], shape=[0], dtype=dataset_ops.get_legacy_output_types(dataset)), dense_shape=array_ops.concat( [np.array([0], dtype=np.int64), padded_shape], 0)) def batch_reduce_fn(state, value): padded_value = sparse_tensor.SparseTensor( indices=value.indices, values=value.values, dense_shape=padded_shape) reshaped_value = sparse_ops.sparse_reshape( padded_value, array_ops.concat( [np.array([1], dtype=np.int64), padded_value.dense_shape], 0)) return sparse_ops.sparse_concat(0, [state, reshaped_value]) reducer = grouping.Reducer(batch_init_fn, batch_reduce_fn, finalize_fn) return get_single_element.get_single_element( dataset.apply(grouping.group_by_reducer(key_fn, reducer)))
def _batch_sparse_window(dataset): """Batches a window of sparse tensors.""" def key_fn(_): return np.int64(0) def shape_init_fn(_): return first_element.dense_shape def shape_reduce_fn(state, value): check_ops.assert_equal(state, value.dense_shape) return state def finalize_fn(state): return state dataset_output_shapes = dataset_ops.get_legacy_output_shapes(dataset) if dataset_output_shapes.is_fully_defined(): shape = dataset_output_shapes else: first_element = get_single_element.get_single_element(dataset.take(1)) shape_reducer = grouping.Reducer(shape_init_fn, shape_reduce_fn, finalize_fn) shape = get_single_element.get_single_element( dataset.apply(grouping.group_by_reducer(key_fn, shape_reducer))) def batch_init_fn(_): indices_shape = array_ops.concat([[0], [array_ops.size(shape) + 1]], 0) return sparse_tensor.SparseTensor( indices=gen_array_ops.empty(indices_shape, dtype=dtypes.int64), values=constant_op.constant( [], shape=[0], dtype=dataset_ops.get_legacy_output_types(dataset)), dense_shape=array_ops.concat( [np.array([0], dtype=np.int64), math_ops.cast(shape, dtypes.int64)], 0)) def batch_reduce_fn(state, value): return sparse_ops.sparse_concat(0, [state, value]) def reshape_fn(value): return sparse_ops.sparse_reshape( value, array_ops.concat([np.array([1], dtype=np.int64), value.dense_shape], 0)) batch_reducer = grouping.Reducer(batch_init_fn, batch_reduce_fn, finalize_fn) return get_single_element.get_single_element( dataset.map(reshape_fn).apply( grouping.group_by_reducer(key_fn, batch_reducer)))
def testGetSingleElement(self, skip, take, error=None, error_msg=None): skip_t = array_ops.placeholder(dtypes.int64, shape=[]) take_t = array_ops.placeholder(dtypes.int64, shape=[]) def make_sparse(x): x_1d = array_ops.reshape(x, [1]) x_2d = array_ops.reshape(x, [1, 1]) return sparse_tensor.SparseTensor(x_2d, x_1d, x_1d) dataset = dataset_ops.Dataset.range(100).skip(skip_t).map( lambda x: (x * x, make_sparse(x))).take(take_t) element = get_single_element.get_single_element(dataset) with self.cached_session() as sess: if error is None: dense_val, sparse_val = sess.run( element, feed_dict={ skip_t: skip, take_t: take }) self.assertEqual(skip * skip, dense_val) self.assertAllEqual([[skip]], sparse_val.indices) self.assertAllEqual([skip], sparse_val.values) self.assertAllEqual([skip], sparse_val.dense_shape) else: with self.assertRaisesRegexp(error, error_msg): sess.run(element, feed_dict={skip_t: skip, take_t: take})
def testGetSingleElement(self, skip, take, error=None, error_msg=None): def make_sparse(x): x_1d = array_ops.reshape(x, [1]) x_2d = array_ops.reshape(x, [1, 1]) return sparse_tensor.SparseTensor(x_2d, x_1d, x_1d) dataset = dataset_ops.Dataset.range(100).skip(skip).map( lambda x: (x * x, make_sparse(x))).take(take) if error is None: dense_val, sparse_val = self.evaluate( get_single_element.get_single_element(dataset)) self.assertEqual(skip * skip, dense_val) self.assertAllEqual([[skip]], sparse_val.indices) self.assertAllEqual([skip], sparse_val.values) self.assertAllEqual([skip], sparse_val.dense_shape) else: with self.assertRaisesRegexp(error, error_msg): self.evaluate(get_single_element.get_single_element(dataset))
def make_dataset(): batched = dataset_ops.Dataset.from_tensors(tensor).repeat( num_rows).batch(num_rows) # pylint: disable=cell-var-from-loop batched_tensor = get_single_element.get_single_element( batched) dataset = dataset_ops.Dataset.from_tensors( batched_tensor).repeat() return SingleThreadedFlatMapDataset( dataset, dataset_ops.Dataset.from_tensor_slices)
def testGetSingleElement(self, skip, take, error=None, error_msg=None): def make_sparse(x): x_1d = array_ops.reshape(x, [1]) x_2d = array_ops.reshape(x, [1, 1]) return sparse_tensor.SparseTensor(x_2d, x_1d, x_1d) dataset = dataset_ops.Dataset.range(100).skip( skip).map(lambda x: (x * x, make_sparse(x))).take(take) if error is None: dense_val, sparse_val = self.evaluate( get_single_element.get_single_element(dataset)) self.assertEqual(skip * skip, dense_val) self.assertAllEqual([[skip]], sparse_val.indices) self.assertAllEqual([skip], sparse_val.values) self.assertAllEqual([skip], sparse_val.dense_shape) else: with self.assertRaisesRegexp(error, error_msg): self.evaluate(get_single_element.get_single_element(dataset))
def benchmark_slice_repeat_sparse(self): non_zeros_per_row_values = [0, 1, 5, 10, 100] num_rows_values = [32, 64, 128, 1024] for non_zeros_per_row in non_zeros_per_row_values: tensor = sparse_tensor.SparseTensor( indices=np.arange(non_zeros_per_row, dtype=np.int64)[:, np.newaxis], values=np.arange(non_zeros_per_row, dtype=np.int64), dense_shape=[1000]) for num_rows in num_rows_values: batched = dataset_ops.Dataset.from_tensors( tensor).repeat(num_rows).batch(num_rows) batched_tensor = get_single_element.get_single_element(batched) dataset = dataset_ops.Dataset.from_tensors(batched_tensor).flat_map( dataset_ops.Dataset.from_tensor_slices).repeat() self.run_and_report_benchmark( dataset, num_elements=100000, iters=5, name="slice_repeat_sparse_elements_per_row_%d_num_rows_%d" % ( non_zeros_per_row, num_rows))
def get_single_element(dataset): """Returns the single element in `dataset` as a nested structure of tensors. This function enables you to use a `tf.data.Dataset` in a stateless "tensor-in tensor-out" expression, without creating a `tf.compat.v1.data.Iterator`. This can be useful when your preprocessing transformations are expressed as a `Dataset`, and you want to use the transformation at serving time. For example: ```python input_batch = tf.compat.v1.placeholder(tf.string, shape=[BATCH_SIZE]) def preprocessing_fn(input_str): # ... return image, label dataset = (tf.data.Dataset.from_tensor_slices(input_batch) .map(preprocessing_fn, num_parallel_calls=BATCH_SIZE) .batch(BATCH_SIZE)) image_batch, label_batch = tf.data.experimental.get_single_element(dataset) ``` Args: dataset: A `tf.data.Dataset` object containing a single element. Returns: A nested structure of `tf.Tensor` objects, corresponding to the single element of `dataset`. Raises: TypeError: if `dataset` is not a `tf.data.Dataset` object. InvalidArgumentError (at runtime): if `dataset` does not contain exactly one element. """ return experimental_get_single_element.get_single_element(dataset)
def _padded_batch_dense_window(dataset, padded_shape, padding_value=None): """Batches a window of dense tensors with padding.""" padded_shape = math_ops.cast( convert.partial_shape_to_tensor(padded_shape), dtypes.int32) def key_fn(_): return np.int64(0) def max_init_fn(_): return padded_shape def max_reduce_fn(state, value): """Computes the maximum shape to pad to.""" condition = math_ops.reduce_all( math_ops.logical_or( math_ops.less_equal(array_ops.shape(value), padded_shape), math_ops.equal(padded_shape, -1))) assert_op = control_flow_ops.Assert(condition, [ "Actual shape greater than padded shape: ", array_ops.shape(value), padded_shape ]) with ops.control_dependencies([assert_op]): return math_ops.maximum(state, array_ops.shape(value)) def finalize_fn(state): return state # Compute the padded shape. max_reducer = grouping.Reducer(max_init_fn, max_reduce_fn, finalize_fn) padded_shape = get_single_element.get_single_element( dataset.apply(grouping.group_by_reducer(key_fn, max_reducer))) if padding_value is None: if dataset.output_types == dtypes.string: padding_value = "" elif dataset.output_types == dtypes.bool: padding_value = False elif dataset.output_types == dtypes.variant: raise TypeError("Unable to create padding for field of type 'variant'") else: padding_value = 0 def batch_init_fn(_): batch_shape = array_ops.concat( [np.array([0], dtype=np.int32), padded_shape], 0) return gen_array_ops.empty(batch_shape, dtype=dataset.output_types) def batch_reduce_fn(state, value): return array_ops.concat([state, [value]], 0) def pad_fn(value): shape = array_ops.shape(value) left = array_ops.zeros_like(shape) right = padded_shape - shape return array_ops.pad( value, array_ops.stack([left, right], 1), constant_values=padding_value) batch_reducer = grouping.Reducer(batch_init_fn, batch_reduce_fn, finalize_fn) return get_single_element.get_single_element( dataset.map(pad_fn).apply( grouping.group_by_reducer(key_fn, batch_reducer)))
def _padded_batch_dense_window(dataset, padded_shape, padding_value=None): """Batches a window of dense tensors with padding.""" padded_shape = math_ops.cast( convert.partial_shape_to_tensor(padded_shape), dtypes.int32) def key_fn(_): return np.int64(0) def max_init_fn(_): return padded_shape def max_reduce_fn(state, value): """Computes the maximum shape to pad to.""" condition = math_ops.reduce_all( math_ops.logical_or( math_ops.less_equal(array_ops.shape(value), padded_shape), math_ops.equal(padded_shape, -1))) assert_op = control_flow_ops.Assert(condition, [ "Actual shape greater than padded shape: ", array_ops.shape(value), padded_shape ]) with ops.control_dependencies([assert_op]): return math_ops.maximum(state, array_ops.shape(value)) def finalize_fn(state): return state # Compute the padded shape. max_reducer = grouping.Reducer(max_init_fn, max_reduce_fn, finalize_fn) padded_shape = get_single_element.get_single_element( dataset.apply(grouping.group_by_reducer(key_fn, max_reducer))) dataset_output_types = dataset_ops.get_legacy_output_types(dataset) if padding_value is None: if dataset_output_types == dtypes.string: padding_value = "" elif dataset_output_types == dtypes.bool: padding_value = False elif dataset_output_types == dtypes.variant: raise TypeError("Unable to create padding for field of type 'variant'") else: padding_value = 0 def batch_init_fn(_): batch_shape = array_ops.concat( [np.array([0], dtype=np.int32), padded_shape], 0) return gen_array_ops.empty(batch_shape, dtype=dataset_output_types) def batch_reduce_fn(state, value): return array_ops.concat([state, [value]], 0) def pad_fn(value): shape = array_ops.shape(value) left = array_ops.zeros_like(shape) right = padded_shape - shape return array_ops.pad( value, array_ops.stack([left, right], 1), constant_values=padding_value) batch_reducer = grouping.Reducer(batch_init_fn, batch_reduce_fn, finalize_fn) return get_single_element.get_single_element( dataset.map(pad_fn).apply( grouping.group_by_reducer(key_fn, batch_reducer)))
def flat_map_func(ds): batched = ds.batch(2) element = get_single_element.get_single_element(batched) return dataset_ops.Dataset.from_tensors(element)
def fn(): _ = get_single_element.get_single_element(dataset_fn()) return "hello"