Example #1
0
def _batch_dense_window(dataset):
  """Batches a window of dense tensors."""

  def key_fn(_):
    return np.int64(0)

  def shape_init_fn(_):
    return array_ops.shape(first_element)

  def shape_reduce_fn(state, value):
    check_ops.assert_equal(state, array_ops.shape(value))
    return state

  def finalize_fn(state):
    return state

  if dataset.output_shapes.is_fully_defined():
    shape = dataset.output_shapes
  else:
    first_element = get_single_element.get_single_element(dataset.take(1))
    shape_reducer = grouping.Reducer(shape_init_fn, shape_reduce_fn,
                                     finalize_fn)
    shape = get_single_element.get_single_element(
        dataset.apply(grouping.group_by_reducer(key_fn, shape_reducer)))

  def batch_init_fn(_):
    batch_shape = array_ops.concat([[0], shape], 0)
    return gen_array_ops.empty(batch_shape, dtype=dataset.output_types)

  def batch_reduce_fn(state, value):
    return array_ops.concat([state, [value]], 0)

  batch_reducer = grouping.Reducer(batch_init_fn, batch_reduce_fn, finalize_fn)
  return get_single_element.get_single_element(
      dataset.apply(grouping.group_by_reducer(key_fn, batch_reducer)))
Example #2
0
def _batch_dense_window(dataset):
    """Batches a window of dense tensors."""
    def key_fn(_):
        return np.int64(0)

    def shape_init_fn(_):
        return array_ops.shape(first_element)

    def shape_reduce_fn(state, value):
        check_ops.assert_equal(state, array_ops.shape(value))
        return state

    def finalize_fn(state):
        return state

    if dataset.output_shapes.is_fully_defined():
        shape = dataset.output_shapes
    else:
        first_element = get_single_element.get_single_element(dataset.take(1))
        shape_reducer = grouping.Reducer(shape_init_fn, shape_reduce_fn,
                                         finalize_fn)
        shape = get_single_element.get_single_element(
            dataset.apply(grouping.group_by_reducer(key_fn, shape_reducer)))

    def batch_init_fn(_):
        batch_shape = array_ops.concat([[0], shape], 0)
        return gen_array_ops.empty(batch_shape, dtype=dataset.output_types)

    def batch_reduce_fn(state, value):
        return array_ops.concat([state, [value]], 0)

    batch_reducer = grouping.Reducer(batch_init_fn, batch_reduce_fn,
                                     finalize_fn)
    return get_single_element.get_single_element(
        dataset.apply(grouping.group_by_reducer(key_fn, batch_reducer)))
Example #3
0
def _padded_batch_sparse_window(dataset, padded_shape):
    """Batches a window of sparse tensors with padding."""
    def key_fn(_):
        return np.int64(0)

    def max_init_fn(_):
        return convert.partial_shape_to_tensor(padded_shape)

    def max_reduce_fn(state, value):
        """Computes the maximum shape to pad to."""
        condition = math_ops.reduce_all(
            math_ops.logical_or(
                math_ops.less_equal(value.dense_shape, padded_shape),
                math_ops.equal(padded_shape, -1)))
        assert_op = control_flow_ops.Assert(condition, [
            "Actual shape greater than padded shape: ", value.dense_shape,
            padded_shape
        ])
        with ops.control_dependencies([assert_op]):
            return math_ops.maximum(state, value.dense_shape)

    def finalize_fn(state):
        return state

    # Compute the padded shape.
    max_reducer = grouping.Reducer(max_init_fn, max_reduce_fn, finalize_fn)
    padded_shape = get_single_element.get_single_element(
        dataset.apply(grouping.group_by_reducer(key_fn, max_reducer)))

    def batch_init_fn(_):
        indices_shape = array_ops.concat(
            [[0], [array_ops.size(padded_shape) + 1]], 0)
        return sparse_tensor.SparseTensor(
            indices=gen_array_ops.empty(indices_shape, dtype=dtypes.int64),
            values=constant_op.constant(
                [],
                shape=[0],
                dtype=dataset_ops.get_legacy_output_types(dataset)),
            dense_shape=array_ops.concat(
                [np.array([0], dtype=np.int64), padded_shape], 0))

    def batch_reduce_fn(state, value):
        padded_value = sparse_tensor.SparseTensor(indices=value.indices,
                                                  values=value.values,
                                                  dense_shape=padded_shape)
        reshaped_value = sparse_ops.sparse_reshape(
            padded_value,
            array_ops.concat(
                [np.array([1], dtype=np.int64), padded_value.dense_shape], 0))
        return sparse_ops.sparse_concat(0, [state, reshaped_value])

    reducer = grouping.Reducer(batch_init_fn, batch_reduce_fn, finalize_fn)
    return get_single_element.get_single_element(
        dataset.apply(grouping.group_by_reducer(key_fn, reducer)))
Example #4
0
def _batch_sparse_window(dataset):
    """Batches a window of sparse tensors."""
    def key_fn(_):
        return np.int64(0)

    def shape_init_fn(_):
        return first_element.dense_shape

    def shape_reduce_fn(state, value):
        check_ops.assert_equal(state, value.dense_shape)
        return state

    def finalize_fn(state):
        return state

    dataset_output_shapes = dataset_ops.get_legacy_output_shapes(dataset)
    if dataset_output_shapes.is_fully_defined():
        shape = dataset_output_shapes
    else:
        first_element = get_single_element.get_single_element(dataset.take(1))
        shape_reducer = grouping.Reducer(shape_init_fn, shape_reduce_fn,
                                         finalize_fn)
        shape = get_single_element.get_single_element(
            dataset.apply(grouping.group_by_reducer(key_fn, shape_reducer)))

    def batch_init_fn(_):
        indices_shape = array_ops.concat([[0], [array_ops.size(shape) + 1]], 0)
        return sparse_tensor.SparseTensor(
            indices=gen_array_ops.empty(indices_shape, dtype=dtypes.int64),
            values=constant_op.constant(
                [],
                shape=[0],
                dtype=dataset_ops.get_legacy_output_types(dataset)),
            dense_shape=array_ops.concat([
                np.array([0], dtype=np.int64),
                math_ops.cast(shape, dtypes.int64)
            ], 0))

    def batch_reduce_fn(state, value):
        return sparse_ops.sparse_concat(0, [state, value])

    def reshape_fn(value):
        return sparse_ops.sparse_reshape(
            value,
            array_ops.concat(
                [np.array([1], dtype=np.int64), value.dense_shape], 0))

    batch_reducer = grouping.Reducer(batch_init_fn, batch_reduce_fn,
                                     finalize_fn)
    return get_single_element.get_single_element(
        dataset.map(reshape_fn).apply(
            grouping.group_by_reducer(key_fn, batch_reducer)))
Example #5
0
def _padded_batch_sparse_window(dataset, padded_shape):
  """Batches a window of sparse tensors with padding."""

  def key_fn(_):
    return np.int64(0)

  def max_init_fn(_):
    return convert.partial_shape_to_tensor(padded_shape)

  def max_reduce_fn(state, value):
    """Computes the maximum shape to pad to."""
    condition = math_ops.reduce_all(
        math_ops.logical_or(
            math_ops.less_equal(value.dense_shape, padded_shape),
            math_ops.equal(padded_shape, -1)))
    assert_op = control_flow_ops.Assert(condition, [
        "Actual shape greater than padded shape: ", value.dense_shape,
        padded_shape
    ])
    with ops.control_dependencies([assert_op]):
      return math_ops.maximum(state, value.dense_shape)

  def finalize_fn(state):
    return state

  # Compute the padded shape.
  max_reducer = grouping.Reducer(max_init_fn, max_reduce_fn, finalize_fn)
  padded_shape = get_single_element.get_single_element(
      dataset.apply(grouping.group_by_reducer(key_fn, max_reducer)))

  def batch_init_fn(_):
    indices_shape = array_ops.concat([[0], [array_ops.size(padded_shape) + 1]],
                                     0)
    return sparse_tensor.SparseTensor(
        indices=gen_array_ops.empty(indices_shape, dtype=dtypes.int64),
        values=constant_op.constant(
            [], shape=[0], dtype=dataset_ops.get_legacy_output_types(dataset)),
        dense_shape=array_ops.concat(
            [np.array([0], dtype=np.int64), padded_shape], 0))

  def batch_reduce_fn(state, value):
    padded_value = sparse_tensor.SparseTensor(
        indices=value.indices, values=value.values, dense_shape=padded_shape)
    reshaped_value = sparse_ops.sparse_reshape(
        padded_value,
        array_ops.concat(
            [np.array([1], dtype=np.int64), padded_value.dense_shape], 0))
    return sparse_ops.sparse_concat(0, [state, reshaped_value])

  reducer = grouping.Reducer(batch_init_fn, batch_reduce_fn, finalize_fn)
  return get_single_element.get_single_element(
      dataset.apply(grouping.group_by_reducer(key_fn, reducer)))
Example #6
0
def _batch_sparse_window(dataset):
  """Batches a window of sparse tensors."""

  def key_fn(_):
    return np.int64(0)

  def shape_init_fn(_):
    return first_element.dense_shape

  def shape_reduce_fn(state, value):
    check_ops.assert_equal(state, value.dense_shape)
    return state

  def finalize_fn(state):
    return state

  dataset_output_shapes = dataset_ops.get_legacy_output_shapes(dataset)
  if dataset_output_shapes.is_fully_defined():
    shape = dataset_output_shapes
  else:
    first_element = get_single_element.get_single_element(dataset.take(1))
    shape_reducer = grouping.Reducer(shape_init_fn, shape_reduce_fn,
                                     finalize_fn)
    shape = get_single_element.get_single_element(
        dataset.apply(grouping.group_by_reducer(key_fn, shape_reducer)))

  def batch_init_fn(_):
    indices_shape = array_ops.concat([[0], [array_ops.size(shape) + 1]], 0)
    return sparse_tensor.SparseTensor(
        indices=gen_array_ops.empty(indices_shape, dtype=dtypes.int64),
        values=constant_op.constant(
            [], shape=[0], dtype=dataset_ops.get_legacy_output_types(dataset)),
        dense_shape=array_ops.concat(
            [np.array([0], dtype=np.int64),
             math_ops.cast(shape, dtypes.int64)], 0))

  def batch_reduce_fn(state, value):
    return sparse_ops.sparse_concat(0, [state, value])

  def reshape_fn(value):
    return sparse_ops.sparse_reshape(
        value,
        array_ops.concat([np.array([1], dtype=np.int64), value.dense_shape], 0))

  batch_reducer = grouping.Reducer(batch_init_fn, batch_reduce_fn, finalize_fn)
  return get_single_element.get_single_element(
      dataset.map(reshape_fn).apply(
          grouping.group_by_reducer(key_fn, batch_reducer)))
  def testGetSingleElement(self, skip, take, error=None, error_msg=None):
    skip_t = array_ops.placeholder(dtypes.int64, shape=[])
    take_t = array_ops.placeholder(dtypes.int64, shape=[])

    def make_sparse(x):
      x_1d = array_ops.reshape(x, [1])
      x_2d = array_ops.reshape(x, [1, 1])
      return sparse_tensor.SparseTensor(x_2d, x_1d, x_1d)

    dataset = dataset_ops.Dataset.range(100).skip(skip_t).map(
        lambda x: (x * x, make_sparse(x))).take(take_t)
    element = get_single_element.get_single_element(dataset)

    with self.cached_session() as sess:
      if error is None:
        dense_val, sparse_val = sess.run(
            element, feed_dict={
                skip_t: skip,
                take_t: take
            })
        self.assertEqual(skip * skip, dense_val)
        self.assertAllEqual([[skip]], sparse_val.indices)
        self.assertAllEqual([skip], sparse_val.values)
        self.assertAllEqual([skip], sparse_val.dense_shape)
      else:
        with self.assertRaisesRegexp(error, error_msg):
          sess.run(element, feed_dict={skip_t: skip, take_t: take})
  def testGetSingleElement(self, skip, take, error=None, error_msg=None):
    skip_t = array_ops.placeholder(dtypes.int64, shape=[])
    take_t = array_ops.placeholder(dtypes.int64, shape=[])

    def make_sparse(x):
      x_1d = array_ops.reshape(x, [1])
      x_2d = array_ops.reshape(x, [1, 1])
      return sparse_tensor.SparseTensor(x_2d, x_1d, x_1d)

    dataset = dataset_ops.Dataset.range(100).skip(skip_t).map(
        lambda x: (x * x, make_sparse(x))).take(take_t)
    element = get_single_element.get_single_element(dataset)

    with self.cached_session() as sess:
      if error is None:
        dense_val, sparse_val = sess.run(
            element, feed_dict={
                skip_t: skip,
                take_t: take
            })
        self.assertEqual(skip * skip, dense_val)
        self.assertAllEqual([[skip]], sparse_val.indices)
        self.assertAllEqual([skip], sparse_val.values)
        self.assertAllEqual([skip], sparse_val.dense_shape)
      else:
        with self.assertRaisesRegexp(error, error_msg):
          sess.run(element, feed_dict={skip_t: skip, take_t: take})
    def testGetSingleElement(self, skip, take, error=None, error_msg=None):
        def make_sparse(x):
            x_1d = array_ops.reshape(x, [1])
            x_2d = array_ops.reshape(x, [1, 1])
            return sparse_tensor.SparseTensor(x_2d, x_1d, x_1d)

        dataset = dataset_ops.Dataset.range(100).skip(skip).map(
            lambda x: (x * x, make_sparse(x))).take(take)
        if error is None:
            dense_val, sparse_val = self.evaluate(
                get_single_element.get_single_element(dataset))
            self.assertEqual(skip * skip, dense_val)
            self.assertAllEqual([[skip]], sparse_val.indices)
            self.assertAllEqual([skip], sparse_val.values)
            self.assertAllEqual([skip], sparse_val.dense_shape)
        else:
            with self.assertRaisesRegexp(error, error_msg):
                self.evaluate(get_single_element.get_single_element(dataset))
                def make_dataset():
                    batched = dataset_ops.Dataset.from_tensors(tensor).repeat(
                        num_rows).batch(num_rows)  # pylint: disable=cell-var-from-loop
                    batched_tensor = get_single_element.get_single_element(
                        batched)

                    dataset = dataset_ops.Dataset.from_tensors(
                        batched_tensor).repeat()
                    return SingleThreadedFlatMapDataset(
                        dataset, dataset_ops.Dataset.from_tensor_slices)
  def testGetSingleElement(self, skip, take, error=None, error_msg=None):

    def make_sparse(x):
      x_1d = array_ops.reshape(x, [1])
      x_2d = array_ops.reshape(x, [1, 1])
      return sparse_tensor.SparseTensor(x_2d, x_1d, x_1d)

    dataset = dataset_ops.Dataset.range(100).skip(
        skip).map(lambda x: (x * x, make_sparse(x))).take(take)
    if error is None:
      dense_val, sparse_val = self.evaluate(
          get_single_element.get_single_element(dataset))
      self.assertEqual(skip * skip, dense_val)
      self.assertAllEqual([[skip]], sparse_val.indices)
      self.assertAllEqual([skip], sparse_val.values)
      self.assertAllEqual([skip], sparse_val.dense_shape)
    else:
      with self.assertRaisesRegexp(error, error_msg):
        self.evaluate(get_single_element.get_single_element(dataset))
  def benchmark_slice_repeat_sparse(self):
    non_zeros_per_row_values = [0, 1, 5, 10, 100]
    num_rows_values = [32, 64, 128, 1024]

    for non_zeros_per_row in non_zeros_per_row_values:
      tensor = sparse_tensor.SparseTensor(
          indices=np.arange(non_zeros_per_row, dtype=np.int64)[:, np.newaxis],
          values=np.arange(non_zeros_per_row, dtype=np.int64),
          dense_shape=[1000])

      for num_rows in num_rows_values:
        batched = dataset_ops.Dataset.from_tensors(
            tensor).repeat(num_rows).batch(num_rows)
        batched_tensor = get_single_element.get_single_element(batched)

        dataset = dataset_ops.Dataset.from_tensors(batched_tensor).flat_map(
            dataset_ops.Dataset.from_tensor_slices).repeat()
        self.run_and_report_benchmark(
            dataset,
            num_elements=100000,
            iters=5,
            name="slice_repeat_sparse_elements_per_row_%d_num_rows_%d" % (
                non_zeros_per_row, num_rows))
Example #13
0
def get_single_element(dataset):
    """Returns the single element in `dataset` as a nested structure of tensors.

  This function enables you to use a `tf.data.Dataset` in a stateless
  "tensor-in tensor-out" expression, without creating a
  `tf.compat.v1.data.Iterator`.
  This can be useful when your preprocessing transformations are expressed
  as a `Dataset`, and you want to use the transformation at serving time.
  For example:

  ```python
  input_batch = tf.compat.v1.placeholder(tf.string, shape=[BATCH_SIZE])

  def preprocessing_fn(input_str):
    # ...
    return image, label

  dataset = (tf.data.Dataset.from_tensor_slices(input_batch)
             .map(preprocessing_fn, num_parallel_calls=BATCH_SIZE)
             .batch(BATCH_SIZE))

  image_batch, label_batch = tf.data.experimental.get_single_element(dataset)
  ```

  Args:
    dataset: A `tf.data.Dataset` object containing a single element.

  Returns:
    A nested structure of `tf.Tensor` objects, corresponding to the single
    element of `dataset`.

  Raises:
    TypeError: if `dataset` is not a `tf.data.Dataset` object.
    InvalidArgumentError (at runtime): if `dataset` does not contain exactly
      one element.
  """
    return experimental_get_single_element.get_single_element(dataset)
def get_single_element(dataset):
  """Returns the single element in `dataset` as a nested structure of tensors.

  This function enables you to use a `tf.data.Dataset` in a stateless
  "tensor-in tensor-out" expression, without creating a
  `tf.compat.v1.data.Iterator`.
  This can be useful when your preprocessing transformations are expressed
  as a `Dataset`, and you want to use the transformation at serving time.
  For example:

  ```python
  input_batch = tf.compat.v1.placeholder(tf.string, shape=[BATCH_SIZE])

  def preprocessing_fn(input_str):
    # ...
    return image, label

  dataset = (tf.data.Dataset.from_tensor_slices(input_batch)
             .map(preprocessing_fn, num_parallel_calls=BATCH_SIZE)
             .batch(BATCH_SIZE))

  image_batch, label_batch = tf.data.experimental.get_single_element(dataset)
  ```

  Args:
    dataset: A `tf.data.Dataset` object containing a single element.

  Returns:
    A nested structure of `tf.Tensor` objects, corresponding to the single
    element of `dataset`.

  Raises:
    TypeError: if `dataset` is not a `tf.data.Dataset` object.
    InvalidArgumentError (at runtime): if `dataset` does not contain exactly
      one element.
  """
  return experimental_get_single_element.get_single_element(dataset)
Example #15
0
def _padded_batch_dense_window(dataset, padded_shape, padding_value=None):
  """Batches a window of dense tensors with padding."""

  padded_shape = math_ops.cast(
      convert.partial_shape_to_tensor(padded_shape), dtypes.int32)

  def key_fn(_):
    return np.int64(0)

  def max_init_fn(_):
    return padded_shape

  def max_reduce_fn(state, value):
    """Computes the maximum shape to pad to."""
    condition = math_ops.reduce_all(
        math_ops.logical_or(
            math_ops.less_equal(array_ops.shape(value), padded_shape),
            math_ops.equal(padded_shape, -1)))
    assert_op = control_flow_ops.Assert(condition, [
        "Actual shape greater than padded shape: ",
        array_ops.shape(value), padded_shape
    ])
    with ops.control_dependencies([assert_op]):
      return math_ops.maximum(state, array_ops.shape(value))

  def finalize_fn(state):
    return state

  # Compute the padded shape.
  max_reducer = grouping.Reducer(max_init_fn, max_reduce_fn, finalize_fn)
  padded_shape = get_single_element.get_single_element(
      dataset.apply(grouping.group_by_reducer(key_fn, max_reducer)))

  if padding_value is None:
    if dataset.output_types == dtypes.string:
      padding_value = ""
    elif dataset.output_types == dtypes.bool:
      padding_value = False
    elif dataset.output_types == dtypes.variant:
      raise TypeError("Unable to create padding for field of type 'variant'")
    else:
      padding_value = 0

  def batch_init_fn(_):
    batch_shape = array_ops.concat(
        [np.array([0], dtype=np.int32), padded_shape], 0)
    return gen_array_ops.empty(batch_shape, dtype=dataset.output_types)

  def batch_reduce_fn(state, value):
    return array_ops.concat([state, [value]], 0)

  def pad_fn(value):
    shape = array_ops.shape(value)
    left = array_ops.zeros_like(shape)
    right = padded_shape - shape
    return array_ops.pad(
        value, array_ops.stack([left, right], 1), constant_values=padding_value)

  batch_reducer = grouping.Reducer(batch_init_fn, batch_reduce_fn, finalize_fn)
  return get_single_element.get_single_element(
      dataset.map(pad_fn).apply(
          grouping.group_by_reducer(key_fn, batch_reducer)))
Example #16
0
def _padded_batch_dense_window(dataset, padded_shape, padding_value=None):
  """Batches a window of dense tensors with padding."""

  padded_shape = math_ops.cast(
      convert.partial_shape_to_tensor(padded_shape), dtypes.int32)

  def key_fn(_):
    return np.int64(0)

  def max_init_fn(_):
    return padded_shape

  def max_reduce_fn(state, value):
    """Computes the maximum shape to pad to."""
    condition = math_ops.reduce_all(
        math_ops.logical_or(
            math_ops.less_equal(array_ops.shape(value), padded_shape),
            math_ops.equal(padded_shape, -1)))
    assert_op = control_flow_ops.Assert(condition, [
        "Actual shape greater than padded shape: ",
        array_ops.shape(value), padded_shape
    ])
    with ops.control_dependencies([assert_op]):
      return math_ops.maximum(state, array_ops.shape(value))

  def finalize_fn(state):
    return state

  # Compute the padded shape.
  max_reducer = grouping.Reducer(max_init_fn, max_reduce_fn, finalize_fn)
  padded_shape = get_single_element.get_single_element(
      dataset.apply(grouping.group_by_reducer(key_fn, max_reducer)))

  dataset_output_types = dataset_ops.get_legacy_output_types(dataset)
  if padding_value is None:
    if dataset_output_types == dtypes.string:
      padding_value = ""
    elif dataset_output_types == dtypes.bool:
      padding_value = False
    elif dataset_output_types == dtypes.variant:
      raise TypeError("Unable to create padding for field of type 'variant'")
    else:
      padding_value = 0

  def batch_init_fn(_):
    batch_shape = array_ops.concat(
        [np.array([0], dtype=np.int32), padded_shape], 0)
    return gen_array_ops.empty(batch_shape, dtype=dataset_output_types)

  def batch_reduce_fn(state, value):
    return array_ops.concat([state, [value]], 0)

  def pad_fn(value):
    shape = array_ops.shape(value)
    left = array_ops.zeros_like(shape)
    right = padded_shape - shape
    return array_ops.pad(
        value, array_ops.stack([left, right], 1), constant_values=padding_value)

  batch_reducer = grouping.Reducer(batch_init_fn, batch_reduce_fn, finalize_fn)
  return get_single_element.get_single_element(
      dataset.map(pad_fn).apply(
          grouping.group_by_reducer(key_fn, batch_reducer)))
 def flat_map_func(ds):
   batched = ds.batch(2)
   element = get_single_element.get_single_element(batched)
   return dataset_ops.Dataset.from_tensors(element)
 def fn():
   _ = get_single_element.get_single_element(dataset_fn())
   return "hello"
Example #19
0
 def flat_map_func(ds):
     batched = ds.batch(2)
     element = get_single_element.get_single_element(batched)
     return dataset_ops.Dataset.from_tensors(element)
Example #20
0
 def fn():
     _ = get_single_element.get_single_element(dataset_fn())
     return "hello"