def ragged_op(*args, **kwargs):
        """Ragged version of `op`."""
        args = list(args)

        # Collect all of the elementwise arguments, and put them in a single
        # dict whose values are the (potentially ragged) tensors that need to
        # be broadcast to a common shape.  The keys of this dict are tuples
        # (argkey, index), where argkey is an int for poitional args or a string
        # for keyword args; and index is None for non-list args and the index of the
        # tensor for list args.
        elementwise_args = {}
        for (name, position, is_list) in elementwise_arg_infos.values():
            if position < len(args):
                if is_list:
                    args[position] = list(args[position])
                    for (index, arg) in enumerate(args[position]):
                        elementwise_args[position, index] = arg
                else:
                    elementwise_args[position, None] = args[position]
            elif name in kwargs:
                if is_list:
                    kwargs[name] = list(kwargs[name])
                    for (i, arg) in enumerate(kwargs[name]):
                        elementwise_args[name, i] = arg
                else:
                    elementwise_args[name, None] = kwargs[name]

        with ops.name_scope(None, op.__name__, elementwise_args.values()):
            # Convert all inputs to tensors or ragged tensors.
            for ((key, index), tensor) in elementwise_args.items():
                argname = elementwise_arg_infos[key].name
                converted = ragged_factory_ops.convert_to_tensor_or_ragged_tensor(
                    tensor, name=argname)
                elementwise_args[key, index] = converted

            # Broadcast tensors to have compatible shapes.
            broadcast_args, result_splits, broadcast_check_ops = \
                _broadcast_elementwise_args(elementwise_args)

            # Replace tensor arguments with their dense values.
            for ((key, index), tensor) in broadcast_args.items():
                if ragged_tensor.is_ragged(tensor):
                    if isinstance(key, int) and index is None:
                        args[key] = tensor.inner_values
                    elif isinstance(key, int) and index is not None:
                        args[key][index] = tensor.inner_values
                    elif isinstance(key, str) and index is None:
                        kwargs[key] = tensor.inner_values
                    else:
                        assert isinstance(key, str) and index is not None
                        kwargs[key][index] = tensor.inner_values

            # Call the elementwise op on the broadcasted dense values.
            with ops.control_dependencies(broadcast_check_ops):
                result_values = op(*args, **kwargs)

            # Restore any ragged dimensions that we stripped off, and return the
            # result.
            return ragged_factory_ops.from_nested_row_splits(
                result_values, result_splits)
  def ragged_op(*args, **kwargs):
    """Ragged version of `op`."""
    args = list(args)

    # Collect all of the elementwise arguments, and put them in a single
    # dict whose values are the (potentially ragged) tensors that need to
    # be broadcast to a common shape.  The keys of this dict are tuples
    # (argkey, index), where argkey is an int for poitional args or a string
    # for keyword args; and index is None for non-list args and the index of the
    # tensor for list args.
    elementwise_args = {}
    for (name, position, is_list) in elementwise_arg_infos.values():
      if position < len(args):
        if is_list:
          args[position] = list(args[position])
          for (index, arg) in enumerate(args[position]):
            elementwise_args[position, index] = arg
        else:
          elementwise_args[position, None] = args[position]
      elif name in kwargs:
        if is_list:
          kwargs[name] = list(kwargs[name])
          for (i, arg) in enumerate(kwargs[name]):
            elementwise_args[name, i] = arg
        else:
          elementwise_args[name, None] = kwargs[name]

    with ops.name_scope(None, op.__name__, elementwise_args.values()):
      # Convert all inputs to tensors or ragged tensors.
      for ((key, index), tensor) in elementwise_args.items():
        argname = elementwise_arg_infos[key].name
        converted = ragged_factory_ops.convert_to_tensor_or_ragged_tensor(
            tensor, name=argname)
        elementwise_args[key, index] = converted

      # Broadcast tensors to have compatible shapes.
      broadcast_args, result_splits, broadcast_check_ops = \
          _broadcast_elementwise_args(elementwise_args)

      # Replace tensor arguments with their dense values.
      for ((key, index), tensor) in broadcast_args.items():
        if ragged_tensor.is_ragged(tensor):
          if isinstance(key, int) and index is None:
            args[key] = tensor.inner_values
          elif isinstance(key, int) and index is not None:
            args[key][index] = tensor.inner_values
          elif isinstance(key, str) and index is None:
            kwargs[key] = tensor.inner_values
          else:
            assert isinstance(key, str) and index is not None
            kwargs[key][index] = tensor.inner_values

      # Call the elementwise op on the broadcasted dense values.
      with ops.control_dependencies(broadcast_check_ops):
        result_values = op(*args, **kwargs)

      # Restore any ragged dimensions that we stripped off, and return the
      # result.
      return ragged_factory_ops.from_nested_row_splits(result_values,
                                                       result_splits)
def to_sparse(rt_input, name=None):
    """Converts a `RaggedTensor` into a sparse tensor.

  Example:

  ```python
  >>> rt = ragged.constant([[1, 2, 3], [4], [], [5, 6]])
  >>> ragged.to_sparse(rt).eval()
  SparseTensorValue(indices=[[0, 0], [0, 1], [0, 2], [1, 0], [3, 0], [3, 1]],
                    values=[1, 2, 3, 4, 5, 6],
                    dense_shape=[4, 3])
  ```

  Args:
    rt_input: The input `RaggedTensor`.
    name: A name prefix for the returned tensors (optional).

  Returns:
    A SparseTensor with the same values as `rt_input`.
  """
    if not ragged_tensor.is_ragged(rt_input):
        raise TypeError('Expected RaggedTensor, got %s' %
                        type(rt_input).__name__)
    with ops.name_scope(name, 'RaggedToSparse', [rt_input]):
        rt_input = ragged_factory_ops.convert_to_tensor_or_ragged_tensor(
            rt_input, name='rt_input')
        result = gen_ragged_conversion_ops.ragged_tensor_to_sparse(
            rt_input.nested_row_splits, rt_input.inner_values, name=name)
        return sparse_tensor.SparseTensor(result.sparse_indices,
                                          result.sparse_values,
                                          result.sparse_dense_shape)
Example #4
0
def broadcast_to(rt_input, shape, broadcast_inner_dimensions=True):
    """Broadcasts a potentially ragged tensor to a ragged shape.

  Tiles `rt_input` as necessary to match the given shape.

  Behavior is undefined if `rt_input` is not broadcast-compatible with `shape`.

  Args:
    rt_input: The potentially ragged tensor to broadcast.
    shape: A `RaggedTensorDynamicShape`
    broadcast_inner_dimensions: If false, then inner dimensions will not be
      tiled.

  Returns:
    A potentially ragged tensor whose values are taken from
    `rt_input`, and whose shape matches `shape`.
  """
    if not isinstance(shape, RaggedTensorDynamicShape):
        raise TypeError('shape must be a RaggedTensorDynamicShape')
    rt_input = ragged_factory_ops.convert_to_tensor_or_ragged_tensor(rt_input)

    # Broadcasting to a uniform shape.
    if shape.num_partitioned_dimensions == 0:
        return _broadcast_to_uniform_shape(rt_input, shape,
                                           broadcast_inner_dimensions)
    else:
        return _broadcast_to_ragged_shape(rt_input, shape,
                                          broadcast_inner_dimensions)
Example #5
0
def _replace_ragged_with_inner_values(value, nested_splits_lists):
    """Replace RaggedTensors with their inner_values, and record their splits.

  Returns a copy of `value`, with any nested `RaggedTensor`s replaced by their
  `inner_values` tensor.  Looks inside lists, tuples, and dicts.

  Appends each `RaggedTensor`'s `nested_splits` to `nested_splits_lists`.

  Args:
    value: The value that should be transformed by replacing `RaggedTensors`.
    nested_splits_lists: An output parameter used to record the `nested_splits`
      for any `RaggedTensors` that were replaced.

  Returns:
    A copy of `value` with nested `RaggedTensors` replaced by their `values`.
  """
    # Base case
    if ragged_tensor.is_ragged(value):
        value = ragged_factory_ops.convert_to_tensor_or_ragged_tensor(value)
        nested_splits_lists.append(value.nested_row_splits)
        return value.inner_values

    # Recursion cases
    def recurse(v):
        return _replace_ragged_with_inner_values(v, nested_splits_lists)

    if isinstance(value, list):
        return [recurse(v) for v in value]
    elif isinstance(value, tuple):
        return tuple(recurse(v) for v in value)
    elif isinstance(value, dict):
        return dict((k, recurse(v)) for (k, v) in value.items())
    else:
        return value
def broadcast_to(rt_input, shape, broadcast_inner_dimensions=True):
  """Broadcasts a potentially ragged tensor to a ragged shape.

  Tiles `rt_input` as necessary to match the given shape.

  Behavior is undefined if `rt_input` is not broadcast-compatible with `shape`.

  Args:
    rt_input: The potentially ragged tensor to broadcast.
    shape: A `RaggedTensorDynamicShape`
    broadcast_inner_dimensions: If false, then inner dimensions will not be
      tiled.

  Returns:
    A potentially ragged tensor whose values are taken from
    `rt_input`, and whose shape matches `shape`.
  """
  if not isinstance(shape, RaggedTensorDynamicShape):
    raise TypeError('shape must be a RaggedTensorDynamicShape')
  rt_input = ragged_factory_ops.convert_to_tensor_or_ragged_tensor(rt_input)

  # Broadcasting to a uniform shape.
  if shape.num_partitioned_dimensions == 0:
    return _broadcast_to_uniform_shape(rt_input, shape,
                                       broadcast_inner_dimensions)
  else:
    return _broadcast_to_ragged_shape(rt_input, shape,
                                      broadcast_inner_dimensions)
def to_sparse(rt_input, name=None):
  """Converts a `RaggedTensor` into a sparse tensor.

  Example:

  ```python
  >>> rt = ragged.constant([[1, 2, 3], [4], [], [5, 6]])
  >>> ragged.to_sparse(rt).eval()
  SparseTensorValue(indices=[[0, 0], [0, 1], [0, 2], [1, 0], [3, 0], [3, 1]],
                    values=[1, 2, 3, 4, 5, 6],
                    dense_shape=[4, 3])
  ```

  Args:
    rt_input: The input `RaggedTensor`.
    name: A name prefix for the returned tensors (optional).

  Returns:
    A SparseTensor with the same values as `rt_input`.
  """
  if not ragged_tensor.is_ragged(rt_input):
    raise TypeError('Expected RaggedTensor, got %s' % type(rt_input).__name__)
  with ops.name_scope(name, 'RaggedToSparse', [rt_input]):
    rt_input = ragged_factory_ops.convert_to_tensor_or_ragged_tensor(
        rt_input, name='rt_input')
    result = gen_ragged_conversion_ops.ragged_tensor_to_sparse(
        rt_input.nested_row_splits, rt_input.inner_values, name=name)
    return sparse_tensor.SparseTensor(
        result.sparse_indices, result.sparse_values, result.sparse_dense_shape)
 def from_tensor(cls, rt_input):
   """Constructs a ragged shape for a potentially ragged tensor."""
   with ops.name_scope(None, 'RaggedTensorDynamicShapeFromTensor', [rt_input]):
     rt_input = ragged_factory_ops.convert_to_tensor_or_ragged_tensor(rt_input)
     if not ragged_tensor.is_ragged(rt_input):
       return cls([], array_ops.shape(rt_input))
     else:
       partitioned_dim_sizes = ((ragged_array_ops.nrows(rt_input),) +
                                ragged_array_ops.nested_row_lengths(rt_input))
       return RaggedTensorDynamicShape(
           partitioned_dim_sizes,
           array_ops.shape(rt_input.inner_values)[1:])
Example #9
0
 def from_tensor(cls, rt_input):
     """Constructs a ragged shape for a potentially ragged tensor."""
     with ops.name_scope(None, 'RaggedTensorDynamicShapeFromTensor',
                         [rt_input]):
         rt_input = ragged_factory_ops.convert_to_tensor_or_ragged_tensor(
             rt_input)
         if not ragged_tensor.is_ragged(rt_input):
             return cls([], array_ops.shape(rt_input))
         else:
             partitioned_dim_sizes = (
                 (ragged_array_ops.nrows(rt_input), ) +
                 ragged_array_ops.nested_row_lengths(rt_input))
             return RaggedTensorDynamicShape(
                 partitioned_dim_sizes,
                 array_ops.shape(rt_input.inner_values)[1:])
def unicode_encode(input,
                   output_encoding,
                   errors="replace",
                   replacement_char=65533,
                   name=None):
    r"""Encodes each sequence of Unicode code points in `input` into a string.

  `result[i1...iN]` is the string formed by concatenating the Unicode
  codepoints `input[1...iN, :]`, encoded using `output_encoding`.

  Args:
    input: An `N+1` dimensional potentially ragged integer tensor with
        shape `[D1...DN, num_chars]`.
    output_encoding: Unicode encoding that should be used to encode each
      codepoint sequence.  Can be `"UTF-8"`, `"UTF-16-BE"`, or `"UTF-32-BE"`.
    errors: Specifies the response when an invalid codepoint is encountered
      (optional). One of:
            * `'replace'`: Replace invalid codepoint with the
              `replacement_char`. (default)
            * `'ignore'`: Skip invalid codepoints.
            * `'strict'`: Raise an exception for any invalid codepoint.
    replacement_char: The replacement character codepoint to be used in place of
      any invalid input when `errors='replace'`. Any valid unicode codepoint may
      be used. The default value is the default unicode replacement character
      which is 0xFFFD (U+65533).
    name: A name for the operation (optional).

  Returns:
    A `N` dimensional `string` tensor with shape `[D1...DN]`.

  #### Example:
    ```python
      >>> input = [[71, 246, 246, 100, 110, 105, 103, 104, 116], [128522]]
      >>> unicode_encode(input, 'UTF8')
      ['G\xc3\xb6\xc3\xb6dnight', '\xf0\x9f\x98\x8a']
    ```
  """
    with ops.name_scope(name, "UnicodeEncode", [input]):
        input_tensor = ragged_factory_ops.convert_to_tensor_or_ragged_tensor(
            input)
        if input_tensor.shape.ndims is None:
            raise ValueError("Rank of input_tensor must be statically known.")
        if ragged_tensor.is_ragged(input_tensor):
            if input_tensor.inner_values.shape.ndims > 1:
                # If the inner_values of our ragged tensor is multi-dimensional, we can
                # process it separately and our output will have the same nested splits
                # as our input.
                return input_tensor.with_inner_values(
                    unicode_encode(input_tensor.inner_values, output_encoding,
                                   errors, replacement_char))
            elif input_tensor.ragged_rank > 1:
                # Recursively process the values of the ragged tensor.
                return input_tensor.with_values(
                    unicode_encode(input_tensor.values, output_encoding,
                                   errors, replacement_char))
            else:
                # Our ragged tensor is of the correct shape (rank 1 inner_values tensor
                # with ragged_rank of 1) so we can process it as normal.
                return gen_string_ops.unicode_encode(
                    input_values=input_tensor.values,
                    input_splits=input_tensor.row_splits,
                    output_encoding=output_encoding,
                    errors=errors,
                    replacement_char=replacement_char)
        else:
            if input_tensor.shape.ndims == 2:
                # The input tensor is of the correct 2-D shape, it's just not ragged.
                return unicode_encode(
                    ragged_conversion_ops.from_tensor(input_tensor),
                    output_encoding, errors, replacement_char)
            elif input_tensor.shape.ndims > 2:
                # We need to initially flatten the input tensor to 2-D, and then can
                # reshape the output of our processed flattened tensor.
                flat_input_tensor = array_ops.reshape(
                    input_tensor,
                    array_ops.stack([-1, array_ops.shape(input_tensor)[-1]]))
                flat_output_tensor = unicode_encode(flat_input_tensor,
                                                    output_encoding, errors,
                                                    replacement_char)
                return array_ops.reshape(flat_output_tensor,
                                         input_tensor.shape[:-1])
            elif input_tensor.shape.ndims == 0:
                raise ValueError("input_tensor's rank must be at least 1.")
            else:
                # Our input tensor is rank 1, so we create a ragged tensor with an added
                # dimension to create the correct input shape & type, and then remove
                # the additional dimension from the output and return the string scalar.
                ragged_input_tensor = ragged_factory_ops.from_row_splits(
                    input_tensor,
                    array_ops.stack([
                        0,
                        array_ops.shape(input_tensor, out_type=dtypes.int64)[0]
                    ]))
                output_tensor = unicode_encode(ragged_input_tensor,
                                               output_encoding, errors,
                                               replacement_char)
                return array_ops.reshape(output_tensor, [])
Example #11
0
def map_fn(fn,
           elems,
           dtype=None,
           parallel_iterations=None,
           back_prop=True,
           swap_memory=False,
           infer_shape=True,
           name=None):
    """map on the list of tensors unpacked from `elems` on dimension 0.

  The simplest version of `map_fn` repeatedly applies the callable `fn` to a
  sequence of elements from first to last. The elements are made of the
  tensors unpacked from `elems`. `dtype` is the data type of the return
  value of `fn`. Users must provide `dtype` if it is different from
  the data type of `elems`.

  Suppose that `elems` is unpacked into `values`, a list of tensors. The shape
  of the result tensor is `[values.shape[0]] + fn(values[0]).shape`.

  This method also allows multi-arity `elems` and output of `fn`.  If `elems`
  is a (possibly nested) list or tuple of tensors, then each of these tensors
  must have a matching first (unpack) dimension.  The signature of `fn` may
  match the structure of `elems`.  That is, if `elems` is
  `(t1, [t2, t3, [t4, t5]])`, then an appropriate signature for `fn` is:
  `fn = lambda (t1, [t2, t3, [t4, t5]]):`.

  Furthermore, `fn` may emit a different structure than its input.  For example,
  `fn` may look like: `fn = lambda t1: return (t1 + 1, t1 - 1)`.  In this case,
  the `dtype` parameter is not optional: `dtype` must be a type or (possibly
  nested) tuple of types matching the output of `fn`.

  To apply a functional operation to the nonzero elements of a SparseTensor
  one of the following methods is recommended. First, if the function is
  expressible as TensorFlow ops, use

  ```python
    result = SparseTensor(input.indices, fn(input.values), input.dense_shape)
  ```

  If, however, the function is not expressible as a TensorFlow op, then use

  ```python
  result = SparseTensor(
    input.indices, map_fn(fn, input.values), input.dense_shape)
  ```

  instead.

  When executing eagerly, map_fn does not execute in parallel even if
  `parallel_iterations` is set to a value > 1. You can still get the
  performance benefits of running a function in parallel by using the
  `tf.contrib.eager.defun` decorator,

  ```python
  # Assume the function being used in map_fn is fn.
  # To ensure map_fn calls fn in parallel, use the defun decorator.
  @tf.contrib.eager.defun
  def func(tensor):
    return tf.map_fn(fn, tensor)
  ```

  Note that if you use the defun decorator, any non-TensorFlow Python code
  that you may have written in your function won't get executed. See
  `tf.contrib.eager.defun` for more details. The recommendation would be to
  debug without defun but switch to defun to get performance benefits of
  running map_fn in parallel.

  Args:
    fn: The callable to be performed.  It accepts one argument, which will have
      the same (possibly nested) structure as `elems`.  Its output must have the
      same structure as `dtype` if one is provided, otherwise it must have the
      same structure as `elems`.
    elems: A tensor or (possibly nested) sequence of tensors, each of which will
      be unpacked along their first dimension.  The nested sequence of the
      resulting slices will be applied to `fn`.
    dtype: (optional) The output type(s) of `fn`.  If `fn` returns a structure
      of Tensors differing from the structure of `elems`, then `dtype` is not
      optional and must have the same structure as the output of `fn`. Use
      `RaggedTensorType` to declare an output of type `RaggedTensor`.
    parallel_iterations: (optional) The number of iterations allowed to run in
      parallel. When graph building, the default value is 10. While executing
      eagerly, the default value is set to 1.
    back_prop: (optional) True enables support for back propagation.
    swap_memory: (optional) True enables GPU-CPU memory swapping.
    infer_shape: (optional) False disables tests for consistent output shapes.
    name: (optional) Name prefix for the returned tensors.

  Returns:
    A possibly nested sequence of potentially ragged tensors.  Each
    tensor packs the results of applying `fn` to tensors unpacked from `elems`
    along the first dimension, from first to last.

  Raises:
    TypeError: if `fn` is not callable or the structure of the output of
      `fn` and `dtype` do not match, or if elems is a SparseTensor.
    ValueError: if the lengths of the output of `fn` and `dtype` do not match.

  #### Examples:

    ```python
    elems = np.array([1, 2, 3, 4, 5, 6])
    squares = map_fn(lambda x: x * x, elems)
    # squares == [1, 4, 9, 16, 25, 36]
    ```

    ```python
    elems = (np.array([1, 2, 3]), np.array([-1, 1, -1]))
    alternate = map_fn(lambda x: x[0] * x[1], elems, dtype=tf.int64)
    # alternate == [-1, 2, -3]
    ```

    ```python
    elems = np.array([1, 2, 3])
    alternates = map_fn(lambda x: (x, -x), elems, dtype=(tf.int64, tf.int64))
    # alternates[0] == [1, 2, 3]
    # alternates[1] == [-1, -2, -3]
    ```

    ```python
    elems=ragged.constant([[1, 2, 3], [4, 5], [6, 7]])
    mean = map_fn(tf.reduce_mean, elems)
    # mean == [2, 4, 6]
    ```

    ```python
    elems=ragged.constant([[1, 2, 3], [4, 5], [6, 7]], dtype=tf.int64)
    out = map_fn(fn=lambda x: x+1, elems,
      dtype=ragged.RaggedTensorType(type=tf.int64, ragged_rank=0))
    # out = ragged.constant([[2, 3, 4], [5, 6], [7, 8]])
    ```
  """
    if not callable(fn):
        raise TypeError("fn must be callable.")

    if isinstance(elems, sparse_tensor.SparseTensor):
        raise TypeError(
            "To perform a map on the values of a sparse tensor use either "
            " SparseTensor(input.indices, fn(input.values), input.dense_shape) or "
            " SparseTensor(input.indices, map_fn(fn, input.values), "
            "input.dense_shape)")

    in_graph_mode = not context.executing_eagerly()
    # Set the default number of parallel_iterations depending on graph/eager mode.
    if in_graph_mode and not parallel_iterations:
        parallel_iterations = 10
    elif not in_graph_mode and not parallel_iterations:
        parallel_iterations = 1

    if not in_graph_mode and parallel_iterations > 1:
        logging.log_first_n(
            logging.WARN, "Setting parallel_iterations > 1 has no "
            "effect when executing eagerly. Consider calling map_fn"
            " with tf.contrib.eager.defun to execute fn in "
            "parallel.", 1)
        parallel_iterations = 1

    input_is_sequence = nest.is_sequence(elems)
    input_flatten = lambda x: nest.flatten(x) if input_is_sequence else [x]

    def input_pack(x):
        return nest.pack_sequence_as(elems, x) if input_is_sequence else x[0]

    elems_flat = input_flatten(elems)

    with ops.name_scope(name, "map", elems_flat):
        # TODO(akshayka): Remove the in_graph_mode check once caching devices are
        # supported in Eager
        if in_graph_mode:
            # Any get_variable calls in fn will cache the first call locally
            # and not issue repeated network I/O requests for each iteration.
            varscope = vs.get_variable_scope()
            varscope_caching_device_was_none = False
            if varscope.caching_device is None:
                # TODO(ebrevdo): Change to using colocate_with here and in other
                # methods.
                varscope.set_caching_device(lambda op: op.device)
                varscope_caching_device_was_none = True

        elems_flat = [
            ragged_factory_ops.convert_to_tensor_or_ragged_tensor(elem,
                                                                  name="elem")
            for elem in elems_flat
        ]

        # We can either infer the output, or we can assume that it will be the same
        # as the input structure.
        dtype = dtype or input_pack([elem.dtype for elem in elems_flat])

        # Find the number of iterations, n may be known statically.
        if isinstance(elems_flat[0], ragged_tensor.RaggedTensor):
            n = ragged_array_ops.nrows(elems_flat[0], out_type=dtypes.int32)
        else:
            static_shape = elems_flat[0].shape
            if static_shape.ndims is not None and static_shape.ndims < 1:
                if len(elems_flat) == 1:
                    raise ValueError(
                        "elems must be a 1+ dimensional Tensor, not a scalar")
                else:
                    raise ValueError(
                        "elements in elems must be 1+ dimensional Tensors, not scalars"
                    )
            n = static_shape[0].value or array_ops.shape(elems_flat[0])[0]

        # Create a flat list of TAs.

        # Flatten the dtype structure to a list.
        dtype_flat = nest.flatten(dtype)

        # decompose to components
        dtype_components = [_maybe_decompose_dtype(d) for d in dtype_flat]
        dtype_components_flat = nest.flatten(dtype_components)

        # Create TensorArrays.
        accs_ta = [
            tensor_array_ops.TensorArray(dtype=t,
                                         dynamic_size=False,
                                         infer_shape=infer_shape,
                                         size=n) for t in dtype_components_flat
        ]

        i = constant_op.constant(0)

        def compute(i, tas):
            """The loop body of map_fn.

      Args:
        i: the loop counter
        tas: the flat TensorArray accumulator list

      Returns:
        (i + 1, tas): the updated counter + updated TensorArrays

      Raises:
        TypeError: if dtype and packed_fn_values structure do not match
        ValueType: if dtype and packed_fn_values lengths do not match
      """
            # Get Tensors or RaggedTensors sliced at i, then pack it back to the
            # original structure.
            packed_values = input_pack(
                [elem_flat[i] for elem_flat in elems_flat])
            packed_fn_values = fn(packed_values)

            # Check that the structure of the output matches what was declared or
            # inferred.
            # nest.assert_same_structure(dtype or elems, packed_fn_values)

            # Flatten and decompose to a list of Tensors
            flat_fn_values = nest.flatten(packed_fn_values)

            # If we declared that we are expecting a RaggedTensor output, but we get a
            # Tensor output. We should try to convert it to a RaggedTensor.
            flat_fn_composite_tensors = list(
                _convert_declared(flat_fn_values, dtype_flat))

            flat_fn_components = [
                _maybe_decompose_tensor(t) for t in flat_fn_composite_tensors
            ]
            flat_fn_tensors = nest.flatten(flat_fn_components)

            # Write to TAs.
            tas = [
                ta.write(i, value)
                for (ta, value) in zip(tas, flat_fn_tensors)
            ]

            return (i + 1, tas)

        _, r_a = control_flow_ops.while_loop(
            lambda i, _: i < n,
            compute, (i, accs_ta),
            parallel_iterations=parallel_iterations,
            back_prop=back_prop,
            swap_memory=swap_memory,
            maximum_iterations=n)

        # TODO(akshayka): Remove the in_graph_mode check once caching devices are
        # supported in Eager
        if in_graph_mode and varscope_caching_device_was_none:
            varscope.set_caching_device(None)

        # Pack back into a list of components
        results_as_components = nest.pack_sequence_as(dtype_components, r_a)

        # Stack TensorArrays for Tensor outputs, and concat RaggedTensor outputs.
        def _stack_or_concat(e):
            if isinstance(e, _RaggedTensorComponents):
                return _concat_ragged_tensor_components(e)
            else:
                result = e.stack()
                return result

        results_flat_components = [
            _stack_or_concat(e) for e in results_as_components
        ]

        results_packed = [
            _maybe_recompose_tensor(c) for c in results_flat_components
        ]
        results_packed = nest.pack_sequence_as(dtype, results_packed)
        return results_packed
Example #12
0
def unicode_encode(input, output_encoding, errors="replace",
                   replacement_char=65533, name=None):
  r"""Encodes each sequence of Unicode code points in `input` into a string.

  `result[i1...iN]` is the string formed by concatenating the Unicode
  codepoints `input[1...iN, :]`, encoded using `output_encoding`.

  Args:
    input: An `N+1` dimensional potentially ragged integer tensor with
        shape `[D1...DN, num_chars]`.
    output_encoding: Unicode encoding that should be used to encode each
      codepoint sequence.  Can be `"UTF-8"`, `"UTF-16-BE"`, or `"UTF-32-BE"`.
    errors: Specifies the response when an invalid codepoint is encountered
      (optional). One of:
            * `'replace'`: Replace invalid codepoint with the
              `replacement_char`. (default)
            * `'ignore'`: Skip invalid codepoints.
            * `'strict'`: Raise an exception for any invalid codepoint.
    replacement_char: The replacement character codepoint to be used in place of
      any invalid input when `errors='replace'`. Any valid unicode codepoint may
      be used. The default value is the default unicode replacement character
      which is 0xFFFD (U+65533).
    name: A name for the operation (optional).

  Returns:
    A `N` dimensional `string` tensor with shape `[D1...DN]`.

  #### Example:
    ```python
      >>> input = [[71, 246, 246, 100, 110, 105, 103, 104, 116], [128522]]
      >>> unicode_encode(input, 'UTF8')
      ['G\xc3\xb6\xc3\xb6dnight', '\xf0\x9f\x98\x8a']
    ```
  """
  with ops.name_scope(name, "UnicodeEncode", [input]):
    input_tensor = ragged_factory_ops.convert_to_tensor_or_ragged_tensor(input)
    if input_tensor.shape.ndims is None:
      raise ValueError("Rank of input_tensor must be statically known.")
    if ragged_tensor.is_ragged(input_tensor):
      if input_tensor.inner_values.shape.ndims > 1:
        # If the inner_values of our ragged tensor is multi-dimensional, we can
        # process it separately and our output will have the same nested splits
        # as our input.
        return input_tensor.with_inner_values(
            unicode_encode(input_tensor.inner_values, output_encoding, errors,
                           replacement_char))
      elif input_tensor.ragged_rank > 1:
        # Recursively process the values of the ragged tensor.
        return input_tensor.with_values(
            unicode_encode(input_tensor.values, output_encoding, errors,
                           replacement_char))
      else:
        # Our ragged tensor is of the correct shape (rank 1 inner_values tensor
        # with ragged_rank of 1) so we can process it as normal.
        return gen_string_ops.unicode_encode(
            input_values=input_tensor.values,
            input_splits=input_tensor.row_splits,
            output_encoding=output_encoding,
            errors=errors,
            replacement_char=replacement_char)
    else:
      if input_tensor.shape.ndims == 2:
        # The input tensor is of the correct 2-D shape, it's just not ragged.
        return unicode_encode(ragged_conversion_ops.from_tensor(input_tensor),
                              output_encoding, errors, replacement_char)
      elif input_tensor.shape.ndims > 2:
        # We need to initially flatten the input tensor to 2-D, and then can
        # reshape the output of our processed flattened tensor.
        flat_input_tensor = array_ops.reshape(
            input_tensor,
            array_ops.stack([-1, array_ops.shape(input_tensor)[-1]]))
        flat_output_tensor = unicode_encode(flat_input_tensor, output_encoding,
                                            errors, replacement_char)
        return array_ops.reshape(flat_output_tensor, input_tensor.shape[:-1])
      elif input_tensor.shape.ndims == 0:
        raise ValueError("input_tensor's rank must be at least 1.")
      else:
        # Our input tensor is rank 1, so we create a ragged tensor with an added
        # dimension to create the correct input shape & type, and then remove
        # the additional dimension from the output and return the string scalar.
        ragged_input_tensor = ragged_factory_ops.from_row_splits(
            input_tensor,
            array_ops.stack([0, array_ops.shape(input_tensor,
                                                out_type=dtypes.int64)[0]]))
        output_tensor = unicode_encode(ragged_input_tensor, output_encoding,
                                       errors, replacement_char)
        return array_ops.reshape(output_tensor, [])
def to_tensor(rt_input, default_value=None, name=None):
    """Converts a `RaggedTensor` into a `Tensor`.

  Example:

  ```python
  >>> rt = ragged.constant([[9, 8, 7], [], [6, 5], [4]])
  >>> print ragged.to_tensor(rt).eval()
  [[9 8 7]
   [0 0 0]
   [6 5 0]
   [4 0 0]]
  ```

  Args:
    rt_input: The input `RaggedTensor`.
    default_value: Value to set for indices not specified in `rt_input`.
      Defaults to zero.  `default_value.shape` must be equal to
      `rt_input.shape[rt_input.ragged_rank + 1:]`.
    name: A name prefix for the returned tensors (optional).

  Returns:
    A `Tensor` with shape `ragged.bounding_shape(rt_input)` and the
    values specified by the non-empty values in `rt_input`.  Empty values are
    assigned `default_value`.
  """
    with ops.name_scope(name, 'RaggedToTensor', [rt_input, default_value]):
        rt_input = ragged_factory_ops.convert_to_tensor_or_ragged_tensor(
            rt_input, name='rt_input')
        if not ragged_tensor.is_ragged(rt_input):
            return rt_input  # already dense

        # If ragged_rank > 1, then recursively convert the ragged values into a
        # `Tensor` before we proceed.
        values = rt_input.values
        if ragged_tensor.is_ragged(values):
            values = to_tensor(values, default_value)

        # Get the expected dense shape ([nrows, ncols] + value_shape).
        rt_row_lengths = [rt_input.row_splits[1:] - rt_input.row_splits[:-1]]
        nrows = array_ops.shape(rt_input.row_splits,
                                out_type=dtypes.int64)[0] - 1
        ncols = math_ops.maximum(math_ops.reduce_max(rt_row_lengths), 0)
        values_shape = array_ops.shape(values, out_type=dtypes.int64)
        value_shape = values_shape[1:]
        nvals = values_shape[0]

        # Build a default value if none was supplied.
        if default_value is None:
            default_value = array_ops.zeros(value_shape, dtype=values.dtype)
        else:
            default_value = ops.convert_to_tensor(default_value,
                                                  name='default_value',
                                                  dtype=values.dtype)
        default_value.shape.assert_is_compatible_with(values.shape[1:])
        default_value.set_shape(values.shape[1:])

        # Get the row start indices, and expand to shape=[nrows, 1].
        starts = array_ops.expand_dims(rt_input.row_splits[:-1], 1)

        # Get the row limit indices, and expand to shape=[nrows, 1].
        limits = array_ops.expand_dims(rt_input.row_splits[1:], 1)

        # Get the column indices, and expand to shape=[1, ncols].
        columns = array_ops.expand_dims(math_ops.range(0, ncols), 0)

        # Build a list containing the values plus the default value.  We will use
        # tf.gather to collect values from this list for the `Tensor` (using
        # nvals as the index for the default value).
        values_and_default = array_ops.concat(
            [values, array_ops.stack([default_value])], axis=0)

        # Construct a matrix "indices" pointing into values_and_default.  I.e.,
        # output[r, c] = values_and_default[indices[r, c].
        nondefault_index = starts + columns
        has_value = nondefault_index < limits
        default_index = array_ops.fill(array_ops.stack([nrows, ncols]), nvals)
        indices = array_ops.where(has_value, nondefault_index, default_index)

        # Gather the results into a `Tensor`.
        return array_ops.gather(values_and_default, indices)
def map_fn(fn,
           elems,
           dtype=None,
           parallel_iterations=None,
           back_prop=True,
           swap_memory=False,
           infer_shape=True,
           name=None):
  """map on the list of tensors unpacked from `elems` on dimension 0.

  The simplest version of `map_fn` repeatedly applies the callable `fn` to a
  sequence of elements from first to last. The elements are made of the
  tensors unpacked from `elems`. `dtype` is the data type of the return
  value of `fn`. Users must provide `dtype` if it is different from
  the data type of `elems`.

  Suppose that `elems` is unpacked into `values`, a list of tensors. The shape
  of the result tensor is `[values.shape[0]] + fn(values[0]).shape`.

  This method also allows multi-arity `elems` and output of `fn`.  If `elems`
  is a (possibly nested) list or tuple of tensors, then each of these tensors
  must have a matching first (unpack) dimension.  The signature of `fn` may
  match the structure of `elems`.  That is, if `elems` is
  `(t1, [t2, t3, [t4, t5]])`, then an appropriate signature for `fn` is:
  `fn = lambda (t1, [t2, t3, [t4, t5]]):`.

  Furthermore, `fn` may emit a different structure than its input.  For example,
  `fn` may look like: `fn = lambda t1: return (t1 + 1, t1 - 1)`.  In this case,
  the `dtype` parameter is not optional: `dtype` must be a type or (possibly
  nested) tuple of types matching the output of `fn`.

  To apply a functional operation to the nonzero elements of a SparseTensor
  one of the following methods is recommended. First, if the function is
  expressible as TensorFlow ops, use

  ```python
    result = SparseTensor(input.indices, fn(input.values), input.dense_shape)
  ```

  If, however, the function is not expressible as a TensorFlow op, then use

  ```python
  result = SparseTensor(
    input.indices, map_fn(fn, input.values), input.dense_shape)
  ```

  instead.

  When executing eagerly, map_fn does not execute in parallel even if
  `parallel_iterations` is set to a value > 1. You can still get the
  performance benefits of running a function in parallel by using the
  `tf.contrib.eager.defun` decorator,

  ```python
  # Assume the function being used in map_fn is fn.
  # To ensure map_fn calls fn in parallel, use the defun decorator.
  @tf.contrib.eager.defun
  def func(tensor):
    return tf.map_fn(fn, tensor)
  ```

  Note that if you use the defun decorator, any non-TensorFlow Python code
  that you may have written in your function won't get executed. See
  `tf.contrib.eager.defun` for more details. The recommendation would be to
  debug without defun but switch to defun to get performance benefits of
  running map_fn in parallel.

  Args:
    fn: The callable to be performed.  It accepts one argument, which will have
      the same (possibly nested) structure as `elems`.  Its output must have the
      same structure as `dtype` if one is provided, otherwise it must have the
      same structure as `elems`.
    elems: A tensor or (possibly nested) sequence of tensors, each of which will
      be unpacked along their first dimension.  The nested sequence of the
      resulting slices will be applied to `fn`.
    dtype: (optional) The output type(s) of `fn`.  If `fn` returns a structure
      of Tensors differing from the structure of `elems`, then `dtype` is not
      optional and must have the same structure as the output of `fn`. Use
      `RaggedTensorType` to declare an output of type `RaggedTensor`.
    parallel_iterations: (optional) The number of iterations allowed to run in
      parallel. When graph building, the default value is 10. While executing
      eagerly, the default value is set to 1.
    back_prop: (optional) True enables support for back propagation.
    swap_memory: (optional) True enables GPU-CPU memory swapping.
    infer_shape: (optional) False disables tests for consistent output shapes.
    name: (optional) Name prefix for the returned tensors.

  Returns:
    A possibly nested sequence of potentially ragged tensors.  Each
    tensor packs the results of applying `fn` to tensors unpacked from `elems`
    along the first dimension, from first to last.

  Raises:
    TypeError: if `fn` is not callable or the structure of the output of
      `fn` and `dtype` do not match, or if elems is a SparseTensor.
    ValueError: if the lengths of the output of `fn` and `dtype` do not match.

  #### Examples:

    ```python
    elems = np.array([1, 2, 3, 4, 5, 6])
    squares = map_fn(lambda x: x * x, elems)
    # squares == [1, 4, 9, 16, 25, 36]
    ```

    ```python
    elems = (np.array([1, 2, 3]), np.array([-1, 1, -1]))
    alternate = map_fn(lambda x: x[0] * x[1], elems, dtype=tf.int64)
    # alternate == [-1, 2, -3]
    ```

    ```python
    elems = np.array([1, 2, 3])
    alternates = map_fn(lambda x: (x, -x), elems, dtype=(tf.int64, tf.int64))
    # alternates[0] == [1, 2, 3]
    # alternates[1] == [-1, -2, -3]
    ```

    ```python
    elems=ragged.constant([[1, 2, 3], [4, 5], [6, 7]])
    mean = map_fn(tf.reduce_mean, elems)
    # mean == [2, 4, 6]
    ```

    ```python
    elems=ragged.constant([[1, 2, 3], [4, 5], [6, 7]], dtype=tf.int64)
    out = map_fn(fn=lambda x: x+1, elems,
      dtype=ragged.RaggedTensorType(type=tf.int64, ragged_rank=0))
    # out = ragged.constant([[2, 3, 4], [5, 6], [7, 8]])
    ```
  """
  if not callable(fn):
    raise TypeError("fn must be callable.")

  if isinstance(elems, sparse_tensor.SparseTensor):
    raise TypeError(
        "To perform a map on the values of a sparse tensor use either "
        " SparseTensor(input.indices, fn(input.values), input.dense_shape) or "
        " SparseTensor(input.indices, map_fn(fn, input.values), "
        "input.dense_shape)")

  in_graph_mode = not context.executing_eagerly()
  # Set the default number of parallel_iterations depending on graph/eager mode.
  if in_graph_mode and not parallel_iterations:
    parallel_iterations = 10
  elif not in_graph_mode and not parallel_iterations:
    parallel_iterations = 1

  if not in_graph_mode and parallel_iterations > 1:
    logging.log_first_n(logging.WARN, "Setting parallel_iterations > 1 has no "
                        "effect when executing eagerly. Consider calling map_fn"
                        " with tf.contrib.eager.defun to execute fn in "
                        "parallel.", 1)
    parallel_iterations = 1

  input_is_sequence = nest.is_sequence(elems)
  input_flatten = lambda x: nest.flatten(x) if input_is_sequence else [x]

  def input_pack(x):
    return nest.pack_sequence_as(elems, x) if input_is_sequence else x[0]

  elems_flat = input_flatten(elems)

  with ops.name_scope(name, "map", elems_flat):
    # TODO(akshayka): Remove the in_graph_mode check once caching devices are
    # supported in Eager
    if in_graph_mode:
      # Any get_variable calls in fn will cache the first call locally
      # and not issue repeated network I/O requests for each iteration.
      varscope = vs.get_variable_scope()
      varscope_caching_device_was_none = False
      if varscope.caching_device is None:
        # TODO(ebrevdo): Change to using colocate_with here and in other
        # methods.
        varscope.set_caching_device(lambda op: op.device)
        varscope_caching_device_was_none = True

    elems_flat = [
        ragged_factory_ops.convert_to_tensor_or_ragged_tensor(
            elem, name="elem") for elem in elems_flat
    ]

    # We can either infer the output, or we can assume that it will be the same
    # as the input structure.
    dtype = dtype or input_pack([elem.dtype for elem in elems_flat])

    # Find the number of iterations, n may be known statically.
    if isinstance(elems_flat[0], ragged_tensor.RaggedTensor):
      n = ragged_array_ops.nrows(elems_flat[0], out_type=dtypes.int32)
    else:
      static_shape = elems_flat[0].shape
      if static_shape.ndims is not None and static_shape.ndims < 1:
        if len(elems_flat) == 1:
          raise ValueError(
              "elems must be a 1+ dimensional Tensor, not a scalar")
        else:
          raise ValueError(
              "elements in elems must be 1+ dimensional Tensors, not scalars")
      n = static_shape[0].value or array_ops.shape(elems_flat[0])[0]

    # Create a flat list of TAs.

    # Flatten the dtype structure to a list.
    dtype_flat = nest.flatten(dtype)

    # decompose to components
    dtype_components = [_maybe_decompose_dtype(d) for d in dtype_flat]
    dtype_components_flat = nest.flatten(dtype_components)

    # Create TensorArrays.
    accs_ta = [
        tensor_array_ops.TensorArray(
            dtype=t, dynamic_size=False, infer_shape=infer_shape, size=n)
        for t in dtype_components_flat
    ]

    i = constant_op.constant(0)

    def compute(i, tas):
      """The loop body of map_fn.

      Args:
        i: the loop counter
        tas: the flat TensorArray accumulator list

      Returns:
        (i + 1, tas): the updated counter + updated TensorArrays

      Raises:
        TypeError: if dtype and packed_fn_values structure do not match
        ValueType: if dtype and packed_fn_values lengths do not match
      """
      # Get Tensors or RaggedTensors sliced at i, then pack it back to the
      # original structure.
      packed_values = input_pack([elem_flat[i] for elem_flat in elems_flat])
      packed_fn_values = fn(packed_values)

      # Check that the structure of the output matches what was declared or
      # inferred.
      # nest.assert_same_structure(dtype or elems, packed_fn_values)

      # Flatten and decompose to a list of Tensors
      flat_fn_values = nest.flatten(packed_fn_values)

      # If we declared that we are expecting a RaggedTensor output, but we get a
      # Tensor output. We should try to convert it to a RaggedTensor.
      flat_fn_composite_tensors = list(
          _convert_declared(flat_fn_values, dtype_flat))

      flat_fn_components = [
          _maybe_decompose_tensor(t) for t in flat_fn_composite_tensors
      ]
      flat_fn_tensors = nest.flatten(flat_fn_components)

      # Write to TAs.
      tas = [ta.write(i, value) for (ta, value) in zip(tas, flat_fn_tensors)]

      return (i + 1, tas)

    _, r_a = control_flow_ops.while_loop(
        lambda i, _: i < n, compute, (i, accs_ta),
        parallel_iterations=parallel_iterations,
        back_prop=back_prop,
        swap_memory=swap_memory,
        maximum_iterations=n)

    # TODO(akshayka): Remove the in_graph_mode check once caching devices are
    # supported in Eager
    if in_graph_mode and varscope_caching_device_was_none:
      varscope.set_caching_device(None)

    # Pack back into a list of components
    results_as_components = nest.pack_sequence_as(dtype_components, r_a)

    # Stack TensorArrays for Tensor outputs, and concat RaggedTensor outputs.
    def _stack_or_concat(e):
      if isinstance(e, _RaggedTensorComponents):
        return _concat_ragged_tensor_components(e)
      else:
        result = e.stack()
        return result

    results_flat_components = [
        _stack_or_concat(e) for e in results_as_components
    ]

    results_packed = [
        _maybe_recompose_tensor(c) for c in results_flat_components
    ]
    results_packed = nest.pack_sequence_as(dtype, results_packed)
    return results_packed
Example #15
0
def _ragged_reduce_aggregate(reduce_op,
                             unsorted_segment_op,
                             rt_input,
                             axis,
                             keepdims,
                             name=None):
    """Aggregates across axes of a RaggedTensor using the given `Tensor` ops.

  Reduces `rt_input` along the dimensions given in `axis`.  The rank of the
  tensor is reduced by 1 for each entry in `axis`.  If `axis` is not specified,
  then all dimensions are reduced, and a scalar value is returned.

  This op assumes that `reduce_op` and `unsorted_segment_op` are associative;
  if not, then reducing multiple axes will return incorrect results.  (In
  particular, reducing multiple axes is currently implemented by reducing the
  axes one at a time.)

  Args:
    reduce_op: The tensorflow `op` that should be used to reduce values in
      uniform dimensions.  Must have the same signature and basic behavior as
      `reduce_sum`, `reduce_max`, etc.
    unsorted_segment_op: The tensorflow `op` that should be used to combine
      values in ragged dimensions.  Must have the same signature and basic
      behavior as `unsorted_segment_sum`, `unsorted_segment_max`, etc.
    rt_input: A `Tensor` or `RaggedTensor` containing the values to be reduced.
    axis: The axis or axes to reduce.  May be `None` (to reduce all axes), an
      `int` (to reduce a single axis), a `list` or `tuple` of `int` (to reduce a
      given set of axes), or a `Tensor` with a constant value.  Must be in the
      range `[0, rt_input.rank)`.
    keepdims: If true, retains reduced dimensions with length 1.
    name: A name prefix for the returned tensor (optional).

  Returns:
    A `RaggedTensor` containing the reduced values.  The returned tensor
    has the same dtype as `data`, and its shape is given by removing the
    dimensions specified in `axis` from `rt_input.shape`.  The `ragged_rank`
    of the returned tensor is given by substracting any ragged dimensions
    specified in `axis` from `rt_input.ragged_rank`.
  Raises:
    ValueError: If `axis` contains a `Tensor` whose value is not constant.
  """
    if not ragged_tensor.is_ragged(rt_input):
        return reduce_op(rt_input, axis, name=name)

    if keepdims:
        raise ValueError('keepdims=True is not supported for RaggedTensors.')

    if isinstance(axis, ops.Tensor):
        axis = tensor_util.constant_value(axis)
        if axis is None:
            raise ValueError('axis must be known at graph construction time.')

    # When reducing all axes, just ignore splits & reduce the inner values.
    if axis is None:
        return reduce_op(rt_input.inner_values, None, name=name)

    with ops.name_scope(name, 'RaggedReduce', [rt_input, axis]):
        if isinstance(axis, (tuple, list)):
            if not axis:
                return rt_input
            elif len(axis) == 1:
                axis = axis[0]
            else:
                # When reducing multiple axes, just reduce one at a time.  This is less
                # efficient, and only works for associative ops.  (In particular, it
                # does not work for reduce_mean.)  However, reducing multiple axes at
                # once will probably require a nontrivial c++ op.
                axis = sorted(axis)
                inner_reduced = _ragged_reduce_aggregate(
                    reduce_op, unsorted_segment_op, rt_input, axis[-1],
                    keepdims)
                return _ragged_reduce_aggregate(reduce_op, unsorted_segment_op,
                                                inner_reduced, axis[:-1],
                                                keepdims)

        axis = ragged_util.get_positive_axis(axis, rt_input.shape.ndims)

        rt_input = ragged_factory_ops.convert_to_tensor_or_ragged_tensor(
            rt_input, name='rt_input')

        if axis == 0:
            # out[i_1, i_2, ..., i_N] = sum_{j} rt_input[j, i_1, i_2, ..., i_N]
            row_lengths = rt_input.row_splits[1:] - rt_input.row_splits[:-1]
            num_segments = math_ops.maximum(math_ops.reduce_max(row_lengths),
                                            0)
            segment_ids = range(row_lengths).values
            return _ragged_segment_aggregate(unsorted_segment_op,
                                             rt_input.values, segment_ids,
                                             num_segments)
        elif axis == 1:
            # out[i_0, i_1, i_2, ..., i_N] = sum_{j} rt_input[i_0, j, i_2, ..., i_N]
            num_segments = array_ops.shape(rt_input.row_splits)[0] - 1
            segment_ids = segment_id_ops.row_splits_to_segment_ids(
                rt_input.row_splits)
            return _ragged_segment_aggregate(unsorted_segment_op,
                                             rt_input.values, segment_ids,
                                             num_segments)
        else:
            # out[i_0, ..., i_[axis-1], i_axis+1], ..., i_N] =
            #     sum_{j} rt_input [i_0, ..., i_[axis-1], j, i_axis+1], ..., i_N]
            return rt_input.with_values(
                _ragged_reduce_aggregate(reduce_op, unsorted_segment_op,
                                         rt_input.values, axis - 1, keepdims))
def to_tensor(rt_input, default_value=None, name=None):
  """Converts a `RaggedTensor` into a `Tensor`.

  Example:

  ```python
  >>> rt = ragged.constant([[9, 8, 7], [], [6, 5], [4]])
  >>> print ragged.to_tensor(rt).eval()
  [[9 8 7]
   [0 0 0]
   [6 5 0]
   [4 0 0]]
  ```

  Args:
    rt_input: The input `RaggedTensor`.
    default_value: Value to set for indices not specified in `rt_input`.
      Defaults to zero.  `default_value.shape` must be equal to
      `rt_input.shape[rt_input.ragged_rank + 1:]`.
    name: A name prefix for the returned tensors (optional).

  Returns:
    A `Tensor` with shape `ragged.bounding_shape(rt_input)` and the
    values specified by the non-empty values in `rt_input`.  Empty values are
    assigned `default_value`.
  """
  with ops.name_scope(name, 'RaggedToTensor', [rt_input, default_value]):
    rt_input = ragged_factory_ops.convert_to_tensor_or_ragged_tensor(
        rt_input, name='rt_input')
    if not ragged_tensor.is_ragged(rt_input):
      return rt_input  # already dense

    # If ragged_rank > 1, then recursively convert the ragged values into a
    # `Tensor` before we proceed.
    values = rt_input.values
    if ragged_tensor.is_ragged(values):
      values = to_tensor(values, default_value)

    # Get the expected dense shape ([nrows, ncols] + value_shape).
    rt_row_lengths = [rt_input.row_splits[1:] - rt_input.row_splits[:-1]]
    nrows = array_ops.shape(rt_input.row_splits, out_type=dtypes.int64)[0] - 1
    ncols = math_ops.maximum(math_ops.reduce_max(rt_row_lengths), 0)
    values_shape = array_ops.shape(values, out_type=dtypes.int64)
    value_shape = values_shape[1:]
    nvals = values_shape[0]

    # Build a default value if none was supplied.
    if default_value is None:
      default_value = array_ops.zeros(value_shape, dtype=values.dtype)
    else:
      default_value = ops.convert_to_tensor(
          default_value, name='default_value', dtype=values.dtype)
    default_value.shape.assert_is_compatible_with(values.shape[1:])
    default_value.set_shape(values.shape[1:])

    # Get the row start indices, and expand to shape=[nrows, 1].
    starts = array_ops.expand_dims(rt_input.row_splits[:-1], 1)

    # Get the row limit indices, and expand to shape=[nrows, 1].
    limits = array_ops.expand_dims(rt_input.row_splits[1:], 1)

    # Get the column indices, and expand to shape=[1, ncols].
    columns = array_ops.expand_dims(math_ops.range(0, ncols), 0)

    # Build a list containing the values plus the default value.  We will use
    # tf.gather to collect values from this list for the `Tensor` (using
    # nvals as the index for the default value).
    values_and_default = array_ops.concat(
        [values, array_ops.stack([default_value])], axis=0)

    # Construct a matrix "indices" pointing into values_and_default.  I.e.,
    # output[r, c] = values_and_default[indices[r, c].
    nondefault_index = starts + columns
    has_value = nondefault_index < limits
    default_index = array_ops.fill(array_ops.stack([nrows, ncols]), nvals)
    indices = array_ops.where(has_value, nondefault_index, default_index)

    # Gather the results into a `Tensor`.
    return array_ops.gather(values_and_default, indices)
def _ragged_segment_aggregate(unsorted_segment_op, data, segment_ids,
                              num_segments, name=None):
  """Aggregates along segments of a RaggedTensor using `unsorted_segment_op`.

  Returns a RaggedTensor `output` with `num_segments` rows, where the row
  `output[i]` is formed by combining all rows of `data` whose corresponding
  `segment_id` is `i`.  The values in each row are combined using
  `unsorted_segment_op`.

  The length of the row `output[i]` will be the maximum of the lengths of
  all rows of `data` whose corresponding `segment_id` is `i`.  If no `data`
  rows correspond to a given segment ID, then the output row for that segment
  ID will be empty.

  Args:
    unsorted_segment_op: The tensorflow `op` that should be used to combine
      values in each row.  Must have the same signature and basic behavior as
      `unsorted_segment_sum`, `unsorted_segment_max`, etc.
    data: A `RaggedTensor` containing the values to be combined.
    segment_ids: A `Tensor` or `RaggedTensor`.  Must have type `int64` or
      `int32`.  `segment_ids.shape` must be a prefix of `data.shape`.
      `segment_ids` is not required to be sorted.
    num_segments: An `int32` or `int64` scalar.
    name: A name prefix for the returned tensor (optional).

  Returns:
    A `RaggedTensor` containing the aggregated values.  The returned tensor
    has the same dtype as `data`, and its shape is
    `[num_segments] + data.shape[segment_ids.rank:]`.
  Raises:
    ValueError: If segment_ids.shape is not a prefix of data.shape.
  """
  if not (ragged_tensor.is_ragged(data) or
          ragged_tensor.is_ragged(segment_ids)):
    return unsorted_segment_op(data, segment_ids, num_segments, name)

  with ops.name_scope(name, 'RaggedSegment',
                      [data, segment_ids, num_segments]) as name:
    data = ragged_factory_ops.convert_to_tensor_or_ragged_tensor(
        data, name='data')
    segment_ids = ragged_factory_ops.convert_to_tensor_or_ragged_tensor(
        segment_ids, name='segment_ids')

    if ragged_tensor.is_ragged(segment_ids):
      if not ragged_tensor.is_ragged(data):
        raise ValueError('segment_ids.shape must be a prefix of data.shape, '
                         'but segment_ids is ragged and data is not.')
      check_splits = check_ops.assert_equal(
          segment_ids.row_splits,
          data.row_splits,
          message='segment_ids.shape must be a prefix of data.shape')
      with ops.control_dependencies([check_splits]):
        return _ragged_segment_aggregate(unsorted_segment_op, data.values,
                                         segment_ids.values, num_segments, name)

    segment_ids = math_ops.cast(segment_ids, dtypes.int64)

    # Find the length of each row in data.  (dtype=int64, shape=[data_nrows])
    data_row_lengths = data.row_splits[1:] - data.row_splits[:-1]

    # Find the length that each output row will have.  The length of the row
    # corresponding to segment `id` is `max(data_row_lengths[i])` where
    # `segment_ids[i]=id`.  (dtype=int64, shape=[output_nrows])
    output_row_lengths = math_ops.maximum(
        math_ops.unsorted_segment_max(data_row_lengths, segment_ids,
                                      num_segments), 0)
    assert output_row_lengths.dtype == dtypes.int64

    # Build the splits tensor for the output RaggedTensor.
    output_splits = array_ops.concat(
        [
            array_ops.zeros([1], dtypes.int64),
            math_ops.cumsum(output_row_lengths)
        ],
        axis=0)

    # For each row in `data`, find the start & limit position where that row's
    # values will be aggregated in output.values.
    data_row_to_out_row_start = array_ops.gather(output_splits, segment_ids)
    data_row_to_out_row_limit = data_row_to_out_row_start + data_row_lengths

    # For each value in `data.values`, find the position where it will
    # aggregated in `output.values`.
    # Get the target output values index for each data values index.
    data_val_to_out_val_index = range(data_row_to_out_row_start,
                                      data_row_to_out_row_limit).values

    # Recursively aggregate the values.
    output_values = _ragged_segment_aggregate(unsorted_segment_op, data.values,
                                              data_val_to_out_val_index,
                                              output_splits[-1])
    return ragged_factory_ops.from_row_splits(output_values, output_splits)
def _ragged_reduce_aggregate(reduce_op, unsorted_segment_op, rt_input, axis,
                             name=None):
  """Aggregates across axes of a RaggedTensor using the given `Tensor` ops.

  Reduces `rt_input` along the dimensions given in `axis`.  The rank of the
  tensor is reduced by 1 for each entry in `axis`.  If `axis` is not specified,
  then all dimensions are reduced, and a scalar value is returned.

  This op assumes that `reduce_op` and `unsorted_segment_op` are associative;
  if not, then reducing multiple axes will return incorrect results.  (In
  particular, reducing multiple axes is currently implemented by reducing the
  axes one at a time.)

  Args:
    reduce_op: The tensorflow `op` that should be used to reduce values in
      uniform dimensions.  Must have the same signature and basic behavior as
      `reduce_sum`, `reduce_max`, etc.
    unsorted_segment_op: The tensorflow `op` that should be used to combine
      values in ragged dimensions.  Must have the same signature and basic
      behavior as `unsorted_segment_sum`, `unsorted_segment_max`, etc.
    rt_input: A `Tensor` or `RaggedTensor` containing the values to be reduced.
    axis: The axis or axes to reduce.  May be `None` (to reduce all axes), an
      `int` (to reduce a single axis), a `list` or `tuple` of `int` (to reduce a
      given set of axes), or a `Tensor` with a constant value.  Must be in the
      range `[0, rt_input.rank)`.
    name: A name prefix for the returned tensor (optional).

  Returns:
    A `RaggedTensor` containing the reduced values.  The returned tensor
    has the same dtype as `data`, and its shape is given by removing the
    dimensions specified in `axis` from `rt_input.shape`.  The `ragged_rank`
    of the returned tensor is given by substracting any ragged dimensions
    specified in `axis` from `rt_input.ragged_rank`.
  Raises:
    ValueError: If `axis` contains a `Tensor` whose value is not constant.
  """
  if not ragged_tensor.is_ragged(rt_input):
    return reduce_op(rt_input, axis, name=name)

  if isinstance(axis, ops.Tensor):
    axis = tensor_util.constant_value(axis)
    if axis is None:
      raise ValueError('axis must be known at graph construction time.')

  # When reducing all axes, just ignore splits & reduce the inner values.
  if axis is None:
    return reduce_op(rt_input.inner_values, None, name=name)

  with ops.name_scope(name, 'RaggedReduce', [rt_input, axis]):
    if isinstance(axis, (tuple, list)):
      if not axis:
        return rt_input
      elif len(axis) == 1:
        axis = axis[0]
      else:
        # When reducing multiple axes, just reduce one at a time.  This is less
        # efficient, and only works for associative ops.  (In particular, it
        # does not work for reduce_mean.)  However, reducing multiple axes at
        # once will probably require a nontrivial c++ op.
        axis = sorted(axis)
        inner_reduced = _ragged_reduce_aggregate(reduce_op, unsorted_segment_op,
                                                 rt_input, axis[-1])
        return _ragged_reduce_aggregate(reduce_op, unsorted_segment_op,
                                        inner_reduced, axis[:-1])

    axis = ragged_util.get_positive_axis(axis, rt_input.shape.ndims)

    rt_input = ragged_factory_ops.convert_to_tensor_or_ragged_tensor(
        rt_input, name='rt_input')

    if axis == 0:
      # out[i_1, i_2, ..., i_N] = sum_{j} rt_input[j, i_1, i_2, ..., i_N]
      row_lengths = rt_input.row_splits[1:] - rt_input.row_splits[:-1]
      num_segments = math_ops.maximum(math_ops.reduce_max(row_lengths), 0)
      segment_ids = range(row_lengths).values
      return _ragged_segment_aggregate(unsorted_segment_op, rt_input.values,
                                       segment_ids, num_segments)
    elif axis == 1:
      # out[i_0, i_1, i_2, ..., i_N] = sum_{j} rt_input[i_0, j, i_2, ..., i_N]
      num_segments = array_ops.shape(rt_input.row_splits)[0] - 1
      segment_ids = segment_id_ops.row_splits_to_segment_ids(
          rt_input.row_splits)
      return _ragged_segment_aggregate(unsorted_segment_op, rt_input.values,
                                       segment_ids, num_segments)
    else:
      # out[i_0, ..., i_[axis-1], i_axis+1], ..., i_N] =
      #     sum_{j} rt_input [i_0, ..., i_[axis-1], j, i_axis+1], ..., i_N]
      return rt_input.with_values(
          _ragged_reduce_aggregate(reduce_op, unsorted_segment_op,
                                   rt_input.values, axis - 1))
Example #19
0
def _ragged_segment_aggregate(unsorted_segment_op,
                              data,
                              segment_ids,
                              num_segments,
                              name=None):
    """Aggregates along segments of a RaggedTensor using `unsorted_segment_op`.

  Returns a RaggedTensor `output` with `num_segments` rows, where the row
  `output[i]` is formed by combining all rows of `data` whose corresponding
  `segment_id` is `i`.  The values in each row are combined using
  `unsorted_segment_op`.

  The length of the row `output[i]` will be the maximum of the lengths of
  all rows of `data` whose corresponding `segment_id` is `i`.  If no `data`
  rows correspond to a given segment ID, then the output row for that segment
  ID will be empty.

  Args:
    unsorted_segment_op: The tensorflow `op` that should be used to combine
      values in each row.  Must have the same signature and basic behavior as
      `unsorted_segment_sum`, `unsorted_segment_max`, etc.
    data: A `RaggedTensor` containing the values to be combined.
    segment_ids: A `Tensor` or `RaggedTensor`.  Must have type `int64` or
      `int32`.  `segment_ids.shape` must be a prefix of `data.shape`.
      `segment_ids` is not required to be sorted.
    num_segments: An `int32` or `int64` scalar.
    name: A name prefix for the returned tensor (optional).

  Returns:
    A `RaggedTensor` containing the aggregated values.  The returned tensor
    has the same dtype as `data`, and its shape is
    `[num_segments] + data.shape[segment_ids.rank:]`.
  Raises:
    ValueError: If segment_ids.shape is not a prefix of data.shape.
  """
    if not (ragged_tensor.is_ragged(data)
            or ragged_tensor.is_ragged(segment_ids)):
        return unsorted_segment_op(data, segment_ids, num_segments, name)

    with ops.name_scope(name, 'RaggedSegment',
                        [data, segment_ids, num_segments]) as name:
        data = ragged_factory_ops.convert_to_tensor_or_ragged_tensor(
            data, name='data')
        segment_ids = ragged_factory_ops.convert_to_tensor_or_ragged_tensor(
            segment_ids, name='segment_ids')

        if ragged_tensor.is_ragged(segment_ids):
            if not ragged_tensor.is_ragged(data):
                raise ValueError(
                    'segment_ids.shape must be a prefix of data.shape, '
                    'but segment_ids is ragged and data is not.')
            check_splits = check_ops.assert_equal(
                segment_ids.row_splits,
                data.row_splits,
                message='segment_ids.shape must be a prefix of data.shape')
            with ops.control_dependencies([check_splits]):
                return _ragged_segment_aggregate(unsorted_segment_op,
                                                 data.values,
                                                 segment_ids.values,
                                                 num_segments, name)

        segment_ids = math_ops.cast(segment_ids, dtypes.int64)

        # Find the length of each row in data.  (dtype=int64, shape=[data_nrows])
        data_row_lengths = data.row_splits[1:] - data.row_splits[:-1]

        # Find the length that each output row will have.  The length of the row
        # corresponding to segment `id` is `max(data_row_lengths[i])` where
        # `segment_ids[i]=id`.  (dtype=int64, shape=[output_nrows])
        output_row_lengths = math_ops.maximum(
            math_ops.unsorted_segment_max(data_row_lengths, segment_ids,
                                          num_segments), 0)
        assert output_row_lengths.dtype == dtypes.int64

        # Build the splits tensor for the output RaggedTensor.
        output_splits = array_ops.concat([
            array_ops.zeros([1], dtypes.int64),
            math_ops.cumsum(output_row_lengths)
        ],
                                         axis=0)

        # For each row in `data`, find the start & limit position where that row's
        # values will be aggregated in output.values.
        data_row_to_out_row_start = array_ops.gather(output_splits,
                                                     segment_ids)
        data_row_to_out_row_limit = data_row_to_out_row_start + data_row_lengths

        # For each value in `data.values`, find the position where it will
        # aggregated in `output.values`.
        # Get the target output values index for each data values index.
        data_val_to_out_val_index = range(data_row_to_out_row_start,
                                          data_row_to_out_row_limit).values

        # Recursively aggregate the values.
        output_values = _ragged_segment_aggregate(unsorted_segment_op,
                                                  data.values,
                                                  data_val_to_out_val_index,
                                                  output_splits[-1])
        return ragged_factory_ops.from_row_splits(output_values, output_splits)