Ejemplo n.º 1
0
  def testBinaryElementwiseOp(self, x, y, op=math_ops.add, **extra_args):
    use_kwargs = extra_args.pop('use_kwargs', ())
    x = ragged_tensor.convert_to_tensor_or_ragged_tensor(x)
    y = ragged_tensor.convert_to_tensor_or_ragged_tensor(y)
    if 'x' in use_kwargs and 'y' in use_kwargs:
      result = op(x=x, y=y, **extra_args)
    elif 'y' in use_kwargs:
      result = op(x, y=y, **extra_args)
    else:
      result = op(x, y, **extra_args)

    # Run the wrapped op on the dense values, for comparison.
    dense_x = x.flat_values if isinstance(x, ragged_tensor.RaggedTensor) else x
    dense_y = y.flat_values if isinstance(y, ragged_tensor.RaggedTensor) else y
    expected_flat_values = array_ops.reshape(
        op(dense_x, dense_y, **extra_args), [-1])

    # Check that the result has the expected shape.
    self.assertSameShape(y, result)

    # Check that the result has the expected (flattened) values.
    if isinstance(result, ragged_tensor.RaggedTensor):
      result_flat_values = array_ops.reshape(result.flat_values, [-1])
    else:
      result_flat_values = array_ops.reshape(result, [-1])
    self.assertAllEqual(expected_flat_values, result_flat_values)
 def testConvertNumpyArrayError(self,
                                value,
                                message,
                                dtype=None,
                                preferred_dtype=None):
   with self.assertRaisesRegexp(ValueError, message):
     ragged_tensor.convert_to_tensor_or_ragged_tensor(value, dtype,
                                                      preferred_dtype)
 def testConvertTensorError(self,
                            pylist,
                            message,
                            dtype=None,
                            preferred_dtype=None):
   tensor = constant_op.constant(pylist)
   with self.assertRaisesRegexp(ValueError, message):
     ragged_tensor.convert_to_tensor_or_ragged_tensor(tensor, dtype,
                                                      preferred_dtype)
Ejemplo n.º 4
0
 def testRaggedAddWithBroadcasting(self, x, y, expected, doc):
   expected_rrank = getattr(expected, 'ragged_rank', 0)
   x = ragged_tensor.convert_to_tensor_or_ragged_tensor(x, dtype=dtypes.int32)
   y = ragged_tensor.convert_to_tensor_or_ragged_tensor(y, dtype=dtypes.int32)
   result = x + y
   result_rrank = getattr(result, 'ragged_rank', 0)
   self.assertEqual(expected_rrank, result_rrank)
   if hasattr(expected, 'tolist'):
     expected = expected.tolist()
   self.assertRaggedEqual(result, expected)
  def testConvertRaggedTensorError(self,
                                   pylist,
                                   message,
                                   dtype=None,
                                   preferred_dtype=None):
    rt = ragged_factory_ops.constant(pylist)

    with self.assertRaisesRegexp(ValueError, message):
      ragged_tensor.convert_to_tensor_or_ragged_tensor(rt, dtype,
                                                       preferred_dtype)
Ejemplo n.º 6
0
  def testListValuedElementwiseOp(self, inputs, op=math_ops.add_n,
                                  **extra_args):
    use_kwargs = extra_args.pop('use_kwargs', False)
    inputs = [
        ragged_tensor.convert_to_tensor_or_ragged_tensor(x) for x in inputs
    ]
    if use_kwargs:
      result = op(inputs=inputs, **extra_args)
    else:
      result = op(inputs, **extra_args)

    # Run the wrapped op on the dense values, for comparison.
    dense_inputs = [
        x.flat_values if isinstance(x, ragged_tensor.RaggedTensor) else x
        for x in inputs
    ]
    expected_flat_values = array_ops.reshape(
        op(dense_inputs, **extra_args), [-1])

    # Check that the result has the expected shape.
    self.assertSameShape(inputs[0], result)

    # Check that the result has the expected (flattened) values.
    if isinstance(result, ragged_tensor.RaggedTensor):
      result_flat_values = array_ops.reshape(result.flat_values, [-1])
    else:
      result_flat_values = array_ops.reshape(result, [-1])
    self.assertAllEqual(expected_flat_values, result_flat_values)
Ejemplo n.º 7
0
def string_split_v2(input, sep=None, maxsplit=-1, name=None):  # pylint: disable=redefined-builtin
  """Split elements of `input` based on `sep` into a `RaggedTensor`.

  Let N be the size of `input` (typically N will be the batch size). Split each
  element of `input` based on `sep` and return a `SparseTensor` or
  `RaggedTensor` containing the split tokens. Empty tokens are ignored.

  Example:

  ```python
  >>> tf.strings.split('hello world')
  <Tensor ['hello', 'world']>
  >>> tf.strings.split(['hello world', 'a b c'])
  <tf.RaggedTensor [['hello', 'world'], ['a', 'b', 'c']]>
  ```

  If `sep` is given, consecutive delimiters are not grouped together and are
  deemed to delimit empty strings. For example, `input` of `"1<>2<><>3"` and
  `sep` of `"<>"` returns `["1", "2", "", "3"]`. If `sep` is None or an empty
  string, consecutive whitespace are regarded as a single separator, and the
  result will contain no empty strings at the start or end if the string has
  leading or trailing whitespace.

  Note that the above mentioned behavior matches python's str.split.

  Args:
    input: A string `Tensor` of rank `N`, the strings to split.  If
      `rank(input)` is not known statically, then it is assumed to be `1`.
    sep: `0-D` string `Tensor`, the delimiter string.
    maxsplit: An `int`. If `maxsplit > 0`, limit of the split of the result.
    name: A name for the operation (optional).

  Raises:
    ValueError: If sep is not a string.

  Returns:
    A `RaggedTensor` of rank `N+1`, the strings split according to the
    delimiter.
  """
  with ops.name_scope(name, "StringSplit", [input]):
    input = ragged_tensor.convert_to_tensor_or_ragged_tensor(
        input, dtype=dtypes.string, name="input")
    if isinstance(input, ragged_tensor.RaggedTensor):
      return input.with_flat_values(
          string_split_v2(input.flat_values, sep, maxsplit))

    rank = input.shape.ndims
    if rank == 0:
      return string_split_v2(array_ops.stack([input]), sep, maxsplit)[0]
    elif rank == 1 or rank is None:
      sparse_result = string_ops.string_split_v2(
          input, sep=sep, maxsplit=maxsplit)
      return ragged_tensor.RaggedTensor.from_value_rowids(
          values=sparse_result.values,
          value_rowids=sparse_result.indices[:, 0],
          nrows=sparse_result.dense_shape[0],
          validate=False)
    else:
      return string_split_v2(
          ragged_tensor.RaggedTensor.from_tensor(input), sep, maxsplit)
Ejemplo n.º 8
0
def broadcast_to(rt_input, shape, broadcast_inner_dimensions=True):
  """Broadcasts a potentially ragged tensor to a ragged shape.

  Tiles `rt_input` as necessary to match the given shape.

  Behavior is undefined if `rt_input` is not broadcast-compatible with `shape`.

  Args:
    rt_input: The potentially ragged tensor to broadcast.
    shape: A `RaggedTensorDynamicShape`
    broadcast_inner_dimensions: If false, then inner dimensions will not be
      tiled.

  Returns:
    A potentially ragged tensor whose values are taken from
    `rt_input`, and whose shape matches `shape`.
  """
  if not isinstance(shape, RaggedTensorDynamicShape):
    raise TypeError('shape must be a RaggedTensorDynamicShape')
  rt_input = ragged_tensor.convert_to_tensor_or_ragged_tensor(rt_input)

  # Broadcasting to a uniform shape.
  if shape.num_partitioned_dimensions == 0:
    return _broadcast_to_uniform_shape(rt_input, shape,
                                       broadcast_inner_dimensions)
  else:
    return _broadcast_to_ragged_shape(rt_input, shape,
                                      broadcast_inner_dimensions)
Ejemplo n.º 9
0
def normalize_tensors(tensors):
  """Converts a nested structure of tensor-like objects to tensors.

  * `SparseTensor`-like inputs are converted to `SparseTensor`.
  * `TensorArray` inputs are passed through.
  * Everything else is converted to a dense `Tensor`.

  Args:
    tensors: A nested structure of tensor-like, list,
      `SparseTensor`, `SparseTensorValue`, or `TensorArray` objects.

  Returns:
    A nested structure of tensor, `SparseTensor`, or `TensorArray` objects.
  """
  flat_tensors = nest.flatten(tensors)
  prepared = []
  with ops.name_scope("normalize_tensors"):
    for i, t in enumerate(flat_tensors):
      if sparse_tensor_lib.is_sparse(t):
        prepared.append(sparse_tensor_lib.SparseTensor.from_value(t))
      elif ragged_tensor.is_ragged(t):
        prepared.append(
            ragged_tensor.convert_to_tensor_or_ragged_tensor(
                t, name="component_%d" % i))
      elif isinstance(t, tensor_array_ops.TensorArray):
        prepared.append(t)
      else:
        prepared.append(ops.convert_to_tensor(t, name="component_%d" % i))
  return nest.pack_sequence_as(tensors, prepared)
Ejemplo n.º 10
0
def _replace_ragged_with_flat_values(value, nested_splits_lists):
  """Replace RaggedTensors with their flat_values, and record their splits.

  Returns a copy of `value`, with any nested `RaggedTensor`s replaced by their
  `flat_values` tensor.  Looks inside lists, tuples, and dicts.

  Appends each `RaggedTensor`'s `nested_splits` to `nested_splits_lists`.

  Args:
    value: The value that should be transformed by replacing `RaggedTensors`.
    nested_splits_lists: An output parameter used to record the `nested_splits`
      for any `RaggedTensors` that were replaced.

  Returns:
    A copy of `value` with nested `RaggedTensors` replaced by their `values`.
  """
  # Base case
  if ragged_tensor.is_ragged(value):
    value = ragged_tensor.convert_to_tensor_or_ragged_tensor(value)
    nested_splits_lists.append(value.nested_row_splits)
    return value.flat_values

  # Recursion cases
  def recurse(v):
    return _replace_ragged_with_flat_values(v, nested_splits_lists)

  if isinstance(value, list):
    return [recurse(v) for v in value]
  elif isinstance(value, tuple):
    return tuple(recurse(v) for v in value)
  elif isinstance(value, dict):
    return dict((k, recurse(v)) for (k, v) in value.items())
  else:
    return value
Ejemplo n.º 11
0
def _unicode_decode(input, input_encoding, errors, replacement_char,
                    replace_control_characters, with_offsets):
  """Decodes each string into a sequence of codepoints."""
  input = ragged_tensor.convert_to_tensor_or_ragged_tensor(input, name="input")
  input_ndims = input.shape.ndims
  if input_ndims is None:
    raise ValueError("Rank of `input` must be statically known.")

  if input_ndims > 1:
    # Convert to a ragged tensor with ragged_rank = input_ndims - 1.
    if not ragged_tensor.is_ragged(input):
      input = ragged_tensor.RaggedTensor.from_tensor(
          input, ragged_rank=input_ndims - 1)
    elif input.ragged_rank < input_ndims - 1:
      input = input.with_flat_values(
          ragged_tensor.RaggedTensor.from_tensor(
              input.flat_values,
              ragged_rank=input_ndims - input.ragged_rank + 1))

  # Reshape the input to a flat vector, and apply the gen_string_ops op.
  if ragged_tensor.is_ragged(input):
    flat_input = array_ops.reshape(input.flat_values, [-1])
  else:
    flat_input = array_ops.reshape(input, [-1])

  if with_offsets:
    decode_op = gen_string_ops.unicode_decode_with_offsets
  else:
    decode_op = gen_string_ops.unicode_decode
  flat_result = decode_op(
      input=flat_input,
      input_encoding=input_encoding,
      errors=errors,
      replacement_char=replacement_char,
      replace_control_characters=replace_control_characters)

  if input_ndims == 0:
    codepoints = flat_result.char_values
    if with_offsets:
      offsets = flat_result.char_to_byte_starts
  else:
    codepoints = ragged_tensor.RaggedTensor.from_row_splits(
        flat_result.char_values, flat_result.row_splits, validate=False)
    if input_ndims > 1:
      codepoints = input.with_flat_values(codepoints)
    if with_offsets:
      offsets = ragged_tensor.RaggedTensor.from_row_splits(
          flat_result.char_to_byte_starts, flat_result.row_splits,
          validate=False)
      if input_ndims > 1:
        offsets = input.with_flat_values(offsets)

  if with_offsets:
    return codepoints, offsets
  else:
    return codepoints
 def testConvertNumpyArray(self,
                           value,
                           dtype=None,
                           preferred_dtype=None,
                           expected_dtype=None):
   if expected_dtype is None:
     expected_dtype = value.dtype if dtype is None else dtype
   converted = ragged_tensor.convert_to_tensor_or_ragged_tensor(
       value, dtype, preferred_dtype)
   self.assertEqual(dtypes.as_dtype(expected_dtype), converted.dtype)
   self.assertAllEqual(value, converted)
 def testConvertRaggedTensorValue(self,
                                  value,
                                  dtype=None,
                                  preferred_dtype=None,
                                  expected_dtype=None):
   if expected_dtype is None:
     expected_dtype = value.dtype if dtype is None else dtype
   converted = ragged_tensor.convert_to_tensor_or_ragged_tensor(
       value, dtype, preferred_dtype)
   self.assertEqual(value.ragged_rank, converted.ragged_rank)
   self.assertEqual(dtypes.as_dtype(expected_dtype), converted.dtype)
   self.assertEqual(value.to_list(), self.eval_to_list(converted))
Ejemplo n.º 14
0
def dropout_v2(x: ragged_tensor.Ragged,
               rate,
               noise_shape=None,
               seed=None,
               name=None):
  """Ragged dispatch target for tf.nn.dropout."""
  if noise_shape is not None:
    raise ValueError('noise_shape is not supported yet for RaggedTensor x')
  with ops.name_scope(name, 'RaggedNNDropout', [x, rate]):
    x = ragged_tensor.convert_to_tensor_or_ragged_tensor(x, name='x')
    return x.with_flat_values(
        nn_ops.dropout_v2(x.flat_values, rate=rate, seed=seed))
Ejemplo n.º 15
0
 def from_tensor(cls, rt_input):
   """Constructs a ragged shape for a potentially ragged tensor."""
   with ops.name_scope(None, 'RaggedTensorDynamicShapeFromTensor', [rt_input]):
     rt_input = ragged_tensor.convert_to_tensor_or_ragged_tensor(rt_input)
     if not ragged_tensor.is_ragged(rt_input):
       return cls([], array_ops.shape(rt_input))
     else:
       partitioned_dim_sizes = (
           (rt_input.nrows(),) + rt_input.nested_row_lengths())
       return RaggedTensorDynamicShape(
           partitioned_dim_sizes,
           array_ops.shape(rt_input.flat_values)[1:])
Ejemplo n.º 16
0
  def test_Bidirectional_ragged_input(self, merge_mode):
    np.random.seed(100)
    rnn = keras.layers.LSTM
    units = 3
    x = ragged_factory_ops.constant(
        [[[1, 1, 1], [1, 1, 1]], [[1, 1, 1]],
         [[1, 1, 1], [1, 1, 1], [1, 1, 1], [1, 1, 1]],
         [[1, 1, 1], [1, 1, 1], [1, 1, 1]]],
        ragged_rank=1)
    x = math_ops.cast(x, 'float32')

    # pylint: disable=g-long-lambda
    with self.cached_session():
      if merge_mode == 'ave':
        merge_func = lambda y, y_rev: (y + y_rev) / 2
      elif merge_mode == 'concat':
        merge_func = lambda y, y_rev: ragged_concat_ops.concat(
            (y, y_rev), axis=-1)
      elif merge_mode == 'mul':
        merge_func = lambda y, y_rev: (y * y_rev)
        # pylint: enable=g-long-lambda

      inputs = keras.Input(
          shape=(None, 3), batch_size=4, dtype='float32', ragged=True)
      layer = keras.layers.Bidirectional(
          rnn(units, return_sequences=True), merge_mode=merge_mode)
      f_merged = keras.backend.function([inputs], layer(inputs))
      f_forward = keras.backend.function([inputs],
                                         layer.forward_layer(inputs))
      f_backward = keras.backend.function(
          [inputs],
          array_ops.reverse(layer.backward_layer(inputs), axis=[1]))

      y_merged = f_merged(x)
      y_expected = merge_func(
          ragged_tensor.convert_to_tensor_or_ragged_tensor(f_forward(x)),
          ragged_tensor.convert_to_tensor_or_ragged_tensor(f_backward(x)))

      y_merged = ragged_tensor.convert_to_tensor_or_ragged_tensor(y_merged)
      self.assertAllClose(y_merged.flat_values, y_expected.flat_values)
Ejemplo n.º 17
0
  def tokenize_with_offsets(self, input):  # pylint: disable=redefined-builtin
    """Tokenizes a tensor of UTF-8 strings on whitespaces.

    The strings are split on ICU defined whitespace characters. These
    whitespace characters are dropped.

    Args:
      input: A `RaggedTensor`or `Tensor` of UTF-8 strings with any shape.

    Returns:
      A tuple `(tokens, start_offsets, limit_offsets)` where:

        * `tokens`: A `RaggedTensor` of tokenized text.
        * `start_offsets`: A `RaggedTensor` of the tokens' starting byte offset.
        * `limit_offsets`: A `RaggedTensor` of the tokens' ending byte offset.
    """
    name = None
    with ops.name_scope(name, "WhitespaceTokenize", [input]):
      input_tensor = ragged_tensor.convert_to_tensor_or_ragged_tensor(input)
      if input_tensor.shape.ndims is None:
        raise ValueError("Rank of input_tensor must be statically known.")
      if ragged_tensor.is_ragged(input_tensor):
        if input_tensor.flat_values.shape.ndims > 1:
          # If the flat_values of our ragged tensor is multi-dimensional, we can
          # process it separately and our output will have the same nested
          # splits as our input.
          (tokens, starts,
           limits) = self.tokenize_with_offsets(input_tensor.flat_values)
          return (input_tensor.with_flat_values(tokens),
                  input_tensor.with_flat_values(starts),
                  input_tensor.with_flat_values(limits))
        else:
          # Recursively process the values of the ragged tensor.
          (tokens, starts,
           limits) = self.tokenize_with_offsets(input_tensor.values)
          return (input_tensor.with_values(tokens),
                  input_tensor.with_values(starts),
                  input_tensor.with_values(limits))
      else:
        if input_tensor.shape.ndims > 1:
          # Convert the input tensor to ragged and process it.
          return self.tokenize_with_offsets(
              ragged_conversion_ops.from_tensor(input_tensor))
        elif input_tensor.shape.ndims == 0:
          (tokens, starts, limits) = self.tokenize_with_offsets(
              array_ops.stack([input_tensor]))
          return tokens.values, starts.values, limits.values
        else:
          # Our rank 1 tensor is the correct shape, so we can process it as
          # normal.
          return self._whitespace_tokenize_with_offsets_encode_decode_wrapper(
              input_tensor)
Ejemplo n.º 18
0
 def from_tensor(cls, rt_input, dim_size_dtype=None):
   """Constructs a ragged shape for a potentially ragged tensor."""
   with ops.name_scope(None, 'RaggedTensorDynamicShapeFromTensor', [rt_input]):
     rt_input = ragged_tensor.convert_to_tensor_or_ragged_tensor(rt_input)
     if not ragged_tensor.is_ragged(rt_input):
       return cls([], array_ops.shape(rt_input))
     else:
       partitioned_dim_sizes = (
           (rt_input.nrows(),) + rt_input.nested_row_lengths())
       return RaggedTensorDynamicShape(
           partitioned_dim_sizes,
           array_ops.shape(rt_input.flat_values)[1:],
           dim_size_dtype=dim_size_dtype)
Ejemplo n.º 19
0
def _convert_to_structured_field_value(value):
    """Converts `value` to a Tensor, RaggedTensor, or StructuredTensor."""
    if isinstance(value,
                  (ops.Tensor, ragged_tensor.RaggedTensor, StructuredTensor)):
        return value
    elif ragged_tensor.is_ragged(value):
        return ragged_tensor.convert_to_tensor_or_ragged_tensor(value)
    else:
        try:
            return ops.convert_to_tensor(value)
        except (ValueError, TypeError):
            raise TypeError('Unexpected type for value in `fields`: %r' %
                            value)
Ejemplo n.º 20
0
def bitcast(
    input: ragged_tensor.RaggedOrDense,  # pylint: disable=redefined-builtin
    type,  # pylint: disable=redefined-builtin
    name=None) -> ragged_tensor.RaggedOrDense:
  """RaggedTensor dispatch override for tf.bitcast."""
  type = dtypes.as_dtype(type)
  with ops.name_scope(name, 'Bitcast', [input]):
    input = ragged_tensor.convert_to_tensor_or_ragged_tensor(
        input, name='input')
    if (input.dtype.size < type.size and input.flat_values.shape.rank < 2):
      raise ValueError('`input.flat_values` is required to have rank >= 2 when '
                       'input.dtype.size < type.size. Actual rank: '
                       f'{input.flat_values.shape.rank}')
    return input.with_flat_values(array_ops.bitcast(input.flat_values, type))
Ejemplo n.º 21
0
def ragged_binary_elementwise_op(op, x, y):
    """Binary elementwise api handler for RaggedTensors."""
    x_is_ragged = ragged_tensor.is_ragged(x)
    y_is_ragged = ragged_tensor.is_ragged(y)

    # Convert args to tensors.
    x = ragged_tensor.convert_to_tensor_or_ragged_tensor(
        x, preferred_dtype=(y.dtype if y_is_ragged else None))
    y = ragged_tensor.convert_to_tensor_or_ragged_tensor(
        y, preferred_dtype=x.dtype)

    if x_is_ragged and y_is_ragged:
        x, y = ragged_tensor.match_row_splits_dtypes(x, y)

    # Perform broadcasting, when appropraite
    if ((x_is_ragged and y_is_ragged)
            or (x_is_ragged and x.flat_values.shape.ndims <= y.shape.ndims)
            or (y_is_ragged and y.flat_values.shape.ndims <= x.shape.ndims)):
        bcast_shape = ragged_tensor_shape.broadcast_dynamic_shape(
            ragged_tensor_shape.RaggedTensorDynamicShape.from_tensor(x),
            ragged_tensor_shape.RaggedTensorDynamicShape.from_tensor(y))
        x = ragged_tensor_shape.broadcast_to(x,
                                             bcast_shape,
                                             broadcast_inner_dimensions=False)
        y = ragged_tensor_shape.broadcast_to(y,
                                             bcast_shape,
                                             broadcast_inner_dimensions=False)

    x_values = x.flat_values if ragged_tensor.is_ragged(x) else x
    y_values = y.flat_values if ragged_tensor.is_ragged(y) else y
    mapped_values = op(x_values, y_values)
    if isinstance(mapped_values, bool):
        return mapped_values  # Special case for tensor_equals.
    if ragged_tensor.is_ragged(x):
        return x.with_flat_values(mapped_values)
    else:
        return y.with_flat_values(mapped_values)
Ejemplo n.º 22
0
def string_bytes_split(input, name=None):  # pylint: disable=redefined-builtin
    """Split string elements of `input` into bytes.

  Examples:

  ```python
  >>> tf.strings.bytes_split('hello')
  ['h', 'e', 'l', 'l', 'o']
  >>> tf.strings.bytes_split(['hello', '123'])
  <RaggedTensor [['h', 'e', 'l', 'l', 'o'], ['1', '2', '3']]>
  ```

  Note that this op splits strings into bytes, not unicode characters.  To
  split strings into unicode characters, use `tf.strings.unicode_split`.

  See also: `tf.io.decode_raw`, `tf.strings.split`, `tf.strings.unicode_split`.

  Args:
    input: A string `Tensor` or `RaggedTensor`: the strings to split.  Must
      have a statically known rank (`N`).
    name: A name for the operation (optional).

  Returns:
    A `RaggedTensor` of rank `N+1`: the bytes that make up the source strings.
  """
    with ops.name_scope(name, "StringsByteSplit", [input]):
        input = ragged_tensor.convert_to_tensor_or_ragged_tensor(input,
                                                                 name="input")
        if isinstance(input, ragged_tensor.RaggedTensor):
            return input.with_flat_values(string_bytes_split(
                input.flat_values))

        rank = input.shape.ndims
        if rank is None:
            raise ValueError("input must have a statically-known rank.")

        if rank == 0:
            return string_bytes_split(array_ops.stack([input]))[0]
        elif rank == 1:
            indices, values, shape = gen_string_ops.string_split(
                input, delimiter="", skip_empty=False)
            return ragged_tensor.RaggedTensor.from_value_rowids(
                values=values,
                value_rowids=indices[:, 0],
                nrows=shape[0],
                validate=False)
        else:
            return string_bytes_split(
                ragged_tensor.RaggedTensor.from_tensor(input))
Ejemplo n.º 23
0
def reverse(tensor: ragged_tensor.Ragged, axis, name=None):
  """Reverses a RaggedTensor along the specified axes.

  #### Example:

  >>> data = tf.ragged.constant([
  ...   [[1, 2], [3, 4]], [[5, 6]], [[7, 8], [9, 10], [11, 12]]])
  >>> tf.reverse(data, axis=[0, 2])
  <tf.RaggedTensor [[[8, 7], [10, 9], [12, 11]], [[6, 5]], [[2, 1], [4, 3]]]>

  Args:
    tensor: A 'RaggedTensor' to reverse.
    axis: A list or tuple of 'int' or a constant 1D 'tf.Tensor'. The indices of
      the axes to reverse.
    name: A name prefix for the returned tensor (optional).

  Returns:
    A 'RaggedTensor'.
  """
  type_error_msg = ('`axis` must be a list of int or a constant tensor'
                    'when reversing axes in a ragged tensor')

  with ops.name_scope(name, 'Reverse', [tensor, axis]):
    if isinstance(axis, ops.Tensor):
      axis = tensor_util.constant_value(axis)
      if axis is None:
        raise TypeError(type_error_msg)
    elif not (isinstance(axis, (list, tuple)) and
              all(isinstance(dim, int) for dim in axis)):
      raise TypeError(type_error_msg)

    tensor = ragged_tensor.convert_to_tensor_or_ragged_tensor(
        tensor, name='tensor')

    # Allow usage of negative values to specify innermost axes.
    axis = [
        array_ops.get_positive_axis(dim, tensor.shape.rank, 'axis[%d]' % i,
                                    'rank(tensor)')
        for i, dim in enumerate(axis)
    ]

    # We only need to slice up to the max axis. If the axis list
    # is empty, it should be 0.
    slices = [slice(None)] * (max(axis) + 1 if axis else 0)

    for dim in axis:
      slices[dim] = slice(None, None, -1)

    return tensor[tuple(slices)]
Ejemplo n.º 24
0
  def __init__(self, shape, fields):
    """Creates a `StructuredTensor` from a dictionary of fields.

    Args:
      shape: A `TensorShape`: static information about the shape of the
        `StructuredTensor`.  Must have a known `rank`.
      fields: A dictionary mapping from string to `Tensor`, `RaggedTensor`, or
        `StructuredTensor`, providing the values for individual fields in each
        structure.  If `ndims > 0`, then every tensor in `fields` must have the
        same shape in the first `shape.rank` dimensions; and that shape must be
        compatible with `shape`.

    Returns:
      A `StructuredTensor`.
    """
    shape = tensor_shape.as_shape(shape)
    if shape.rank is None:
      raise ValueError("StructuredTensor's shape must have known rank.")
    if not isinstance(fields, dict):
      raise TypeError('fields must be a dictionary, got %s' %
                      type(fields).__name__)
    self._fields = {}
    with ops.name_scope(None, 'StructuredTensor', fields.values()):
      for (key, value) in fields.items():
        if not isinstance(key, str):
          raise TypeError('Unexpected type for key in `fields`: %r' % key)
        if not _FIELD_NAME_RE.match(key):
          raise ValueError('Field name %r is not currently allowed.' % key)
        if not isinstance(
            value, (ops.Tensor, ragged_tensor.RaggedTensor, StructuredTensor)):
          if ragged_tensor.is_ragged(value):
            value = ragged_tensor.convert_to_tensor_or_ragged_tensor(value)
          else:
            try:
              value = ops.convert_to_tensor(value)
            except (ValueError, TypeError):
              raise TypeError('Unexpected type for value in `fields`: %r' %
                              value)
        self._fields[key] = value

    # Check the static TensorShape for this StructuredTensor.
    shape = tensor_shape.as_shape(shape)
    rank = shape.ndims
    if rank is None:
      raise ValueError("StructuredTensor's shape must have known rank.")
    self._static_shape = shape
    if rank > 0:
      for value in self._fields.values():
        self._static_shape = self._static_shape.merge_with(value.shape[:rank])
Ejemplo n.º 25
0
def _ragged_nn_dropout_v1(x,
                          keep_prob=None,
                          noise_shape=None,
                          seed=None,
                          name=None,
                          rate=None):
    if noise_shape is not None:
        raise ValueError('noise_shape is not supported yet for RaggedTensor x')
    with ops.name_scope(name, 'RaggedNNDropout', [x, rate]):
        x = ragged_tensor.convert_to_tensor_or_ragged_tensor(x, name='x')
        return x.with_flat_values(
            nn_ops.dropout(x.flat_values,
                           keep_prob=keep_prob,
                           seed=seed,
                           rate=rate))
Ejemplo n.º 26
0
  def detokenize(self, input, name=None):  # pylint: disable=redefined-builtin
    """Detokenizes input codepoints (integers) to UTF-8 strings.

    Args:
      input: A `RaggedTensor` or `Tensor` of codepoints (ints) with a rank of at
        least 1.
      name: The name argument that is passed to the op function.

    Returns:
      A N-1 dimensional string tensor of the detokenized text.
    """
    name = None
    with ops.name_scope(name, "UnicodeCharTokenize", [input, self]):
      input_tensor = ragged_tensor.convert_to_tensor_or_ragged_tensor(input)
      return ragged_string_ops.unicode_encode(input_tensor, "UTF-8")
Ejemplo n.º 27
0
    def tokenize(self, input, name=None):  # pylint: disable=redefined-builtin
        """Tokenizes a tensor of UTF-8 strings.

    Args:
      input: A `RaggedTensor` or `Tensor` of UTF-8 strings with any shape.
      name: The name argument that is passed to the op function.

    Returns:
      A `RaggedTensor` of tokenized text. The returned shape is the shape of the
      input tensor with an added ragged dimension for tokens of each string.
    """
        with ops.name_scope(name, "SentenceTokenizer", [input, self]):
            input_tensor = ragged_tensor.convert_to_tensor_or_ragged_tensor(
                input)
            if input_tensor.shape.ndims is None:
                raise ValueError(
                    "Rank of input_tensor must be statically known.")
            if ragged_tensor.is_ragged(input_tensor):
                # Recursively process the values of the ragged tensor.
                tokens = self.tokenize(input_tensor.flat_values)
                return input_tensor.with_flat_values(tokens)
            else:
                if input_tensor.shape.ndims > 1:
                    # Convert the input tensor to ragged and process it.
                    return self.tokenize(
                        ragged_conversion_ops.from_tensor(input_tensor))
                elif input_tensor.shape.ndims == 0:
                    tokens = self.tokenize(array_ops.stack([input_tensor]))
                    return tokens.values
                else:
                    # Our rank 1 tensor is the correct shape, so we can process it as
                    # normal.
                    (output_values, row_splits) = (
                        gen_sentencepiece_tokenizer.sentencepiece_tokenize_op(
                            self._model_resource.resource_handle,
                            input_tensor,
                            self.nbest_size,
                            self.alpha,
                            self.add_bos,
                            self.add_eos,
                            self.reverse,
                            self.out_type,
                            return_nbest=self.return_nbest))
                    tokens = RaggedTensor.from_nested_row_splits(
                        flat_values=output_values,
                        nested_row_splits=[row_splits],
                        validate=False)
                    return tokens
Ejemplo n.º 28
0
def normalize_element(element):
    """Normalizes a nested structure of element components.

  * Components matching `SparseTensorSpec` are converted to `SparseTensor`.
  * Components matching `RaggedTensorSpec` are converted to `RaggedTensor`.
  * Components matching `DatasetSpec` or `TensorArraySpec` are passed through.
  * `CompositeTensor` components are passed through.
  * All other components are converted to `Tensor`.

  Args:
    element: A nested structure of individual components.

  Returns:
    A nested structure of `Tensor`, `Dataset`, `SparseTensor`, `RaggedTensor`,
    or `TensorArray` objects.
  """
    components = nest.flatten(element)
    normalized_components = []
    with ops.name_scope("normalize_element"):
        # Imported here to avoid circular dependency.
        from tensorflow.python.data.ops import dataset_ops  # pylint: disable=g-import-not-at-top
        for i, t in enumerate(components):
            try:
                spec = type_spec_from_value(t, use_fallback=False)
            except TypeError:
                # TypeError indicates it was not possible to compute a `TypeSpec` for
                # the value. As a fallback try converting the value to a tensor.
                normalized_components.append(
                    ops.convert_to_tensor(t, name="component_%d" % i))
            else:
                if isinstance(spec, sparse_tensor.SparseTensorSpec):
                    normalized_components.append(
                        sparse_tensor.SparseTensor.from_value(t))
                elif isinstance(spec, ragged_tensor.RaggedTensorSpec):
                    normalized_components.append(
                        ragged_tensor.convert_to_tensor_or_ragged_tensor(
                            t, name="component_%d" % i))
                elif isinstance(spec, (tensor_array_ops.TensorArraySpec,
                                       dataset_ops.DatasetSpec)):
                    normalized_components.append(t)
                elif isinstance(spec, NoneTensorSpec):
                    normalized_components.append(NoneTensor())
                elif isinstance(t, composite_tensor.CompositeTensor):
                    normalized_components.append(t)
                else:
                    normalized_components.append(
                        ops.convert_to_tensor(t, name="component_%d" % i))
    return nest.pack_sequence_as(element, normalized_components)
Ejemplo n.º 29
0
  def lookup(self, inputs):
    """Perform a table lookup."""
    # Sparse tensors don't play nicely with tensor conversion, so we handle
    # them before attempting to convert lists or arrays to tensors.
    if isinstance(
        inputs, (sparse_tensor.SparseTensor, sparse_tensor.SparseTensorValue)):
      return self._sparse_lookup(inputs)

    # Try to convert lists/arrays to tensors or RaggedTensors.
    inputs = ragged_tensor.convert_to_tensor_or_ragged_tensor(inputs)

    # Run the lookup operation on the converted tensor.
    if ragged_tensor.is_ragged(inputs):
      return self._ragged_lookup(inputs)
    else:
      return self._tensor_lookup(inputs)
Ejemplo n.º 30
0
    def tokenize_with_offsets(self, input_strs):
        """Tokenizes a tensor of UTF-8 strings into words with [start,end) offsets.

    Args:
      input_strs: An N-dimensional `Tensor` or `RaggedTensor` of UTF-8 strings.

    Returns:
      A tuple `(tokens, start_offsets, limit_offsets)` where:
        * `tokens` is a `RaggedTensor` of strings where `tokens[i1...iN, j]` is
          the string content of the `j-th` token in `input_strs[i1...iN]`
        * `start_offsets` is a `RaggedTensor` of int64s where
          `start_offsets[i1...iN, j]` is the byte offset for the start of the
          `j-th` token in `input_strs[i1...iN]`.
        * `limit_offsets` is a `RaggedTensor` of int64s where
          `limit_offsets[i1...iN, j]` is the byte offset immediately after the
          end of the `j-th` token in `input_strs[i...iN]`.
    """
        input_strs = ragged_tensor.convert_to_tensor_or_ragged_tensor(
            input_strs)
        rank = input_strs.shape.ndims
        if rank is None:
            raise ValueError('input must have a known rank.')

        # Currently, the hub_module accepts only rank 1 input tensors, and outputs
        # rank 2 tokens/starts/ends.  To handle input of different ranks (0, 2, 3,
        # etc), we first convert the input into a rank 1 tensor, then run the
        # module, and finally convert the output back to the expected shape.
        if rank == 0:
            # Build a rank 1 input batch with one string.
            input_batch = array_ops.stack([input_strs])
            # [1, (number codepoints)]
            tokens, starts, ends = self._predict_tokens(input_batch)
            return tokens.flat_values, starts.flat_values, ends.flat_values
        elif rank == 1:
            return self._predict_tokens(input_strs)
        else:
            if not ragged_tensor.is_ragged(input_strs):
                input_strs = ragged_tensor.RaggedTensor.from_tensor(
                    input_strs, ragged_rank=rank - 1)

            # [number strings, (number codepoints)]
            tokens, starts, limits = self._predict_tokens(
                input_strs.flat_values)
            tokens = input_strs.with_flat_values(tokens)
            starts = input_strs.with_flat_values(starts)
            limits = input_strs.with_flat_values(limits)
        return tokens, starts, limits
Ejemplo n.º 31
0
  def testUnaryElementwiseOp(self, x, op=math_ops.abs, **extra_args):
    x = ragged_tensor.convert_to_tensor_or_ragged_tensor(x)
    result = op(x, **extra_args)

    # Run the wrapped op on the dense values, for comparison.
    dense_x = x.flat_values if isinstance(x, ragged_tensor.RaggedTensor) else x
    expected_flat_values = array_ops.reshape(op(dense_x, **extra_args), [-1])

    # Check that the result has the expected shape.
    self.assertSameShape(x, result)

    # Check that the result has the expected (flattened) values.
    if isinstance(result, ragged_tensor.RaggedTensor):
      result_flat_values = array_ops.reshape(result.flat_values, [-1])
    else:
      result_flat_values = array_ops.reshape(result, [-1])
    self.assertAllEqual(expected_flat_values, result_flat_values)
Ejemplo n.º 32
0
def down_sample(source, freq_vocab, replacement='', threshold=1e-3, min_freq=0, seed=None, name=None):
    """Randomly down-sample high frequency tokens in `source` with `replacement` value.

    Args:
        source: string `Tensor` or `RaggedTensor` or `SparseTensor` of any shape, items to be sampled.
        freq_vocab: `Counter` with frequencies vocabulary.
        replacement: `string`, value to set instead of downsampled ones
        threshold: `float`, items occurrence threshold.
        min_freq: `int`, items below that frequency will be treated as unique.
        seed: `int`, used to create a random seed (optional).
            See @{tf.random.set_seed} for behavior.
        name: `string`, a name for the operation (optional).

    Returns:
      A boolean `Tensor` of same shape as source: "keep" flags.
    """
    with tf.name_scope(name or 'down_sample'):
        if isinstance(source, sparse_tensor.SparseTensorValue) or isinstance(source, sparse_tensor.SparseTensor):
            source = sparse_tensor.convert_to_tensor_or_sparse_tensor(source, dtype=tf.string, name=name)
        else:
            source = ragged_tensor.convert_to_tensor_or_ragged_tensor(source, dtype=tf.string, name=name)

        if not tf.string.is_compatible_with(source.dtype):
            raise RuntimeError('"Source" must have dtype compatible with "string". '
                               'Actual: {}'.format(source.dtype))

        if isinstance(source, tf.SparseTensor):
            return tf.SparseTensor(
                values=down_sample(source.values, freq_vocab, replacement, threshold, min_freq, seed),
                indices=source.indices,
                dense_shape=source.dense_shape
            )
        elif isinstance(source, tf.RaggedTensor):
            return source.with_flat_values(
                down_sample(source.flat_values, freq_vocab, replacement, threshold, min_freq, seed)
            )

        keep = sample_mask(
            source=source,
            freq_vocab=freq_vocab,
            threshold=threshold,
            min_freq=min_freq,
            seed=seed,
        )

        return tf.where(keep, source, replacement)
Ejemplo n.º 33
0
def ragged_tensor_to_string(rt, summarize=None):
    """Returns a scalar string tensor with the contents of a RaggedTensor.

  Requires that `rt.shape.rank` is not `None`.

  Note: this converts the entire `RaggedTensor` into a single string scalar.
  If you want to convert individual elements, use `tf.strings.as_string(rt)`.

  >>> rt1 = tf.ragged.constant([[1, 2, 3], [4, 5]])
  >>> ragged_tensor_to_string(rt1).numpy()
  b'[[1, 2, 3], [4, 5]]'

  >>> rt2 = tf.ragged.constant([[['a'], ['b', 'c']], [['d', 'e', 'f'], []]])
  >>> ragged_tensor_to_string(rt2).numpy()
  b"[[['a'], ['b', 'c']], [['d', 'e', 'f'], []]]"

  >>> rt3 = tf.ragged.constant([[1], [2, 3, 4, 5, 6], [], [], [7], [8, 9]])
  >>> ragged_tensor_to_string(rt3, summarize=2).numpy()
  b'[[1], [2, 3, ..., 5, 6], ..., [7], [8, 9]]'

  Args:
    rt: The RaggedTensor that should be converted to a string.
    summarize: If specified, then only the first and last `summarize` elements
      within each dimension are included in the string. If `-1` or `None`, then
      all elements are included.
  """
    if (summarize is not None and summarize != -1
            and not (isinstance(summarize, int) and summarize > 0)):
        raise ValueError(
            "Expected summarize to be -1 or a positive int, got %r" %
            summarize)
    with ops.name_scope(None, "AsString", [rt]):
        rt = ragged_tensor.convert_to_tensor_or_ragged_tensor(rt)
        if rt.shape.rank is None:
            raise ValueError(
                "RaggedTensor to_string requires that rt.shape.rank "
                "is not None.")
        # Convert all elements of `rt` to strings.
        if rt.dtype == dtypes.string:
            escaped = string_ops.regex_replace(rt.flat_values, r"(['\\])",
                                               r"\\\1")
            str_t = rt.with_flat_values("'" + escaped + "'")
        else:
            str_t = rt.with_flat_values(string_ops.as_string(rt.flat_values))

        return _ragged_tensor_to_string(str_t, summarize)
Ejemplo n.º 34
0
  def testUnaryElementwiseOp(self, x, op=math_ops.abs, **extra_args):
    x = ragged_tensor.convert_to_tensor_or_ragged_tensor(x)
    result = op(x, **extra_args)

    # Run the wrapped op on the dense values, for comparison.
    dense_x = x.flat_values if isinstance(x, ragged_tensor.RaggedTensor) else x
    expected_flat_values = array_ops.reshape(op(dense_x, **extra_args), [-1])

    # Check that the result has the expected shape.
    self.assertSameShape(x, result)

    # Check that the result has the expected (flattened) values.
    if isinstance(result, ragged_tensor.RaggedTensor):
      result_flat_values = array_ops.reshape(result.flat_values, [-1])
    else:
      result_flat_values = array_ops.reshape(result, [-1])
    self.assertAllEqual(expected_flat_values, result_flat_values)
Ejemplo n.º 35
0
def string_bytes_split(input, name=None):  # pylint: disable=redefined-builtin
  """Split string elements of `input` into bytes.

  Examples:

  ```python
  >>> tf.strings.to_bytes('hello')
  ['h', 'e', 'l', 'l', 'o']
  >>> tf.strings.to_bytes(['hello', '123'])
  <RaggedTensor [['h', 'e', 'l', 'l', 'o'], ['1', '2', '3']]>
  ```

  Note that this op splits strings into bytes, not unicode characters.  To
  split strings into unicode characters, use `tf.strings.unicode_split`.

  See also: `tf.io.decode_raw`, `tf.strings.split`, `tf.strings.unicode_split`.

  Args:
    input: A string `Tensor` or `RaggedTensor`: the strings to split.  Must
      have a statically known rank (`N`).
    name: A name for the operation (optional).

  Returns:
    A `RaggedTensor` of rank `N+1`: the bytes that make up the soruce strings.
  """
  with ops.name_scope(name, "StringsByteSplit", [input]):
    input = ragged_tensor.convert_to_tensor_or_ragged_tensor(input,
                                                             name="input")
    if isinstance(input, ragged_tensor.RaggedTensor):
      return input.with_flat_values(string_bytes_split(input.flat_values))

    rank = input.shape.ndims
    if rank is None:
      raise ValueError("input must have a statically-known rank.")

    if rank == 0:
      return string_bytes_split(array_ops.stack([input]))[0]
    elif rank == 1:
      indices, values, shape = gen_string_ops.string_split(
          input, delimiter="", skip_empty=False)
      return ragged_tensor.RaggedTensor.from_value_rowids(
          values=values, value_rowids=indices[:, 0], nrows=shape[0],
          validate=False)
    else:
      return string_bytes_split(ragged_tensor.RaggedTensor.from_tensor(input))
  def func(data):
    with ops.name_scope(name, 'NGrams', [data, width]):
      data = ragged_tensor.convert_to_tensor_or_ragged_tensor(data, name='data')
      slices = []
      for start in range(width):
        stop = None if start - width + 1 == 0 else start - width + 1
        if axis >= 0:
          idx = [slice(None)] * axis + [slice(start, stop)]
        else:
          idx = [Ellipsis, slice(start, stop)] + [slice(None)] * (-axis - 1)
        slices.append(data[idx])

      # Stack the slices.
      stack_axis = axis + 1 if axis >= 0 else axis
      windowed_data = array_ops.stack(slices, stack_axis)

      return string_ops.reduce_join(
          windowed_data, axis=axis, separator=string_separator)
Ejemplo n.º 37
0
def cont_bow(source, window, seed=None, name=None):
    """Generates `Continuous bag-of-words` target and context pairs from batched list of tokens.

    Args:
        source: `2-D` string `Tensor` or `RaggedTensor`, batched lists of tokens [sentences, tokens].
        window: `int`, size of context before and after target token, must be > 0.
        seed: `int`, used to create a random seed (optional).
            See @{tf.random.set_seed} for behavior.
        name: `string`, a name for the operation (optional).

    Returns:
        `1-D` string `Tensor`: target tokens.
        `2-D` string `RaggedTensor`: context tokens.
        `2-D` int32 `RaggedTensor`: context positions.
    """
    with tf.name_scope(name or 'cont_bow'):
        source = ragged_tensor.convert_to_tensor_or_ragged_tensor(
            source, name='source')

        if source.shape.rank != 2:
            raise ValueError('Rank of `source` must equals 2')

        if not ragged_tensor.is_ragged(source):
            source = ragged_tensor.RaggedTensor.from_tensor(source,
                                                            ragged_rank=1)

        if source.ragged_rank != 1:
            raise ValueError('Ragged rank of `source` must equals 1')

        seed1, seed2 = random_seed.get_seed(seed)

        target, context_values, context_splits, context_positions = tfmiss_ops.miss_cont_bow(
            source_values=source.values,
            source_splits=source.row_splits,
            window=window,
            seed=seed1,
            seed2=seed2)

        context = tf.RaggedTensor.from_row_splits(context_values,
                                                  context_splits)
        position = tf.RaggedTensor.from_row_splits(context_positions,
                                                   context_splits)

        return target, context, position
Ejemplo n.º 38
0
def reduce_variance(input_tensor: ragged_tensor.Ragged,
                    axis=None,
                    keepdims=False,
                    name=None):
  """For docs, see: _RAGGED_REDUCE_DOCSTRING."""
  with ops.name_scope(name, 'RaggedReduceVariance', [input_tensor, axis]):
    input_tensor = ragged_tensor.convert_to_tensor_or_ragged_tensor(
        input_tensor, name='input_tensor')
    if input_tensor.dtype.is_complex:
      raise ValueError(
          'reduce_variance is not supported for RaggedTensors with complex dtypes.'
      )
    square_of_input = math_ops.square(input_tensor)
    mean_of_square = reduce_mean(square_of_input, axis=axis, keepdims=keepdims)
    mean = reduce_mean(input_tensor, axis=axis, keepdims=keepdims)
    square_of_mean = math_ops.square(mean)
    # Note: the above method of computing variance is not numerically stable,
    # and can result in negative variances.  Here we clip to >= 0.
    return math_ops.maximum(mean_of_square - square_of_mean, 0)
Ejemplo n.º 39
0
    def detokenize(self, input, name=None):  # pylint: disable=redefined-builtin
        """Detokenizes tokens into preprocessed text.

    Args:
      input: A `RaggedTensor` or `Tensor` of UTF-8 string tokens with a rank of
        at least 1.
      name: The name argument that is passed to the op function.

    Returns:
      A N-1 dimensional string Tensor or RaggedTensor of the detokenized text.
    """
        with ops.name_scope(name, "SentenceTokenizer", [input, self]):
            input_tensor = ragged_tensor.convert_to_tensor_or_ragged_tensor(
                input)
            if input_tensor.shape.ndims is None:
                raise ValueError(
                    "Rank of input_tensor must be statically known.")
            if input_tensor.shape.ndims == 0:
                raise ValueError("Rank of input_tensor must be at least 1.")
            if ragged_tensor.is_ragged(input_tensor):
                if input_tensor.flat_values.shape.ndims > 1:
                    # If the flat_values of our ragged tensor is multi-dimensional, we can
                    # process it separately and our output will have the same nested
                    # splits as our input.
                    tokens = self.detokenize(input_tensor.flat_values)
                    return input_tensor.with_flat_values(tokens)
                elif input_tensor.ragged_rank > 1:
                    # Recursively process the values of the ragged tensor.
                    tokens = self.detokenize(input_tensor.values)
                    return input_tensor.with_values(tokens)
                else:
                    return gen_sentencepiece_tokenizer.sentencepiece_detokenize_op(
                        self._model_resource.resource_handle,
                        input_tensor.flat_values, input_tensor.row_splits,
                        self.add_bos, self.add_eos, self.reverse)
            else:
                if input_tensor.shape.ndims > 1:
                    # Convert the input tensor to ragged and process it.
                    return self.detokenize(
                        ragged_conversion_ops.from_tensor(input_tensor))
                else:
                    tokens = self.detokenize(array_ops.stack([input_tensor]))
                    return array_ops.reshape(tokens, [])
Ejemplo n.º 40
0
def ragged_one_hot(indices,
                   depth,
                   on_value=None,
                   off_value=None,
                   axis=None,
                   dtype=None,
                   name=None):
    """Applies tf.one_hot along the values of a RaggedTensor."""
    with ops.name_scope(name, 'RaggedOneHot', [indices]):
        indices = ragged_tensor.convert_to_tensor_or_ragged_tensor(
            indices, name='indices')
        if axis is not None:
            axis = ragged_util.get_positive_axis(axis, indices.shape.ndims)
            if axis < indices.ragged_rank:
                raise ValueError(
                    'axis may not be less than indices.ragged_rank.')
        return indices.with_flat_values(
            array_ops.one_hot(indices.flat_values, depth, on_value, off_value,
                              axis, dtype, name))
Ejemplo n.º 41
0
def _replace_ragged_with_flat_values(value, partition_lists,
                                     flat_values_nrows):
    """Replace RaggedTensors with their flat_values, and record their partitions.

  Returns a copy of `value`, with any nested `RaggedTensor`s replaced by their
  `flat_values` tensor.  Looks inside lists, tuples, and dicts.

  Appends each `RaggedTensor`'s `RowPartition`s to `partition_lists`.

  Args:
    value: The value that should be transformed by replacing `RaggedTensors`.
    partition_lists: An output parameter used to record the row partitions
      for any `RaggedTensors` that were replaced.
    flat_values_nrows: An output parameter used to record the outer dimension
      size for each replacement `flat_values` (when known).  Contains a list of
      int.

  Returns:
    A copy of `value` with nested `RaggedTensors` replaced by their `values`.
  """
    # Base case
    if ragged_tensor.is_ragged(value):
        value = ragged_tensor.convert_to_tensor_or_ragged_tensor(value)
        partition_lists.append(value._nested_row_partitions)  # pylint: disable=protected-access
        nrows = tensor_shape.dimension_at_index(value.flat_values.shape,
                                                0).value
        if nrows is not None:
            flat_values_nrows.append(nrows)
        return value.flat_values

    # Recursion cases
    def recurse(v):
        return _replace_ragged_with_flat_values(v, partition_lists,
                                                flat_values_nrows)

    if isinstance(value, list):
        return [recurse(v) for v in value]
    elif isinstance(value, tuple):
        return tuple(recurse(v) for v in value)
    elif isinstance(value, dict):
        return dict((k, recurse(v)) for (k, v) in value.items())
    else:
        return value
Ejemplo n.º 42
0
    def test_merge_with_ragged_input(self, layer):
        ragged_data = tf.ragged.constant(
            [[1., 1., 1.], [1., 1.], [1., 1., 1., 1.]], ragged_rank=1)
        dense_data = ragged_data.to_tensor()
        input1 = keras.Input(shape=(None, ), ragged=True)
        input2 = keras.Input(shape=(None, ), ragged=True)
        out = keras.layers.Add()([input1, input2])
        model = keras.models.Model(inputs=[input1, input2], outputs=out)
        out_ragged = model.predict([ragged_data, ragged_data], steps=1)
        out_ragged = ragged_tensor.convert_to_tensor_or_ragged_tensor(
            out_ragged).to_tensor()

        input1 = keras.Input(shape=(None, ))
        input2 = keras.Input(shape=(None, ))
        out = keras.layers.Add()([input1, input2])
        model = keras.models.Model(inputs=[input1, input2], outputs=out)
        out_dense = model.predict([dense_data, dense_data], steps=1)

        self.assertAllEqual(out_dense, out_ragged)
def gather(params,
           indices,
           validate_indices=None,
           name=None,
           axis=None,
           batch_dims=0):
  """tf.gather for structured tensors.

  Does not support (yet) checks on illegal axis values, et cetera.

  Indices must be a ragged or dense tensor.
  Args:
    params: a structured tensor to be gathered
    indices: a ragged tensor or tensor to gather by.
    validate_indices: whether to validate the indices
    name: the name of the op(s).
    axis: the axis in params to gather on.
    batch_dims: the number of batch dimensions.

  Returns:
    the params reorganized according to indices.
  """
  if name is None:
    name = 'gather'
  with ops.name_scope(name):
    if axis is None:
      axis = batch_dims
    ndims_name = params.shape.rank
    axis = array_ops.get_positive_axis(axis, ndims_name)
    indices = ragged_tensor.convert_to_tensor_or_ragged_tensor(
        indices, name='indices')

    def leaf_op(p):
      return array_ops.gather(
          p,
          indices,
          validate_indices=validate_indices,
          axis=axis,
          batch_dims=batch_dims,
          name=None)

    return _extend_op_single(params, leaf_op)
Ejemplo n.º 44
0
def tile(input, multiples, name=None):  # pylint: disable=redefined-builtin
  """Constructs a `RaggedTensor` by tiling a given `RaggedTensor`.

  The values of `input` are replicated `multiples[i]` times along the
  `i`th dimension (for each dimension `i`).  For every dimension `axis` in
  `input`, the length of each output element in that dimension is the
  length of corresponding input element multiplied by `multiples[axis]`.

  Args:
    input: A `RaggedTensor`.
    multiples: A 1-D integer `Tensor`.  Length must be the same as the number of
      dimensions in `input`.
    name: A name for the operation (optional).

  Returns:
    A `RaggedTensor` with the same type, rank, and ragged_rank as `input`.

  #### Example:
    ```python
    >>> rt = tf.ragged.constant([[1, 2], [3]])
    >>> ragged.tile(rt, [3, 2])
    [[1, 2, 1, 2], [3, 3], [1, 2, 1, 2], [3, 3], [1, 2, 1, 2], [3, 3]]
    ```
  """
  with ops.name_scope(name, 'RaggedTile', [input, multiples]):
    input = ragged_tensor.convert_to_tensor_or_ragged_tensor(
        input, name='input')
    if not ragged_tensor.is_ragged(input):
      return array_ops.tile(input, multiples, name)
    multiples = ragged_util.convert_to_int_tensor(
        multiples, name='multiples', dtype=input.row_splits.dtype)
    multiples.shape.assert_has_rank(1)

    # If the constant value of `multiples` is available, then we can use it
    # to skip tiling dimensions where `multiples=1`.
    const_multiples = tensor_util.constant_value(multiples)

    return ragged_tensor.RaggedTensor.from_nested_row_splits(
        _tile_ragged_values(input, multiples, const_multiples),
        _tile_ragged_splits(input, multiples, const_multiples),
        validate=False)
Ejemplo n.º 45
0
def tile(input, multiples, name=None):  # pylint: disable=redefined-builtin
    """Constructs a `RaggedTensor` by tiling a given `RaggedTensor`.

  The values of `input` are replicated `multiples[i]` times along the
  `i`th dimension (for each dimension `i`).  For every dimension `axis` in
  `input`, the length of each output element in that dimension is the
  length of corresponding input element multiplied by `multiples[axis]`.

  Args:
    input: A `RaggedTensor`.
    multiples: A 1-D integer `Tensor`.  Length must be the same as the number of
      dimensions in `input`.
    name: A name for the operation (optional).

  Returns:
    A `RaggedTensor` with the same type, rank, and ragged_rank as `input`.

  #### Example:
    ```python
    >>> rt = tf.ragged.constant([[1, 2], [3]])
    >>> ragged.tile(rt, [3, 2])
    [[1, 2, 1, 2], [3, 3], [1, 2, 1, 2], [3, 3], [1, 2, 1, 2], [3, 3]]
    ```
  """
    with ops.name_scope(name, 'RaggedTile', [input, multiples]):
        input = ragged_tensor.convert_to_tensor_or_ragged_tensor(input,
                                                                 name='input')
        if not ragged_tensor.is_ragged(input):
            return array_ops.tile(input, multiples, name)
        multiples = ragged_util.convert_to_int_tensor(
            multiples, name='multiples', dtype=input.row_splits.dtype)
        multiples.shape.assert_has_rank(1)

        # If the constant value of `multiples` is available, then we can use it
        # to skip tiling dimensions where `multiples=1`.
        const_multiples = tensor_util.constant_value(multiples)

        return ragged_tensor.RaggedTensor.from_nested_row_splits(
            _tile_ragged_values(input, multiples, const_multiples),
            _tile_ragged_splits(input, multiples, const_multiples),
            validate=False)
Ejemplo n.º 46
0
def char_ngrams(source, minn, maxn, itself, skip=None, name=None):
    """Split unicode strings into character ngrams.

    Args:
        source: `Tensor` or `RaggedTensor` of any shape, strings to split
        minn: Minimum length of character ngram
        maxn: Maximum length of character ngram
        itself: Strategy for source word preserving.
            One of `"asis"`, `"never"`, `"always"`, `"alone"`.
        skip: list of strings to pass without changes or None.
        name: A name for the operation (optional).
    Returns:
        `Tensor` if rank(source) is 0, `RaggedTensor` with an additional dimension otherwise.
    """
    with tf.name_scope(name or 'char_ngrams'):
        source = ragged_tensor.convert_to_tensor_or_ragged_tensor(
            source, name='source', dtype=tf.string)
        if source.shape.rank is None:
            raise ValueError('Rank of `source` must be statically known.')

        if not isinstance(source, tf.RaggedTensor) and source.shape.rank > 1:
            source = ragged_tensor.RaggedTensor.from_tensor(
                source, ragged_rank=source.shape.rank - 1)

        if isinstance(source, tf.RaggedTensor):
            return source.with_flat_values(
                char_ngrams(source.flat_values, minn, maxn, itself, skip))

        result_values, result_splits = tfmiss_ops.miss_char_ngrams(
            source=source,
            minn=minn,
            maxn=maxn,
            itself=itself.upper(),
            skip=skip or [],
        )

        if source.shape.rank == 0:
            return result_values

        return tf.RaggedTensor.from_row_splits(result_values, result_splits)
Ejemplo n.º 47
0
def normalize_utf8_with_offsets_map(input,
                                    normalization_form="NFKC",
                                    name=None):
  """Normalizes each UTF-8 string in the input tensor using the specified rule.

  Returns normalized strings and an offset map used by another operation to map
  post-normalized string offsets to pre-normalized string offsets.

  See http://unicode.org/reports/tr15/

  Args:
    input: A `Tensor` or `RaggedTensor` of type string. (Must be UTF-8.)
      normalization_form: One of the following string values ('NFC', 'NFKC',
      'NFD', 'NFKD'). Default is 'NFKC'. NOTE: `NFD` and `NFKD` for
      `normalize_utf8_with_offsets_map` will not be available until the tf.text
      release w/ ICU 69 (scheduled after 4/2021).
    name: The name for this op (optional).

  Returns:
    A tuple of (results, offsets_map) where:

    results: A `Tensor` or `RaggedTensor` of type string, with normalized
      contents.
    offsets_map: A `Tensor` or `RaggedTensor` of type `variant`, used to map
      the post-normalized string offsets to pre-normalized string offsets. It
      has the same shape as the results tensor. offsets_map is an input to
      `find_source_offsets` op.
  """
  with ops.name_scope(name, "NormalizeUTF8WithOffsets", [input]):
    input_tensor = ragged_tensor.convert_to_tensor_or_ragged_tensor(
        input, dtype=dtypes.string)
    if ragged_tensor.is_ragged(input_tensor):
      result, offsets_map = gen_normalize_ops.normalize_utf8_with_offsets_map(
          input_tensor.flat_values, normalization_form)
      return input_tensor.with_flat_values(
          result), input_tensor.with_flat_values(offsets_map)
    else:
      return gen_normalize_ops.normalize_utf8_with_offsets_map(
          input_tensor, normalization_form)
Ejemplo n.º 48
0
def _ragged_segment_aggregate(unsorted_segment_op,
                              data,
                              segment_ids,
                              num_segments,
                              name=None):
  """Aggregates along segments of a RaggedTensor using `unsorted_segment_op`.

  Returns a RaggedTensor `output` with `num_segments` rows, where the row
  `output[i]` is formed by combining all rows of `data` whose corresponding
  `segment_id` is `i`.  The values in each row are combined using
  `unsorted_segment_op`.

  The length of the row `output[i]` will be the maximum of the lengths of
  all rows of `data` whose corresponding `segment_id` is `i`.  If no `data`
  rows correspond to a given segment ID, then the output row for that segment
  ID will be empty.

  Args:
    unsorted_segment_op: The tensorflow `op` that should be used to combine
      values in each row.  Must have the same signature and basic behavior as
      `unsorted_segment_sum`, `unsorted_segment_max`, etc.
    data: A `RaggedTensor` containing the values to be combined.
    segment_ids: A `Tensor` or `RaggedTensor`.  Must have type `int64` or
      `int32`.  `segment_ids.shape` must be a prefix of `data.shape`.
      `segment_ids` is not required to be sorted.
    num_segments: An `int32` or `int64` scalar.
    name: A name prefix for the returned tensor (optional).

  Returns:
    A `RaggedTensor` containing the aggregated values.  The returned tensor
    has the same dtype as `data`, and its shape is
    `[num_segments] + data.shape[segment_ids.rank:]`.
  Raises:
    ValueError: If segment_ids.shape is not a prefix of data.shape.
  """
  if not (ragged_tensor.is_ragged(data) or
          ragged_tensor.is_ragged(segment_ids)):
    return unsorted_segment_op(data, segment_ids, num_segments, name)

  with ops.name_scope(name, 'RaggedSegment',
                      [data, segment_ids, num_segments]) as name:
    data = ragged_tensor.convert_to_tensor_or_ragged_tensor(data, name='data')
    segment_ids = ragged_tensor.convert_to_tensor_or_ragged_tensor(
        segment_ids, name='segment_ids')

    if ragged_tensor.is_ragged(segment_ids):
      if not ragged_tensor.is_ragged(data):
        raise ValueError('segment_ids.shape must be a prefix of data.shape, '
                         'but segment_ids is ragged and data is not.')
      check_splits = check_ops.assert_equal(
          segment_ids.row_splits,
          data.row_splits,
          message='segment_ids.shape must be a prefix of data.shape')
      with ops.control_dependencies([check_splits]):
        return _ragged_segment_aggregate(unsorted_segment_op, data.values,
                                         segment_ids.values, num_segments, name)

    segment_ids = math_ops.cast(segment_ids, dtypes.int64)

    # Find the length of each row in data.  (dtype=int64, shape=[data_nrows])
    data_row_lengths = data.row_splits[1:] - data.row_splits[:-1]

    # Find the length that each output row will have.  The length of the row
    # corresponding to segment `id` is `max(data_row_lengths[i])` where
    # `segment_ids[i]=id`.  (dtype=int64, shape=[output_nrows])
    output_row_lengths = math_ops.maximum(
        math_ops.unsorted_segment_max(data_row_lengths, segment_ids,
                                      num_segments), 0)
    assert output_row_lengths.dtype == dtypes.int64

    # Build the splits tensor for the output RaggedTensor.
    output_splits = array_ops.concat([
        array_ops.zeros([1], dtypes.int64),
        math_ops.cumsum(output_row_lengths)
    ],
                                     axis=0)

    # For each row in `data`, find the start & limit position where that row's
    # values will be aggregated in output.values.
    data_row_to_out_row_start = array_ops.gather(output_splits, segment_ids)
    data_row_to_out_row_limit = data_row_to_out_row_start + data_row_lengths

    # For each value in `data.values`, find the position where it will
    # aggregated in `output.values`.
    # Get the target output values index for each data values index.
    data_val_to_out_val_index = range(data_row_to_out_row_start,
                                      data_row_to_out_row_limit).values

    # Recursively aggregate the values.
    output_values = _ragged_segment_aggregate(unsorted_segment_op, data.values,
                                              data_val_to_out_val_index,
                                              output_splits[-1])
    return ragged_tensor.RaggedTensor.from_row_splits(output_values,
                                                      output_splits)
Ejemplo n.º 49
0
def unicode_encode(input,
                   output_encoding,
                   errors="replace",
                   replacement_char=65533,
                   name=None):
  r"""Encodes each sequence of Unicode code points in `input` into a string.

  `result[i1...iN]` is the string formed by concatenating the Unicode
  codepoints `input[1...iN, :]`, encoded using `output_encoding`.

  Args:
    input: An `N+1` dimensional potentially ragged integer tensor with shape
      `[D1...DN, num_chars]`.
    output_encoding: Unicode encoding that should be used to encode each
      codepoint sequence.  Can be `"UTF-8"`, `"UTF-16-BE"`, or `"UTF-32-BE"`.
    errors: Specifies the response when an invalid codepoint is encountered
      (optional). One of:
            * `'replace'`: Replace invalid codepoint with the
              `replacement_char`. (default)
            * `'ignore'`: Skip invalid codepoints.
            * `'strict'`: Raise an exception for any invalid codepoint.
    replacement_char: The replacement character codepoint to be used in place of
      any invalid input when `errors='replace'`. Any valid unicode codepoint may
      be used. The default value is the default unicode replacement character
      which is 0xFFFD (U+65533).
    name: A name for the operation (optional).

  Returns:
    A `N` dimensional `string` tensor with shape `[D1...DN]`.

  #### Example:
    ```python
      >>> input = [[71, 246, 246, 100, 110, 105, 103, 104, 116], [128522]]
      >>> unicode_encode(input, 'UTF-8')
      ['G\xc3\xb6\xc3\xb6dnight', '\xf0\x9f\x98\x8a']
    ```
  """
  with ops.name_scope(name, "UnicodeEncode", [input]):
    input_tensor = ragged_tensor.convert_to_tensor_or_ragged_tensor(input)
    if input_tensor.shape.ndims is None:
      raise ValueError("Rank of input_tensor must be statically known.")
    if ragged_tensor.is_ragged(input_tensor):
      if input_tensor.flat_values.shape.ndims > 1:
        # If the flat_values of our ragged tensor is multi-dimensional, we can
        # process it separately and our output will have the same nested splits
        # as our input.
        return input_tensor.with_flat_values(
            unicode_encode(input_tensor.flat_values, output_encoding, errors,
                           replacement_char))
      elif input_tensor.ragged_rank > 1:
        # Recursively process the values of the ragged tensor.
        return input_tensor.with_values(
            unicode_encode(input_tensor.values, output_encoding, errors,
                           replacement_char))
      else:
        # Our ragged tensor is of the correct shape (rank 1 flat_values tensor
        # with ragged_rank of 1) so we can process it as normal.
        return gen_string_ops.unicode_encode(
            input_values=input_tensor.values,
            input_splits=input_tensor.row_splits,
            output_encoding=output_encoding,
            errors=errors,
            replacement_char=replacement_char)
    else:
      if input_tensor.shape.ndims == 2:
        # The input tensor is of the correct 2-D shape, it's just not ragged.
        return unicode_encode(
            ragged_tensor.RaggedTensor.from_tensor(input_tensor),
            output_encoding, errors, replacement_char)
      elif input_tensor.shape.ndims > 2:
        # We need to initially flatten the input tensor to 2-D, and then can
        # reshape the output of our processed flattened tensor.
        flat_input_tensor = array_ops.reshape(
            input_tensor,
            array_ops.stack([-1, array_ops.shape(input_tensor)[-1]]))
        flat_output_tensor = unicode_encode(flat_input_tensor, output_encoding,
                                            errors, replacement_char)
        return array_ops.reshape(flat_output_tensor, input_tensor.shape[:-1])
      elif input_tensor.shape.ndims == 0:
        raise ValueError("input_tensor's rank must be at least 1.")
      else:
        # Our input tensor is rank 1, so we create a ragged tensor with an added
        # dimension to create the correct input shape & type, and then remove
        # the additional dimension from the output and return the string scalar.
        ragged_input_tensor = ragged_tensor.RaggedTensor.from_row_splits(
            input_tensor,
            array_ops.stack(
                [0, array_ops.shape(input_tensor, out_type=dtypes.int32)[0]]),
            validate=False)
        output_tensor = unicode_encode(ragged_input_tensor, output_encoding,
                                       errors, replacement_char)
        return array_ops.reshape(output_tensor, [])
Ejemplo n.º 50
0
def squeeze(input, axis=None, name=None):  # pylint: disable=redefined-builtin
  """Ragged compatible squeeze.

  If `input` is a `tf.Tensor`, then this calls `tf.squeeze`.

  If `input` is a `tf.RaggedTensor`, then this operation takes `O(N)` time,
  where `N` is the number of elements in the squeezed dimensions.

  Args:
    input: A potentially ragged tensor. The input to squeeze.
    axis: An optional list of ints. Defaults to `None`. If the `input` is
      ragged, it only squeezes the dimensions listed. It fails if `input` is
      ragged and axis is []. If `input` is not ragged it calls tf.squeeze. Note
      that it is an error to squeeze a dimension that is not 1. It must be in
      the range of [-rank(input), rank(input)).
   name: A name for the operation (optional).

  Returns:
    A potentially ragged tensor. Contains the same data as input,
    but has one or more dimensions of size 1 removed.
  """
  with ops.name_scope(name, 'RaggedSqueeze', [input]):
    input = ragged_tensor.convert_to_tensor_or_ragged_tensor(input)
    if isinstance(input, ops.Tensor):
      return array_ops.squeeze(input, axis, name)

    if axis is None:
      raise ValueError('Ragged.squeeze must have an axis argument.')
    if isinstance(axis, int):
      axis = [axis]
    elif ((not isinstance(axis, (list, tuple))) or
          (not all(isinstance(d, int) for d in axis))):
      raise TypeError('Axis must be a list or tuple of integers.')

    dense_dims = []
    ragged_dims = []
    # Normalize all the dims in axis to be positive
    axis = [ragged_util.get_positive_axis(d, input.shape.ndims) for d in axis]
    for dim in axis:
      if dim > input.ragged_rank:
        dense_dims.append(dim - input.ragged_rank)
      else:
        ragged_dims.append(dim)

    # Make sure the specified ragged dimensions are squeezable.
    assertion_list = []
    scalar_tensor_one = constant_op.constant(1, dtype=input.row_splits.dtype)
    for i, r in enumerate(input.nested_row_lengths()):
      if i + 1 in ragged_dims:
        assertion_list.append(
            control_flow_ops.Assert(
                math_ops.reduce_all(math_ops.equal(r, scalar_tensor_one)),
                ['the given axis (axis = %d) is not squeezable!' % (i + 1)]))
    if 0 in ragged_dims:
      scalar_tensor_two = constant_op.constant(2, dtype=dtypes.int32)
      assertion_list.append(
          control_flow_ops.Assert(
              math_ops.equal(
                  array_ops.size(input.row_splits), scalar_tensor_two),
              ['the given axis (axis = 0) is not squeezable!']))

    # Till now, we are sure that the ragged dimensions are squeezable.
    squeezed_rt = None
    squeezed_rt = control_flow_ops.with_dependencies(assertion_list,
                                                     input.flat_values)

    if dense_dims:
      # Gives error if the dense dimension is not squeezable.
      squeezed_rt = array_ops.squeeze(squeezed_rt, dense_dims)

    remaining_row_splits = []
    remaining_row_splits = list()
    for i, row_split in enumerate(input.nested_row_splits):
      # each row_splits tensor is for dimension #(i+1) .
      if (i + 1) not in ragged_dims:
        remaining_row_splits.append(row_split)
    # Take care of the first row if it is to be squeezed.
    if remaining_row_splits and 0 in ragged_dims:
      remaining_row_splits.pop(0)

    squeezed_rt = RaggedTensor.from_nested_row_splits(squeezed_rt,
                                                      remaining_row_splits)

    # Corner case: when removing all the ragged dimensions and the output is
    # a scalar tensor e.g. ragged.squeeze(ragged.constant([[[1]]])).
    if set(range(0, input.ragged_rank + 1)).issubset(set(ragged_dims)):
      squeezed_rt = array_ops.squeeze(squeezed_rt, [0], name)

    return squeezed_rt
Ejemplo n.º 51
0
def _ragged_reduce_aggregate(reduce_op,
                             unsorted_segment_op,
                             rt_input,
                             axis,
                             keepdims,
                             name=None):
  """Aggregates across axes of a RaggedTensor using the given `Tensor` ops.

  Reduces `rt_input` along the dimensions given in `axis`.  The rank of the
  tensor is reduced by 1 for each entry in `axis`.  If `axis` is not specified,
  then all dimensions are reduced, and a scalar value is returned.

  This op assumes that `reduce_op` and `unsorted_segment_op` are associative;
  if not, then reducing multiple axes will return incorrect results.  (In
  particular, reducing multiple axes is currently implemented by reducing the
  axes one at a time.)

  Args:
    reduce_op: The tensorflow `op` that should be used to reduce values in
      uniform dimensions.  Must have the same signature and basic behavior as
      `reduce_sum`, `reduce_max`, etc.
    unsorted_segment_op: The tensorflow `op` that should be used to combine
      values in ragged dimensions.  Must have the same signature and basic
      behavior as `unsorted_segment_sum`, `unsorted_segment_max`, etc.
    rt_input: A `Tensor` or `RaggedTensor` containing the values to be reduced.
    axis: The axis or axes to reduce.  May be `None` (to reduce all axes), an
      `int` (to reduce a single axis), a `list` or `tuple` of `int` (to reduce a
      given set of axes), or a `Tensor` with a constant value.  Must be in the
      range `[0, rt_input.rank)`.
    keepdims: If true, retains reduced dimensions with length 1.
    name: A name prefix for the returned tensor (optional).

  Returns:
    A `RaggedTensor` containing the reduced values.  The returned tensor
    has the same dtype as `data`, and its shape is given by removing the
    dimensions specified in `axis` from `rt_input.shape`.  The `ragged_rank`
    of the returned tensor is given by substracting any ragged dimensions
    specified in `axis` from `rt_input.ragged_rank`.
  Raises:
    ValueError: If `axis` contains a `Tensor` whose value is not constant.
  """
  if not ragged_tensor.is_ragged(rt_input):
    return reduce_op(rt_input, axis, name=name)

  if keepdims:
    raise ValueError('keepdims=True is not supported for RaggedTensors.')

  if isinstance(axis, ops.Tensor):
    axis = tensor_util.constant_value(axis)
    if axis is None:
      raise ValueError('axis must be known at graph construction time.')
    if isinstance(axis, np.ndarray):
      axis = axis.tolist()

  # When reducing all axes, just ignore splits & reduce the inner values.
  if axis is None:
    return reduce_op(rt_input.flat_values, None, name=name)

  with ops.name_scope(name, 'RaggedReduce', [rt_input, axis]):
    if isinstance(axis, (tuple, list)):
      if not axis:
        return rt_input
      elif len(axis) == 1:
        axis = axis[0]
      else:
        # When reducing multiple axes, just reduce one at a time.  This is less
        # efficient, and only works for associative ops.  (In particular, it
        # does not work for reduce_mean.)  However, reducing multiple axes at
        # once will probably require a nontrivial c++ op.
        axis = sorted(axis)
        inner_reduced = _ragged_reduce_aggregate(reduce_op, unsorted_segment_op,
                                                 rt_input, axis[-1], keepdims)
        return _ragged_reduce_aggregate(reduce_op, unsorted_segment_op,
                                        inner_reduced, axis[:-1], keepdims)

    rt_input = ragged_tensor.convert_to_tensor_or_ragged_tensor(
        rt_input, name='rt_input')

    axis = ragged_util.get_positive_axis(axis, rt_input.shape.ndims)

    if axis == 0:
      # out[i_1, i_2, ..., i_N] = sum_{j} rt_input[j, i_1, i_2, ..., i_N]
      row_lengths = rt_input.row_splits[1:] - rt_input.row_splits[:-1]
      num_segments = math_ops.maximum(math_ops.reduce_max(row_lengths), 0)
      segment_ids = range(row_lengths).values
      return _ragged_segment_aggregate(unsorted_segment_op, rt_input.values,
                                       segment_ids, num_segments)
    elif axis == 1:
      # out[i_0, i_1, i_2, ..., i_N] = sum_{j} rt_input[i_0, j, i_2, ..., i_N]
      num_segments = array_ops.shape(rt_input.row_splits)[0] - 1
      segment_ids = segment_id_ops.row_splits_to_segment_ids(
          rt_input.row_splits)
      return _ragged_segment_aggregate(unsorted_segment_op, rt_input.values,
                                       segment_ids, num_segments)
    else:
      # out[i_0, ..., i_[axis-1], i_axis+1], ..., i_N] =
      #     sum_{j} rt_input [i_0, ..., i_[axis-1], j, i_axis+1], ..., i_N]
      return rt_input.with_values(
          _ragged_reduce_aggregate(reduce_op, unsorted_segment_op,
                                   rt_input.values, axis - 1, keepdims))
Ejemplo n.º 52
0
def _ragged_stack_concat_helper(rt_inputs, axis, stack_values):
  """Helper function to concatenate or stack ragged tensors.

  Args:
    rt_inputs: A list of RaggedTensors or Tensors to combine.
    axis: The axis along which to concatenate or stack.
    stack_values: A boolean -- if true, then stack values; otherwise,
      concatenate them.

  Returns:
    A RaggedTensor.
  Raises:
    ValueError: If rt_inputs is empty, or if axis is out of range.
  """
  # Validate parameters.
  if not rt_inputs:
    raise ValueError('rt_inputs may not be empty.')

  # Convert input tensors.
  rt_inputs = [
      ragged_tensor.convert_to_tensor_or_ragged_tensor(
          rt_input, name='rt_input') for rt_input in rt_inputs
  ]
  row_splits_dtype, rt_inputs = ragged_tensor.match_row_splits_dtypes(
      *rt_inputs, return_dtype=True)
  rt_inputs = list(rt_inputs)

  # Special case: if there's only one input, then return it as-is.
  if len(rt_inputs) == 1:
    if stack_values:
      return ragged_array_ops.expand_dims(rt_inputs[0], axis=axis)
    else:
      return rt_inputs[0]

  # Check the rank (number of dimensions) of the input tensors.
  ndims = None
  for rt in rt_inputs:
    if ndims is None:
      ndims = rt.shape.ndims
    else:
      rt.shape.assert_has_rank(ndims)

  out_ndims = ndims if (ndims is None or not stack_values) else ndims + 1
  axis = ragged_util.get_positive_axis(axis, out_ndims)

  # If all the inputs are Tensors, and we're combining the final dimension,
  # then we can delegate to the tf.stack/tf.concat operation, and return a
  # Tensor.
  if all(not ragged_tensor.is_ragged(rt) for rt in rt_inputs):
    if ndims is not None and (axis == out_ndims - 1 or axis == ndims - 1):
      if stack_values:
        return array_ops.stack(rt_inputs, axis)
      else:
        return array_ops.concat(rt_inputs, axis)

  # Convert any Tensor inputs to RaggedTensors.  This makes it
  # possible to concatenate Tensors and RaggedTensors together.
  for i in range(len(rt_inputs)):
    if not ragged_tensor.is_ragged(rt_inputs[i]):
      rt_inputs[i] = ragged_tensor.RaggedTensor.from_tensor(
          rt_inputs[i], ragged_rank=1, row_splits_dtype=row_splits_dtype)

  # Convert the input tensors to all have the same ragged_rank.
  ragged_rank = max(max(rt.ragged_rank for rt in rt_inputs), 1)
  rt_inputs = [_increase_ragged_rank_to(rt, ragged_rank, row_splits_dtype)
               for rt in rt_inputs]

  if axis == 0:
    return _ragged_stack_concat_axis_0(rt_inputs, stack_values)
  elif axis == 1:
    return _ragged_stack_concat_axis_1(rt_inputs, stack_values)
  else:  # axis > 1: recurse.
    values = [rt.values for rt in rt_inputs]
    splits = [[rt_input.row_splits] for rt_input in rt_inputs]
    with ops.control_dependencies(ragged_util.assert_splits_match(splits)):
      return ragged_tensor.RaggedTensor.from_row_splits(
          _ragged_stack_concat_helper(values, axis - 1, stack_values),
          splits[0][0], validate=False)
Ejemplo n.º 53
0
def where(condition, x=None, y=None, name=None):
  """Return the elements, either from `x` or `y`, depending on the `condition`.

  : If both `x` and `y` are `None`:
    Returns the coordinates of true elements of `condition`. The coordinates
    are returned in a 2-D tensor with shape
    `[num_true_values, dim_size(condition)]`, where `result[i]` is the
    coordinates of the `i`th true value (in row-major order).

  : If both `x` and `y` are non-`None`:
    Returns a tensor formed by selecting values from `x` where condition is
    true, and from `y` when condition is false.  In particular:

    : If `condition`, `x`, and `y` all have the same shape:

      * `result[i1...iN] = x[i1...iN]` if `condition[i1...iN]` is true.
      * `result[i1...iN] = y[i1...iN]` if `condition[i1...iN]` is false.

    : Otherwise:

      * `condition` must be a vector.
      * `x` and `y` must have the same number of dimensions.
      * The outermost dimensions of `condition`, `x`, and `y` must all have the
        same size.
      * `result[i] = x[i]` if `condition[i]` is true.
      * `result[i] = y[i]` if `condition[i]` is false.

  Args:
    condition: A potentially ragged tensor of type `bool`
    x: A potentially ragged tensor (optional).
    y: A potentially ragged tensor (optional).  Must be specified if `x` is
      specified.  Must have the same rank and type as `x`.
    name: A name of the operation (optional)

  Returns:
    : If both `x` and `y` are `None`:
      A `Tensor` with shape `(num_true, dim_size(condition))`.
    : Otherwise:
      A potentially ragged tensor with the same type, rank, and outermost
      dimension size as `x` and `y`.
      `result.ragged_rank = max(x.ragged_rank, y.ragged_rank)`.

  Raises:
    ValueError: When exactly one of `x` or `y` is non-`None`; or when
      `condition`, `x`, and `y` have incompatible shapes.

  #### Examples:
    ```python
    >>> # Coordinates where condition is true.
    >>> condition = tf.ragged.constant_value(
    ...     [[True, False, True], [False, True]])
    >>> ragged.where(condition)
    [[0, 0], [0, 2], [1, 1]]

    >>> # Elementwise selection between x and y, based on condition.
    >>> condition = tf.ragged.constant_value(
    ...     [[True, False, True], [False, True]])
    >>> x = tf.ragged.constant_value([['A', 'B', 'C'], ['D', 'E']])
    >>> y = tf.ragged.constant_value([['a', 'b', 'c'], ['d', 'e']])
    >>> ragged.where(condition, x, y)
    [['A', 'b', 'C'], ['d', 'E']]

    >>> # Row selection between x and y, based on condition.
    >>> condition = [True, False]
    >>> x = tf.ragged.constant_value([['A', 'B', 'C'], ['D', 'E']])
    >>> y = tf.ragged.constant_value([['a', 'b', 'c'], ['d', 'e']])
    >>> ragged.where(condition, x, y)
    [['A', 'B', 'C'], ['d', 'e']]
    ```
  """
  if (x is None) != (y is None):
    raise ValueError('x and y must be either both None or both non-None')
  with ops.name_scope('RaggedWhere', name, [condition, x, y]):
    condition = ragged_tensor.convert_to_tensor_or_ragged_tensor(
        condition, name='condition')
    if x is None:
      return _coordinate_where(condition)
    else:
      x = ragged_tensor.convert_to_tensor_or_ragged_tensor(x, name='x')
      y = ragged_tensor.convert_to_tensor_or_ragged_tensor(y, name='y')
      return _elementwise_where(condition, x, y)
Ejemplo n.º 54
0
def gather(params, indices, validate_indices=None, axis=0, batch_dims=0,
           name=None):
  """Gathers ragged slices from `params` axis `0` according to `indices`.

  Returns `RaggedTensor` output, such that:

  ```python
  output.shape = indices.shape + params.shape[1:]
  output.ragged_rank = indices.shape.ndims + params.ragged_rank
  output[i...j, d0...dn] = params[indices[i...j], d0...dn]
  ```

  `params` may be ragged.  `indices` may be ragged.
  `indices` must have dtype `int32` or `int64`. If any index is out of bounds,
  then an error is returned.

  Examples:

  ```python
  >>> params = tf.constant(['a', 'b', 'c', 'd', 'e'])
  >>> indices = tf.constant([3, 1, 2, 1, 0])
  >>> ragged_params = tf.ragged.constant([['a', 'b', 'c'], ['d'], [], ['e']])
  >>> ragged_indices = tf.ragged.constant([[3, 1, 2], [1], [], [0]])

  >>> print ragged.gather(params, ragged_indices)
  [['d', 'b', 'c'], ['b'], [], ['a']]

  >>> print ragged.gather(ragged_params, indices)
  [['e'], ['d'], [], ['d'], ['a', 'b', 'c']]

  >>> print ragged.gather(ragged_params, ragged_indices)
  [[['e'], ['d'], []], [['d']], [], [['a', 'b', 'c']]]
  ```

  Args:
    params: The potentially ragged tensor from which to gather values. Must be
      at least rank 1.
    indices: The potentially ragged tensor indicating which values to gather.
      Must have dtype `int32` or `int64`.  Values must be in the range `[0,
      params.shape[0]]`.
    validate_indices: Ignored.
    axis: Must be zero.
    batch_dims: Must be zero.
    name: A name for the operation (optional).

  Returns:
    A `RaggedTensor`, where `output.dtype=params.dtype` and
    `output.shape=indices.shape + params.shape[1:]` and
    `output.ragged_rank=indices.shape.ndims + params.ragged_rank`.

  Raises:
    ValueError: If indices.shape.ndims is not known statically.
  """
  del validate_indices
  if not isinstance(axis, int) or axis != 0:
    raise ValueError('axis != 0 is not supported for ragged gather yet.')
  if not isinstance(batch_dims, int) or batch_dims != 0:
    raise ValueError('batch_dims != 0 is not supported for ragged gather yet.')
  with ops.name_scope(name, 'RaggedGather', [params, indices]):
    params = ragged_tensor.convert_to_tensor_or_ragged_tensor(
        params, name='params')
    indices = ragged_tensor.convert_to_tensor_or_ragged_tensor(
        indices, name='indices')
    params, indices = ragged_tensor.match_row_splits_dtypes(params, indices)

    if ragged_tensor.is_ragged(indices):
      return indices.with_values(gather(params, indices.values))

    if not ragged_tensor.is_ragged(params):
      return array_ops.gather(params, indices)

    indices = ops.convert_to_tensor(indices)
    if indices.shape.ndims is None:
      raise ValueError('indices.shape.ndims must be known statically')

    result = gen_ragged_array_ops.ragged_gather(
        indices=indices,
        params_dense_values=params.flat_values,
        params_nested_splits=params.nested_row_splits,
        OUTPUT_RAGGED_RANK=indices.shape.ndims + len(params.nested_row_splits) -
        1)

    # Compose the RaggedTensor from splits & values.
    return ragged_tensor.RaggedTensor.from_nested_row_splits(
        result.output_dense_values, result.output_nested_splits, validate=False)
Ejemplo n.º 55
0
def map_fn(fn,
           elems,
           dtype=None,
           parallel_iterations=None,
           back_prop=True,
           swap_memory=False,
           infer_shape=True,
           name=None):
  """map on the list of tensors unpacked from `elems` on dimension 0.

  The simplest version of `map_fn` repeatedly applies the callable `fn` to a
  sequence of elements from first to last. The elements are made of the
  tensors unpacked from `elems`. `dtype` is the data type of the return
  value of `fn`. Users must provide `dtype` if it is different from
  the data type of `elems`.

  Suppose that `elems` is unpacked into `values`, a list of tensors. The shape
  of the result tensor is `[values.shape[0]] + fn(values[0]).shape`.

  This method also allows multi-arity `elems` and output of `fn`.  If `elems`
  is a (possibly nested) list or tuple of tensors, then each of these tensors
  must have a matching first (unpack) dimension.  The signature of `fn` may
  match the structure of `elems`.  That is, if `elems` is
  `(t1, [t2, t3, [t4, t5]])`, then an appropriate signature for `fn` is:
  `fn = lambda (t1, [t2, t3, [t4, t5]]):`.

  Furthermore, `fn` may emit a different structure than its input.  For example,
  `fn` may look like: `fn = lambda t1: return (t1 + 1, t1 - 1)`.  In this case,
  the `dtype` parameter is not optional: `dtype` must be a type or (possibly
  nested) tuple of types matching the output of `fn`.

  To apply a functional operation to the nonzero elements of a SparseTensor
  one of the following methods is recommended. First, if the function is
  expressible as TensorFlow ops, use

  ```python
    result = SparseTensor(input.indices, fn(input.values), input.dense_shape)
  ```

  If, however, the function is not expressible as a TensorFlow op, then use

  ```python
  result = SparseTensor(
    input.indices, map_fn(fn, input.values), input.dense_shape)
  ```

  instead.

  When executing eagerly, map_fn does not execute in parallel even if
  `parallel_iterations` is set to a value > 1. You can still get the
  performance benefits of running a function in parallel by using the
  `tf.contrib.eager.defun` decorator,

  ```python
  # Assume the function being used in map_fn is fn.
  # To ensure map_fn calls fn in parallel, use the defun decorator.
  @tf.contrib.eager.defun
  def func(tensor):
    return tf.map_fn(fn, tensor)
  ```

  Note that if you use the defun decorator, any non-TensorFlow Python code
  that you may have written in your function won't get executed. See
  `tf.contrib.eager.defun` for more details. The recommendation would be to
  debug without defun but switch to defun to get performance benefits of
  running map_fn in parallel.

  Args:
    fn: The callable to be performed.  It accepts one argument, which will have
      the same (possibly nested) structure as `elems`.  Its output must have the
      same structure as `dtype` if one is provided, otherwise it must have the
      same structure as `elems`.
    elems: A tensor or (possibly nested) sequence of tensors, each of which will
      be unpacked along their first dimension.  The nested sequence of the
      resulting slices will be applied to `fn`.
    dtype: (optional) The output type(s) of `fn`.  If `fn` returns a structure
      of Tensors differing from the structure of `elems`, then `dtype` is not
      optional and must have the same structure as the output of `fn`. Use
      `RaggedTensorType` to declare an output of type `RaggedTensor`.
    parallel_iterations: (optional) The number of iterations allowed to run in
      parallel. When graph building, the default value is 10. While executing
      eagerly, the default value is set to 1.
    back_prop: (optional) True enables support for back propagation.
    swap_memory: (optional) True enables GPU-CPU memory swapping.
    infer_shape: (optional) False disables tests for consistent output shapes.
    name: (optional) Name prefix for the returned tensors.

  Returns:
    A possibly nested sequence of potentially ragged tensors.  Each
    tensor packs the results of applying `fn` to tensors unpacked from `elems`
    along the first dimension, from first to last.

  Raises:
    TypeError: if `fn` is not callable or the structure of the output of
      `fn` and `dtype` do not match, or if elems is a SparseTensor.
    ValueError: if the lengths of the output of `fn` and `dtype` do not match.

  #### Examples:

    ```python
    elems = np.array([1, 2, 3, 4, 5, 6])
    squares = map_fn(lambda x: x * x, elems)
    # squares == [1, 4, 9, 16, 25, 36]
    ```

    ```python
    elems = (np.array([1, 2, 3]), np.array([-1, 1, -1]))
    alternate = map_fn(lambda x: x[0] * x[1], elems, dtype=tf.int64)
    # alternate == [-1, 2, -3]
    ```

    ```python
    elems = np.array([1, 2, 3])
    alternates = map_fn(lambda x: (x, -x), elems, dtype=(tf.int64, tf.int64))
    # alternates[0] == [1, 2, 3]
    # alternates[1] == [-1, -2, -3]
    ```

    ```python
    elems=ragged.constant([[1, 2, 3], [4, 5], [6, 7]])
    mean = map_fn(tf.reduce_mean, elems)
    # mean == [2, 4, 6]
    ```

    ```python
    elems=ragged.constant([[1, 2, 3], [4, 5], [6, 7]], dtype=tf.int64)
    out = map_fn(fn=lambda x: x+1, elems,
      dtype=ragged.RaggedTensorType(type=tf.int64, ragged_rank=0))
    # out = ragged.constant([[2, 3, 4], [5, 6], [7, 8]])
    ```
  """
  if not callable(fn):
    raise TypeError("fn must be callable.")

  if isinstance(elems, sparse_tensor.SparseTensor):
    raise TypeError(
        "To perform a map on the values of a sparse tensor use either "
        " SparseTensor(input.indices, fn(input.values), input.dense_shape) or "
        " SparseTensor(input.indices, map_fn(fn, input.values), "
        "input.dense_shape)")

  in_graph_mode = not context.executing_eagerly()
  # Set the default number of parallel_iterations depending on graph/eager mode.
  if in_graph_mode and not parallel_iterations:
    parallel_iterations = 10
  elif not in_graph_mode and not parallel_iterations:
    parallel_iterations = 1

  if not in_graph_mode and parallel_iterations > 1:
    logging.log_first_n(logging.WARN, "Setting parallel_iterations > 1 has no "
                        "effect when executing eagerly. Consider calling map_fn"
                        " with tf.contrib.eager.defun to execute fn in "
                        "parallel.", 1)
    parallel_iterations = 1

  input_is_sequence = nest.is_sequence(elems)
  input_flatten = lambda x: nest.flatten(x) if input_is_sequence else [x]

  def input_pack(x):
    return nest.pack_sequence_as(elems, x) if input_is_sequence else x[0]

  elems_flat = input_flatten(elems)

  with ops.name_scope(name, "map", elems_flat):
    # TODO(akshayka): Remove the in_graph_mode check once caching devices are
    # supported in Eager
    if in_graph_mode:
      # Any get_variable calls in fn will cache the first call locally
      # and not issue repeated network I/O requests for each iteration.
      varscope = vs.get_variable_scope()
      varscope_caching_device_was_none = False
      if varscope.caching_device is None:
        # TODO(ebrevdo): Change to using colocate_with here and in other
        # methods.
        varscope.set_caching_device(lambda op: op.device)
        varscope_caching_device_was_none = True

    elems_flat = [
        ragged_tensor.convert_to_tensor_or_ragged_tensor(elem, name="elem")
        for elem in elems_flat
    ]

    # We can either infer the output, or we can assume that it will be the same
    # as the input structure.
    dtype = dtype or input_pack([elem.dtype for elem in elems_flat])

    # Find the number of iterations, n may be known statically.
    if isinstance(elems_flat[0], ragged_tensor.RaggedTensor):
      n = elems_flat[0].nrows(out_type=dtypes.int32)
    else:
      static_shape = elems_flat[0].shape
      if static_shape.ndims is not None and static_shape.ndims < 1:
        if len(elems_flat) == 1:
          raise ValueError(
              "elems must be a 1+ dimensional Tensor, not a scalar")
        else:
          raise ValueError(
              "elements in elems must be 1+ dimensional Tensors, not scalars")
      n = (tensor_shape.dimension_value(static_shape[0]) or
           array_ops.shape(elems_flat[0])[0])

    n = math_ops.cast(n, dtype=dtypes.int32)
    # Create a flat list of TAs.

    # Flatten the dtype structure to a list.
    dtype_flat = nest.flatten(dtype)

    # decompose to components
    dtype_components = [_maybe_decompose_dtype(d) for d in dtype_flat]
    dtype_components_flat = nest.flatten(dtype_components)

    # Create TensorArrays.
    accs_ta = [
        tensor_array_ops.TensorArray(
            dtype=t, dynamic_size=False, infer_shape=infer_shape, size=n)
        for t in dtype_components_flat
    ]

    i = constant_op.constant(0, dtype=dtypes.int32)

    def compute(i, tas):
      """The loop body of map_fn.

      Args:
        i: the loop counter
        tas: the flat TensorArray accumulator list

      Returns:
        (i + 1, tas): the updated counter + updated TensorArrays

      Raises:
        TypeError: if dtype and packed_fn_values structure do not match
        ValueType: if dtype and packed_fn_values lengths do not match
      """
      # Get Tensors or RaggedTensors sliced at i, then pack it back to the
      # original structure.
      packed_values = input_pack([elem_flat[i] for elem_flat in elems_flat])
      packed_fn_values = fn(packed_values)

      # Check that the structure of the output matches what was declared or
      # inferred.
      # nest.assert_same_structure(dtype or elems, packed_fn_values)

      # Flatten and decompose to a list of Tensors
      flat_fn_values = nest.flatten(packed_fn_values)

      # If we declared that we are expecting a RaggedTensor output, but we get a
      # Tensor output. We should try to convert it to a RaggedTensor.
      flat_fn_composite_tensors = list(
          _convert_declared(flat_fn_values, dtype_flat))

      flat_fn_components = [
          _maybe_decompose_tensor(t) for t in flat_fn_composite_tensors
      ]
      flat_fn_tensors = nest.flatten(flat_fn_components)

      # Write to TAs.
      tas = [ta.write(i, value) for (ta, value) in zip(tas, flat_fn_tensors)]

      return (i + 1, tas)

    _, r_a = control_flow_ops.while_loop(
        lambda i, _: i < n, compute, (i, accs_ta),
        parallel_iterations=parallel_iterations,
        back_prop=back_prop,
        swap_memory=swap_memory,
        maximum_iterations=n)

    # TODO(akshayka): Remove the in_graph_mode check once caching devices are
    # supported in Eager
    if in_graph_mode and varscope_caching_device_was_none:
      varscope.set_caching_device(None)

    # Pack back into a list of components
    results_as_components = nest.pack_sequence_as(dtype_components, r_a)

    # Stack TensorArrays for Tensor outputs, and concat RaggedTensor outputs.
    def _stack_or_concat(e):
      if isinstance(e, _RaggedTensorComponents):
        return _concat_ragged_tensor_components(e)
      else:
        result = e.stack()
        return result

    results_flat_components = [
        _stack_or_concat(e) for e in results_as_components
    ]

    results_packed = [
        _maybe_recompose_tensor(c) for c in results_flat_components
    ]
    results_packed = nest.pack_sequence_as(dtype, results_packed)
    return results_packed
Ejemplo n.º 56
0
def gather_nd(params, indices, batch_dims=0, name=None):
  """Gather slices from `params` using `n`-dimensional indices.

  This operation is similar to `gather`, but it uses the innermost dimension
  of `indices` to define a slice into `params`.  In particular, if:

  * `indices` has shape `[A1...AN, I]`
  * `params` has shape `[B1...BM]`

  Then:

  * `result` has shape `[A1...AN, B_{I+1}...BM]`.
  * `result[a1...aN] = params[indices[a1...aN, :]]`

  Args:
    params: A potentially ragged tensor with shape `[A1...AN, I]`.
    indices: A potentially ragged tensor with shape `[B1...BM]`.
    batch_dims: Must be zero.
    name: A name for the operation (optional).

  Returns:
    A potentially ragged tensor with shape `[A1...AN, B_{I+1}...BM]`.

  #### Examples:
    ```python
    >>> params = tf.compat.v1.ragged.constant_value(
    ...     [ [ ['000', '001'], ['010'              ]          ],
    ...       [ ['100'       ], ['110', '111', '112'], ['120'] ],
    ...       [ [            ], ['210'              ]          ] ])

    >>> # Gather 2D slices from a 3D tensor
    >>> ragged.gather_nd(params, [[2], [0]])
    [ [ [            ], ['210'] ]
      [ ['000', '001'], ['010'] ] ]

    >>> # Gather 1D slices from a 3D tensor
    >>> ragged.gather_nd(params, [[2, 1], [0, 0]])
    [['210'], ['000', '001']]

    >>> # Gather scalars from a 3D tensor
    >>> ragged.gather_nd(params, [[0, 0, 1], [1, 1, 2]])
    ['001', '112']
    ```
  """
  if not isinstance(batch_dims, int) or batch_dims != 0:
    raise ValueError('batch_dims != 0 is not supported for ragged gather yet.')
  if not (ragged_tensor.is_ragged(params) or ragged_tensor.is_ragged(indices)):
    return array_ops.gather_nd(params, indices, name)

  with ops.name_scope(name, 'RaggedGatherNd', [params, indices]):

    params = ragged_tensor.convert_to_tensor_or_ragged_tensor(
        params, name='params')
    indices = ragged_tensor.convert_to_tensor_or_ragged_tensor(
        indices, name='indices')
    params, indices = ragged_tensor.match_row_splits_dtypes(params, indices)
    indices_shape = indices.shape
    indices_ndims = indices_shape.ndims
    if indices_ndims is None:
      raise ValueError('indices.rank be statically known.')
    if indices_ndims == 0:
      raise ValueError('indices.rank must be at least 1.')
    if (ragged_tensor.is_ragged(indices) and
        indices_ndims == indices.ragged_rank + 1):
      raise ValueError('The innermost dimension of indices may not be ragged')

    # `index_size` is the "n" in "gather_nd" -- i.e., the number of dimensions
    # that each index slices into.
    index_size = tensor_shape.dimension_value(indices_shape[-1])
    if index_size is None:
      raise ValueError('indices.shape[-1] must be statically known.')

    # If `indices` has more than 2 dimensions, then recurse.  If `indices` is
    # dense, then we convert it to ragged before recursing, and then convert
    # the result back to `dense` if appropriate.
    if indices_ndims > 2:
      indices_is_dense = not ragged_tensor.is_ragged(indices)
      if indices_is_dense:
        indices = ragged_tensor.RaggedTensor.from_tensor(
            indices, ragged_rank=indices_ndims - 2,
            row_splits_dtype=params.row_splits.dtype)
      result = indices.with_flat_values(gather_nd(params, indices.flat_values))
      if (indices_is_dense and ragged_tensor.is_ragged(result) and
          result.ragged_rank == indices_ndims - 2):
        result = ragged_tensor.RaggedTensor.to_tensor(result)
      return result

    # indices_ndims <= 2, and the innermost dimension of indices may not be
    # ragged, so `indices` must not be ragged.
    assert not ragged_tensor.is_ragged(indices)
    assert ragged_tensor.is_ragged(params)

    # Handle corner case: An empty index tuple selects the entire `params`
    # value.  So if `index_size` is zero, then tile `params`.
    if index_size == 0:
      params_ndims = params.ragged_rank + array_ops.rank(params.flat_values)
      for dim in range(indices_ndims - 1):
        params = ragged_array_ops.expand_dims(params, axis=0)
      multiples = array_ops.concat([
          array_ops.shape(indices)[:-1],
          array_ops.ones([params_ndims], dtypes.int32)
      ],
                                   axis=0)
      return ragged_array_ops.tile(params, multiples)

    # When index_size=1, we can just flatten the index tuples and use gather.
    elif index_size == 1:
      flattened_index_tuples = array_ops.reshape(indices, [-1])
      return gather(params, flattened_index_tuples)

    # Otherwise, params is a RaggedTensor, and indices is a 1D or 2D Tensor.
    # Flatten both the index tuples and the params, such that the flattened
    # index tuples point to the correct values in the flattened params; and
    # then use ragged.gather on the flattened index tuples & params.
    else:
      indices = math_ops.cast(indices, params.row_splits.dtype)

      # Flatten the outermost 2 dimensions of the index tuples & params.
      flattened_index_tuples = array_ops.gather(params.row_splits,
                                                indices[..., 0])
      flattened_index_tuples += indices[..., 1]
      flattened_params = params.values

      # Flatten any remaining dimensions.
      for dim in range(2, index_size):
        if not ragged_tensor.is_ragged(flattened_params):
          flattened_index_tuples = array_ops.expand_dims(
              flattened_index_tuples, axis=1)
          flattened_index_tuples = array_ops.concat(
              [flattened_index_tuples, indices[..., dim:]], axis=1)
          return array_ops.gather_nd(flattened_params, flattened_index_tuples)

        flattened_index_tuples = array_ops.gather(
            flattened_params.row_starts(), flattened_index_tuples)
        flattened_index_tuples += indices[..., dim]
        flattened_params = flattened_params.values

      # Gather using the flattened index tuples and params.
      return gather(flattened_params, flattened_index_tuples)
Ejemplo n.º 57
0
def strings_split_v1(input=None, sep=None, maxsplit=-1,  # pylint: disable=redefined-builtin
                     result_type="SparseTensor", source=None, name=None):
  """Split elements of `input` based on `sep`.

  Let N be the size of `input` (typically N will be the batch size). Split each
  element of `input` based on `sep` and return a `SparseTensor` or
  `RaggedTensor` containing the split tokens. Empty tokens are ignored.

  Examples:

  ```python
  >>> tf.strings.split(['hello world', 'a b c'])
  tf.SparseTensor(indices=[[0, 0], [0, 1], [1, 0], [1, 1], [1, 2]],
                  values=['hello', 'world', 'a', 'b', 'c']
                  dense_shape=[2, 3])

  >>> tf.strings.split(['hello world', 'a b c'], result_type="RaggedTensor")
  <tf.RaggedTensor [['hello', 'world'], ['a', 'b', 'c']]>
  ```

  If `sep` is given, consecutive delimiters are not grouped together and are
  deemed to delimit empty strings. For example, `input` of `"1<>2<><>3"` and
  `sep` of `"<>"` returns `["1", "2", "", "3"]`. If `sep` is None or an empty
  string, consecutive whitespace are regarded as a single separator, and the
  result will contain no empty strings at the start or end if the string has
  leading or trailing whitespace.

  Note that the above mentioned behavior matches python's str.split.

  Args:
    input: A string `Tensor` of rank `N`, the strings to split.  If
      `rank(input)` is not known statically, then it is assumed to be `1`.
    sep: `0-D` string `Tensor`, the delimiter character.
    maxsplit: An `int`. If `maxsplit > 0`, limit of the split of the result.
    result_type: The tensor type for the result: one of `"RaggedTensor"` or
      `"SparseTensor"`.
    source: alias for "input" argument.
    name: A name for the operation (optional).

  Raises:
    ValueError: If sep is not a string.

  Returns:
    A `SparseTensor` or `RaggedTensor` of rank `N+1`, the strings split
    according to the delimiter.
  """
  input = deprecation.deprecated_argument_lookup(
      "input", input, "source", source)
  with ops.name_scope(name, "StringSplit", [input]):
    input = ragged_tensor.convert_to_tensor_or_ragged_tensor(
        input, dtype=dtypes.string, name="input")
    if result_type == "SparseTensor" and input.shape.rank == 1:
      return string_ops.string_split_v2(input, sep=sep, maxsplit=maxsplit)

    ragged_result = string_split_v2(input, sep=sep, maxsplit=maxsplit)
    if result_type == "SparseTensor":
      return ragged_result.to_sparse()
    elif result_type == "RaggedTensor":
      return ragged_result
    else:
      raise ValueError("result_type must be 'RaggedTensor' or 'SparseTensor'.")
def batch_gather(params, indices, name=None):
  """Gathers slices from `params` according to `indices` with batch dims.

  This operation is similar to `gather`, but it assumes that the leading `N`
  dimensions of `indices` and `params` are batch dimensions, and performs a
  gather within each batch.  In particular, when using this operation with `N`
  batch dimensions `B1...BN`:

  * `indices` has shape `[B1...BN, I]`
  * `params` has shape `[B1...BN, P1...PM]`.
  * `result` has shape `[B1...BN, I, P2...PM]`.
  * `result[b1...bN, i, p2...pM] =
    params[b1...bN, indices[b1...bN, i], p2...pM]`

  Args:
    params: A potentially ragged tensor with shape `[B1...BN, P1...PM]` (`N>=0`,
      `M>0`).
    indices: A potentially ragged tensor with shape `[B1...BN, I]` (`N>=0`).
    name: A name for the operation (optional).

  Returns:
    A potentially ragged tensor with shape `[B1...BN, I, P2...PM]`.
    `result.ragged_rank = max(indices.ragged_rank, params.ragged_rank)`.

  #### Example:
    ```python
    >>> params = tf.ragged.constant([['a', 'b', 'c'], ['d'], [], ['e']])
    >>> indices = tf.ragged.constant([[1, 2, 0], [], [], [0, 0]])
    >>> tf.compat.v1.batch_gather(params, indices)
    [['b', 'c', 'a'], [], [], ['e', 'e']]
    ```
  """
  if not (ragged_tensor.is_ragged(params) or ragged_tensor.is_ragged(indices)):
    return array_ops.batch_gather(params, indices, name)

  with ops.name_scope(name, 'RaggedBatchGather', [params, indices]):
    params = ragged_tensor.convert_to_tensor_or_ragged_tensor(
        params, name='params')
    indices = ragged_tensor.convert_to_tensor_or_ragged_tensor(
        indices, name='indices')
    params, indices = ragged_tensor.match_row_splits_dtypes(params, indices)
    indices_ndims = indices.shape.ndims
    if indices_ndims is None:
      raise ValueError(
          'batch_gather does not allow indices with unknown shape.')
    if indices_ndims == 0:
      raise ValueError('indices.rank must be at least 1.')

    if ragged_tensor.is_ragged(indices):
      # If the outermost ragged dimension is a batch dimension, recurse.
      if indices_ndims > 2:
        if not ragged_tensor.is_ragged(params):
          raise ValueError('batch shape from indices does '
                           'not match params shape')
        checks = [check_ops.assert_equal(params.row_splits, indices.row_splits)]
        with ops.control_dependencies(checks):
          return ragged_tensor.RaggedTensor.from_row_splits(
              batch_gather(params.values, indices.values), indices.row_splits,
              validate=False)

      # Otherwise, indices is a 2D ragged tensor with 1 ragged dimension.
      else:
        # Ensure that `params` is ragged and has at least 2 dimensions.
        if not ragged_tensor.is_ragged(params):
          if params.shape.ndims is not None and params.shape.ndims < 2:
            raise ValueError('batch shape from indices does '
                             'not match params shape')
          params = ragged_tensor.RaggedTensor.from_tensor(
              params, ragged_rank=1,
              row_splits_dtype=indices.row_splits.dtype)

        # Adjust indices from within-batch to global (in params.values), and
        # then use ragged.gather to gather them.
        num_indices = indices.row_lengths()
        params_starts = params.row_starts()
        adjustments = ragged_util.repeat(params_starts, num_indices, axis=0)
        adjusted_index_values = (
            math_ops.cast(indices.values, adjustments.dtype) + adjustments)
        return ragged_tensor.RaggedTensor.from_row_splits(
            ragged_gather_ops.gather(params.values, adjusted_index_values),
            indices.row_splits, validate=False)

    else:  # params is a RaggedTensor and indices is a Tensor.
      if indices_ndims == 1:
        return ragged_gather_ops.gather(params, indices)
      elif indices_ndims == 2:
        # Adjust indices from batch-local to global (in params.values)
        adjustments = array_ops.expand_dims(params.row_starts(), 1)
        adjusted_indices = (
            math_ops.cast(indices, adjustments.dtype) + adjustments)
        return ragged_gather_ops.gather(params.values, adjusted_indices)
      else:
        raise ValueError('batch shape from indices does not match params shape')
Ejemplo n.º 59
0
 def testElementwiseOpBroadcast(self, x, y, expected):
   x = ragged_tensor.convert_to_tensor_or_ragged_tensor(x, dtype=dtypes.int32)
   y = ragged_tensor.convert_to_tensor_or_ragged_tensor(y, dtype=dtypes.int32)
   result = x + y
   self.assertRaggedEqual(result, expected)
def batch_gather_with_default(params,
                              indices,
                              default_value='',
                              name=None):
  """Same as `batch_gather` but inserts `default_value` for invalid indices.

  This operation is similar to `batch_gather` except that it will substitute
  the value for invalid indices with `default_value` as the contents.
  See `batch_gather` for more details.


  Args:
    params: A potentially ragged tensor with shape `[B1...BN, P1...PM]` (`N>=0`,
      `M>0`).
    indices: A potentially ragged tensor with shape `[B1...BN, I]` (`N>=0`).
    default_value: A value to be inserted in places where `indices` are out of
      bounds. Must be the same dtype as params and either a scalar or rank 1.
    name: A name for the operation (optional).

  Returns:
    A potentially ragged tensor with shape `[B1...BN, I, P2...PM]`.
    `result.ragged_rank = max(indices.ragged_rank, params.ragged_rank)`.

  #### Example:
    ```python
    >>> params = tf.ragged.constant([
          ['a', 'b', 'c'],
          ['d'],
          [],
          ['e']])
    >>> indices = tf.ragged.constant([[1, 2, -1], [], [], [0, 10]])
    >>> batch_gather_with_default(params, indices, 'FOO')
    [['b', 'c', 'FOO'], [], [], ['e', 'FOO']]
  ```
  """
  with ops.name_scope(name, 'RaggedBatchGatherWithDefault'):
    params = ragged_tensor.convert_to_tensor_or_ragged_tensor(
        params, name='params',
    )
    indices = ragged_tensor.convert_to_tensor_or_ragged_tensor(
        indices, name='indices',
    )
    default_value = ragged_tensor.convert_to_tensor_or_ragged_tensor(
        default_value, name='default_value',
    )
    # TODO(hterry): lift this restriction and support default_values of
    #               of rank > 1
    if (default_value.shape.ndims is not 0
        and default_value.shape.ndims is not 1):
      raise ValueError('"default_value" must be a scalar or vector')
    upper_bounds = None
    if indices.shape.ndims is None:
      raise ValueError('Indices must have a known rank.')
    if params.shape.ndims is None:
      raise ValueError('Params must have a known rank.')

    num_batch_dimensions = indices.shape.ndims - 1
    pad = None
    # The logic for this works as follows:
    # - create a padded params, where:
    #    padded_params[b1...bn, 0] = default_value
    #    padded_params[b1...bn, i] = params[b1...bn, i-1] (i>0)
    # - create an `upper_bounds` Tensor that contains the number of elements
    #   in each innermost rank. Broadcast `upper_bounds` to be the same shape
    #   as `indices`.
    # - check to see which index in `indices` are out of bounds and substitute
    #   it with the index containing `default_value` (the first).
    # - call batch_gather with the indices adjusted.
    with ops.control_dependencies([
        check_ops.assert_greater_equal(array_ops.rank(params),
                                       array_ops.rank(indices))]):
      if ragged_tensor.is_ragged(params):
        row_lengths = ragged_array_ops.expand_dims(
            params.row_lengths(axis=num_batch_dimensions),
            axis=-1)
        upper_bounds = math_ops.cast(row_lengths, indices.dtype)

        pad_shape = _get_pad_shape(params, indices)

        pad = ragged_tensor_shape.broadcast_to(
            default_value, pad_shape)
      else:
        params_shape = array_ops.shape(params)
        pad_shape = array_ops.concat([
            params_shape[:num_batch_dimensions],
            [1],
            params_shape[num_batch_dimensions + 1:params.shape.ndims]
        ], 0)
        upper_bounds = params_shape[num_batch_dimensions]
        pad = array_ops.broadcast_to(default_value, pad_shape)

      # Add `default_value` as the first value in the innermost (ragged) rank.
      pad = math_ops.cast(pad, params.dtype)
      padded_params = array_ops.concat(
          [pad, params], axis=num_batch_dimensions)

      # Adjust the indices by substituting out-of-bound indices to the
      # default-value index (which is the first element)
      shifted_indices = indices + 1
      is_out_of_bounds = (indices < 0) | (indices > upper_bounds)
      adjusted_indices = ragged_where_op.where(
          is_out_of_bounds,
          x=array_ops.zeros_like(indices), y=shifted_indices,
      )
      return array_ops.batch_gather(
          params=padded_params, indices=adjusted_indices, name=name)