예제 #1
0
def _get_row_lengths(segments, axis=-1):
    axis = array_ops.get_positive_axis(axis, segments.shape.ndims) - 1
    foo = ragged_tensor.RaggedTensor.from_nested_row_lengths(
        segments.nested_row_lengths()[axis],
        segments.nested_row_lengths()[:axis])
    for _ in range(axis):
        foo = math_ops.reduce_sum(foo, -1)
    return foo
예제 #2
0
def _get_row_lengths_merged_to_axis(segments, axis=-1):
  """Get the row lengths relative to a desired axis."""
  axis = array_ops.get_positive_axis(axis, segments.shape.ndims) - 1
  row_lengths = ragged_tensor.RaggedTensor.from_nested_row_lengths(
      segments.nested_row_lengths()[axis],
      segments.nested_row_lengths()[:axis])
  for _ in range(axis):
    row_lengths = math_ops.reduce_sum(row_lengths, -1)
  return row_lengths
예제 #3
0
    def merge_dims(self, outer_axis, inner_axis):
        """Merges outer_axis...inner_axis into a single dimension.

    Returns a copy of this RaggedTensor with the specified range of dimensions
    flattened into a single dimension, with elements in row-major order.

    >>> st = StructuredTensor.from_pyval(
    ...     [[{'foo': 12}, {'foo': 33}], [], [{'foo': 99}]])
    >>> st.merge_dims(0, 1)
    <StructuredTensor(
      fields={
        "foo": tf.Tensor([12 33 99], shape=(3,), dtype=int32)},
      shape=(3,))>

    Args:
      outer_axis: `int`: The first dimension in the range of dimensions to
        merge. May be negative (to index from the last dimension).
      inner_axis: `int`: The last dimension in the range of dimensions to merge.
        May be negative (to index from the last dimension).

    Returns:
      A copy of this tensor, with the specified dimensions merged into a
      single dimension.  The shape of the returned tensor will be
      `self.shape[:outer_axis] + [N] + self.shape[inner_axis + 1:]`, where `N`
      is the total number of slices in the merged dimensions.
    """
        outer_axis = array_ops.get_positive_axis(outer_axis,
                                                 self.shape.rank,
                                                 axis_name='outer_axis',
                                                 ndims_name='rank(self)')
        inner_axis = array_ops.get_positive_axis(inner_axis,
                                                 self.shape.rank,
                                                 axis_name='inner_axis',
                                                 ndims_name='rank(self)')
        if not outer_axis < inner_axis:
            raise ValueError('Expected outer_axis (%d) to be less than '
                             'inner_axis (%d)' % (outer_axis, inner_axis))
        return _merge_dims(self, outer_axis, inner_axis)
예제 #4
0
def reverse(tensor: ragged_tensor.Ragged, axis, name=None):
  """Reverses a RaggedTensor along the specified axes.

  #### Example:

  >>> data = tf.ragged.constant([
  ...   [[1, 2], [3, 4]], [[5, 6]], [[7, 8], [9, 10], [11, 12]]])
  >>> tf.reverse(data, axis=[0, 2])
  <tf.RaggedTensor [[[8, 7], [10, 9], [12, 11]], [[6, 5]], [[2, 1], [4, 3]]]>

  Args:
    tensor: A 'RaggedTensor' to reverse.
    axis: A list or tuple of 'int' or a constant 1D 'tf.Tensor'. The indices of
      the axes to reverse.
    name: A name prefix for the returned tensor (optional).

  Returns:
    A 'RaggedTensor'.
  """
  type_error_msg = ('`axis` must be a list of int or a constant tensor'
                    'when reversing axes in a ragged tensor')

  with ops.name_scope(name, 'Reverse', [tensor, axis]):
    if isinstance(axis, ops.Tensor):
      axis = tensor_util.constant_value(axis)
      if axis is None:
        raise TypeError(type_error_msg)
    elif not (isinstance(axis, (list, tuple)) and
              all(isinstance(dim, int) for dim in axis)):
      raise TypeError(type_error_msg)

    tensor = ragged_tensor.convert_to_tensor_or_ragged_tensor(
        tensor, name='tensor')

    # Allow usage of negative values to specify innermost axes.
    axis = [
        array_ops.get_positive_axis(dim, tensor.shape.rank, 'axis[%d]' % i,
                                    'rank(tensor)')
        for i, dim in enumerate(axis)
    ]

    # We only need to slice up to the max axis. If the axis list
    # is empty, it should be 0.
    slices = [slice(None)] * (max(axis) + 1 if axis else 0)

    for dim in axis:
      slices[dim] = slice(None, None, -1)

    return tensor[tuple(slices)]
예제 #5
0
def ragged_cumsum(x: ragged_tensor.Ragged,
                  axis: int = 0,
                  exclusive: bool = False,
                  reverse: bool = False,
                  name: typing.Optional[str] = None):
  """Calculate math_ops.cumsum for a RaggedTensor.

  Given a ragged tensor `x`, the `result` is a ragged tensor with the same
  shape. One can calculate the value of `result[i_1...i_k]` as follows:
  ```
  dense_result=tf.math.cumsum(rt.to_tensor(), axis=axis, exclusive=exclusive,
                              reverse=reverse)
  result[i_1...i_k]=dense_result[i_1...i_k]
  ```

  Args:
    x: the original ragged tensor to sum.
    axis: the axis along which to sum, can range -rank<=axis<rank.
    exclusive: is the sum exclusive or inclusive? If True, then result[0]=0.
        If False, then result[0]=x[0].
    reverse: If True, sum from back to front.
    name: the name of the op.
  Returns:
    the cumulative sum.
  """
  with ops.name_scope(name, 'RaggedCumSum', [x, axis, exclusive, reverse]):
    axis = array_ops.get_positive_axis(axis, x.shape.rank, ndims_name='rank')
    if axis == x.ragged_rank:
      last_rp = x._nested_row_partitions[-1]  # pylint: disable=protected-access
      return x.with_flat_values(
          _cumsum_flat_values_at_ragged_rank(last_rp, x.flat_values,
                                             exclusive=exclusive,
                                             reverse=reverse))
    elif axis > x.ragged_rank:
      new_axis = axis - x.ragged_rank
      cumsum_bound = functools.partial(
          math_ops.cumsum, axis=new_axis, exclusive=exclusive, reverse=reverse)
      return ragged_functional_ops.map_flat_values(cumsum_bound, x)
    else:
      dense_version = x.to_tensor()
      result = math_ops.cumsum(
          dense_version, axis, exclusive=exclusive, reverse=reverse, name=name)
      return ragged_tensor.RaggedTensor.from_tensor(
          result, lengths=x.nested_row_lengths())
def _expand_dims_impl(st, axis, name=None):  # pylint: disable=redefined-builtin
    """Creates a StructuredTensor with a length 1 axis inserted at index `axis`.

  This is an implementation of tf.expand_dims for StructuredTensor. Note
  that the `axis` must be less than or equal to rank.

  >>> st = StructuredTensor.from_pyval([[{"x": 1}, {"x": 2}], [{"x": 3}]])
  >>> tf.expand_dims(st, 0).to_pyval()
  [[[{'x': 1}, {'x': 2}], [{'x': 3}]]]
  >>> tf.expand_dims(st, 1).to_pyval()
  [[[{'x': 1}, {'x': 2}]], [[{'x': 3}]]]
  >>> tf.expand_dims(st, 2).to_pyval()
  [[[{'x': 1}], [{'x': 2}]], [[{'x': 3}]]]
  >>> tf.expand_dims(st, -1).to_pyval()  # -1 is the same as 2
  [[[{'x': 1}], [{'x': 2}]], [[{'x': 3}]]]

  Args:
    st: the original StructuredTensor.
    axis: the axis to insert the dimension: `-(rank + 1) <= axis <= rank`
    name: the name of the op.

  Returns:
    a new structured tensor with larger rank.

  Raises:
    an error if `axis < -(rank + 1)` or `rank < axis`.
  """
    axis = array_ops.get_positive_axis(axis,
                                       st.rank + 1,
                                       axis_name='axis',
                                       ndims_name='rank(st)')
    with ops.name_scope(name, 'ExpandDims', [st, axis]):
        new_fields = {
            k: array_ops.expand_dims(v, axis)
            for (k, v) in st._fields.items()
        }
        new_shape = st.shape[:axis] + (1, ) + st.shape[axis:]
        new_row_partitions = _expand_st_row_partitions(st, axis)
        new_nrows = st.nrows() if (axis > 0) else 1
        return StructuredTensor.from_fields(new_fields,
                                            shape=new_shape,
                                            row_partitions=new_row_partitions,
                                            nrows=new_nrows)
예제 #7
0
    def get_selectable(self, input_ids, axis):
        """See `get_selectable()` in superclass."""
        selectable = super(FirstNItemSelector,
                           self).get_selectable(input_ids, axis)
        axis = array_ops.get_positive_axis(
            axis, input_ids.ragged_rank + input_ids.flat_values.shape.rank)
        # Create a positions RT and mask out positions that are not selectable
        positions_flat = math_ops.range(array_ops.size(input_ids.flat_values))
        positions = input_ids.with_flat_values(positions_flat)
        selectable_positions = ragged_array_ops.boolean_mask(
            positions, selectable)

        # merge to the desired axis
        selectable_positions = selectable_positions.merge_dims(
            1, axis) if axis > 1 else selectable_positions

        # Get a selection mask based off of how many items are desired for selection
        merged_axis = axis - (axis - 1)
        selection_mask = _get_selection_mask(selectable_positions,
                                             self._num_to_select, merged_axis)
        # Mask out positions that were not selected.
        selected_positions = ragged_array_ops.boolean_mask(
            selectable_positions, selection_mask)

        # Now that we have all the positions which were chosen, we recreate a mask
        # (matching the original input's shape) where the value is True if it was
        # selected. We do this by creating a "all false" RT and scattering true
        # values to the positions chosen for selection.
        all_true = selected_positions.with_flat_values(
            array_ops.ones_like(selected_positions.flat_values))
        all_false = math_ops.cast(
            array_ops.zeros(array_ops.shape(input_ids.flat_values)),
            dtypes.int32)
        results_flat = array_ops.tensor_scatter_update(
            all_false, array_ops.expand_dims(selected_positions.flat_values,
                                             -1), all_true.flat_values)
        results = input_ids.with_flat_values(results_flat)
        results = math_ops.cast(results, dtypes.bool)

        # Reduce until input.shape[:axis]
        for _ in range(input_ids.shape.ndims - axis - 1):
            results = math_ops.reduce_all(results, -1)
        return results
def gather(params,
           indices,
           validate_indices=None,
           name=None,
           axis=None,
           batch_dims=0):
  """tf.gather for structured tensors.

  Does not support (yet) checks on illegal axis values, et cetera.

  Indices must be a ragged or dense tensor.
  Args:
    params: a structured tensor to be gathered
    indices: a ragged tensor or tensor to gather by.
    validate_indices: whether to validate the indices
    name: the name of the op(s).
    axis: the axis in params to gather on.
    batch_dims: the number of batch dimensions.

  Returns:
    the params reorganized according to indices.
  """
  if name is None:
    name = 'gather'
  with ops.name_scope(name):
    if axis is None:
      axis = batch_dims
    ndims_name = params.shape.rank
    axis = array_ops.get_positive_axis(axis, ndims_name)
    indices = ragged_tensor.convert_to_tensor_or_ragged_tensor(
        indices, name='indices')

    def leaf_op(p):
      return array_ops.gather(
          p,
          indices,
          validate_indices=validate_indices,
          axis=axis,
          batch_dims=batch_dims,
          name=None)

    return _extend_op_single(params, leaf_op)
예제 #9
0
def ragged_one_hot(indices,
                   depth,
                   on_value=None,
                   off_value=None,
                   axis=None,
                   dtype=None,
                   name=None):
    """Applies tf.one_hot along the values of a RaggedTensor."""
    with ops.name_scope(name, 'RaggedOneHot', [indices]):
        indices = ragged_tensor.convert_to_tensor_or_ragged_tensor(
            indices, name='indices')
        if axis is not None:
            axis = array_ops.get_positive_axis(axis,
                                               indices.shape.ndims,
                                               ndims_name='rank(indices)')
            if axis < indices.ragged_rank:
                raise ValueError(
                    'axis may not be less than indices.ragged_rank.')
        return indices.with_flat_values(
            array_ops.one_hot(indices.flat_values, depth, on_value, off_value,
                              axis, dtype, name))
예제 #10
0
  def get_selectable(self, input_ids, axis):
    """Return a boolean mask of items that can be chosen for selection.

    Args:
      input_ids: a `RaggedTensor`.
      axis: axis to apply selection on.

    Returns:
      a `RaggedTensor` with dtype of bool and with shape
      `input_ids.shape[:axis]`. Its values are True if the
      corresponding item (or broadcasted subitems) should be considered for
      masking. In the default implementation, all `input_ids` items that are not
      listed in `unselectable_ids` (from the class arg) are considered
      selectable.
    """
    # merge to the desired axis
    input_ids = input_ids.merge_dims(1, axis) if axis > 1 else input_ids

    all_selectable_flats = [
        ragged_functional_ops.map_flat_values(math_ops.not_equal, input_ids,
                                              i).flat_values
        for i in self._unselectable_ids
    ]

    # if there are no blacklisted ids, mark everything as selectable
    if all_selectable_flats:
      reduce_flat = math_ops.reduce_all(all_selectable_flats, axis=0)
    else:
      reduce_flat = array_ops.ones_like(
          input_ids.flat_values, dtype=dtypes.bool)

    # reduce to the requested axis and broadcast to match original shape
    axis = array_ops.get_positive_axis(
        axis, input_ids.ragged_rank + input_ids.flat_values.shape.rank)
    results = input_ids.with_flat_values(reduce_flat)
    if axis < input_ids.ragged_rank:
      reduce_axis = list(range(input_ids.ragged_rank, axis, -1))
      results = math_ops.reduce_all(results, reduce_axis)

    return results
def concat(values, axis, name: str = 'concat'):
  """tf.concat for structured tensors.

  Does not support (yet) checks on illegal axis values, et cetera.

  Args:
    values: a sequence of StructuredTensors.
    axis: an axis to concatenate upon.
    name: the name of the op(s).

  Returns:
    the params reorganized according to indices.
  """
  if name is None:
    name = 'concat'
  _assert_concat_compatible_structured_tensors(values)
  def leaf_op(values):
    return array_ops.concat(values, axis)
  # TODO(martinz): handle axis when it is a tensor.
  axis = array_ops.get_positive_axis(axis, values[0].rank)
  with ops.name_scope(name, 'StructuredConcat', values):
    return _extend_op(values, leaf_op)
예제 #12
0
def split(value: ragged_tensor.Ragged,
          num_or_size_splits,
          axis=0,
          num=None,
          name=None):
  """Splits a RaggedTensor `value` into a list of sub RaggedTensors.

  If `num_or_size_splits` is an `int`,  then it splits `value` along the
  dimension `axis` into `num_or_size_splits` smaller RaggedTensors. This
  requires that `value.shape[axis]` is divisible by `num_or_size_splits`.

  If `num_or_size_splits` is a 1-D Tensor (or list), then `value` is split into
  `len(num_or_size_splits)` elements. The shape of the `i`-th element has the
  same size as the `value` except along dimension `axis` where the size is
  `num_or_size_splits[i]`.

  Splits along a ragged dimension is not allowed.

  For example:

  >>> rt = tf.RaggedTensor.from_row_lengths(
  ...      np.arange(6 * 3).reshape(6, 3), row_lengths=[1, 2, 2, 1])
  >>> rt.shape
  TensorShape([4, None, 3])
  >>>
  >>> rt1, rt2 = tf.split(rt, 2)  # uniform splits
  >>> rt1.shape
  TensorShape([2, None, 3])
  >>> rt2.shape
  TensorShape([2, None, 3])
  >>>
  >>> rt3, rt4, rt5 = tf.split(rt, [1, 2, 1])  # ragged splits
  >>> rt3.shape
  TensorShape([1, None, 3])
  >>> rt4.shape
  TensorShape([2, None, 3])
  >>> rt5.shape
  TensorShape([1, None, 3])
  >>>
  >>> rt6, rt7 = tf.split(rt, [1, 2], axis=2)  # splits along axis 2
  >>> rt6.shape
  TensorShape([4, None, 1])
  >>> rt7.shape
  TensorShape([4, None, 2])

  Args:
    value: The `RaggedTensor` to split.
    num_or_size_splits: Either an `int` indicating the number of splits
      along `axis` or a 1-D integer `Tensor` or Python list containing the sizes
      of each output tensor along `axis`. If a Python int, then it must evenly
      divide `value.shape[axis]`; otherwise the sum of sizes along the split
      axis must match that of the `value`.
    axis: An `int` or scalar `int32` `Tensor`. The dimension along which
      to split. Must be in the range `[-rank(value), rank(value))`. Defaults to
      0.
    num: An `int` used to specify the number of outputs when
      `num_or_size_splits` is a 1-D list or `Tensor` and its length is
      statically unknown, e.g., specifying `tf.TensorSepc(None)` with
      the `input_signature` argument of `tf.function` (optional).
    name: A name for the operation (optional).

  Returns:
    if `num_or_size_splits` is an `int` returns a list of `num_or_size_splits`
    `RaggedTensor` objects; if `num_or_size_splits` is a 1-D Tensor returns
    `num_or_size_splits.get_shape[0]` `RaggedTensor` objects resulting from
    splitting `value`.

  Raises:
    ValueError: If the dimension `axis` of `value` is a ragged dimension.
    ValueError: If `num` is unspecified and cannot be inferred.
    ValueError: If `num` is specified but doesn't match the length of
      `num_or_size_splits`.
    ValueError: If `num_or_size_splits` is an `int` and less than 1.
    TypeError: If `num_or_size_splits` is not an `int` or 1-D
      list or 1-D `Tensor`.
    InvalidArgumentError: If the `axis` of `value` cannot be exactly splitted
      by `num_or_size_splits`.
    InvalidArgumentError: If `num_or_size_splits` is contains negative integers.
    InvalidArgumentError: If `num_or_size_splits`'s static shape is unknown and
      its dynamic shape is inconsistent `num`.
    InvalidArgumentError: If `num_or_size_splits`'s static rank is unknown and
      `axis` is a negative integer.
  """
  with ops.name_scope(name, 'RaggedSplit'):
    value = ragged_tensor.convert_to_tensor_or_ragged_tensor(
        value, name='value')
    if isinstance(num_or_size_splits, int) and num_or_size_splits == 1:
      return [value]

    # static assert
    check_ops.assert_integer_v2(
        num_or_size_splits,
        message=('`num_or_size_splits` must be an `int` or 1-D list or '
                 '`Tensor` of integers.'))
    value_shape = ragged_shape.RaggedShape.from_tensor(value)
    axis = array_ops.get_positive_axis(axis, value_shape.rank)
    try:
      dim_size = value_shape[axis]
    except ValueError:
      raise ValueError('Cannot split a ragged dimension. Got `value` with '
                       f'shape {value_shape} and `axis` {axis}.')
    if isinstance(num_or_size_splits, int):
      # Uniform split
      num_splits = num_or_size_splits
      if num_splits < 1:
        raise ValueError('`num_or_size_splits` must be >=1 if it is an `int`.'
                         f'Received {num_or_size_splits}.')
      split_length = math_ops.floordiv(dim_size, num_splits)
      split_lengths = array_ops.repeat(split_length, num_splits)
    else:
      # Ragged split
      num_splits = None
      split_lengths = ops.convert_to_tensor(num_or_size_splits)
      if split_lengths.shape.ndims is not None:
        if split_lengths.shape.ndims != 1:
          raise TypeError('`num_or_size_splits` must be an `int` or 1-D list '
                          f'or `Tensor`. Received {num_or_size_splits}.')
        num_splits = tensor_shape.dimension_value(split_lengths.shape[0])

      if num_splits is None:
        if num is None:
          raise ValueError('`num` must be specified as an `int` when the '
                           'size of `num_or_size_split` is statically '
                           f'unknown. Received `num`: {num} and '
                           f'`num_or_size_split`: {num_or_size_splits}.')
        num_splits = num
      else:
        if num is not None and num != num_splits:
          raise ValueError('`num` does not match the size of '
                           f'`num_or_size_split`. Received `num`: {num} and '
                           f'size of `num_or_size_split`: {num_splits}.')

    splits = array_ops.concat([[0], math_ops.cumsum(split_lengths)], axis=0)
    checks = []
    checks.append(
        check_ops.assert_non_negative_v2(
            num_or_size_splits,
            message='`num_or_size_splits` must be non-negative.'))
    checks.append(
        check_ops.assert_equal_v2(
            num_splits,
            array_ops.shape(split_lengths)[0],
            message='`num` is inconsistent with `num_or_size_split.shape[0]`.'))
    checks.append(
        check_ops.assert_equal_v2(
            math_ops.cast(dim_size, splits.dtype),
            splits[-1],
            message=('Cannot exactly split the `axis` dimension of `value` '
                     'with the given `num_or_size_split`.')))
    splits = control_flow_ops.with_dependencies(checks, splits)
    splited_rts = []
    slices = [slice(None)] * (axis + 1)
    for i in range(num_splits):
      slices[-1] = slice(splits[i], splits[i + 1])
      splited_rts.append(value[tuple(slices)])
    return splited_rts
def squeeze(input, axis=None, name=None):  # pylint: disable=redefined-builtin
  """Ragged compatible squeeze.

  If `input` is a `tf.Tensor`, then this calls `tf.squeeze`.

  If `input` is a `tf.RaggedTensor`, then this operation takes `O(N)` time,
  where `N` is the number of elements in the squeezed dimensions.

  Args:
    input: A potentially ragged tensor. The input to squeeze.
    axis: An optional list of ints. Defaults to `None`. If the `input` is
      ragged, it only squeezes the dimensions listed. It fails if `input` is
      ragged and axis is []. If `input` is not ragged it calls tf.squeeze. Note
      that it is an error to squeeze a dimension that is not 1. It must be in
      the range of [-rank(input), rank(input)).
   name: A name for the operation (optional).

  Returns:
    A potentially ragged tensor. Contains the same data as input,
    but has one or more dimensions of size 1 removed.
  """
  with ops.name_scope(name, 'RaggedSqueeze', [input]):
    input = ragged_tensor.convert_to_tensor_or_ragged_tensor(input)
    if isinstance(input, ops.Tensor):
      return array_ops.squeeze(input, axis, name)

    if axis is None:
      raise ValueError('Ragged.squeeze must have an axis argument.')
    if isinstance(axis, int):
      axis = [axis]
    elif ((not isinstance(axis, (list, tuple))) or
          (not all(isinstance(d, int) for d in axis))):
      raise TypeError('Axis must be a list or tuple of integers.')

    dense_dims = []
    ragged_dims = []
    # Normalize all the dims in axis to be positive
    axis = [
        array_ops.get_positive_axis(d, input.shape.ndims, 'axis[%d]' % i,
                                    'rank(input)') for i, d in enumerate(axis)
    ]
    for dim in axis:
      if dim > input.ragged_rank:
        dense_dims.append(dim - input.ragged_rank)
      else:
        ragged_dims.append(dim)

    # Make sure the specified ragged dimensions are squeezable.
    assertion_list = []
    scalar_tensor_one = constant_op.constant(1, dtype=input.row_splits.dtype)
    for i, r in enumerate(input.nested_row_lengths()):
      if i + 1 in ragged_dims:
        assertion_list.append(
            control_flow_ops.Assert(
                math_ops.reduce_all(math_ops.equal(r, scalar_tensor_one)),
                ['the given axis (axis = %d) is not squeezable!' % (i + 1)]))
    if 0 in ragged_dims:
      scalar_tensor_two = constant_op.constant(2, dtype=dtypes.int32)
      assertion_list.append(
          control_flow_ops.Assert(
              math_ops.equal(
                  array_ops.size(input.row_splits), scalar_tensor_two),
              ['the given axis (axis = 0) is not squeezable!']))

    # Till now, we are sure that the ragged dimensions are squeezable.
    squeezed_rt = None
    squeezed_rt = control_flow_ops.with_dependencies(assertion_list,
                                                     input.flat_values)

    if dense_dims:
      # Gives error if the dense dimension is not squeezable.
      squeezed_rt = array_ops.squeeze(squeezed_rt, dense_dims)

    remaining_row_splits = []
    remaining_row_splits = list()
    for i, row_split in enumerate(input.nested_row_splits):
      # each row_splits tensor is for dimension #(i+1) .
      if (i + 1) not in ragged_dims:
        remaining_row_splits.append(row_split)
    # Take care of the first row if it is to be squeezed.
    if remaining_row_splits and 0 in ragged_dims:
      remaining_row_splits.pop(0)

    squeezed_rt = RaggedTensor.from_nested_row_splits(squeezed_rt,
                                                      remaining_row_splits)

    # Corner case: when removing all the ragged dimensions and the output is
    # a scalar tensor e.g. ragged.squeeze(ragged.constant([[[1]]])).
    if set(range(0, input.ragged_rank + 1)).issubset(set(ragged_dims)):
      squeezed_rt = array_ops.squeeze(squeezed_rt, [0], name)

    return squeezed_rt
예제 #14
0
def expand_dims(input: ragged_tensor.Ragged, axis, name=None):  # pylint: disable=redefined-builtin
  """Inserts a dimension with shape 1 into a potentially ragged tensor's shape.

  Given a potentially ragged tenor `input`, this operation inserts a
  dimension with size 1 at the dimension `axis` of `input`'s shape.

  The following table gives some examples showing how `ragged.expand_dims`
  impacts the shapes of different input tensors.  Ragged dimensions are
  indicated by enclosing them in parentheses.

  input.shape             | axis | result.shape
  ----------------------- | ---- | -----------------------------
  `[D1, D2]`              |  `0` | `[1, D1, D2]`
  `[D1, D2]`              |  `1` | `[D1, 1, D2]`
  `[D1, D2]`              |  `2` | `[D1, D2, 1]`
  `[D1, (D2), (D3), D4]`  |  `0` | `[1, D1, (D2), (D3), D4]`
  `[D1, (D2), (D3), D4]`  |  `1` | `[D1, 1, (D2), (D3), D4]`
  `[D1, (D2), (D3), D4]`  |  `2` | `[D1, (D2), 1, (D3), D4]`
  `[D1, (D2), (D3), D4]`  |  `3` | `[D1, (D2), (D3), 1, D4]`
  `[D1, (D2), (D3), D4]`  |  `4` | `[D1, (D2), (D3), D4, 1]`

  Args:
    input: The potentially tensor that should be expanded with a new dimension.
    axis: An integer constant indicating where the new dimension should be
      inserted.
    name: A name for the operation (optional).

  Returns:
    A tensor with the same values as `input`, with an added dimension of
    size 1 at `axis`.

  #### Examples:

  >>> rt = tf.ragged.constant([[1, 2], [3]])
  >>> print(rt.shape)
  (2, None)

  >>> expanded = tf.expand_dims(rt, axis=0)
  >>> print(expanded.shape, expanded)
  (1, 2, None) <tf.RaggedTensor [[[1, 2], [3]]]>

  >>> expanded = tf.expand_dims(rt, axis=1)
  >>> print(expanded.shape, expanded)
  (2, 1, None) <tf.RaggedTensor [[[1, 2]], [[3]]]>

  >>> expanded = tf.expand_dims(rt, axis=2)
  >>> print(expanded.shape, expanded)
  (2, None, 1) <tf.RaggedTensor [[[1], [2]], [[3]]]>
  """
  with ops.name_scope(name, 'RaggedExpandDims', [input]):
    input = ragged_tensor.convert_to_tensor_or_ragged_tensor(
        input, name='input')

    if not ragged_tensor.is_ragged(input):
      return array_ops.expand_dims(input, axis)

    ndims = None if input.shape.ndims is None else input.shape.ndims + 1
    axis = array_ops.get_positive_axis(axis, ndims, ndims_name='rank(input)')

    if axis == 0:
      return ragged_tensor.RaggedTensor.from_uniform_row_length(
          input, uniform_row_length=input.nrows(), nrows=1, validate=False)
    elif axis == 1:
      return ragged_tensor.RaggedTensor.from_uniform_row_length(
          input, uniform_row_length=1, nrows=input.nrows(), validate=False)
    else:
      if ragged_tensor.is_ragged(input.values):
        return input.with_values(expand_dims(input.values, axis - 1))
      else:
        return input.with_values(array_ops.expand_dims(input.values, axis - 1))
예제 #15
0
def _ragged_stack_concat_helper(rt_inputs, axis, stack_values):
  """Helper function to concatenate or stack ragged tensors.

  Args:
    rt_inputs: A list of RaggedTensors or Tensors to combine.
    axis: The axis along which to concatenate or stack.
    stack_values: A boolean -- if true, then stack values; otherwise,
      concatenate them.

  Returns:
    A RaggedTensor.
  Raises:
    ValueError: If rt_inputs is empty, or if axis is out of range.
  """
  # Validate parameters.
  if not rt_inputs:
    raise ValueError('rt_inputs may not be empty.')

  # Convert input tensors.
  rt_inputs = [
      ragged_tensor.convert_to_tensor_or_ragged_tensor(
          rt_input, name='rt_input') for rt_input in rt_inputs
  ]
  row_splits_dtype, rt_inputs = ragged_tensor.match_row_splits_dtypes(
      *rt_inputs, return_dtype=True)
  rt_inputs = list(rt_inputs)

  # Special case: if there's only one input, then return it as-is.
  if len(rt_inputs) == 1:
    if stack_values:
      return ragged_array_ops.expand_dims(rt_inputs[0], axis=axis)
    else:
      return rt_inputs[0]

  # Check the rank (number of dimensions) of the input tensors.
  ndims = None
  for rt in rt_inputs:
    if ndims is None:
      ndims = rt.shape.ndims
    else:
      rt.shape.assert_has_rank(ndims)

  out_ndims = ndims if (ndims is None or not stack_values) else ndims + 1
  axis = array_ops.get_positive_axis(axis, out_ndims)

  if stack_values and ndims == 1 and axis == 0:
    return ragged_tensor.RaggedTensor.from_row_lengths(
        values=array_ops.concat(rt_inputs, axis=0),
        row_lengths=array_ops.concat([array_ops.shape(r) for r in rt_inputs],
                                     axis=0))

  # If all the inputs are Tensors, and we're combining the final dimension,
  # then we can delegate to the tf.stack/tf.concat operation, and return a
  # Tensor.
  if all(not ragged_tensor.is_ragged(rt) for rt in rt_inputs):
    if ndims is not None and (axis == out_ndims - 1 or axis == ndims - 1):
      if stack_values:
        return array_ops.stack(rt_inputs, axis)
      else:
        return array_ops.concat(rt_inputs, axis)

  # Convert any Tensor inputs to RaggedTensors.  This makes it
  # possible to concatenate Tensors and RaggedTensors together.
  for i in range(len(rt_inputs)):
    if not ragged_tensor.is_ragged(rt_inputs[i]):
      rt_inputs[i] = ragged_tensor.RaggedTensor.from_tensor(
          rt_inputs[i], ragged_rank=1, row_splits_dtype=row_splits_dtype)

  # Convert the input tensors to all have the same ragged_rank.
  ragged_rank = max(max(rt.ragged_rank for rt in rt_inputs), 1)
  rt_inputs = [_increase_ragged_rank_to(rt, ragged_rank, row_splits_dtype)
               for rt in rt_inputs]

  if axis == 0:
    return _ragged_stack_concat_axis_0(rt_inputs, stack_values)
  elif axis == 1:
    return _ragged_stack_concat_axis_1(rt_inputs, stack_values)
  else:  # axis > 1: recurse.
    values = [rt.values for rt in rt_inputs]
    splits = [[rt_input.row_splits] for rt_input in rt_inputs]
    with ops.control_dependencies(ragged_util.assert_splits_match(splits)):
      return ragged_tensor.RaggedTensor.from_row_splits(
          _ragged_stack_concat_helper(values, axis - 1, stack_values),
          splits[0][0], validate=False)
def ragged_reduce_aggregate(reduce_op,
                            unsorted_segment_op,
                            rt_input,
                            axis,
                            keepdims,
                            separator=None,
                            name=None):
  """Aggregates across axes of a RaggedTensor using the given `Tensor` ops.

  Reduces `rt_input` along the dimensions given in `axis`.  The rank of the
  tensor is reduced by 1 for each entry in `axis`.  If `axis` is not specified,
  then all dimensions are reduced, and a scalar value is returned.

  This op assumes that `reduce_op` and `unsorted_segment_op` are associative;
  if not, then reducing multiple axes will return incorrect results.  (In
  particular, reducing multiple axes is currently implemented by reducing the
  axes one at a time.)

  Args:
    reduce_op: The tensorflow `op` that should be used to reduce values in
      uniform dimensions.  Must have the same signature and basic behavior as
      `reduce_sum`, `reduce_max`, etc.
    unsorted_segment_op: The tensorflow `op` that should be used to combine
      values in ragged dimensions.  Must have the same signature and basic
      behavior as `unsorted_segment_sum`, `unsorted_segment_max`, etc.
    rt_input: A `Tensor` or `RaggedTensor` containing the values to be reduced.
    axis: The axis or axes to reduce.  May be `None` (to reduce all axes), an
      `int` (to reduce a single axis), a `list` or `tuple` of `int` (to reduce a
      given set of axes), or a `Tensor` with a constant value.  Must be in the
      range `[0, rt_input.rank)`.
    keepdims: If true, retains reduced dimensions with length 1.
    separator: An optional string. Defaults to None. The separator to use when
      joining. The separator must not be set for non-string data types. (i.e. if
      separator is not None then it uses string ops)
    name: A name prefix for the returned tensor (optional).

  Returns:
    A `RaggedTensor` containing the reduced values.  The returned tensor
    has the same dtype as `data`, and its shape is given by removing the
    dimensions specified in `axis` from `rt_input.shape`.  The `ragged_rank`
    of the returned tensor is given by substracting any ragged dimensions
    specified in `axis` from `rt_input.ragged_rank`.
  Raises:
    ValueError: If `axis` contains a `Tensor` whose value is not constant.
  """
  if not ragged_tensor.is_ragged(rt_input):
    if separator is None:
      return reduce_op(rt_input, axis, keepdims=keepdims, name=name)
    else:
      # When separator is not None, We infer that dtype is string and
      # reduce_join will be called.
      return reduce_op(
          rt_input, axis, keepdims=keepdims, name=name, separator=separator)

  if isinstance(axis, ops.Tensor):
    axis = tensor_util.constant_value(axis)
    if axis is None:
      raise ValueError('axis must be known at graph construction time.')
    if isinstance(axis, np.ndarray):
      axis = axis.tolist()

  # When reducing all axes, just ignore splits & reduce the inner values.
  if axis is None:
    result = reduce_op(rt_input.flat_values, None, keepdims=keepdims, name=name)
    if keepdims:
      # Expand the result to the input number of dimensions.
      for _ in rt_input.shape[1:]:
        result = array_ops.expand_dims(result, axis=0)
    return result

  with ops.name_scope(name, 'RaggedReduce', [rt_input, axis]):
    if isinstance(axis, (tuple, list)):
      if not axis:
        return rt_input
      elif len(axis) == 1:
        axis = axis[0]
      else:
        # When reducing multiple axes, as we reduce one at a time (see below),
        # the negative axis has to be converted to positive at the first run
        # as the sort with negative axis will have different orders.
        # See GitHub issue 27497.
        axis = [
            array_ops.get_positive_axis(a, rt_input.shape.ndims, 'axis[%s]' % i,
                                        'rank(input_tensor)')
            for i, a in enumerate(axis)
        ]
        # When reducing multiple axes, just reduce one at a time.  This is less
        # efficient, and only works for associative ops.  (In particular, it
        # does not work for reduce_mean.)  However, reducing multiple axes at
        # once will probably require a nontrivial c++ op.
        axis = sorted(axis)
        inner_reduced = ragged_reduce_aggregate(reduce_op, unsorted_segment_op,
                                                rt_input, axis[-1], keepdims,
                                                separator)
        return ragged_reduce_aggregate(reduce_op, unsorted_segment_op,
                                       inner_reduced, axis[:-1], keepdims,
                                       separator)

    rt_input = ragged_tensor.convert_to_tensor_or_ragged_tensor(
        rt_input, name='rt_input')

    axis = array_ops.get_positive_axis(
        axis, rt_input.shape.ndims, ndims_name='rank(input_tensor)')

    if axis == 0:
      # out[i_1, i_2, ..., i_N] = sum_{j} rt_input[j, i_1, i_2, ..., i_N]
      row_lengths = rt_input.row_splits[1:] - rt_input.row_splits[:-1]
      num_segments = math_ops.maximum(math_ops.reduce_max(row_lengths), 0)
      segment_ids = range(row_lengths).values
      result = _ragged_segment_aggregate(unsorted_segment_op, rt_input.values,
                                         segment_ids, num_segments, separator)
      if keepdims:
        result = array_ops.expand_dims(result, axis=0)
      return result
    elif axis == 1:
      # out[i_0, i_1, i_2, ..., i_N] = sum_{j} rt_input[i_0, j, i_2, ..., i_N]
      num_segments = array_ops.shape(rt_input.row_splits)[0] - 1
      segment_ids = segment_id_ops.row_splits_to_segment_ids(
          rt_input.row_splits)
      result = _ragged_segment_aggregate(unsorted_segment_op, rt_input.values,
                                         segment_ids, num_segments, separator)
      if keepdims:
        result = array_ops.expand_dims(result, axis=1)
      return result
    else:
      # out[i_0, ..., i_[axis-1], i_axis+1], ..., i_N] =
      #     sum_{j} rt_input [i_0, ..., i_[axis-1], j, i_axis+1], ..., i_N]
      return rt_input.with_values(
          ragged_reduce_aggregate(reduce_op, unsorted_segment_op,
                                  rt_input.values, axis - 1, keepdims,
                                  separator))
def gather(params,
           indices,
           validate_indices=None,
           axis=None,
           batch_dims=0,
           name=None):
    """Gathers ragged slices from `params` axis `0` according to `indices`.

  See `tf.gather` for full documentation.  (This version has the same API
  as `tf.gather`, but supports ragged `params` and `indices`.)

  Examples:

  >>> params = tf.constant(['a', 'b', 'c', 'd', 'e'])
  >>> indices = tf.constant([3, 1, 2, 1, 0])
  >>> ragged_params = tf.ragged.constant([['a', 'b', 'c'], ['d'], [], ['e']])
  >>> ragged_indices = tf.ragged.constant([[3, 1, 2], [1], [], [0]])

  >>> tf.gather(params, ragged_indices)
  <tf.RaggedTensor [[b'd', b'b', b'c'], [b'b'], [], [b'a']]>

  >>> tf.gather(ragged_params, indices)
  <tf.RaggedTensor [[b'e'], [b'd'], [], [b'd'], [b'a', b'b', b'c']]>

  >>> tf.gather(ragged_params, ragged_indices)
  <tf.RaggedTensor [[[b'e'], [b'd'], []], [[b'd']], [], [[b'a', b'b', b'c']]]>

  Args:
    params: The potentially ragged tensor from which to gather values. Must be
      at least rank 1.
    indices: The potentially ragged tensor indicating which values to gather.
      Must have dtype `int32` or `int64`.  Values must be in the range `[0,
      params.shape[0]]`.
    validate_indices: Ignored.
    axis: The axis in `params` to gather `indices` from.
    batch_dims: The number of batch dimensions.
    name: A name for the operation (optional).

  Returns:
    A `RaggedTensor`, where `output.dtype=params.dtype` and
    `output.shape=indices.shape + params.shape[1:]` and
    `output.ragged_rank=indices.shape.ndims + params.ragged_rank`.

  Raises:
    ValueError: If indices.shape.ndims is not known statically.
  """
    del validate_indices

    with ops.name_scope(name, 'RaggedGather', [params, indices]):
        params = ragged_tensor.convert_to_tensor_or_ragged_tensor(
            params, name='params')
        indices = ragged_tensor.convert_to_tensor_or_ragged_tensor(
            indices, name='indices')
        params, indices = ragged_tensor.match_row_splits_dtypes(
            params, indices)

        if batch_dims != indices.shape.rank:
            batch_dims = array_ops.get_positive_axis(
                batch_dims,
                indices.shape.rank,
                axis_name='batch_dims',
                ndims_name='rank(indices)')
        if params.shape.rank is not None and batch_dims >= params.shape.rank:
            raise ValueError('batch_dims must be less than rank(params)')
        if axis is None:
            axis = batch_dims
        axis = array_ops.get_positive_axis(axis,
                                           params.shape.rank,
                                           ndims_name='rank(params)')
        if axis < batch_dims:
            raise ValueError(
                'axis must be greater than or equal to batch_dims')
        if indices.shape.rank is not None:
            if not 0 <= batch_dims <= indices.shape.rank:
                raise ValueError(
                    'batch_dims=%s must be between 0 and rank(indices)=%s' %
                    (batch_dims, indices.shape.rank))

        return _gather(params, indices, axis, batch_dims)
예제 #18
0
def regex_split_with_offsets(input,
                             delim_regex_pattern,
                             keep_delim_regex_pattern="",
                             name=None):
    r"""Split `input` by delimiters that match a regex pattern; returns offsets.

  `regex_split_with_offsets` will split `input` using delimiters that match a
  regex pattern in `delim_regex_pattern`. Here is an example:

  ```
  text_input=["hello there"]
  # split by whitespace
  result, begin, end = regex_split_with_offsets(text_input, "\s")
  # result = [["hello", "there"]]
  # begin = [[0, 7]]
  # end = [[5, 11]]
  ```

  By default, delimiters are not included in the split string results.
  Delimiters may be included by specifying a regex pattern
  `keep_delim_regex_pattern`. For example:

  ```
  text_input=["hello there"]
  # split by whitespace
  result, begin, end = regex_split_with_offsets(text_input, "\s", "\s")
  # result = [["hello", " ", "there"]]
  # begin = [[0, 5, 7]]
  # end = [[5, 6, 11]]
  ```

  If there are multiple delimiters in a row, there are no empty splits emitted.
  For example:

  ```
  text_input=["hello  there"]  # two continuous whitespace characters
  # split by whitespace
  result, begin, end = regex_split_with_offsets(text_input, "\s")
  # result = [["hello", "there"]]
  ```

  See https://github.com/google/re2/wiki/Syntax for the full list of supported
  expressions.

  Args:
    input: A Tensor or RaggedTensor of string input.
    delim_regex_pattern: A string containing the regex pattern of a delimiter.
    keep_delim_regex_pattern: (optional) Regex pattern of delimiters that should
      be kept in the result.
    name: (optional) Name of the op.

  Returns:
    A tuple of RaggedTensors containing:
      (split_results, begin_offsets, end_offsets)
    where tokens is of type string, begin_offsets and end_offsets are of type
    int64.
  """
    delim_regex_pattern = b"".join(
        [b"(", delim_regex_pattern.encode("utf-8"), b")"])
    keep_delim_regex_pattern = b"".join(
        [b"(", keep_delim_regex_pattern.encode("utf-8"), b")"])

    # Convert input to ragged or tensor
    input = ragged_tensor.convert_to_tensor_or_ragged_tensor(
        input, dtype=dtypes.string)

    if ragged_tensor.is_ragged(input):
        # send flat_values to regex_split op.
        tokens, begin_offsets, end_offsets, row_splits = (
            gen_regex_split_ops.regex_split_with_offsets(
                input.flat_values,
                delim_regex_pattern,
                keep_delim_regex_pattern,
                name=name))

        # Pack back into original ragged tensor
        tokens_rt = ragged_tensor.RaggedTensor.from_row_splits(
            tokens, row_splits)
        tokens_rt = ragged_tensor.RaggedTensor.from_row_splits(
            tokens_rt, input.row_splits)
        begin_offsets_rt = ragged_tensor.RaggedTensor.from_row_splits(
            begin_offsets, row_splits)
        begin_offsets_rt = ragged_tensor.RaggedTensor.from_row_splits(
            begin_offsets_rt, input.row_splits)
        end_offsets_rt = ragged_tensor.RaggedTensor.from_row_splits(
            end_offsets, row_splits)
        end_offsets_rt = ragged_tensor.RaggedTensor.from_row_splits(
            end_offsets_rt, input.row_splits)
        return tokens_rt, begin_offsets_rt, end_offsets_rt

    else:
        # reshape to a flat Tensor (if not already)
        input_shape = math_ops.cast(array_ops.shape(input), dtypes.int64)
        input_reshaped = array_ops.reshape(input, [-1])

        # send flat_values to regex_split op.
        tokens, begin_offsets, end_offsets, row_splits = (
            gen_regex_split_ops.regex_split_with_offsets(
                input_reshaped, delim_regex_pattern, keep_delim_regex_pattern))
        # Pack back into ragged tensors
        tokens_rt = ragged_tensor.RaggedTensor.from_row_splits(
            tokens, row_splits=row_splits)
        begin_offsets_rt = ragged_tensor.RaggedTensor.from_row_splits(
            begin_offsets, row_splits=row_splits)
        end_offsets_rt = ragged_tensor.RaggedTensor.from_row_splits(
            end_offsets, row_splits=row_splits)

        # If the original input was a multi-dimensional Tensor, add back the
        # dimensions
        static_rank = input.get_shape().ndims
        if static_rank is not None and static_rank > 1:
            i = array_ops.get_positive_axis(-1, input.get_shape().ndims)
            for i in range(
                    array_ops.get_positive_axis(-1,
                                                input.get_shape().ndims), 0,
                    -1):
                tokens_rt = ragged_tensor.RaggedTensor.from_uniform_row_length(
                    values=tokens_rt, uniform_row_length=input_shape[i])
                begin_offsets_rt = ragged_tensor.RaggedTensor.from_uniform_row_length(
                    values=begin_offsets_rt, uniform_row_length=input_shape[i])
                end_offsets_rt = ragged_tensor.RaggedTensor.from_uniform_row_length(
                    values=end_offsets_rt, uniform_row_length=input_shape[i])
        return tokens_rt, begin_offsets_rt, end_offsets_rt