def _get_row_lengths(segments, axis=-1): axis = array_ops.get_positive_axis(axis, segments.shape.ndims) - 1 foo = ragged_tensor.RaggedTensor.from_nested_row_lengths( segments.nested_row_lengths()[axis], segments.nested_row_lengths()[:axis]) for _ in range(axis): foo = math_ops.reduce_sum(foo, -1) return foo
def _get_row_lengths_merged_to_axis(segments, axis=-1): """Get the row lengths relative to a desired axis.""" axis = array_ops.get_positive_axis(axis, segments.shape.ndims) - 1 row_lengths = ragged_tensor.RaggedTensor.from_nested_row_lengths( segments.nested_row_lengths()[axis], segments.nested_row_lengths()[:axis]) for _ in range(axis): row_lengths = math_ops.reduce_sum(row_lengths, -1) return row_lengths
def merge_dims(self, outer_axis, inner_axis): """Merges outer_axis...inner_axis into a single dimension. Returns a copy of this RaggedTensor with the specified range of dimensions flattened into a single dimension, with elements in row-major order. >>> st = StructuredTensor.from_pyval( ... [[{'foo': 12}, {'foo': 33}], [], [{'foo': 99}]]) >>> st.merge_dims(0, 1) <StructuredTensor( fields={ "foo": tf.Tensor([12 33 99], shape=(3,), dtype=int32)}, shape=(3,))> Args: outer_axis: `int`: The first dimension in the range of dimensions to merge. May be negative (to index from the last dimension). inner_axis: `int`: The last dimension in the range of dimensions to merge. May be negative (to index from the last dimension). Returns: A copy of this tensor, with the specified dimensions merged into a single dimension. The shape of the returned tensor will be `self.shape[:outer_axis] + [N] + self.shape[inner_axis + 1:]`, where `N` is the total number of slices in the merged dimensions. """ outer_axis = array_ops.get_positive_axis(outer_axis, self.shape.rank, axis_name='outer_axis', ndims_name='rank(self)') inner_axis = array_ops.get_positive_axis(inner_axis, self.shape.rank, axis_name='inner_axis', ndims_name='rank(self)') if not outer_axis < inner_axis: raise ValueError('Expected outer_axis (%d) to be less than ' 'inner_axis (%d)' % (outer_axis, inner_axis)) return _merge_dims(self, outer_axis, inner_axis)
def reverse(tensor: ragged_tensor.Ragged, axis, name=None): """Reverses a RaggedTensor along the specified axes. #### Example: >>> data = tf.ragged.constant([ ... [[1, 2], [3, 4]], [[5, 6]], [[7, 8], [9, 10], [11, 12]]]) >>> tf.reverse(data, axis=[0, 2]) <tf.RaggedTensor [[[8, 7], [10, 9], [12, 11]], [[6, 5]], [[2, 1], [4, 3]]]> Args: tensor: A 'RaggedTensor' to reverse. axis: A list or tuple of 'int' or a constant 1D 'tf.Tensor'. The indices of the axes to reverse. name: A name prefix for the returned tensor (optional). Returns: A 'RaggedTensor'. """ type_error_msg = ('`axis` must be a list of int or a constant tensor' 'when reversing axes in a ragged tensor') with ops.name_scope(name, 'Reverse', [tensor, axis]): if isinstance(axis, ops.Tensor): axis = tensor_util.constant_value(axis) if axis is None: raise TypeError(type_error_msg) elif not (isinstance(axis, (list, tuple)) and all(isinstance(dim, int) for dim in axis)): raise TypeError(type_error_msg) tensor = ragged_tensor.convert_to_tensor_or_ragged_tensor( tensor, name='tensor') # Allow usage of negative values to specify innermost axes. axis = [ array_ops.get_positive_axis(dim, tensor.shape.rank, 'axis[%d]' % i, 'rank(tensor)') for i, dim in enumerate(axis) ] # We only need to slice up to the max axis. If the axis list # is empty, it should be 0. slices = [slice(None)] * (max(axis) + 1 if axis else 0) for dim in axis: slices[dim] = slice(None, None, -1) return tensor[tuple(slices)]
def ragged_cumsum(x: ragged_tensor.Ragged, axis: int = 0, exclusive: bool = False, reverse: bool = False, name: typing.Optional[str] = None): """Calculate math_ops.cumsum for a RaggedTensor. Given a ragged tensor `x`, the `result` is a ragged tensor with the same shape. One can calculate the value of `result[i_1...i_k]` as follows: ``` dense_result=tf.math.cumsum(rt.to_tensor(), axis=axis, exclusive=exclusive, reverse=reverse) result[i_1...i_k]=dense_result[i_1...i_k] ``` Args: x: the original ragged tensor to sum. axis: the axis along which to sum, can range -rank<=axis<rank. exclusive: is the sum exclusive or inclusive? If True, then result[0]=0. If False, then result[0]=x[0]. reverse: If True, sum from back to front. name: the name of the op. Returns: the cumulative sum. """ with ops.name_scope(name, 'RaggedCumSum', [x, axis, exclusive, reverse]): axis = array_ops.get_positive_axis(axis, x.shape.rank, ndims_name='rank') if axis == x.ragged_rank: last_rp = x._nested_row_partitions[-1] # pylint: disable=protected-access return x.with_flat_values( _cumsum_flat_values_at_ragged_rank(last_rp, x.flat_values, exclusive=exclusive, reverse=reverse)) elif axis > x.ragged_rank: new_axis = axis - x.ragged_rank cumsum_bound = functools.partial( math_ops.cumsum, axis=new_axis, exclusive=exclusive, reverse=reverse) return ragged_functional_ops.map_flat_values(cumsum_bound, x) else: dense_version = x.to_tensor() result = math_ops.cumsum( dense_version, axis, exclusive=exclusive, reverse=reverse, name=name) return ragged_tensor.RaggedTensor.from_tensor( result, lengths=x.nested_row_lengths())
def _expand_dims_impl(st, axis, name=None): # pylint: disable=redefined-builtin """Creates a StructuredTensor with a length 1 axis inserted at index `axis`. This is an implementation of tf.expand_dims for StructuredTensor. Note that the `axis` must be less than or equal to rank. >>> st = StructuredTensor.from_pyval([[{"x": 1}, {"x": 2}], [{"x": 3}]]) >>> tf.expand_dims(st, 0).to_pyval() [[[{'x': 1}, {'x': 2}], [{'x': 3}]]] >>> tf.expand_dims(st, 1).to_pyval() [[[{'x': 1}, {'x': 2}]], [[{'x': 3}]]] >>> tf.expand_dims(st, 2).to_pyval() [[[{'x': 1}], [{'x': 2}]], [[{'x': 3}]]] >>> tf.expand_dims(st, -1).to_pyval() # -1 is the same as 2 [[[{'x': 1}], [{'x': 2}]], [[{'x': 3}]]] Args: st: the original StructuredTensor. axis: the axis to insert the dimension: `-(rank + 1) <= axis <= rank` name: the name of the op. Returns: a new structured tensor with larger rank. Raises: an error if `axis < -(rank + 1)` or `rank < axis`. """ axis = array_ops.get_positive_axis(axis, st.rank + 1, axis_name='axis', ndims_name='rank(st)') with ops.name_scope(name, 'ExpandDims', [st, axis]): new_fields = { k: array_ops.expand_dims(v, axis) for (k, v) in st._fields.items() } new_shape = st.shape[:axis] + (1, ) + st.shape[axis:] new_row_partitions = _expand_st_row_partitions(st, axis) new_nrows = st.nrows() if (axis > 0) else 1 return StructuredTensor.from_fields(new_fields, shape=new_shape, row_partitions=new_row_partitions, nrows=new_nrows)
def get_selectable(self, input_ids, axis): """See `get_selectable()` in superclass.""" selectable = super(FirstNItemSelector, self).get_selectable(input_ids, axis) axis = array_ops.get_positive_axis( axis, input_ids.ragged_rank + input_ids.flat_values.shape.rank) # Create a positions RT and mask out positions that are not selectable positions_flat = math_ops.range(array_ops.size(input_ids.flat_values)) positions = input_ids.with_flat_values(positions_flat) selectable_positions = ragged_array_ops.boolean_mask( positions, selectable) # merge to the desired axis selectable_positions = selectable_positions.merge_dims( 1, axis) if axis > 1 else selectable_positions # Get a selection mask based off of how many items are desired for selection merged_axis = axis - (axis - 1) selection_mask = _get_selection_mask(selectable_positions, self._num_to_select, merged_axis) # Mask out positions that were not selected. selected_positions = ragged_array_ops.boolean_mask( selectable_positions, selection_mask) # Now that we have all the positions which were chosen, we recreate a mask # (matching the original input's shape) where the value is True if it was # selected. We do this by creating a "all false" RT and scattering true # values to the positions chosen for selection. all_true = selected_positions.with_flat_values( array_ops.ones_like(selected_positions.flat_values)) all_false = math_ops.cast( array_ops.zeros(array_ops.shape(input_ids.flat_values)), dtypes.int32) results_flat = array_ops.tensor_scatter_update( all_false, array_ops.expand_dims(selected_positions.flat_values, -1), all_true.flat_values) results = input_ids.with_flat_values(results_flat) results = math_ops.cast(results, dtypes.bool) # Reduce until input.shape[:axis] for _ in range(input_ids.shape.ndims - axis - 1): results = math_ops.reduce_all(results, -1) return results
def gather(params, indices, validate_indices=None, name=None, axis=None, batch_dims=0): """tf.gather for structured tensors. Does not support (yet) checks on illegal axis values, et cetera. Indices must be a ragged or dense tensor. Args: params: a structured tensor to be gathered indices: a ragged tensor or tensor to gather by. validate_indices: whether to validate the indices name: the name of the op(s). axis: the axis in params to gather on. batch_dims: the number of batch dimensions. Returns: the params reorganized according to indices. """ if name is None: name = 'gather' with ops.name_scope(name): if axis is None: axis = batch_dims ndims_name = params.shape.rank axis = array_ops.get_positive_axis(axis, ndims_name) indices = ragged_tensor.convert_to_tensor_or_ragged_tensor( indices, name='indices') def leaf_op(p): return array_ops.gather( p, indices, validate_indices=validate_indices, axis=axis, batch_dims=batch_dims, name=None) return _extend_op_single(params, leaf_op)
def ragged_one_hot(indices, depth, on_value=None, off_value=None, axis=None, dtype=None, name=None): """Applies tf.one_hot along the values of a RaggedTensor.""" with ops.name_scope(name, 'RaggedOneHot', [indices]): indices = ragged_tensor.convert_to_tensor_or_ragged_tensor( indices, name='indices') if axis is not None: axis = array_ops.get_positive_axis(axis, indices.shape.ndims, ndims_name='rank(indices)') if axis < indices.ragged_rank: raise ValueError( 'axis may not be less than indices.ragged_rank.') return indices.with_flat_values( array_ops.one_hot(indices.flat_values, depth, on_value, off_value, axis, dtype, name))
def get_selectable(self, input_ids, axis): """Return a boolean mask of items that can be chosen for selection. Args: input_ids: a `RaggedTensor`. axis: axis to apply selection on. Returns: a `RaggedTensor` with dtype of bool and with shape `input_ids.shape[:axis]`. Its values are True if the corresponding item (or broadcasted subitems) should be considered for masking. In the default implementation, all `input_ids` items that are not listed in `unselectable_ids` (from the class arg) are considered selectable. """ # merge to the desired axis input_ids = input_ids.merge_dims(1, axis) if axis > 1 else input_ids all_selectable_flats = [ ragged_functional_ops.map_flat_values(math_ops.not_equal, input_ids, i).flat_values for i in self._unselectable_ids ] # if there are no blacklisted ids, mark everything as selectable if all_selectable_flats: reduce_flat = math_ops.reduce_all(all_selectable_flats, axis=0) else: reduce_flat = array_ops.ones_like( input_ids.flat_values, dtype=dtypes.bool) # reduce to the requested axis and broadcast to match original shape axis = array_ops.get_positive_axis( axis, input_ids.ragged_rank + input_ids.flat_values.shape.rank) results = input_ids.with_flat_values(reduce_flat) if axis < input_ids.ragged_rank: reduce_axis = list(range(input_ids.ragged_rank, axis, -1)) results = math_ops.reduce_all(results, reduce_axis) return results
def concat(values, axis, name: str = 'concat'): """tf.concat for structured tensors. Does not support (yet) checks on illegal axis values, et cetera. Args: values: a sequence of StructuredTensors. axis: an axis to concatenate upon. name: the name of the op(s). Returns: the params reorganized according to indices. """ if name is None: name = 'concat' _assert_concat_compatible_structured_tensors(values) def leaf_op(values): return array_ops.concat(values, axis) # TODO(martinz): handle axis when it is a tensor. axis = array_ops.get_positive_axis(axis, values[0].rank) with ops.name_scope(name, 'StructuredConcat', values): return _extend_op(values, leaf_op)
def split(value: ragged_tensor.Ragged, num_or_size_splits, axis=0, num=None, name=None): """Splits a RaggedTensor `value` into a list of sub RaggedTensors. If `num_or_size_splits` is an `int`, then it splits `value` along the dimension `axis` into `num_or_size_splits` smaller RaggedTensors. This requires that `value.shape[axis]` is divisible by `num_or_size_splits`. If `num_or_size_splits` is a 1-D Tensor (or list), then `value` is split into `len(num_or_size_splits)` elements. The shape of the `i`-th element has the same size as the `value` except along dimension `axis` where the size is `num_or_size_splits[i]`. Splits along a ragged dimension is not allowed. For example: >>> rt = tf.RaggedTensor.from_row_lengths( ... np.arange(6 * 3).reshape(6, 3), row_lengths=[1, 2, 2, 1]) >>> rt.shape TensorShape([4, None, 3]) >>> >>> rt1, rt2 = tf.split(rt, 2) # uniform splits >>> rt1.shape TensorShape([2, None, 3]) >>> rt2.shape TensorShape([2, None, 3]) >>> >>> rt3, rt4, rt5 = tf.split(rt, [1, 2, 1]) # ragged splits >>> rt3.shape TensorShape([1, None, 3]) >>> rt4.shape TensorShape([2, None, 3]) >>> rt5.shape TensorShape([1, None, 3]) >>> >>> rt6, rt7 = tf.split(rt, [1, 2], axis=2) # splits along axis 2 >>> rt6.shape TensorShape([4, None, 1]) >>> rt7.shape TensorShape([4, None, 2]) Args: value: The `RaggedTensor` to split. num_or_size_splits: Either an `int` indicating the number of splits along `axis` or a 1-D integer `Tensor` or Python list containing the sizes of each output tensor along `axis`. If a Python int, then it must evenly divide `value.shape[axis]`; otherwise the sum of sizes along the split axis must match that of the `value`. axis: An `int` or scalar `int32` `Tensor`. The dimension along which to split. Must be in the range `[-rank(value), rank(value))`. Defaults to 0. num: An `int` used to specify the number of outputs when `num_or_size_splits` is a 1-D list or `Tensor` and its length is statically unknown, e.g., specifying `tf.TensorSepc(None)` with the `input_signature` argument of `tf.function` (optional). name: A name for the operation (optional). Returns: if `num_or_size_splits` is an `int` returns a list of `num_or_size_splits` `RaggedTensor` objects; if `num_or_size_splits` is a 1-D Tensor returns `num_or_size_splits.get_shape[0]` `RaggedTensor` objects resulting from splitting `value`. Raises: ValueError: If the dimension `axis` of `value` is a ragged dimension. ValueError: If `num` is unspecified and cannot be inferred. ValueError: If `num` is specified but doesn't match the length of `num_or_size_splits`. ValueError: If `num_or_size_splits` is an `int` and less than 1. TypeError: If `num_or_size_splits` is not an `int` or 1-D list or 1-D `Tensor`. InvalidArgumentError: If the `axis` of `value` cannot be exactly splitted by `num_or_size_splits`. InvalidArgumentError: If `num_or_size_splits` is contains negative integers. InvalidArgumentError: If `num_or_size_splits`'s static shape is unknown and its dynamic shape is inconsistent `num`. InvalidArgumentError: If `num_or_size_splits`'s static rank is unknown and `axis` is a negative integer. """ with ops.name_scope(name, 'RaggedSplit'): value = ragged_tensor.convert_to_tensor_or_ragged_tensor( value, name='value') if isinstance(num_or_size_splits, int) and num_or_size_splits == 1: return [value] # static assert check_ops.assert_integer_v2( num_or_size_splits, message=('`num_or_size_splits` must be an `int` or 1-D list or ' '`Tensor` of integers.')) value_shape = ragged_shape.RaggedShape.from_tensor(value) axis = array_ops.get_positive_axis(axis, value_shape.rank) try: dim_size = value_shape[axis] except ValueError: raise ValueError('Cannot split a ragged dimension. Got `value` with ' f'shape {value_shape} and `axis` {axis}.') if isinstance(num_or_size_splits, int): # Uniform split num_splits = num_or_size_splits if num_splits < 1: raise ValueError('`num_or_size_splits` must be >=1 if it is an `int`.' f'Received {num_or_size_splits}.') split_length = math_ops.floordiv(dim_size, num_splits) split_lengths = array_ops.repeat(split_length, num_splits) else: # Ragged split num_splits = None split_lengths = ops.convert_to_tensor(num_or_size_splits) if split_lengths.shape.ndims is not None: if split_lengths.shape.ndims != 1: raise TypeError('`num_or_size_splits` must be an `int` or 1-D list ' f'or `Tensor`. Received {num_or_size_splits}.') num_splits = tensor_shape.dimension_value(split_lengths.shape[0]) if num_splits is None: if num is None: raise ValueError('`num` must be specified as an `int` when the ' 'size of `num_or_size_split` is statically ' f'unknown. Received `num`: {num} and ' f'`num_or_size_split`: {num_or_size_splits}.') num_splits = num else: if num is not None and num != num_splits: raise ValueError('`num` does not match the size of ' f'`num_or_size_split`. Received `num`: {num} and ' f'size of `num_or_size_split`: {num_splits}.') splits = array_ops.concat([[0], math_ops.cumsum(split_lengths)], axis=0) checks = [] checks.append( check_ops.assert_non_negative_v2( num_or_size_splits, message='`num_or_size_splits` must be non-negative.')) checks.append( check_ops.assert_equal_v2( num_splits, array_ops.shape(split_lengths)[0], message='`num` is inconsistent with `num_or_size_split.shape[0]`.')) checks.append( check_ops.assert_equal_v2( math_ops.cast(dim_size, splits.dtype), splits[-1], message=('Cannot exactly split the `axis` dimension of `value` ' 'with the given `num_or_size_split`.'))) splits = control_flow_ops.with_dependencies(checks, splits) splited_rts = [] slices = [slice(None)] * (axis + 1) for i in range(num_splits): slices[-1] = slice(splits[i], splits[i + 1]) splited_rts.append(value[tuple(slices)]) return splited_rts
def squeeze(input, axis=None, name=None): # pylint: disable=redefined-builtin """Ragged compatible squeeze. If `input` is a `tf.Tensor`, then this calls `tf.squeeze`. If `input` is a `tf.RaggedTensor`, then this operation takes `O(N)` time, where `N` is the number of elements in the squeezed dimensions. Args: input: A potentially ragged tensor. The input to squeeze. axis: An optional list of ints. Defaults to `None`. If the `input` is ragged, it only squeezes the dimensions listed. It fails if `input` is ragged and axis is []. If `input` is not ragged it calls tf.squeeze. Note that it is an error to squeeze a dimension that is not 1. It must be in the range of [-rank(input), rank(input)). name: A name for the operation (optional). Returns: A potentially ragged tensor. Contains the same data as input, but has one or more dimensions of size 1 removed. """ with ops.name_scope(name, 'RaggedSqueeze', [input]): input = ragged_tensor.convert_to_tensor_or_ragged_tensor(input) if isinstance(input, ops.Tensor): return array_ops.squeeze(input, axis, name) if axis is None: raise ValueError('Ragged.squeeze must have an axis argument.') if isinstance(axis, int): axis = [axis] elif ((not isinstance(axis, (list, tuple))) or (not all(isinstance(d, int) for d in axis))): raise TypeError('Axis must be a list or tuple of integers.') dense_dims = [] ragged_dims = [] # Normalize all the dims in axis to be positive axis = [ array_ops.get_positive_axis(d, input.shape.ndims, 'axis[%d]' % i, 'rank(input)') for i, d in enumerate(axis) ] for dim in axis: if dim > input.ragged_rank: dense_dims.append(dim - input.ragged_rank) else: ragged_dims.append(dim) # Make sure the specified ragged dimensions are squeezable. assertion_list = [] scalar_tensor_one = constant_op.constant(1, dtype=input.row_splits.dtype) for i, r in enumerate(input.nested_row_lengths()): if i + 1 in ragged_dims: assertion_list.append( control_flow_ops.Assert( math_ops.reduce_all(math_ops.equal(r, scalar_tensor_one)), ['the given axis (axis = %d) is not squeezable!' % (i + 1)])) if 0 in ragged_dims: scalar_tensor_two = constant_op.constant(2, dtype=dtypes.int32) assertion_list.append( control_flow_ops.Assert( math_ops.equal( array_ops.size(input.row_splits), scalar_tensor_two), ['the given axis (axis = 0) is not squeezable!'])) # Till now, we are sure that the ragged dimensions are squeezable. squeezed_rt = None squeezed_rt = control_flow_ops.with_dependencies(assertion_list, input.flat_values) if dense_dims: # Gives error if the dense dimension is not squeezable. squeezed_rt = array_ops.squeeze(squeezed_rt, dense_dims) remaining_row_splits = [] remaining_row_splits = list() for i, row_split in enumerate(input.nested_row_splits): # each row_splits tensor is for dimension #(i+1) . if (i + 1) not in ragged_dims: remaining_row_splits.append(row_split) # Take care of the first row if it is to be squeezed. if remaining_row_splits and 0 in ragged_dims: remaining_row_splits.pop(0) squeezed_rt = RaggedTensor.from_nested_row_splits(squeezed_rt, remaining_row_splits) # Corner case: when removing all the ragged dimensions and the output is # a scalar tensor e.g. ragged.squeeze(ragged.constant([[[1]]])). if set(range(0, input.ragged_rank + 1)).issubset(set(ragged_dims)): squeezed_rt = array_ops.squeeze(squeezed_rt, [0], name) return squeezed_rt
def expand_dims(input: ragged_tensor.Ragged, axis, name=None): # pylint: disable=redefined-builtin """Inserts a dimension with shape 1 into a potentially ragged tensor's shape. Given a potentially ragged tenor `input`, this operation inserts a dimension with size 1 at the dimension `axis` of `input`'s shape. The following table gives some examples showing how `ragged.expand_dims` impacts the shapes of different input tensors. Ragged dimensions are indicated by enclosing them in parentheses. input.shape | axis | result.shape ----------------------- | ---- | ----------------------------- `[D1, D2]` | `0` | `[1, D1, D2]` `[D1, D2]` | `1` | `[D1, 1, D2]` `[D1, D2]` | `2` | `[D1, D2, 1]` `[D1, (D2), (D3), D4]` | `0` | `[1, D1, (D2), (D3), D4]` `[D1, (D2), (D3), D4]` | `1` | `[D1, 1, (D2), (D3), D4]` `[D1, (D2), (D3), D4]` | `2` | `[D1, (D2), 1, (D3), D4]` `[D1, (D2), (D3), D4]` | `3` | `[D1, (D2), (D3), 1, D4]` `[D1, (D2), (D3), D4]` | `4` | `[D1, (D2), (D3), D4, 1]` Args: input: The potentially tensor that should be expanded with a new dimension. axis: An integer constant indicating where the new dimension should be inserted. name: A name for the operation (optional). Returns: A tensor with the same values as `input`, with an added dimension of size 1 at `axis`. #### Examples: >>> rt = tf.ragged.constant([[1, 2], [3]]) >>> print(rt.shape) (2, None) >>> expanded = tf.expand_dims(rt, axis=0) >>> print(expanded.shape, expanded) (1, 2, None) <tf.RaggedTensor [[[1, 2], [3]]]> >>> expanded = tf.expand_dims(rt, axis=1) >>> print(expanded.shape, expanded) (2, 1, None) <tf.RaggedTensor [[[1, 2]], [[3]]]> >>> expanded = tf.expand_dims(rt, axis=2) >>> print(expanded.shape, expanded) (2, None, 1) <tf.RaggedTensor [[[1], [2]], [[3]]]> """ with ops.name_scope(name, 'RaggedExpandDims', [input]): input = ragged_tensor.convert_to_tensor_or_ragged_tensor( input, name='input') if not ragged_tensor.is_ragged(input): return array_ops.expand_dims(input, axis) ndims = None if input.shape.ndims is None else input.shape.ndims + 1 axis = array_ops.get_positive_axis(axis, ndims, ndims_name='rank(input)') if axis == 0: return ragged_tensor.RaggedTensor.from_uniform_row_length( input, uniform_row_length=input.nrows(), nrows=1, validate=False) elif axis == 1: return ragged_tensor.RaggedTensor.from_uniform_row_length( input, uniform_row_length=1, nrows=input.nrows(), validate=False) else: if ragged_tensor.is_ragged(input.values): return input.with_values(expand_dims(input.values, axis - 1)) else: return input.with_values(array_ops.expand_dims(input.values, axis - 1))
def _ragged_stack_concat_helper(rt_inputs, axis, stack_values): """Helper function to concatenate or stack ragged tensors. Args: rt_inputs: A list of RaggedTensors or Tensors to combine. axis: The axis along which to concatenate or stack. stack_values: A boolean -- if true, then stack values; otherwise, concatenate them. Returns: A RaggedTensor. Raises: ValueError: If rt_inputs is empty, or if axis is out of range. """ # Validate parameters. if not rt_inputs: raise ValueError('rt_inputs may not be empty.') # Convert input tensors. rt_inputs = [ ragged_tensor.convert_to_tensor_or_ragged_tensor( rt_input, name='rt_input') for rt_input in rt_inputs ] row_splits_dtype, rt_inputs = ragged_tensor.match_row_splits_dtypes( *rt_inputs, return_dtype=True) rt_inputs = list(rt_inputs) # Special case: if there's only one input, then return it as-is. if len(rt_inputs) == 1: if stack_values: return ragged_array_ops.expand_dims(rt_inputs[0], axis=axis) else: return rt_inputs[0] # Check the rank (number of dimensions) of the input tensors. ndims = None for rt in rt_inputs: if ndims is None: ndims = rt.shape.ndims else: rt.shape.assert_has_rank(ndims) out_ndims = ndims if (ndims is None or not stack_values) else ndims + 1 axis = array_ops.get_positive_axis(axis, out_ndims) if stack_values and ndims == 1 and axis == 0: return ragged_tensor.RaggedTensor.from_row_lengths( values=array_ops.concat(rt_inputs, axis=0), row_lengths=array_ops.concat([array_ops.shape(r) for r in rt_inputs], axis=0)) # If all the inputs are Tensors, and we're combining the final dimension, # then we can delegate to the tf.stack/tf.concat operation, and return a # Tensor. if all(not ragged_tensor.is_ragged(rt) for rt in rt_inputs): if ndims is not None and (axis == out_ndims - 1 or axis == ndims - 1): if stack_values: return array_ops.stack(rt_inputs, axis) else: return array_ops.concat(rt_inputs, axis) # Convert any Tensor inputs to RaggedTensors. This makes it # possible to concatenate Tensors and RaggedTensors together. for i in range(len(rt_inputs)): if not ragged_tensor.is_ragged(rt_inputs[i]): rt_inputs[i] = ragged_tensor.RaggedTensor.from_tensor( rt_inputs[i], ragged_rank=1, row_splits_dtype=row_splits_dtype) # Convert the input tensors to all have the same ragged_rank. ragged_rank = max(max(rt.ragged_rank for rt in rt_inputs), 1) rt_inputs = [_increase_ragged_rank_to(rt, ragged_rank, row_splits_dtype) for rt in rt_inputs] if axis == 0: return _ragged_stack_concat_axis_0(rt_inputs, stack_values) elif axis == 1: return _ragged_stack_concat_axis_1(rt_inputs, stack_values) else: # axis > 1: recurse. values = [rt.values for rt in rt_inputs] splits = [[rt_input.row_splits] for rt_input in rt_inputs] with ops.control_dependencies(ragged_util.assert_splits_match(splits)): return ragged_tensor.RaggedTensor.from_row_splits( _ragged_stack_concat_helper(values, axis - 1, stack_values), splits[0][0], validate=False)
def ragged_reduce_aggregate(reduce_op, unsorted_segment_op, rt_input, axis, keepdims, separator=None, name=None): """Aggregates across axes of a RaggedTensor using the given `Tensor` ops. Reduces `rt_input` along the dimensions given in `axis`. The rank of the tensor is reduced by 1 for each entry in `axis`. If `axis` is not specified, then all dimensions are reduced, and a scalar value is returned. This op assumes that `reduce_op` and `unsorted_segment_op` are associative; if not, then reducing multiple axes will return incorrect results. (In particular, reducing multiple axes is currently implemented by reducing the axes one at a time.) Args: reduce_op: The tensorflow `op` that should be used to reduce values in uniform dimensions. Must have the same signature and basic behavior as `reduce_sum`, `reduce_max`, etc. unsorted_segment_op: The tensorflow `op` that should be used to combine values in ragged dimensions. Must have the same signature and basic behavior as `unsorted_segment_sum`, `unsorted_segment_max`, etc. rt_input: A `Tensor` or `RaggedTensor` containing the values to be reduced. axis: The axis or axes to reduce. May be `None` (to reduce all axes), an `int` (to reduce a single axis), a `list` or `tuple` of `int` (to reduce a given set of axes), or a `Tensor` with a constant value. Must be in the range `[0, rt_input.rank)`. keepdims: If true, retains reduced dimensions with length 1. separator: An optional string. Defaults to None. The separator to use when joining. The separator must not be set for non-string data types. (i.e. if separator is not None then it uses string ops) name: A name prefix for the returned tensor (optional). Returns: A `RaggedTensor` containing the reduced values. The returned tensor has the same dtype as `data`, and its shape is given by removing the dimensions specified in `axis` from `rt_input.shape`. The `ragged_rank` of the returned tensor is given by substracting any ragged dimensions specified in `axis` from `rt_input.ragged_rank`. Raises: ValueError: If `axis` contains a `Tensor` whose value is not constant. """ if not ragged_tensor.is_ragged(rt_input): if separator is None: return reduce_op(rt_input, axis, keepdims=keepdims, name=name) else: # When separator is not None, We infer that dtype is string and # reduce_join will be called. return reduce_op( rt_input, axis, keepdims=keepdims, name=name, separator=separator) if isinstance(axis, ops.Tensor): axis = tensor_util.constant_value(axis) if axis is None: raise ValueError('axis must be known at graph construction time.') if isinstance(axis, np.ndarray): axis = axis.tolist() # When reducing all axes, just ignore splits & reduce the inner values. if axis is None: result = reduce_op(rt_input.flat_values, None, keepdims=keepdims, name=name) if keepdims: # Expand the result to the input number of dimensions. for _ in rt_input.shape[1:]: result = array_ops.expand_dims(result, axis=0) return result with ops.name_scope(name, 'RaggedReduce', [rt_input, axis]): if isinstance(axis, (tuple, list)): if not axis: return rt_input elif len(axis) == 1: axis = axis[0] else: # When reducing multiple axes, as we reduce one at a time (see below), # the negative axis has to be converted to positive at the first run # as the sort with negative axis will have different orders. # See GitHub issue 27497. axis = [ array_ops.get_positive_axis(a, rt_input.shape.ndims, 'axis[%s]' % i, 'rank(input_tensor)') for i, a in enumerate(axis) ] # When reducing multiple axes, just reduce one at a time. This is less # efficient, and only works for associative ops. (In particular, it # does not work for reduce_mean.) However, reducing multiple axes at # once will probably require a nontrivial c++ op. axis = sorted(axis) inner_reduced = ragged_reduce_aggregate(reduce_op, unsorted_segment_op, rt_input, axis[-1], keepdims, separator) return ragged_reduce_aggregate(reduce_op, unsorted_segment_op, inner_reduced, axis[:-1], keepdims, separator) rt_input = ragged_tensor.convert_to_tensor_or_ragged_tensor( rt_input, name='rt_input') axis = array_ops.get_positive_axis( axis, rt_input.shape.ndims, ndims_name='rank(input_tensor)') if axis == 0: # out[i_1, i_2, ..., i_N] = sum_{j} rt_input[j, i_1, i_2, ..., i_N] row_lengths = rt_input.row_splits[1:] - rt_input.row_splits[:-1] num_segments = math_ops.maximum(math_ops.reduce_max(row_lengths), 0) segment_ids = range(row_lengths).values result = _ragged_segment_aggregate(unsorted_segment_op, rt_input.values, segment_ids, num_segments, separator) if keepdims: result = array_ops.expand_dims(result, axis=0) return result elif axis == 1: # out[i_0, i_1, i_2, ..., i_N] = sum_{j} rt_input[i_0, j, i_2, ..., i_N] num_segments = array_ops.shape(rt_input.row_splits)[0] - 1 segment_ids = segment_id_ops.row_splits_to_segment_ids( rt_input.row_splits) result = _ragged_segment_aggregate(unsorted_segment_op, rt_input.values, segment_ids, num_segments, separator) if keepdims: result = array_ops.expand_dims(result, axis=1) return result else: # out[i_0, ..., i_[axis-1], i_axis+1], ..., i_N] = # sum_{j} rt_input [i_0, ..., i_[axis-1], j, i_axis+1], ..., i_N] return rt_input.with_values( ragged_reduce_aggregate(reduce_op, unsorted_segment_op, rt_input.values, axis - 1, keepdims, separator))
def gather(params, indices, validate_indices=None, axis=None, batch_dims=0, name=None): """Gathers ragged slices from `params` axis `0` according to `indices`. See `tf.gather` for full documentation. (This version has the same API as `tf.gather`, but supports ragged `params` and `indices`.) Examples: >>> params = tf.constant(['a', 'b', 'c', 'd', 'e']) >>> indices = tf.constant([3, 1, 2, 1, 0]) >>> ragged_params = tf.ragged.constant([['a', 'b', 'c'], ['d'], [], ['e']]) >>> ragged_indices = tf.ragged.constant([[3, 1, 2], [1], [], [0]]) >>> tf.gather(params, ragged_indices) <tf.RaggedTensor [[b'd', b'b', b'c'], [b'b'], [], [b'a']]> >>> tf.gather(ragged_params, indices) <tf.RaggedTensor [[b'e'], [b'd'], [], [b'd'], [b'a', b'b', b'c']]> >>> tf.gather(ragged_params, ragged_indices) <tf.RaggedTensor [[[b'e'], [b'd'], []], [[b'd']], [], [[b'a', b'b', b'c']]]> Args: params: The potentially ragged tensor from which to gather values. Must be at least rank 1. indices: The potentially ragged tensor indicating which values to gather. Must have dtype `int32` or `int64`. Values must be in the range `[0, params.shape[0]]`. validate_indices: Ignored. axis: The axis in `params` to gather `indices` from. batch_dims: The number of batch dimensions. name: A name for the operation (optional). Returns: A `RaggedTensor`, where `output.dtype=params.dtype` and `output.shape=indices.shape + params.shape[1:]` and `output.ragged_rank=indices.shape.ndims + params.ragged_rank`. Raises: ValueError: If indices.shape.ndims is not known statically. """ del validate_indices with ops.name_scope(name, 'RaggedGather', [params, indices]): params = ragged_tensor.convert_to_tensor_or_ragged_tensor( params, name='params') indices = ragged_tensor.convert_to_tensor_or_ragged_tensor( indices, name='indices') params, indices = ragged_tensor.match_row_splits_dtypes( params, indices) if batch_dims != indices.shape.rank: batch_dims = array_ops.get_positive_axis( batch_dims, indices.shape.rank, axis_name='batch_dims', ndims_name='rank(indices)') if params.shape.rank is not None and batch_dims >= params.shape.rank: raise ValueError('batch_dims must be less than rank(params)') if axis is None: axis = batch_dims axis = array_ops.get_positive_axis(axis, params.shape.rank, ndims_name='rank(params)') if axis < batch_dims: raise ValueError( 'axis must be greater than or equal to batch_dims') if indices.shape.rank is not None: if not 0 <= batch_dims <= indices.shape.rank: raise ValueError( 'batch_dims=%s must be between 0 and rank(indices)=%s' % (batch_dims, indices.shape.rank)) return _gather(params, indices, axis, batch_dims)
def regex_split_with_offsets(input, delim_regex_pattern, keep_delim_regex_pattern="", name=None): r"""Split `input` by delimiters that match a regex pattern; returns offsets. `regex_split_with_offsets` will split `input` using delimiters that match a regex pattern in `delim_regex_pattern`. Here is an example: ``` text_input=["hello there"] # split by whitespace result, begin, end = regex_split_with_offsets(text_input, "\s") # result = [["hello", "there"]] # begin = [[0, 7]] # end = [[5, 11]] ``` By default, delimiters are not included in the split string results. Delimiters may be included by specifying a regex pattern `keep_delim_regex_pattern`. For example: ``` text_input=["hello there"] # split by whitespace result, begin, end = regex_split_with_offsets(text_input, "\s", "\s") # result = [["hello", " ", "there"]] # begin = [[0, 5, 7]] # end = [[5, 6, 11]] ``` If there are multiple delimiters in a row, there are no empty splits emitted. For example: ``` text_input=["hello there"] # two continuous whitespace characters # split by whitespace result, begin, end = regex_split_with_offsets(text_input, "\s") # result = [["hello", "there"]] ``` See https://github.com/google/re2/wiki/Syntax for the full list of supported expressions. Args: input: A Tensor or RaggedTensor of string input. delim_regex_pattern: A string containing the regex pattern of a delimiter. keep_delim_regex_pattern: (optional) Regex pattern of delimiters that should be kept in the result. name: (optional) Name of the op. Returns: A tuple of RaggedTensors containing: (split_results, begin_offsets, end_offsets) where tokens is of type string, begin_offsets and end_offsets are of type int64. """ delim_regex_pattern = b"".join( [b"(", delim_regex_pattern.encode("utf-8"), b")"]) keep_delim_regex_pattern = b"".join( [b"(", keep_delim_regex_pattern.encode("utf-8"), b")"]) # Convert input to ragged or tensor input = ragged_tensor.convert_to_tensor_or_ragged_tensor( input, dtype=dtypes.string) if ragged_tensor.is_ragged(input): # send flat_values to regex_split op. tokens, begin_offsets, end_offsets, row_splits = ( gen_regex_split_ops.regex_split_with_offsets( input.flat_values, delim_regex_pattern, keep_delim_regex_pattern, name=name)) # Pack back into original ragged tensor tokens_rt = ragged_tensor.RaggedTensor.from_row_splits( tokens, row_splits) tokens_rt = ragged_tensor.RaggedTensor.from_row_splits( tokens_rt, input.row_splits) begin_offsets_rt = ragged_tensor.RaggedTensor.from_row_splits( begin_offsets, row_splits) begin_offsets_rt = ragged_tensor.RaggedTensor.from_row_splits( begin_offsets_rt, input.row_splits) end_offsets_rt = ragged_tensor.RaggedTensor.from_row_splits( end_offsets, row_splits) end_offsets_rt = ragged_tensor.RaggedTensor.from_row_splits( end_offsets_rt, input.row_splits) return tokens_rt, begin_offsets_rt, end_offsets_rt else: # reshape to a flat Tensor (if not already) input_shape = math_ops.cast(array_ops.shape(input), dtypes.int64) input_reshaped = array_ops.reshape(input, [-1]) # send flat_values to regex_split op. tokens, begin_offsets, end_offsets, row_splits = ( gen_regex_split_ops.regex_split_with_offsets( input_reshaped, delim_regex_pattern, keep_delim_regex_pattern)) # Pack back into ragged tensors tokens_rt = ragged_tensor.RaggedTensor.from_row_splits( tokens, row_splits=row_splits) begin_offsets_rt = ragged_tensor.RaggedTensor.from_row_splits( begin_offsets, row_splits=row_splits) end_offsets_rt = ragged_tensor.RaggedTensor.from_row_splits( end_offsets, row_splits=row_splits) # If the original input was a multi-dimensional Tensor, add back the # dimensions static_rank = input.get_shape().ndims if static_rank is not None and static_rank > 1: i = array_ops.get_positive_axis(-1, input.get_shape().ndims) for i in range( array_ops.get_positive_axis(-1, input.get_shape().ndims), 0, -1): tokens_rt = ragged_tensor.RaggedTensor.from_uniform_row_length( values=tokens_rt, uniform_row_length=input_shape[i]) begin_offsets_rt = ragged_tensor.RaggedTensor.from_uniform_row_length( values=begin_offsets_rt, uniform_row_length=input_shape[i]) end_offsets_rt = ragged_tensor.RaggedTensor.from_uniform_row_length( values=end_offsets_rt, uniform_row_length=input_shape[i]) return tokens_rt, begin_offsets_rt, end_offsets_rt