def _broadcast_uniform_partitioned_dimension(self, axis, lengths): """Broadcasts the partitioned dimension `axis` to match `lengths`.""" axis_dim_size = self.dimension_size(axis) partitioned_sizes = list(self._partitioned_dim_sizes[:axis]) if lengths.shape.ndims == 0: lengths = array_ops.where( math_ops.equal(axis_dim_size, 1), lengths, axis_dim_size) repeats = array_ops.where(math_ops.equal(axis_dim_size, 1), lengths, 1) splits = array_ops.stack([0, self.num_slices_in_dimension(axis)]) else: splits = math_ops.range( array_ops.size(lengths, out_type=self.dim_size_dtype) + 1) repeats = lengths partitioned_sizes.append(lengths) for dim_size in self._partitioned_dim_sizes[axis + 1:]: if dim_size.shape.ndims == 0: partitioned_sizes.append(dim_size) splits *= dim_size else: partitioned_sizes.append( ragged_util.repeat_ranges(dim_size, splits, repeats)) splits = array_ops.gather( ragged_util.lengths_to_splits(dim_size), splits) inner_sizes = self._inner_dim_sizes return RaggedTensorDynamicShape(partitioned_sizes, inner_sizes)
def _ragged_stack_concat_axis_0(rt_inputs, stack_values): """Helper function to concatenate or stack ragged tensors along axis 0. Args: rt_inputs: A list of RaggedTensors, all with the same rank and ragged_rank. stack_values: Boolean. If true, then stack values; otherwise, concatenate them. Returns: A RaggedTensor. """ # Concatenate the inner values together. flat_values = [rt.flat_values for rt in rt_inputs] concatenated_flat_values = array_ops.concat(flat_values, axis=0) # Concatenate the splits together for each ragged dimension (adjusting # split offsets as necessary). nested_splits = [rt.nested_row_splits for rt in rt_inputs] ragged_rank = rt_inputs[0].ragged_rank concatenated_nested_splits = [ _concat_ragged_splits([ns[dim] for ns in nested_splits]) for dim in range(ragged_rank) ] # If we are performing a stack operation, then add another splits. if stack_values: stack_lengths = array_ops.stack([rt.nrows() for rt in rt_inputs]) stack_splits = ragged_util.lengths_to_splits(stack_lengths) concatenated_nested_splits.insert(0, stack_splits) return ragged_tensor.RaggedTensor.from_nested_row_splits( concatenated_flat_values, concatenated_nested_splits, validate=False)
def _broadcast_uniform_partitioned_dimension(self, axis, lengths): """Broadcasts the partitioned dimension `axis` to match `lengths`.""" axis_dim_size = self.dimension_size(axis) partitioned_sizes = list(self._partitioned_dim_sizes[:axis]) if lengths.shape.ndims == 0: lengths = array_ops.where(math_ops.equal(axis_dim_size, 1), lengths, axis_dim_size) repeats = array_ops.where(math_ops.equal(axis_dim_size, 1), lengths, 1) splits = array_ops.stack([0, self.num_slices_in_dimension(axis)]) else: splits = math_ops.range( array_ops.size(lengths, out_type=self.dim_size_dtype) + 1) repeats = lengths partitioned_sizes.append(lengths) for dim_size in self._partitioned_dim_sizes[axis + 1:]: if dim_size.shape.ndims == 0: partitioned_sizes.append(dim_size) splits *= dim_size else: partitioned_sizes.append( ragged_util.repeat_ranges(dim_size, splits, repeats)) splits = array_ops.gather( ragged_util.lengths_to_splits(dim_size), splits) inner_sizes = self._inner_dim_sizes return RaggedTensorDynamicShape(partitioned_sizes, inner_sizes)
def boolean_mask(data, mask, name=None): """Applies a boolean mask to `data` without flattening the mask dimensions. Returns a potentially ragged tensor that is formed by retaining the elements in `data` where the corresponding value in `mask` is `True`. * `output[a1...aA, i, b1...bB] = data[a1...aA, j, b1...bB]` Where `j` is the `i`th `True` entry of `mask[a1...aA]`. Note that `output` preserves the mask dimensions `a1...aA`; this differs from `tf.boolean_mask`, which flattens those dimensions. Args: data: A potentially ragged tensor. mask: A potentially ragged boolean tensor. `mask`'s shape must be a prefix of `data`'s shape. `rank(mask)` must be known statically. name: A name prefix for the returned tensor (optional). Returns: A potentially ragged tensor that is formed by retaining the elements in `data` where the corresponding value in `mask` is `True`. * `rank(output) = rank(data)`. * `output.ragged_rank = max(data.ragged_rank, rank(mask) - 1)`. Raises: ValueError: if `rank(mask)` is not known statically; or if `mask.shape` is not a prefix of `data.shape`. #### Examples: >>> # Aliases for True & False so data and mask line up. >>> T, F = (True, False) >>> tf.ragged.boolean_mask( # Mask a 2D Tensor. ... data=[[1, 2, 3], [4, 5, 6], [7, 8, 9]], ... mask=[[T, F, T], [F, F, F], [T, F, F]]).to_list() [[1, 3], [], [7]] >>> tf.ragged.boolean_mask( # Mask a 2D RaggedTensor. ... tf.ragged.constant([[1, 2, 3], [4], [5, 6]]), ... tf.ragged.constant([[F, F, T], [F], [T, T]])).to_list() [[3], [], [5, 6]] >>> tf.ragged.boolean_mask( # Mask rows of a 2D RaggedTensor. ... tf.ragged.constant([[1, 2, 3], [4], [5, 6]]), ... tf.ragged.constant([True, False, True])).to_list() [[1, 2, 3], [5, 6]] """ with ops.name_scope(name, 'RaggedMask', [data, mask]): # Convert inputs to tensors. data = ragged_tensor.convert_to_tensor_or_ragged_tensor(data, name='data') mask = ragged_tensor.convert_to_tensor_or_ragged_tensor( mask, dtypes.bool, name='mask') row_splits_dtype, (data, mask) = ragged_tensor.match_row_splits_dtypes( data, mask, return_dtype=True) # Get static rank of mask. if mask.shape.ndims is None: raise ValueError('mask.shape.ndims must be known statically.') elif mask.shape.ndims == 0: raise ValueError('mask cannot be scalar.') # If mask is ragged, then recurse with a non-ragged mask. if ragged_tensor.is_ragged(mask): if not ragged_tensor.is_ragged(data): data = ragged_tensor.RaggedTensor.from_tensor( data, ragged_rank=mask.ragged_rank, row_splits_dtype=mask.row_splits.dtype) # Check that mask.nested_row_splits is a prefix of # data.nested_row_splits. splits_list = [ mask.nested_row_splits, data.nested_row_splits[:mask.ragged_rank] ] with ops.control_dependencies( ragged_util.assert_splits_match(splits_list)): # Strip off ragged `splits` until `mask` is non-ragged. Keep the splits # that we strip off in `splits`, so we can add them back on after # we recursively mask the non-ragged data. splits = [] while ragged_tensor.is_ragged(mask): if mask.shape.ndims > 2: splits.append(mask.row_splits) else: # Count the number of True mask values in each row to find the # lengths of the filtered rows; then convert to splits. int_mask = ragged_functional_ops.map_flat_values( math_ops.cast, mask, dtype=row_splits_dtype) masked_row_lengths = ragged_math_ops.reduce_sum(int_mask, axis=1) splits.append(ragged_util.lengths_to_splits(masked_row_lengths)) mask = mask.values data = data.values # Recursively apply the nested non-ragged mask to the nested data. masked_values = boolean_mask(data, mask) # Add the ragged `splits` back to the result. masked_values = ragged_tensor.RaggedTensor.from_nested_row_splits( masked_values, splits, validate=False) return masked_values # If mask is non-ragged and has rank 1, and data is ragged, then build a # ragged tensor with the indicated rows. elif ragged_tensor.is_ragged(data) and mask.shape.ndims == 1: # Get the masked splits: first get the length of each row, then filter # out the rows that we are deleting, and convert that filtered set of # masks back to a splits tensor. lengths = data.row_lengths() masked_lengths = array_ops.boolean_mask(lengths, mask) masked_splits = ragged_util.lengths_to_splits(masked_lengths) # Get the masked values: first get row ids corresponding to each # value, then use tf.gather to build a boolean mask that's false for # values that come from rows that we are deleting, and use that mask to # construct the masked values tensor. segment_ids = segment_id_ops.row_splits_to_segment_ids(data.row_splits) segment_mask = array_ops.gather(mask, segment_ids) masked_values = boolean_mask(data.values, segment_mask) return ragged_tensor.RaggedTensor.from_row_splits( masked_values, masked_splits, validate=False) # If mask is non-ragged and has rank>1, then convert it to be ragged, # with a ragged rank matching data. if ragged_tensor.is_ragged(data): mask = ragged_tensor.RaggedTensor.from_tensor( mask, ragged_rank=min(data.ragged_rank, mask.shape.ndims - 1), row_splits_dtype=data.row_splits.dtype) return boolean_mask(data, mask) # Otherwise, data and mask are both `Tensor`s. else: # Apply `boolean_mask` to get the masked values. masked_values = array_ops.boolean_mask(data, mask) if mask.shape.ndims >= 2: # Add the innermost ragged dimension. For each innermost cell, get the # number of values it contains. Then flatten that to get a list of # cell lengths, and convert it to splits. Finally, combine the splits # and values to get the innermost ragged tensor. masked_lengths = math_ops.count_nonzero( mask, axis=-1, dtype=row_splits_dtype) flattened_masked_lengths = array_ops.reshape(masked_lengths, [-1]) masked_values = ragged_tensor.RaggedTensor.from_row_lengths( masked_values, flattened_masked_lengths, validate=False) # Wrap remaining ragged dimensions. if mask.shape.ndims > 2: mask_shape = array_ops.shape(mask, out_type=row_splits_dtype) split_size = math_ops.cumprod(mask_shape) + 1 for dim in range(mask.shape.ndims - 3, -1, -1): elt_size = mask_shape[dim + 1] masked_splits = math_ops.range(split_size[dim]) * elt_size masked_values = ragged_tensor.RaggedTensor.from_row_splits( masked_values, masked_splits, validate=False) return masked_values
def _tile_ragged_splits(rt_input, multiples, const_multiples=None): """Builds nested_split tensors for a tiled `RaggedTensor`. Returns a list of split tensors that can be used to construct the `RaggedTensor` that tiles `rt_input` as specified by `multiples`. Args: rt_input: The `RaggedTensor` that is being tiled. multiples: A 1-D integer `tensor`, indicating how many times each dimension should be repeated. const_multiples: Optional constant value for multiples. Used to skip tiling dimensions where `multiples=1`. Returns: A list of 1-D integer `Tensor`s (one for each ragged dimension in `rt_input`). #### Example: >>> rt = tf.ragged.constant([[1, 2], [3]]) >>> _tile_ragged_splits(rt, [3, 2]) [<tf.Tensor: shape=(7,), dtype=int64, numpy=array([ 0, 4, 6, 10, 12, 16, 18])>] """ ragged_rank = rt_input.ragged_rank nested_splits = rt_input.nested_row_splits # projected_splits[src_axis, dst_axis] contains the split points that divide # the rows from src_axis in the list of dst_axis values. E.g., # projected_splits[i, i] = nested_splits[i], and # projected_splits[i, i+1] = gather(nested_splits[i+1], nested_splits[i]). projected_splits = [{i: nested_splits[i]} for i in range(ragged_rank)] for src_axis in range(ragged_rank): for dst_axis in range(src_axis + 1, ragged_rank - 1): projected_splits[src_axis][dst_axis] = array_ops.gather( nested_splits[dst_axis], projected_splits[src_axis][dst_axis - 1]) # For each ragged dimension: nested_splits[axis] -> result_splits[axis]. result_splits = [] for axis in range(ragged_rank): # Get the length of each row for the input tensor for this dimension. input_lengths = nested_splits[axis][1:] - nested_splits[axis][:-1] # Multiply those lengths by the `multiples` of dimension axis+1, since # each value will be repeated that number of times. output_lengths = input_lengths * multiples[axis + 1] # Repeat ranges of the row lengths as necessary for them to be tiled in # each ragged dimension `d < axis`. (Start with dimension d=axis-1, and # work our way up to dimension d=0.) repeats = 1 for d in range(axis - 1, -1, -1): if const_multiples is None or const_multiples[d + 1] != 1: splits = projected_splits[d][axis - 1] * repeats output_lengths = ragged_util.repeat_ranges(output_lengths, splits, multiples[d + 1]) repeats *= multiples[d + 1] # Tile splits for the outermost (uniform) dimension. output_lengths = array_ops.tile(output_lengths, multiples[:1]) # Convert to splits. result_splits.append(ragged_util.lengths_to_splits(output_lengths)) return result_splits
def boolean_mask(data, mask, keepdims=False, name=None): """Applies a boolean mask to `data`. Returns a potentially ragged tensor that is formed by retaining the elements in `data` where the corresponding value in `mask` is `True`. If `keepdims` is true then outer dimensions (corresponding to the `mask` dimensions) are preserved, and: * `output[a1...aA, i, b1...bB] = data[a1...aA, j, b1...bB]` Where `j` is the `i`th `True` entry of `mask[a1...aA]`. If `keepdims` is false, then the outer dimensions are collapsed (similar to the behavior of `tf.boolean_mask`), and: * `output[i, b1...bB] = data[a1...aA, b1...bB]` Where `(a1...aA)` is the `i`th `True` entry of `mask` (in row-major order). Args: data: A potentially ragged tensor. mask: A potentially ragged boolean tensor. `mask`'s shape must be a prefix of `data`'s shape. `rank(mask)` must be known statically. keepdims: Whether to preserve the outer dimensions (`keepdims=True`) or flatten them (`keepdims=False`). name: A name prefix for the returned tensor (optional). Returns: A potentially ragged tensor that is formed by retaining the elements in `data` where the corresponding value in `mask` is `True`. If `keepdims` is false: * `rank(output) = rank(data) - rank(mask) + 1`. * `output.ragged_rank = max(data.ragged_rank - rank(mask) + 1, 0)`. If `keepdims` is true: * `rank(output) = rank(data)`. * `output.ragged_rank = max(data.ragged_rank, rank(mask) - 1)`. Raises: ValueError: if `rank(mask)` is not known statically; or if `mask.shape` is not a prefix of `data.shape`. #### Examples: ```python >>> # Aliases for True & False so data and mask line up. >>> T, F = (True, False) >>> tf.ragged.boolean_mask( # Mask a 2D Tensor. Flatten outer dims. ... data=[[1, 2, 3], [4, 5, 6], [7, 8, 9]], ... mask=[[T, F, T], [F, F, F], [T, F, F]], ... keepdims=False).tolist() [1, 3, 7] >>> tf.ragged.boolean_mask( # Mask a 2D Tensor. Preserve outer dims. ... data=[[1, 2, 3], [4, 5, 6], [7, 8, 9]], ... mask=[[T, F, T], [F, F, F], [T, F, F]], ... keepdims=True).tolist() [[1, 3], [], [7]] >>> tf.ragged.boolean_mask( # Mask a 2D RaggedTensor. Flatten outer dims. ... tf.ragged.constant([[1, 2, 3], [4], [5, 6]]), ... tf.ragged.constant([[F, F, T], [F], [T, T]]), ... keepdims=False).tolist() [3, 5, 6] >>> tf.ragged.boolean_mask( # Mask a 2D RaggedTensor. Preserve outer dims. ... tf.ragged.constant([[1, 2, 3], [4], [5, 6]]), ... tf.ragged.constant([[F, F, T], [F], [T, T]]), ... keepdims=True).tolist() [[3], [], [5, 6]] >>> tf.ragged.boolean_mask( # Mask rows of a 2D RaggedTensor. ... tf.ragged.constant([[1, 2, 3], [4], [5, 6]]), ... tf.ragged.constant([True, False, True]), ... keepdims=True).tolist() [[1, 2, 3], [5, 6]] ``` """ with ops.name_scope(name, 'RaggedMask', [data, mask]): # Convert inputs to tensors. data = ragged_tensor.convert_to_tensor_or_ragged_tensor(data, name='data') mask = ragged_tensor.convert_to_tensor_or_ragged_tensor( mask, dtypes.bool, name='mask') row_splits_dtype, (data, mask) = ragged_tensor.match_row_splits_dtypes( data, mask, return_dtype=True) # Get static rank of mask. if mask.shape.ndims is None: raise ValueError('mask.shape.ndims must be known statically.') elif mask.shape.ndims == 0: raise ValueError('mask cannot be scalar.') # If mask is ragged, then recurse with a non-ragged mask. if ragged_tensor.is_ragged(mask): if not ragged_tensor.is_ragged(data): data = ragged_tensor.RaggedTensor.from_tensor( data, ragged_rank=mask.ragged_rank, row_splits_dtype=mask.row_splits.dtype) # Check that mask.nested_row_splits is a prefix of # data.nested_row_splits. splits_list = [ mask.nested_row_splits, data.nested_row_splits[:mask.ragged_rank] ] with ops.control_dependencies( ragged_util.assert_splits_match(splits_list)): # Strip off ragged `splits` until `mask` is non-ragged. Keep the splits # that we strip off in `splits`, so we can add them back on after # we recursively mask the non-ragged data. splits = [] while ragged_tensor.is_ragged(mask): if mask.shape.ndims > 2: splits.append(mask.row_splits) else: # Count the number of True mask values in each row to find the # lengths of the filtered rows; then convert to splits. int_mask = ragged_functional_ops.map_flat_values( math_ops.cast, mask, dtype=row_splits_dtype) masked_row_lengths = ragged_math_ops.reduce_sum(int_mask, axis=1) splits.append(ragged_util.lengths_to_splits(masked_row_lengths)) mask = mask.values data = data.values # Recursively apply the nested non-ragged mask to the nested data. masked_values = boolean_mask(data, mask, keepdims) # Add the ragged `splits` back to the result. if keepdims: masked_values = ragged_tensor.RaggedTensor.from_nested_row_splits( masked_values, splits, validate=False) return masked_values # If mask is non-ragged and has rank 1, and data is ragged, then build a # ragged tensor with the indicated rows. elif ragged_tensor.is_ragged(data) and mask.shape.ndims == 1: # Get the masked splits: first get the length of each row, then filter # out the rows that we are deleting, and convert that filtered set of # masks back to a splits tensor. lengths = data.row_lengths() masked_lengths = array_ops.boolean_mask(lengths, mask) masked_splits = ragged_util.lengths_to_splits(masked_lengths) # Get the masked values: first get row ids corresponding to each # value, then use tf.gather to build a boolean mask that's false for # values that come from rows that we are deleting, and use that mask to # construct the masked values tensor. segment_ids = segment_id_ops.row_splits_to_segment_ids(data.row_splits) segment_mask = array_ops.gather(mask, segment_ids) masked_values = boolean_mask(data.values, segment_mask, keepdims=False) return ragged_tensor.RaggedTensor.from_row_splits(masked_values, masked_splits, validate=False) # If mask is non-ragged and has rank>1, then convert it to be ragged, # with a ragged rank matching data. if ragged_tensor.is_ragged(data): mask = ragged_tensor.RaggedTensor.from_tensor( mask, ragged_rank=min(data.ragged_rank, mask.shape.ndims - 1), row_splits_dtype=data.row_splits.dtype) return boolean_mask(data, mask, keepdims) # Otherwise, data and mask are both `Tensor`s. else: # Apply `boolean_mask` to get the masked values. masked_values = array_ops.boolean_mask(data, mask) if mask.shape.ndims >= 2 and keepdims: # Add the innermost ragged dimension. For each innermost cell, get the # number of values it contains. Then flatten that to get a list of # cell lengths, and convert it to splits. Finally, combine the splits # and values to get the innermost ragged tensor. masked_lengths = math_ops.count_nonzero(mask, axis=-1, dtype=row_splits_dtype) flattened_masked_lengths = array_ops.reshape(masked_lengths, [-1]) masked_values = ragged_tensor.RaggedTensor.from_row_lengths( masked_values, flattened_masked_lengths, validate=False) # Wrap remaining ragged dimensions. if mask.shape.ndims > 2 and keepdims: mask_shape = array_ops.shape(mask, out_type=row_splits_dtype) split_size = math_ops.cumprod(mask_shape) + 1 for dim in range(mask.shape.ndims - 3, -1, -1): elt_size = mask_shape[dim + 1] masked_splits = math_ops.range(split_size[dim]) * elt_size masked_values = ragged_tensor.RaggedTensor.from_row_splits( masked_values, masked_splits, validate=False) return masked_values
def _tile_ragged_splits(rt_input, multiples, const_multiples=None): """Builds nested_split tensors for a tiled `RaggedTensor`. Returns a list of split tensors that can be used to construct the `RaggedTensor` that tiles `rt_input` as specified by `multiples`. Args: rt_input: The `RaggedTensor` that is being tiled. multiples: A 1-D integer `tensor`, indicating how many times each dimension should be repeated. const_multiples: Optional constant value for multiples. Used to skip tiling dimensions where `multiples=1`. Returns: A list of 1-D integer `Tensor`s (one for each ragged dimension in `rt_input`). #### Example: ```python >>> rt = tf.ragged.constant([[1, 2], [3]]) >>> _tile_ragged_splits(rt, [3, 2]) [0, 4, 6, 10, 12, 16, 18] ``` """ ragged_rank = rt_input.ragged_rank nested_splits = rt_input.nested_row_splits # projected_splits[src_axis, dst_axis] contains the split points that divide # the rows from src_axis in the list of dst_axis values. E.g., # projected_splits[i, i] = nested_splits[i], and # projected_splits[i, i+1] = gather(nested_splits[i+1], nested_splits[i]). projected_splits = [{i: nested_splits[i]} for i in range(ragged_rank)] for src_axis in range(ragged_rank): for dst_axis in range(src_axis + 1, ragged_rank - 1): projected_splits[src_axis][dst_axis] = array_ops.gather( nested_splits[dst_axis], projected_splits[src_axis][dst_axis - 1]) # For each ragged dimension: nested_splits[axis] -> result_splits[axis]. result_splits = [] for axis in range(ragged_rank): # Get the length of each row for the input tensor for this dimension. input_lengths = nested_splits[axis][1:] - nested_splits[axis][:-1] # Multiply those lengths by the `multiples` of dimension axis+1, since # each value will be repeated that number of times. output_lengths = input_lengths * multiples[axis + 1] # Repeat ranges of the row lengths as necessary for them to be tiled in # each ragged dimension `d < axis`. (Start with dimension d=axis-1, and # work our way up to dimension d=0.) repeats = 1 for d in range(axis - 1, -1, -1): if const_multiples is None or const_multiples[d + 1] != 1: splits = projected_splits[d][axis - 1] * repeats output_lengths = ragged_util.repeat_ranges(output_lengths, splits, multiples[d + 1]) repeats *= multiples[d + 1] # Tile splits for the outermost (uniform) dimension. output_lengths = array_ops.tile(output_lengths, multiples[:1]) # Convert to splits. result_splits.append(ragged_util.lengths_to_splits(output_lengths)) return result_splits