Python convert_to_int_tensor Examples, tensorflow.python.ops.ragged.ragged_util.convert_to_int_tensor Python Examples

Example #1

0

Show file

def segment_ids_to_row_splits(segment_ids,
                              num_segments=None,
                              out_type=None,
                              name=None):
    """Generates the RaggedTensor `row_splits` corresponding to a segmentation.

  Returns an integer vector `splits`, where `splits[0] = 0` and
  `splits[i] = splits[i-1] + count(segment_ids==i)`.  Example:

  >>> print(tf.ragged.segment_ids_to_row_splits([0, 0, 0, 2, 2, 3, 4, 4, 4]))
  tf.Tensor([0 3 3 5 6 9], shape=(6,), dtype=int64)

  Args:
    segment_ids: A 1-D integer Tensor.
    num_segments: A scalar integer indicating the number of segments.  Defaults
      to `max(segment_ids) + 1` (or zero if `segment_ids` is empty).
    out_type: The dtype for the return value.  Defaults to `segment_ids.dtype`,
      or `tf.int64` if `segment_ids` does not have a dtype.
    name: A name prefix for the returned tensor (optional).

  Returns:
    A sorted 1-D integer Tensor, with `shape=[num_segments + 1]`.
  """
    if out_type is None:
        if isinstance(segment_ids, ops.Tensor):
            out_type = segment_ids.dtype
        elif isinstance(num_segments, ops.Tensor):
            out_type = num_segments.dtype
        else:
            out_type = dtypes.int64
    else:
        out_type = dtypes.as_dtype(out_type)
    with ops.name_scope(name, "SegmentIdsToRaggedSplits",
                        [segment_ids]) as name:
        # Note: we cast int64 tensors to int32, since bincount currently only
        # supports int32 inputs.
        segment_ids = ragged_util.convert_to_int_tensor(segment_ids,
                                                        "segment_ids",
                                                        dtype=dtypes.int32)
        segment_ids.shape.assert_has_rank(1)
        if num_segments is not None:
            num_segments = ragged_util.convert_to_int_tensor(
                num_segments, "num_segments", dtype=dtypes.int32)
            num_segments.shape.assert_has_rank(0)

        row_lengths = math_ops.bincount(segment_ids,
                                        minlength=num_segments,
                                        maxlength=num_segments,
                                        dtype=out_type)
        splits = array_ops.concat([[0], math_ops.cumsum(row_lengths)], axis=0)

        # Update shape information, if possible.
        if num_segments is not None:
            const_num_segments = tensor_util.constant_value(num_segments)
            if const_num_segments is not None:
                splits.set_shape(
                    tensor_shape.TensorShape([const_num_segments + 1]))

        return splits

Example #2

0

Show file

File: segment_id_ops.py Project: aritratony/tensorflow

def segment_ids_to_row_splits(segment_ids, num_segments=None,
                              out_type=None, name=None):
  """Generates the RaggedTensor `row_splits` corresponding to a segmentation.

  Returns an integer vector `splits`, where `splits[0] = 0` and
  `splits[i] = splits[i-1] + count(segment_ids==i)`.  Example:

  ```python
  >>> ragged.segment_ids_to_row_splits([0, 0, 0, 2, 2, 3, 4, 4, 4]).eval()
  [ 0 3 3 5 6 9 ]
  ```

  Args:
    segment_ids: A 1-D integer Tensor.
    num_segments: A scalar integer indicating the number of segments.  Defaults
      to `max(segment_ids) + 1` (or zero if `segment_ids` is empty).
    out_type: The dtype for the return value.  Defaults to `segment_ids.dtype`,
      or `tf.int64` if `segment_ids` does not have a dtype.
    name: A name prefix for the returned tensor (optional).

  Returns:
    A sorted 1-D integer Tensor, with `shape=[num_segments + 1]`.
  """
  if out_type is None:
    if isinstance(segment_ids, ops.Tensor):
      out_type = segment_ids.dtype
    elif isinstance(num_segments, ops.Tensor):
      out_type = num_segments.dtype
    else:
      out_type = dtypes.int64
  else:
    out_type = dtypes.as_dtype(out_type)
  with ops.name_scope(name, "SegmentIdsToRaggedSplits", [segment_ids]) as name:
    # Note: we cast int64 tensors to int32, since bincount currently only
    # supports int32 inputs.
    segment_ids = ragged_util.convert_to_int_tensor(segment_ids, "segment_ids",
                                                    dtype=dtypes.int32)
    segment_ids.shape.assert_has_rank(1)
    if num_segments is not None:
      num_segments = ragged_util.convert_to_int_tensor(num_segments,
                                                       "num_segments",
                                                       dtype=dtypes.int32)
      num_segments.shape.assert_has_rank(0)

    row_lengths = math_ops.bincount(
        segment_ids,
        minlength=num_segments,
        maxlength=num_segments,
        dtype=out_type)
    splits = array_ops.concat([[0], math_ops.cumsum(row_lengths)], axis=0)

    # Update shape information, if possible.
    if num_segments is not None:
      const_num_segments = tensor_util.constant_value(num_segments)
      if const_num_segments is not None:
        splits.set_shape(tensor_shape.TensorShape([const_num_segments + 1]))

    return splits

Example #3

0

Show file

    def from_dim_sizes(dim_sizes):
        """Constructs a ragged shape from a list of dimension sizes.

    This list contains a single tensor for each dimension, where the tensor
    is a scalar if the dimension is uniform, or a vector if the dimension is
    ragged.

    Args:
      dim_sizes: List of int64 scalars or vectors.

    Returns:
      A RaggedTensorDynamicShape.
    """
        with ops.name_scope(None, 'RaggedTensorDynamicShapeFromDimensionSizes',
                            [dim_sizes]):
            dim_sizes = tuple(
                ragged_util.convert_to_int_tensor(
                    size, dtype=dtypes.int64, name='dim_sizes')
                for size in dim_sizes)
            # Split the dimensions into partitioned & inner dimensions.
            inner_split = 0
            for dim, dim_size in enumerate(dim_sizes):
                if dim_size.shape.ndims == 1:
                    inner_split = dim + 1
                elif dim_size.shape.ndims != 0:
                    raise ValueError(
                        'Each dim_size must be a scalar or a vector')
            return RaggedTensorDynamicShape(dim_sizes[:inner_split],
                                            dim_sizes[inner_split:])

Example #4

0

Show file

File: ragged_tensor_shape.py Project: Wajih-O/tensorflow

  def from_dim_sizes(dim_sizes):
    """Constructs a ragged shape from a list of dimension sizes.

    This list contains a single tensor for each dimension, where the tensor
    is a scalar if the dimension is uniform, or a vector if the dimension is
    ragged.

    Args:
      dim_sizes: List of int64 scalars or vectors.

    Returns:
      A RaggedTensorDynamicShape.
    """
    with ops.name_scope(None, 'RaggedTensorDynamicShapeFromDimensionSizes',
                        [dim_sizes]):
      dim_sizes = tuple(
          ragged_util.convert_to_int_tensor(
              size, dtype=dtypes.int64, name='dim_sizes') for size in dim_sizes)
      # Split the dimensions into partitioned & inner dimensions.
      inner_split = 0
      for dim, dim_size in enumerate(dim_sizes):
        if dim_size.shape.ndims == 1:
          inner_split = dim + 1
        elif dim_size.shape.ndims != 0:
          raise ValueError('Each dim_size must be a scalar or a vector')
      return RaggedTensorDynamicShape(dim_sizes[:inner_split],
                                      dim_sizes[inner_split:])

Example #5

0

Show file

    def __init__(self, partitioned_dim_sizes, inner_dim_sizes):
        """Creates a RaggedTensorDynamicShape.

    Args:
      partitioned_dim_sizes: A `list` of 0-D or 1-D integer `Tensor`, one for
        each partitioned dimension.  If dimension `d` is uniform, then
        `partitioned_dim_sizes[d]` must be an integer scalar, specifying the
        size of all slices across dimension `d`.  If dimension `d` is ragged,
        then `partitioned_dim_sizes[d]` must be an integer vector, specifying
        the size of each slice across dimension `d`.
      inner_dim_sizes: A 1-D integer `Tensor`, whose length is equal to the
        number of inner dimensions.  `inner_dim_sizes[n]` is the size of all
        slices across the `n`th inner dimension (which is the
        `(len(partitioned_dim_sizes)+n)`th dimension in the overall tensor.
    """
        assert isinstance(partitioned_dim_sizes, (list, tuple))
        with ops.name_scope(None, 'RaggedTensorDynamicShape',
                            (partitioned_dim_sizes, inner_dim_sizes)):
            partitioned_dim_sizes = tuple(
                ragged_util.convert_to_int_tensor(
                    size,
                    dtype=dtypes.int64,
                    name='partitioned_dimension_size')
                for size in partitioned_dim_sizes)
            inner_dim_sizes = ragged_util.convert_to_int_tensor(
                inner_dim_sizes, dtype=dtypes.int64, name='inner_dim_sizes')

            # Validate shapes.
            if partitioned_dim_sizes:
                for axis, dimension_size in enumerate(partitioned_dim_sizes):
                    if dimension_size.shape.ndims is None:
                        raise ValueError(
                            'rank of partitioned_dim_sizes[%d] is unknown' %
                            axis)
                    dimension_size.shape.with_rank_at_most(1)
                if partitioned_dim_sizes[0].shape.ndims == 1:
                    raise ValueError(
                        'outermost partitioned dimension must be uniform')
                if partitioned_dim_sizes[-1].shape.ndims == 0:
                    raise ValueError(
                        'innermost partitioned dimension must be ragged')
            inner_dim_sizes.shape.assert_has_rank(1)

            self._partitioned_dim_sizes = partitioned_dim_sizes
            self._inner_dim_sizes = inner_dim_sizes

Example #6

0

Show file

File: segment_id_ops.py Project: yytang0220/tensorflow

def segment_ids_to_row_splits(segment_ids, num_segments=None, name=None):
    """Generates the RaggedTensor `splits` vector corresponding to a segmentation.

  Returns an integer vector `splits`, where `splits[0] = 0` and
  `splits[i] = splits[i-1] + count(segment_ids==i)`.  Example:

  ```python
  >>> ragged.segment_ids_to_row_splits([0, 0, 0, 2, 2, 3, 4, 4, 4]).eval()
  [ 0 3 3 5 6 9 ]
  ```

  Args:
    segment_ids: A 1-D integer Tensor.
    num_segments: A scalar integer indicating the number of segments.  Defaults
      to `max(segment_ids) + 1` (or zero if `segment_ids` is empty).
    name: A name prefix for the returned tensor (optional).

  Returns:
    A sorted 1-D int64 Tensor, with `shape=[num_segments + 1]`.
  """
    with ops.name_scope(name, "SegmentIdsToRaggedSplits",
                        [segment_ids]) as name:
        segment_ids = ragged_util.convert_to_int_tensor(
            segment_ids, "segment_ids")
        segment_ids.shape.assert_has_rank(1)
        if num_segments is not None:
            num_segments = ragged_util.convert_to_int_tensor(
                num_segments, "num_segments")
            num_segments.shape.assert_has_rank(0)

        row_lengths = math_ops.bincount(segment_ids,
                                        minlength=num_segments,
                                        maxlength=num_segments,
                                        dtype=dtypes.int64)
        splits = array_ops.concat([[0], math_ops.cumsum(row_lengths)], axis=0)

        # Update shape information, if possible.
        if num_segments is not None:
            const_num_segments = tensor_util.constant_value(num_segments)
            if const_num_segments is not None:
                splits.set_shape(
                    tensor_shape.TensorShape([const_num_segments + 1]))

        return splits

Example #7

0

Show file

File: segment_id_ops.py Project: Wajih-O/tensorflow

def segment_ids_to_row_splits(segment_ids, num_segments=None, name=None):
  """Generates the RaggedTensor `row_splits` corresponding to a segmentation.

  Returns an integer vector `splits`, where `splits[0] = 0` and
  `splits[i] = splits[i-1] + count(segment_ids==i)`.  Example:

  ```python
  >>> ragged.segment_ids_to_row_splits([0, 0, 0, 2, 2, 3, 4, 4, 4]).eval()
  [ 0 3 3 5 6 9 ]
  ```

  Args:
    segment_ids: A 1-D integer Tensor.
    num_segments: A scalar integer indicating the number of segments.  Defaults
      to `max(segment_ids) + 1` (or zero if `segment_ids` is empty).
    name: A name prefix for the returned tensor (optional).

  Returns:
    A sorted 1-D int64 Tensor, with `shape=[num_segments + 1]`.
  """
  with ops.name_scope(name, "SegmentIdsToRaggedSplits", [segment_ids]) as name:
    segment_ids = ragged_util.convert_to_int_tensor(segment_ids, "segment_ids")
    segment_ids.shape.assert_has_rank(1)
    if num_segments is not None:
      num_segments = ragged_util.convert_to_int_tensor(num_segments,
                                                       "num_segments")
      num_segments.shape.assert_has_rank(0)

    row_lengths = math_ops.bincount(
        segment_ids,
        minlength=num_segments,
        maxlength=num_segments,
        dtype=dtypes.int64)
    splits = array_ops.concat([[0], math_ops.cumsum(row_lengths)], axis=0)

    # Update shape information, if possible.
    if num_segments is not None:
      const_num_segments = tensor_util.constant_value(num_segments)
      if const_num_segments is not None:
        splits.set_shape(tensor_shape.TensorShape([const_num_segments + 1]))

    return splits

Example #8

0

Show file

File: ragged_tensor_shape.py Project: Wajih-O/tensorflow

  def __init__(self, partitioned_dim_sizes, inner_dim_sizes):
    """Creates a RaggedTensorDynamicShape.

    Args:
      partitioned_dim_sizes: A `list` of 0-D or 1-D integer `Tensor`, one for
        each partitioned dimension.  If dimension `d` is uniform, then
        `partitioned_dim_sizes[d]` must be an integer scalar, specifying the
        size of all slices across dimension `d`.  If dimension `d` is ragged,
        then `partitioned_dim_sizes[d]` must be an integer vector, specifying
        the size of each slice across dimension `d`.
      inner_dim_sizes: A 1-D integer `Tensor`, whose length is equal to the
        number of inner dimensions.  `inner_dim_sizes[n]` is the size of all
        slices across the `n`th inner dimension (which is the
        `(len(partitioned_dim_sizes)+n)`th dimension in the overall tensor.
    """
    assert isinstance(partitioned_dim_sizes, (list, tuple))
    with ops.name_scope(None, 'RaggedTensorDynamicShape',
                        (partitioned_dim_sizes, inner_dim_sizes)):
      partitioned_dim_sizes = tuple(
          ragged_util.convert_to_int_tensor(
              size, dtype=dtypes.int64, name='partitioned_dimension_size')
          for size in partitioned_dim_sizes)
      inner_dim_sizes = ragged_util.convert_to_int_tensor(
          inner_dim_sizes, dtype=dtypes.int64, name='inner_dim_sizes')

      # Validate shapes.
      if partitioned_dim_sizes:
        for axis, dimension_size in enumerate(partitioned_dim_sizes):
          if dimension_size.shape.ndims is None:
            raise ValueError(
                'rank of partitioned_dim_sizes[%d] is unknown' % axis)
          dimension_size.shape.with_rank_at_most(1)
        if partitioned_dim_sizes[0].shape.ndims == 1:
          raise ValueError('outermost partitioned dimension must be uniform')
        if partitioned_dim_sizes[-1].shape.ndims == 0:
          raise ValueError('innermost partitioned dimension must be ragged')
      inner_dim_sizes.shape.assert_has_rank(1)

      self._partitioned_dim_sizes = partitioned_dim_sizes
      self._inner_dim_sizes = inner_dim_sizes

Example #9

0

Show file

def tile(input, multiples, name=None):  # pylint: disable=redefined-builtin
    """Constructs a `RaggedTensor` by tiling a given `RaggedTensor`.

  The values of `input` are replicated `multiples[i]` times along the
  `i`th dimension (for each dimension `i`).  For every dimension `axis` in
  `input`, the length of each output element in that dimension is the
  length of corresponding input element multiplied by `multiples[axis]`.

  Args:
    input: A `RaggedTensor`.
    multiples: A 1-D integer `Tensor`.  Length must be the same as the number of
      dimensions in `input`.
    name: A name for the operation (optional).

  Returns:
    A `RaggedTensor` with the same type, rank, and ragged_rank as `input`.

  #### Example:
    ```python
    >>> rt = tf.ragged.constant([[1, 2], [3]])
    >>> ragged.tile(rt, [3, 2])
    [[1, 2, 1, 2], [3, 3], [1, 2, 1, 2], [3, 3], [1, 2, 1, 2], [3, 3]]
    ```
  """
    with ops.name_scope(name, 'RaggedTile', [input, multiples]):
        input = ragged_tensor.convert_to_tensor_or_ragged_tensor(input,
                                                                 name='input')
        if not ragged_tensor.is_ragged(input):
            return array_ops.tile(input, multiples, name)
        multiples = ragged_util.convert_to_int_tensor(
            multiples, name='multiples', dtype=input.row_splits.dtype)
        multiples.shape.assert_has_rank(1)

        # If the constant value of `multiples` is available, then we can use it
        # to skip tiling dimensions where `multiples=1`.
        const_multiples = tensor_util.constant_value(multiples)

        return ragged_tensor.RaggedTensor.from_nested_row_splits(
            _tile_ragged_values(input, multiples, const_multiples),
            _tile_ragged_splits(input, multiples, const_multiples),
            validate=False)

Example #10

0

Show file

File: ragged_array_ops.py Project: aritratony/tensorflow

def tile(input, multiples, name=None):  # pylint: disable=redefined-builtin
  """Constructs a `RaggedTensor` by tiling a given `RaggedTensor`.

  The values of `input` are replicated `multiples[i]` times along the
  `i`th dimension (for each dimension `i`).  For every dimension `axis` in
  `input`, the length of each output element in that dimension is the
  length of corresponding input element multiplied by `multiples[axis]`.

  Args:
    input: A `RaggedTensor`.
    multiples: A 1-D integer `Tensor`.  Length must be the same as the number of
      dimensions in `input`.
    name: A name for the operation (optional).

  Returns:
    A `RaggedTensor` with the same type, rank, and ragged_rank as `input`.

  #### Example:
    ```python
    >>> rt = tf.ragged.constant([[1, 2], [3]])
    >>> ragged.tile(rt, [3, 2])
    [[1, 2, 1, 2], [3, 3], [1, 2, 1, 2], [3, 3], [1, 2, 1, 2], [3, 3]]
    ```
  """
  with ops.name_scope(name, 'RaggedTile', [input, multiples]):
    input = ragged_tensor.convert_to_tensor_or_ragged_tensor(
        input, name='input')
    if not ragged_tensor.is_ragged(input):
      return array_ops.tile(input, multiples, name)
    multiples = ragged_util.convert_to_int_tensor(
        multiples, name='multiples', dtype=input.row_splits.dtype)
    multiples.shape.assert_has_rank(1)

    # If the constant value of `multiples` is available, then we can use it
    # to skip tiling dimensions where `multiples=1`.
    const_multiples = tensor_util.constant_value(multiples)

    return ragged_tensor.RaggedTensor.from_nested_row_splits(
        _tile_ragged_values(input, multiples, const_multiples),
        _tile_ragged_splits(input, multiples, const_multiples),
        validate=False)

Example #11

0

Show file

File: ragged_tensor_shape.py Project: aritratony/tensorflow

  def broadcast_dimension(self, axis, lengths):
    """Returns a shape that is broadcast-compatible with self & lengths.

    * If dimension[axis] is uniform and lengths is a scalar, the check
      that either lengths==1 or axis==1 or lengths==axis, and tile
      dimension[axis] with tf.where(lengths==axis, 1, axis) repeats.

    * If dimension[axis] is uniform and lengths is a vector, then check
      that dimension[axis]==1, and raggedly tile dimension[axis] with
      lengths repeats.  (we can skip tiling if we statically know that
      slice_lengths == 1??)

    * If dimension[axis] is ragged and lengths is a scalar, then check
      that lengths==1.

    * If dimension[axis] is ragged and lengths is a vector, then check
      that self.dimension_size(axis) == lengths.

    Args:
      axis: `int`.  The dimension to broadcast.
      lengths: 0-D or 1-D integer `Tensor`.

    Returns:
      A `RaggedTensorDynamicShape`.
    """
    lengths = ragged_util.convert_to_int_tensor(
        lengths, name='lengths', dtype=self.dim_size_dtype)
    # Check whether lengths is a scalar (for uniform dimensions) or
    # vector (for ragged dimensions).
    if lengths.shape.ndims is None:
      raise ValueError('lengths must have a known rank.')
    elif lengths.shape.ndims > 1:
      raise ValueError('lengths must be a scalar or vector')
    else:
      lengths_is_scalar = (lengths.shape.ndims == 0)

    # Verify that the shapes are compatible.
    if self.is_ragged(axis):
      if lengths_is_scalar:
        condition = math_ops.equal(lengths, 1)
      else:
        condition = math_ops.reduce_all(
            math_ops.equal(lengths, self.dimension_size(axis)))
    else:
      axis_dim_size = self.dimension_size(axis)
      if lengths_is_scalar:
        condition = (
            math_ops.equal(lengths, 1) | math_ops.equal(axis_dim_size, 1)
            | math_ops.equal(axis_dim_size, lengths))
      else:
        condition = math_ops.equal(axis_dim_size, 1)
    broadcast_err = [
        'Unable to broadcast: dimension size mismatch in dimension', axis,
        'lengths=', lengths, 'dim_size=',
        self.dimension_size(axis)
    ]
    broadcast_check = control_flow_ops.Assert(
        condition, data=broadcast_err, summarize=10)

    with ops.control_dependencies([broadcast_check]):
      # Partitioned dimensions:
      if axis < self.num_partitioned_dimensions:
        if self.is_ragged(axis):
          # Use an identity op to make sure the check actually gets run.
          return RaggedTensorDynamicShape(
              self._partitioned_dim_sizes,
              array_ops.identity(self.inner_dim_sizes))
        else:
          return self._broadcast_uniform_partitioned_dimension(axis, lengths)

      # Inner dimensions:
      else:
        if lengths_is_scalar:
          return self._broadcast_inner_dimension_to_uniform(axis, lengths)
        else:
          if axis == 0:
            raise ValueError('Unable to broadcast: '
                             'outermost dimension must be uniform.')
          return self._broadcast_inner_dimension_to_ragged(axis, lengths)

Example #12

0

Show file

    def broadcast_dimension(self, axis, lengths):
        """Returns a shape that is broadcast-compatible with self & lengths.

    * If dimension[axis] is uniform and lengths is a scalar, the check
      that either lengths==1 or axis==1 or lengths==axis, and tile
      dimension[axis] with tf.where(lengths==axis, 1, axis) repeats.

    * If dimension[axis] is uniform and lengths is a vector, then check
      that dimension[axis]==1, and raggedly tile dimension[axis] with
      lengths repeats.  (we can skip tiling if we statically know that
      slice_lengths == 1??)

    * If dimension[axis] is ragged and lengths is a scalar, then check
      that lengths==1.

    * If dimension[axis] is ragged and lengths is a vector, then check
      that self.dimension_size(axis) == lengths.

    Args:
      axis: `int`.  The dimension to broadcast.
      lengths: 0-D or 1-D integer `Tensor`.

    Returns:
      A `RaggedTensorDynamicShape`.
    """
        lengths = ragged_util.convert_to_int_tensor(lengths,
                                                    name='lengths',
                                                    dtype=self.dim_size_dtype)
        # Check whether lengths is a scalar (for uniform dimensions) or
        # vector (for ragged dimensions).
        if lengths.shape.ndims is None:
            raise ValueError('lengths must have a known rank.')
        elif lengths.shape.ndims > 1:
            raise ValueError('lengths must be a scalar or vector')
        else:
            lengths_is_scalar = (lengths.shape.ndims == 0)

        # Verify that the shapes are compatible.
        if self.is_ragged(axis):
            if lengths_is_scalar:
                condition = math_ops.equal(lengths, 1)
            else:
                condition = math_ops.reduce_all(
                    math_ops.equal(lengths, self.dimension_size(axis)))
        else:
            axis_dim_size = self.dimension_size(axis)
            if lengths_is_scalar:
                condition = (math_ops.equal(lengths, 1)
                             | math_ops.equal(axis_dim_size, 1)
                             | math_ops.equal(axis_dim_size, lengths))
            else:
                condition = math_ops.equal(axis_dim_size, 1)
        broadcast_err = [
            'Unable to broadcast: dimension size mismatch in dimension', axis,
            'lengths=', lengths, 'dim_size=',
            self.dimension_size(axis)
        ]
        broadcast_check = control_flow_ops.Assert(condition,
                                                  data=broadcast_err,
                                                  summarize=10)

        with ops.control_dependencies([broadcast_check]):
            # Partitioned dimensions:
            if axis < self.num_partitioned_dimensions:
                if self.is_ragged(axis):
                    # Use an identity op to make sure the check actually gets run.
                    return RaggedTensorDynamicShape(
                        self._partitioned_dim_sizes,
                        array_ops.identity(self.inner_dim_sizes))
                else:
                    return self._broadcast_uniform_partitioned_dimension(
                        axis, lengths)

            # Inner dimensions:
            else:
                if lengths_is_scalar:
                    return self._broadcast_inner_dimension_to_uniform(
                        axis, lengths)
                else:
                    if axis == 0:
                        raise ValueError(
                            'Unable to broadcast: '
                            'outermost dimension must be uniform.')
                    return self._broadcast_inner_dimension_to_ragged(
                        axis, lengths)

Example #13

0

Show file

File: ragged_conversion_ops.py Project: zonghyheu/tensorflow

def from_tensor(tensor, lengths=None, padding=None, ragged_rank=1, name=None):
    """Converts a `Tensor` into a `RaggedTensor`.

  The set of absent/default values may be specified using a vector of lengths
  or a padding value (but not both).  If `lengths` is specified, then the
  output tensor will satisfy `output[row] = tensor[row][:lengths[row]]`.
  If `padding` is specified, then any row *suffix* consisting entirely of
  `padding` will be excluded from the returned `RaggedTensor`.  If neither
  `lengths` nor `padding` is specified, then the returned `RaggedTensor` will
  have no absent/default values.

  Examples:

  ```python
  >>> dt = tf.constant([[5, 7, 0], [0, 3, 0], [6, 0, 0]])
  >>> ragged.from_tensor(dt).eval().tolist()
  [[5, 7, 0], [0, 3, 0], [6, 0, 0]]
  >>> ragged.from_tensor(dt, lengths=[2, 0, 3]).eval().tolist()
  [[5, 7], [], [6, 0, 0]]
  >>> ragged.from_tensor(dt, padding=0).eval().tolist()
  [[5, 7], [0, 3], [6]]
  ```

  Args:
    tensor: The `Tensor` to convert.  Must have rank `ragged_rank + 1` or
      higher.
    lengths: An optional set of row lengths, specified using a 1-D integer
      `Tensor` whose length is equal to `tensor.shape[0]` (the number of rows in
      `tensor`).  If specified, then `output[row]` will contain
      `tensor[row][:lengths[row]]`.  Negative lengths are treated as zero.
    padding: An optional padding value.  If specified, then any row suffix
      consisting entirely of `padding` will be excluded from the returned
      RaggedTensor.  `padding` is a `Tensor` with the same dtype as `tensor`
      and with `shape=tensor.shape[ragged_rank + 1:]`.
    ragged_rank: Integer specifying the ragged rank for the returned
      `RaggedTensor`.  Must be greater than zero.
    name: A name prefix for the returned tensors (optional).

  Returns:
    A `RaggedTensor` with the specified `ragged_rank`.  The shape of the
    returned ragged tensor is compatible with the shape of `tensor`.
  Raises:
    ValueError: If both `lengths` and `padding` are specified.
  """
    if lengths is not None and padding is not None:
        raise ValueError('Specify lengths or padding, but not both')
    if not isinstance(ragged_rank, int):
        raise TypeError('ragged_rank expected int, got %r' % ragged_rank)
    if ragged_rank <= 0:
        raise ValueError('ragged_rank must be greater than 0; got %s' %
                         ragged_rank)

    with ops.name_scope(name, 'RaggedFromTensor', [tensor, lengths, padding]):
        tensor = ops.convert_to_tensor(tensor, name='tensor')
        tensor.shape.with_rank_at_least(ragged_rank + 1)
        input_shape = array_ops.shape(tensor, out_type=dtypes.int64)
        ncols = input_shape[1]

        # Handle ragged_rank>1 via recursion:
        # If the output should have multiple ragged dimensions, then first
        # flatten the tensor to eliminate all but the last ragged dimension,
        # and recursively convert that flattened tensor.  Then add on the splits
        # for the dimensions that we flattened out.
        if ragged_rank > 1:
            # Flatten `tensor` to eliminate all but the last ragged dimension.
            new_shape = array_ops.concat([
                constant_op.constant([-1], dtypes.int64),
                input_shape[ragged_rank:]
            ],
                                         axis=0)
            flattened = array_ops.reshape(tensor, new_shape)
            # Recursively convert the flattened tensor.
            values = from_tensor(flattened, lengths, padding)
            # The total number of elements in each  dimension.  E.g., if
            # input_shape=[3, 4, 5, 6], then dim[2] has 3*4*5 elements in total.
            dim_size = math_ops.cumprod(input_shape)
            # Construct splits tensors for the dimensions that were flattened.
            new_splits = [
                math_ops.range(0, dim_size[dim - 1] + 1) * input_shape[dim]
                for dim in range(1, ragged_rank)
            ]
            return ragged_factory_ops.from_nested_row_splits(
                values, new_splits)

        # If padding was specified, then use it to find row lengths.
        if padding is not None:
            padding = ops.convert_to_tensor(padding,
                                            name='padding',
                                            dtype=tensor.dtype)
            padding.shape.assert_is_compatible_with(tensor.shape[2:])

            # Find places where the padding is equal to the tensor.  (This will
            # broadcast `padding` across the outermost 2 dimensions of `tensor`,
            # so `has_default_value.shape = tensor.shape`.)
            has_default_value = math_ops.equal(padding, tensor)

            # If the padding isn't a scalar, then require that all values in the
            # padding match each item in the tensor.  After this block of code,
            # `has_default.shape = tensor.shape[:2]`.  (Unfortunately, we can't just
            # use reduce_all for both cases, becaue when you pass an empty `axis`
            # list to reduce_all, it reduces all axes; but we want it to reduce no
            # axes -- i.e., to be a no-op.)
            tensor_rank = array_ops.rank(tensor)
            reduce_axis = math_ops.range(2, tensor_rank)
            has_default = control_flow_ops.cond(
                tensor_rank > 2, lambda: math_ops.reduce_all(has_default_value,
                                                             axis=reduce_axis),
                lambda: has_default_value)
            has_default.set_shape(tensor_shape.TensorShape([None, None]))
            has_default.set_shape(tensor.shape[:2])

            # Use has_default it to find the length of each row: for each non-default
            # item in a row, calculate the length that the row needs to have to
            # include that item; and then take the max of those values (across each
            # row).
            has_nondefault = math_ops.logical_not(has_default)
            has_nondefault = math_ops.cast(has_nondefault, dtypes.int64)
            length_for_nondefault_value = (
                has_nondefault *
                array_ops.expand_dims(math_ops.range(1, ncols + 1), 0))
            lengths = math_ops.reduce_max(length_for_nondefault_value, axis=1)

        # If we have lengths (either directly supplied, or computed from paddings),
        # then use those to construct splits; and then use masking to get the
        # corresponding values.
        if lengths is not None:
            lengths = ragged_util.convert_to_int_tensor(
                lengths, 'lengths', dtypes.int64)
            lengths.shape.assert_has_rank(1)
            lengths = math_ops.minimum(lengths, ncols)
            lengths = math_ops.maximum(lengths, 0)
            limits = math_ops.cumsum(lengths)
            splits = array_ops.concat(
                [array_ops.zeros([1], dtypes.int64), limits], axis=0)
            mask = array_ops.sequence_mask(lengths, maxlen=ncols)
            values = array_ops.boolean_mask(tensor, mask)
            return ragged_factory_ops.from_row_splits(values, splits)

        # If neither padding nor lengths were specified, then create a splits
        # vector that contains no default values, and reshape the input tensor
        # to form the values for the RaggedTensor.
        nrows = input_shape[0]
        nvals = nrows * ncols
        splits = math_ops.range(nrows + 1) * ncols
        values_shape = array_ops.concat([[nvals], input_shape[2:]], axis=0)
        values = array_ops.reshape(tensor, values_shape)
        return ragged_factory_ops.from_row_splits(values, splits)

Example #14

0

Show file

File: ragged_conversion_ops.py Project: abhinav-upadhyay/tensorflow

def from_tensor(tensor, lengths=None, padding=None, ragged_rank=1, name=None):
  """Converts a `Tensor` into a `RaggedTensor`.

  The set of absent/default values may be specified using a vector of lengths
  or a padding value (but not both).  If `lengths` is specified, then the
  output tensor will satisfy `output[row] = tensor[row][:lengths[row]]`.
  If `padding` is specified, then any row *suffix* consisting entirely of
  `padding` will be excluded from the returned `RaggedTensor`.  If neither
  `lengths` nor `padding` is specified, then the returned `RaggedTensor` will
  have no absent/default values.

  Examples:

  ```python
  >>> dt = tf.constant([[5, 7, 0], [0, 3, 0], [6, 0, 0]])
  >>> ragged.from_tensor(dt).eval().tolist()
  [[5, 7, 0], [0, 3, 0], [6, 0, 0]]
  >>> ragged.from_tensor(dt, lengths=[2, 0, 3]).eval().tolist()
  [[5, 7], [], [6, 0, 0]]
  >>> ragged.from_tensor(dt, padding=0).eval().tolist()
  [[5, 7], [0, 3], [6]]
  ```

  Args:
    tensor: The `Tensor` to convert.  Must have rank `ragged_rank + 1` or
      higher.
    lengths: An optional set of row lengths, specified using a 1-D integer
      `Tensor` whose length is equal to `tensor.shape[0]` (the number of rows in
      `tensor`).  If specified, then `output[row]` will contain
      `tensor[row][:lengths[row]]`.  Negative lengths are treated as zero.
    padding: An optional padding value.  If specified, then any row suffix
      consisting entirely of `padding` will be excluded from the returned
      RaggedTensor.  `padding` is a `Tensor` with the same dtype as `tensor`
      and with `shape=tensor.shape[ragged_rank + 1:]`.
    ragged_rank: Integer specifying the ragged rank for the returned
      `RaggedTensor`.  Must be greater than zero.
    name: A name prefix for the returned tensors (optional).

  Returns:
    A `RaggedTensor` with the specified `ragged_rank`.  The shape of the
    returned ragged tensor is compatible with the shape of `tensor`.
  Raises:
    ValueError: If both `lengths` and `padding` are specified.
  """
  if lengths is not None and padding is not None:
    raise ValueError('Specify lengths or padding, but not both')
  if not isinstance(ragged_rank, int):
    raise TypeError('ragged_rank expected int, got %r' % ragged_rank)
  if ragged_rank <= 0:
    raise ValueError('ragged_rank must be greater than 0; got %s' % ragged_rank)

  with ops.name_scope(name, 'RaggedFromTensor', [tensor, lengths, padding]):
    tensor = ops.convert_to_tensor(tensor, name='tensor')
    tensor.shape.with_rank_at_least(ragged_rank + 1)
    input_shape = array_ops.shape(tensor, out_type=dtypes.int64)
    ncols = input_shape[1]

    # Handle ragged_rank>1 via recursion:
    # If the output should have multiple ragged dimensions, then first
    # flatten the tensor to eliminate all but the last ragged dimension,
    # and recursively convert that flattened tensor.  Then add on the splits
    # for the dimensions that we flattened out.
    if ragged_rank > 1:
      # Flatten `tensor` to eliminate all but the last ragged dimension.
      new_shape = array_ops.concat(
          [constant_op.constant([-1], dtypes.int64), input_shape[ragged_rank:]],
          axis=0)
      flattened = array_ops.reshape(tensor, new_shape)
      # Recursively convert the flattened tensor.
      values = from_tensor(flattened, lengths, padding)
      # The total number of elements in each  dimension.  E.g., if
      # input_shape=[3, 4, 5, 6], then dim[2] has 3*4*5 elements in total.
      dim_size = math_ops.cumprod(input_shape)
      # Construct splits tensors for the dimensions that were flattened.
      new_splits = [
          math_ops.range(0, dim_size[dim - 1] + 1) * input_shape[dim]
          for dim in range(1, ragged_rank)
      ]
      return ragged_factory_ops.from_nested_row_splits(values, new_splits)

    # If padding was specified, then use it to find row lengths.
    if padding is not None:
      padding = ops.convert_to_tensor(
          padding, name='padding', dtype=tensor.dtype)
      padding.shape.assert_is_compatible_with(tensor.shape[2:])

      # Find places where the padding is equal to the tensor.  (This will
      # broadcast `padding` across the outermost 2 dimensions of `tensor`,
      # so `has_default_value.shape = tensor.shape`.)
      has_default_value = math_ops.equal(padding, tensor)

      # If the padding isn't a scalar, then require that all values in the
      # padding match each item in the tensor.  After this block of code,
      # `has_default.shape = tensor.shape[:2]`.  (Unfortunately, we can't just
      # use reduce_all for both cases, becaue when you pass an empty `axis`
      # list to reduce_all, it reduces all axes; but we want it to reduce no
      # axes -- i.e., to be a no-op.)
      tensor_rank = array_ops.rank(tensor)
      reduce_axis = math_ops.range(2, tensor_rank)
      has_default = control_flow_ops.cond(
          tensor_rank > 2,
          lambda: math_ops.reduce_all(has_default_value, axis=reduce_axis),
          lambda: has_default_value)
      has_default.set_shape(tensor_shape.TensorShape([None, None]))
      has_default.set_shape(tensor.shape[:2])

      # Use has_default it to find the length of each row: for each non-default
      # item in a row, calculate the length that the row needs to have to
      # include that item; and then take the max of those values (across each
      # row).
      has_nondefault = math_ops.logical_not(has_default)
      has_nondefault = math_ops.cast(has_nondefault, dtypes.int64)
      length_for_nondefault_value = (
          has_nondefault * array_ops.expand_dims(
              math_ops.range(1, ncols + 1), 0))
      lengths = math_ops.reduce_max(length_for_nondefault_value, axis=1)

    # If we have lengths (either directly supplied, or computed from paddings),
    # then use those to construct splits; and then use masking to get the
    # corresponding values.
    if lengths is not None:
      lengths = ragged_util.convert_to_int_tensor(lengths, 'lengths',
                                                  dtypes.int64)
      lengths.shape.assert_has_rank(1)
      lengths = math_ops.minimum(lengths, ncols)
      lengths = math_ops.maximum(lengths, 0)
      limits = math_ops.cumsum(lengths)
      splits = array_ops.concat(
          [array_ops.zeros([1], dtypes.int64), limits], axis=0)
      mask = array_ops.sequence_mask(lengths, maxlen=ncols)
      values = array_ops.boolean_mask(tensor, mask)
      return ragged_factory_ops.from_row_splits(values, splits)

    # If neither padding nor lengths were specified, then create a splits
    # vector that contains no default values, and reshape the input tensor
    # to form the values for the RaggedTensor.
    nrows = input_shape[0]
    nvals = nrows * ncols
    splits = math_ops.range(nrows + 1) * ncols
    values_shape = array_ops.concat([[nvals], input_shape[2:]], axis=0)
    values = array_ops.reshape(tensor, values_shape)
    return ragged_factory_ops.from_row_splits(values, splits)