def test_maxlength(self):
     with self.test_session(use_gpu=True):
         self.assertAllEqual(
             math_ops.bincount([5], maxlength=3).eval(), [0, 0, 0])
         self.assertAllEqual(
             math_ops.bincount([1], maxlength=3).eval(), [0, 1])
         self.assertAllEqual(math_ops.bincount([], maxlength=3).eval(), [])
 def test_maxlength(self):
   with self.session(use_gpu=True):
     self.assertAllEqual(self.evaluate(math_ops.bincount([5], maxlength=3)),
                         [0, 0, 0])
     self.assertAllEqual(self.evaluate(math_ops.bincount([1], maxlength=3)),
                         [0, 1])
     self.assertAllEqual(self.evaluate(math_ops.bincount([], maxlength=3)),
                         [])
 def test_empty(self):
   with self.session(use_gpu=True):
     self.assertAllEqual(
         math_ops.bincount([], minlength=5).eval(), [0, 0, 0, 0, 0])
     self.assertAllEqual(math_ops.bincount([], minlength=1).eval(), [0])
     self.assertAllEqual(math_ops.bincount([], minlength=0).eval(), [])
     self.assertEqual(
         math_ops.bincount([], minlength=0, dtype=np.float32).eval().dtype,
         np.float32)
     self.assertEqual(
         math_ops.bincount([], minlength=3, dtype=np.float64).eval().dtype,
         np.float64)
 def test_empty(self):
     with self.test_session(use_gpu=True):
         self.assertAllEqual(
             math_ops.bincount([], minlength=5).eval(), [0, 0, 0, 0, 0])
         self.assertAllEqual(math_ops.bincount([], minlength=1).eval(), [0])
         self.assertAllEqual(math_ops.bincount([], minlength=0).eval(), [])
         self.assertEqual(
             math_ops.bincount([], minlength=0,
                               dtype=np.float32).eval().dtype, np.float32)
         self.assertEqual(
             math_ops.bincount([], minlength=3,
                               dtype=np.float64).eval().dtype, np.float64)
Exemple #5
0
def segment_ids_to_row_splits(segment_ids,
                              num_segments=None,
                              out_type=None,
                              name=None):
    """Generates the RaggedTensor `row_splits` corresponding to a segmentation.

  Returns an integer vector `splits`, where `splits[0] = 0` and
  `splits[i] = splits[i-1] + count(segment_ids==i)`.  Example:

  >>> print(tf.ragged.segment_ids_to_row_splits([0, 0, 0, 2, 2, 3, 4, 4, 4]))
  tf.Tensor([0 3 3 5 6 9], shape=(6,), dtype=int64)

  Args:
    segment_ids: A 1-D integer Tensor.
    num_segments: A scalar integer indicating the number of segments.  Defaults
      to `max(segment_ids) + 1` (or zero if `segment_ids` is empty).
    out_type: The dtype for the return value.  Defaults to `segment_ids.dtype`,
      or `tf.int64` if `segment_ids` does not have a dtype.
    name: A name prefix for the returned tensor (optional).

  Returns:
    A sorted 1-D integer Tensor, with `shape=[num_segments + 1]`.
  """
    if out_type is None:
        if isinstance(segment_ids, ops.Tensor):
            out_type = segment_ids.dtype
        elif isinstance(num_segments, ops.Tensor):
            out_type = num_segments.dtype
        else:
            out_type = dtypes.int64
    else:
        out_type = dtypes.as_dtype(out_type)
    with ops.name_scope(name, "SegmentIdsToRaggedSplits",
                        [segment_ids]) as name:
        # Note: we cast int64 tensors to int32, since bincount currently only
        # supports int32 inputs.
        segment_ids = ragged_util.convert_to_int_tensor(segment_ids,
                                                        "segment_ids",
                                                        dtype=dtypes.int32)
        segment_ids.shape.assert_has_rank(1)
        if num_segments is not None:
            num_segments = ragged_util.convert_to_int_tensor(
                num_segments, "num_segments", dtype=dtypes.int32)
            num_segments.shape.assert_has_rank(0)

        row_lengths = math_ops.bincount(segment_ids,
                                        minlength=num_segments,
                                        maxlength=num_segments,
                                        dtype=out_type)
        splits = array_ops.concat([[0], math_ops.cumsum(row_lengths)], axis=0)

        # Update shape information, if possible.
        if num_segments is not None:
            const_num_segments = tensor_util.constant_value(num_segments)
            if const_num_segments is not None:
                splits.set_shape(
                    tensor_shape.TensorShape([const_num_segments + 1]))

        return splits
def segment_ids_to_row_splits(segment_ids, num_segments=None,
                              out_type=None, name=None):
  """Generates the RaggedTensor `row_splits` corresponding to a segmentation.

  Returns an integer vector `splits`, where `splits[0] = 0` and
  `splits[i] = splits[i-1] + count(segment_ids==i)`.  Example:

  ```python
  >>> ragged.segment_ids_to_row_splits([0, 0, 0, 2, 2, 3, 4, 4, 4]).eval()
  [ 0 3 3 5 6 9 ]
  ```

  Args:
    segment_ids: A 1-D integer Tensor.
    num_segments: A scalar integer indicating the number of segments.  Defaults
      to `max(segment_ids) + 1` (or zero if `segment_ids` is empty).
    out_type: The dtype for the return value.  Defaults to `segment_ids.dtype`,
      or `tf.int64` if `segment_ids` does not have a dtype.
    name: A name prefix for the returned tensor (optional).

  Returns:
    A sorted 1-D integer Tensor, with `shape=[num_segments + 1]`.
  """
  if out_type is None:
    if isinstance(segment_ids, ops.Tensor):
      out_type = segment_ids.dtype
    elif isinstance(num_segments, ops.Tensor):
      out_type = num_segments.dtype
    else:
      out_type = dtypes.int64
  else:
    out_type = dtypes.as_dtype(out_type)
  with ops.name_scope(name, "SegmentIdsToRaggedSplits", [segment_ids]) as name:
    # Note: we cast int64 tensors to int32, since bincount currently only
    # supports int32 inputs.
    segment_ids = ragged_util.convert_to_int_tensor(segment_ids, "segment_ids",
                                                    dtype=dtypes.int32)
    segment_ids.shape.assert_has_rank(1)
    if num_segments is not None:
      num_segments = ragged_util.convert_to_int_tensor(num_segments,
                                                       "num_segments",
                                                       dtype=dtypes.int32)
      num_segments.shape.assert_has_rank(0)

    row_lengths = math_ops.bincount(
        segment_ids,
        minlength=num_segments,
        maxlength=num_segments,
        dtype=out_type)
    splits = array_ops.concat([[0], math_ops.cumsum(row_lengths)], axis=0)

    # Update shape information, if possible.
    if num_segments is not None:
      const_num_segments = tensor_util.constant_value(num_segments)
      if const_num_segments is not None:
        splits.set_shape(tensor_shape.TensorShape([const_num_segments + 1]))

    return splits
 def test_random_without_weights(self):
   num_samples = 10000
   with self.session(use_gpu=True):
     np.random.seed(42)
     for dtype in [np.int32, np.float32]:
       arr = np.random.randint(0, 1000, num_samples)
       weights = np.ones(num_samples).astype(dtype)
       self.assertAllClose(
           math_ops.bincount(arr, None).eval(), np.bincount(arr, weights))
 def test_random_without_weights(self):
     num_samples = 10000
     with self.test_session(use_gpu=True):
         np.random.seed(42)
         for dtype in [np.int32, np.float32]:
             arr = np.random.randint(0, 1000, num_samples)
             weights = np.ones(num_samples).astype(dtype)
             self.assertAllClose(
                 math_ops.bincount(arr, None).eval(),
                 np.bincount(arr, weights))
 def test_random_with_weights(self):
   num_samples = 10000
   with self.session(use_gpu=True):
     np.random.seed(42)
     for dtype in [dtypes.int32, dtypes.int64, dtypes.float32, dtypes.float64]:
       arr = np.random.randint(0, 1000, num_samples)
       if dtype == dtypes.int32 or dtype == dtypes.int64:
         weights = np.random.randint(-100, 100, num_samples)
       else:
         weights = np.random.random(num_samples)
       self.assertAllClose(
           math_ops.bincount(arr, weights).eval(), np.bincount(arr, weights))
 def test_random_with_weights(self):
   num_samples = 10000
   with self.session(use_gpu=True):
     np.random.seed(42)
     for dtype in [dtypes.int32, dtypes.int64, dtypes.float32, dtypes.float64]:
       arr = np.random.randint(0, 1000, num_samples)
       if dtype == dtypes.int32 or dtype == dtypes.int64:
         weights = np.random.randint(-100, 100, num_samples)
       else:
         weights = np.random.random(num_samples)
       self.assertAllClose(
           self.evaluate(math_ops.bincount(arr, weights)),
           np.bincount(arr, weights))
  def testReadmeExample(self):
    data = random_ops.random_uniform((128, 128), 0, 10, dtype=dtypes.int32)
    histogram = math_ops.bincount(data, minlength=10, maxlength=10)
    cdf = math_ops.cumsum(histogram, exclusive=False)
    cdf = array_ops.pad(cdf, [[1, 0]])
    cdf = array_ops.reshape(cdf, [1, 1, -1])

    data = math_ops.cast(data, dtypes.int16)
    encoded = coder_ops.range_encode(data, cdf, precision=14)
    decoded = coder_ops.range_decode(
        encoded, array_ops.shape(data), cdf, precision=14)

    with self.test_session() as sess:
      self.assertAllEqual(*sess.run((data, decoded)))
  def testReadmeExample(self):
    data = random_ops.random_uniform((128, 128), 0, 10, dtype=dtypes.int32)
    histogram = math_ops.bincount(data, minlength=10, maxlength=10)
    cdf = math_ops.cumsum(histogram, exclusive=False)
    cdf = array_ops.pad(cdf, [[1, 0]])
    cdf = array_ops.reshape(cdf, [1, 1, -1])

    data = math_ops.cast(data, dtypes.int16)
    encoded = coder_ops.range_encode(data, cdf, precision=14)
    decoded = coder_ops.range_decode(
        encoded, array_ops.shape(data), cdf, precision=14)

    with self.test_session() as sess:
      self.assertAllEqual(*sess.run((data, decoded)))
Exemple #13
0
 def test_random_with_weights(self):
     num_samples = 10000
     with self.test_session():
         np.random.seed(42)
         for dtype in [
                 dtypes.int32, dtypes.int64, dtypes.float32, dtypes.float64
         ]:
             arr = np.random.randint(0, 1000, num_samples)
             if dtype == dtypes.int32 or dtype == dtypes.int64:
                 weights = np.random.randint(-100, 100, num_samples)
             else:
                 weights = np.random.random(num_samples)
             self.assertAllEqual(
                 math_ops.bincount(arr, weights=weights).eval(),
                 np.bincount(arr, weights))
  def test_values(self):
    with self.session(use_gpu=True):
      self.assertAllEqual(
          math_ops.bincount([1, 1, 1, 2, 2, 3]).eval(), [0, 3, 2, 1])
      arr = [1, 1, 2, 1, 2, 3, 1, 2, 3, 4, 1, 2, 3, 4, 5]
      self.assertAllEqual(math_ops.bincount(arr).eval(), [0, 5, 4, 3, 2, 1])
      arr += [0, 0, 0, 0, 0, 0]
      self.assertAllEqual(math_ops.bincount(arr).eval(), [6, 5, 4, 3, 2, 1])

      self.assertAllEqual(math_ops.bincount([]).eval(), [])
      self.assertAllEqual(math_ops.bincount([0, 0, 0]).eval(), [3])
      self.assertAllEqual(math_ops.bincount([5]).eval(), [0, 0, 0, 0, 0, 1])
      self.assertAllEqual(
          math_ops.bincount(np.arange(10000)).eval(), np.ones(10000))
def segment_ids_to_row_splits(segment_ids, num_segments=None, name=None):
    """Generates the RaggedTensor `splits` vector corresponding to a segmentation.

  Returns an integer vector `splits`, where `splits[0] = 0` and
  `splits[i] = splits[i-1] + count(segment_ids==i)`.  Example:

  ```python
  >>> ragged.segment_ids_to_row_splits([0, 0, 0, 2, 2, 3, 4, 4, 4]).eval()
  [ 0 3 3 5 6 9 ]
  ```

  Args:
    segment_ids: A 1-D integer Tensor.
    num_segments: A scalar integer indicating the number of segments.  Defaults
      to `max(segment_ids) + 1` (or zero if `segment_ids` is empty).
    name: A name prefix for the returned tensor (optional).

  Returns:
    A sorted 1-D int64 Tensor, with `shape=[num_segments + 1]`.
  """
    with ops.name_scope(name, "SegmentIdsToRaggedSplits",
                        [segment_ids]) as name:
        segment_ids = ragged_util.convert_to_int_tensor(
            segment_ids, "segment_ids")
        segment_ids.shape.assert_has_rank(1)
        if num_segments is not None:
            num_segments = ragged_util.convert_to_int_tensor(
                num_segments, "num_segments")
            num_segments.shape.assert_has_rank(0)

        row_lengths = math_ops.bincount(segment_ids,
                                        minlength=num_segments,
                                        maxlength=num_segments,
                                        dtype=dtypes.int64)
        splits = array_ops.concat([[0], math_ops.cumsum(row_lengths)], axis=0)

        # Update shape information, if possible.
        if num_segments is not None:
            const_num_segments = tensor_util.constant_value(num_segments)
            if const_num_segments is not None:
                splits.set_shape(
                    tensor_shape.TensorShape([const_num_segments + 1]))

        return splits
    def test_values(self):
        with self.test_session(use_gpu=True):
            self.assertAllEqual(
                math_ops.bincount([1, 1, 1, 2, 2, 3]).eval(), [0, 3, 2, 1])
            arr = [1, 1, 2, 1, 2, 3, 1, 2, 3, 4, 1, 2, 3, 4, 5]
            self.assertAllEqual(
                math_ops.bincount(arr).eval(), [0, 5, 4, 3, 2, 1])
            arr += [0, 0, 0, 0, 0, 0]
            self.assertAllEqual(
                math_ops.bincount(arr).eval(), [6, 5, 4, 3, 2, 1])

            self.assertAllEqual(math_ops.bincount([]).eval(), [])
            self.assertAllEqual(math_ops.bincount([0, 0, 0]).eval(), [3])
            self.assertAllEqual(
                math_ops.bincount([5]).eval(), [0, 0, 0, 0, 0, 1])
            self.assertAllEqual(
                math_ops.bincount(np.arange(10000)).eval(), np.ones(10000))
Exemple #17
0
def segment_ids_to_row_splits(segment_ids, num_segments=None, name=None):
  """Generates the RaggedTensor `row_splits` corresponding to a segmentation.

  Returns an integer vector `splits`, where `splits[0] = 0` and
  `splits[i] = splits[i-1] + count(segment_ids==i)`.  Example:

  ```python
  >>> ragged.segment_ids_to_row_splits([0, 0, 0, 2, 2, 3, 4, 4, 4]).eval()
  [ 0 3 3 5 6 9 ]
  ```

  Args:
    segment_ids: A 1-D integer Tensor.
    num_segments: A scalar integer indicating the number of segments.  Defaults
      to `max(segment_ids) + 1` (or zero if `segment_ids` is empty).
    name: A name prefix for the returned tensor (optional).

  Returns:
    A sorted 1-D int64 Tensor, with `shape=[num_segments + 1]`.
  """
  with ops.name_scope(name, "SegmentIdsToRaggedSplits", [segment_ids]) as name:
    segment_ids = ragged_util.convert_to_int_tensor(segment_ids, "segment_ids")
    segment_ids.shape.assert_has_rank(1)
    if num_segments is not None:
      num_segments = ragged_util.convert_to_int_tensor(num_segments,
                                                       "num_segments")
      num_segments.shape.assert_has_rank(0)

    row_lengths = math_ops.bincount(
        segment_ids,
        minlength=num_segments,
        maxlength=num_segments,
        dtype=dtypes.int64)
    splits = array_ops.concat([[0], math_ops.cumsum(row_lengths)], axis=0)

    # Update shape information, if possible.
    if num_segments is not None:
      const_num_segments = tensor_util.constant_value(num_segments)
      if const_num_segments is not None:
        splits.set_shape(tensor_shape.TensorShape([const_num_segments + 1]))

    return splits
 def test_zero_weights(self):
     with self.test_session(use_gpu=True):
         self.assertAllEqual(
             math_ops.bincount(np.arange(1000), np.zeros(1000)).eval(),
             np.zeros(1000))
 def test_zero_weights(self):
   with self.test_session():
     self.assertAllEqual(
         math_ops.bincount(np.arange(1000), weights=np.zeros(1000)).eval(),
         np.zeros(1000))
 def test_maxlength(self):
   with self.test_session():
     self.assertAllEqual(math_ops.bincount([5], maxlength=3).eval(), [0, 0, 0])
     self.assertAllEqual(math_ops.bincount([1], maxlength=3).eval(), [0, 1])
     self.assertAllEqual(math_ops.bincount([], maxlength=3).eval(), [])
Exemple #21
0
def from_value_rowids(values, value_rowids, nrows=None, name=None):
    """Creates a `RaggedTensor` with rows partitioned by `value_rowids`.

  The returned `RaggedTensor` corresponds with the python list defined by:

  ```python
  result = [[values[i] for i in range(len(values)) if value_rowids[i] == row]
            for row in range(nrows)]
  ```

  Warning: currently, this needs to cast value_rowids to int64 before
  converting, since `tf.bincount` only supports `int32`.

  Args:
    values: A potentially ragged tensor with shape `[nvals, ...]`.
    value_rowids: A 1-D int64 tensor with shape `[nvals]`, which corresponds
      one-to-one with `values`, and specifies each value's row index.  Must be
      nonnegative, and must be sorted in ascending order.
    nrows: An int64 scalar specifying the number of rows.  This should be
      specified if the `RaggedTensor` may containing empty training rows.  Must
      be greater than `value_rowids[-1]` (or zero if `value_rowids` is empty).
      Defaults to `value_rowids[-1]` (or zero if `value_rowids` is empty).
    name: A name prefix for the RaggedTensor (optional).

  Returns:
    A `RaggedTensor`.  `result.rank = values.rank + 1`.
    `result.ragged_rank = values.ragged_rank + 1`.

  Raises:
    ValueError: If `nrows` is incompatible with `value_rowids`.

  #### Example:
    ```python
    >>> rt = ragged.from_value_rowids(
    ...     values=[3, 1, 4, 1, 5, 9, 2, 6],
    ...     value_rowids=[0, 0, 0, 0, 2, 2, 2, 3],
    ...     nrows=5)
    >>> rt.eval().tolist()
    [[3, 1, 4, 1], [], [5, 9, 2], [6], []]
    ```
  """
    with ops.name_scope(name, 'RaggedFromValueRowIds',
                        [values, value_rowids, nrows]):
        values = convert_to_tensor_or_ragged_tensor(values, name='values')
        value_rowids = ops.convert_to_tensor(value_rowids,
                                             dtypes.int64,
                                             name='value_rowids')
        if nrows is None:
            const_rowids = tensor_util.constant_value(value_rowids)
            if const_rowids is None:
                nrows = array_ops.concat([value_rowids[-1:], [-1]],
                                         axis=0)[0] + 1
                const_nrows = None
            else:
                const_nrows = const_rowids[
                    -1] + 1 if const_rowids.size > 0 else 0
                nrows = ops.convert_to_tensor(const_nrows,
                                              dtypes.int64,
                                              name='nrows')
        else:
            nrows = ops.convert_to_tensor(nrows, dtypes.int64, 'nrows')
            const_nrows = tensor_util.constant_value(nrows)
            if const_nrows is not None:
                if const_nrows < 0:
                    raise ValueError('Expected nrows >= 0; got %d' %
                                     const_nrows)
                const_rowids = tensor_util.constant_value(value_rowids)
                if const_rowids is not None and const_rowids.size > 0:
                    if not const_nrows >= const_rowids[-1] + 1:
                        raise ValueError(
                            'Expected nrows >= value_rowids[-1] + 1; got nrows=%d, '
                            'value_rowids[-1]=%d' %
                            (const_nrows, const_rowids[-1]))

        value_rowids.shape.assert_has_rank(1)
        nrows.shape.assert_has_rank(0)
        values.shape[:1].assert_is_compatible_with(value_rowids.shape)

        # Convert value_rowids & nrows to row_splits.
        # Note: we don't use segment_ids_to_row_splits() here because we want
        # to save the intermediate value `row_lengths`, so we can cache it.
        # TODO(b/116708836) Upgrade bincount to accept int64 so we can skip the cast
        # (Remove the warning in the docstring when we do.)
        value_rowids_int32 = math_ops.cast(value_rowids, dtypes.int32)
        nrows_int32 = math_ops.cast(nrows, dtypes.int32)
        row_lengths = math_ops.bincount(value_rowids_int32,
                                        minlength=nrows_int32,
                                        maxlength=nrows_int32,
                                        dtype=dtypes.int64)
        row_splits = array_ops.concat([[0], math_ops.cumsum(row_lengths)],
                                      axis=0)
        if const_nrows is not None:
            row_lengths.set_shape([const_nrows])
            row_splits.set_shape([const_nrows + 1])

        return ragged_tensor.RaggedTensor(values,
                                          row_splits,
                                          cached_row_lengths=row_lengths,
                                          cached_value_rowids=value_rowids,
                                          cached_nrows=nrows,
                                          internal=True)
 def test_negative(self):
   # unsorted_segment_sum will only report InvalidArgumentError on CPU
   with self.cached_session():
     with self.assertRaises(errors.InvalidArgumentError):
       math_ops.bincount([1, 2, 3, -1, 6, 8]).eval()
 def test_negative(self):
   with self.test_session():
     with self.assertRaises(errors.InvalidArgumentError):
       math_ops.bincount([1, 2, 3, -1, 6, 8]).eval()
 def test_negative(self):
     # unsorted_segment_sum will only report InvalidArgumentError on CPU
     with self.cached_session():
         with self.assertRaises(errors.InvalidArgumentError):
             math_ops.bincount([1, 2, 3, -1, 6, 8]).eval()
Exemple #25
0
  def from_value_rowids(cls,
                        value_rowids,
                        nrows=None,
                        validate=True,
                        preferred_dtype=None):
    """Creates a `RowPartition` with rows partitioned by `value_rowids`.

    This `RowPartition` divides a sequence `values` into rows by specifying
    which row each value should be added to:

    ```python
    rows = [[] for _ in nrows]
    for (value, rowid) in zip(values, value_rowids):
      rows[rowid].append(value)
    ``

    Args:
      value_rowids: A 1-D integer tensor with shape `[nvals]`, which corresponds
        one-to-one with `values`, and specifies each value's row index.  Must be
        nonnegative, and must be sorted in ascending order.
      nrows: An integer scalar specifying the number of rows.  This should be
        specified if the `RowPartition` may containing empty training rows. Must
        be greater than `value_rowids[-1]` (or greater than or equal to zero if
        `value_rowids` is empty). Defaults to `value_rowids[-1]` (or zero if
        `value_rowids` is empty).
      validate: If true, then use assertions to check that the arguments form a
        valid `RowPartition`.
      preferred_dtype: The dtype to encode value_rowids if it doesn't already
        have one. The default is tf.int64.

    Returns:
      A `RowPartition`.

    Raises:
      ValueError: If `nrows` is incompatible with `value_rowids`.

    #### Example:

    >>> print(RowPartition.from_value_rowids(
    ...     value_rowids=[0, 0, 0, 0, 2, 2, 2, 3],
    ...     nrows=4))
    tf.RowPartition(row_splits=tf.Tensor([0 4 4 7 8], shape=(5,), dtype=int64))
    """
    if not isinstance(validate, bool):
      raise TypeError("validate must have type bool")
    with ops.name_scope(None, "RowPartitionFromValueRowIds",
                        [value_rowids, nrows]):
      value_rowids = cls._convert_row_partition(value_rowids, "value_rowids",
                                                preferred_dtype)
      if nrows is None:
        const_rowids = tensor_util.constant_value(value_rowids)
        if const_rowids is None:
          nrows = array_ops.concat([value_rowids[-1:], [-1]], axis=0)[0] + 1
          const_nrows = None
        else:
          const_nrows = const_rowids[-1] + 1 if const_rowids.size > 0 else 0
          nrows = ops.convert_to_tensor(
              const_nrows, value_rowids.dtype, name="nrows")
      else:
        nrows = ops.convert_to_tensor(nrows, value_rowids.dtype, "nrows")
        const_nrows = tensor_util.constant_value(nrows)
        if const_nrows is not None:
          if const_nrows < 0:
            raise ValueError("Expected nrows >= 0; got %d" % const_nrows)
          const_rowids = tensor_util.constant_value(value_rowids)
          if const_rowids is not None and const_rowids.size > 0:
            if not const_nrows >= const_rowids[-1] + 1:
              raise ValueError(
                  "Expected nrows >= value_rowids[-1] + 1; got nrows=%d, "
                  "value_rowids[-1]=%d" % (const_nrows, const_rowids[-1]))

      value_rowids.shape.assert_has_rank(1)
      nrows.shape.assert_has_rank(0)

      if validate:
        msg = ("Arguments to from_value_rowids do not form a valid "
               "RowPartition")
        checks = [
            check_ops.assert_rank(value_rowids, 1, message=msg),
            check_ops.assert_rank(nrows, 0, message=msg),
            check_ops.assert_non_negative(value_rowids[:1], message=msg),
            _assert_monotonic_increasing(value_rowids, message=msg),
            check_ops.assert_less(value_rowids[-1:], nrows, message=msg),
        ]
        value_rowids = control_flow_ops.with_dependencies(checks, value_rowids)

      # Convert value_rowids & nrows to row_splits.
      # Note: we don't use segment_ids_to_row_splits() here because we want
      # to save the intermediate value `row_lengths`, so we can cache it.
      # TODO(b/116708836) Upgrade bincount to accept int64 so we can skip the
      # cast.
      value_rowids_int32 = math_ops.cast(value_rowids, dtypes.int32)
      nrows_int32 = math_ops.cast(nrows, dtypes.int32)
      row_lengths = math_ops.bincount(
          value_rowids_int32,
          minlength=nrows_int32,
          maxlength=nrows_int32,
          dtype=value_rowids.dtype)
      row_splits = array_ops.concat([[0], math_ops.cumsum(row_lengths)], axis=0)
      if const_nrows is not None:
        row_lengths.set_shape([const_nrows])
        row_splits.set_shape([const_nrows + 1])

      return cls(
          row_splits=row_splits,
          row_lengths=row_lengths,
          value_rowids=value_rowids,
          nrows=nrows,
          internal=_row_partition_factory_key)
 def test_zero_weights(self):
   with self.session(use_gpu=True):
     self.assertAllEqual(
         math_ops.bincount(np.arange(1000), np.zeros(1000)).eval(),
         np.zeros(1000))
def from_value_rowids(values, value_rowids, nrows=None, name=None):
  """Creates a `RaggedTensor` with rows partitioned by `value_rowids`.

  The returned `RaggedTensor` corresponds with the python list defined by:

  ```python
  result = [[values[i] for i in range(len(values)) if value_rowids[i] == row]
            for row in range(nrows)]
  ```

  Warning: currently, this needs to cast value_rowids to int64 before
  converting, since `tf.bincount` only supports `int32`.

  Args:
    values: A potentially ragged tensor with shape `[nvals, ...]`.
    value_rowids: A 1-D int64 tensor with shape `[nvals]`, which corresponds
      one-to-one with `values`, and specifies each value's row index.  Must be
      nonnegative, and must be sorted in ascending order.
    nrows: An int64 scalar specifying the number of rows.  This should be
      specified if the `RaggedTensor` may containing empty training rows.  Must
      be greater than `value_rowids[-1]` (or zero if `value_rowids` is empty).
      Defaults to `value_rowids[-1]` (or zero if `value_rowids` is empty).
    name: A name prefix for the RaggedTensor (optional).

  Returns:
    A `RaggedTensor`.  `result.rank = values.rank + 1`.
    `result.ragged_rank = values.ragged_rank + 1`.

  Raises:
    ValueError: If `nrows` is incompatible with `value_rowids`.

  #### Example:
    ```python
    >>> rt = ragged.from_value_rowids(
    ...     values=[3, 1, 4, 1, 5, 9, 2, 6],
    ...     value_rowids=[0, 0, 0, 0, 2, 2, 2, 3],
    ...     nrows=5)
    >>> rt.eval().tolist()
    [[3, 1, 4, 1], [], [5, 9, 2], [6], []]
    ```
  """
  with ops.name_scope(name, 'RaggedFromValueRowIds',
                      [values, value_rowids, nrows]):
    values = convert_to_tensor_or_ragged_tensor(values, name='values')
    value_rowids = ops.convert_to_tensor(
        value_rowids, dtypes.int64, name='value_rowids')
    if nrows is None:
      const_rowids = tensor_util.constant_value(value_rowids)
      if const_rowids is None:
        nrows = array_ops.concat([value_rowids[-1:], [-1]], axis=0)[0] + 1
        const_nrows = None
      else:
        const_nrows = const_rowids[-1] + 1 if const_rowids.size > 0 else 0
        nrows = ops.convert_to_tensor(const_nrows, dtypes.int64, name='nrows')
    else:
      nrows = ops.convert_to_tensor(nrows, dtypes.int64, 'nrows')
      const_nrows = tensor_util.constant_value(nrows)
      if const_nrows is not None:
        if const_nrows < 0:
          raise ValueError('Expected nrows >= 0; got %d' % const_nrows)
        const_rowids = tensor_util.constant_value(value_rowids)
        if const_rowids is not None and const_rowids.size > 0:
          if not const_nrows >= const_rowids[-1] + 1:
            raise ValueError(
                'Expected nrows >= value_rowids[-1] + 1; got nrows=%d, '
                'value_rowids[-1]=%d' % (const_nrows, const_rowids[-1]))

    value_rowids.shape.assert_has_rank(1)
    nrows.shape.assert_has_rank(0)
    values.shape[:1].assert_is_compatible_with(value_rowids.shape)

    # Convert value_rowids & nrows to row_splits.
    # Note: we don't use segment_ids_to_row_splits() here because we want
    # to save the intermediate value `row_lengths`, so we can cache it.
    # TODO(b/116708836) Upgrade bincount to accept int64 so we can skip the cast
    # (Remove the warning in the docstring when we do.)
    value_rowids_int32 = math_ops.cast(value_rowids, dtypes.int32)
    nrows_int32 = math_ops.cast(nrows, dtypes.int32)
    row_lengths = math_ops.bincount(
        value_rowids_int32,
        minlength=nrows_int32,
        maxlength=nrows_int32,
        dtype=dtypes.int64)
    row_splits = array_ops.concat([[0], math_ops.cumsum(row_lengths)], axis=0)
    if const_nrows is not None:
      row_lengths.set_shape([const_nrows])
      row_splits.set_shape([const_nrows + 1])

    return ragged_tensor.RaggedTensor(
        values,
        row_splits,
        cached_row_lengths=row_lengths,
        cached_value_rowids=value_rowids,
        cached_nrows=nrows,
        internal=True)
Exemple #28
0
 def test_negative(self):
     with self.test_session():
         with self.assertRaises(errors.InvalidArgumentError):
             math_ops.bincount([1, 2, 3, -1, 6, 8]).eval()