def _create_high_dimensional_ragged_dataset(self,
                                                strategy,
                                                include_weights=False,
                                                weight=0.5):
        ragged_features = (
            ragged_tensor.RaggedTensor(
                row_lengths=self.feature_watched_row_lengths_high_dimensional,
                values=self.feature_watched_values_high_dimensional),
            ragged_tensor.RaggedTensor(
                row_lengths=self.
                feature_favorited_row_lengths_high_dimensional,
                values=self.feature_favorited_values_high_dimensional),
            ragged_tensor.RaggedTensor(
                row_lengths=self.feature_friends_row_lengths_high_dimensional,
                values=self.feature_friends_values_high_dimensional))
        if include_weights:
            weights = []
            for ragged in ragged_features:
                values = (
                    array_ops.ones_like(ragged.values, dtype=dtypes.float32) *
                    weight)
                weights.append(
                    ragged_tensor.RaggedTensor(
                        row_lengths=ragged.row_lengths(), values=values))
            ragged_features = (ragged_features, tuple(weights))

        dataset = dataset_ops.DatasetV2.from_tensors(ragged_features)
        # Data is batched to self.data_batch_size, rebatch to global batch size.
        return dataset.unbatch().repeat().batch(self.batch_size *
                                                strategy.num_replicas_in_sync,
                                                drop_remainder=True)
Beispiel #2
0
def placeholder(dtype, ragged_rank, value_shape=None, name=None):
  """Creates a placeholder for a `tf.RaggedTensor` that will always be fed.

  **Important**: This ragged tensor will produce an error if evaluated.
  Its value must be fed using the `feed_dict` optional argument to
  `Session.run()`, `Tensor.eval()`, or `Operation.run()`.

  @compatibility{eager} Placeholders are not compatible with eager execution.

  Args:
    dtype: The data type for the `RaggedTensor`.
    ragged_rank: The ragged rank for the `RaggedTensor`
    value_shape: The shape for individual flat values in the `RaggedTensor`.
    name: A name for the operation (optional).

  Returns:
    A `RaggedTensor` that may be used as a handle for feeding a value, but
    not evaluated directly.

  Raises:
    RuntimeError: if eager execution is enabled
  """
  if ragged_rank == 0:
    return array_ops.placeholder(dtype, value_shape, name)

  with ops.name_scope(name, "RaggedPlaceholder", []):
    flat_shape = tensor_shape.TensorShape([None]).concatenate(value_shape)
    result = array_ops.placeholder(dtype, flat_shape, "flat_values")
    for i in reversed(range(ragged_rank)):
      row_splits = array_ops.placeholder(dtypes.int64, [None],
                                         "row_splits_%d" % i)
      result = ragged_tensor.RaggedTensor(result, row_splits, internal=True)
    return result
Beispiel #3
0
def from_row_limits(values, row_limits, name=None):
    """Creates a `RaggedTensor` with rows partitioned by `row_limits`.

  Equivalent to: `from_row_splits(values, concat([0, row_limits]))`.

  Args:
    values: A potentially ragged tensor with shape `[nvals, ...]`.
    row_limits: A 1-D int64 tensor with shape `[nrows]`.  Must be sorted in
      ascending order.  If `nrows>0`, then `row_limits[-1]` must be `nvals`.
    name: A name prefix for the RaggedTensor (optional).

  Returns:
    A `RaggedTensor`.  `result.rank = values.rank + 1`.
    `result.ragged_rank = values.ragged_rank + 1`.

  #### Example:
    ```python
    >>> rt = ragged.from_row_limits(
    ...     values=[3, 1, 4, 1, 5, 9, 2, 6],
    ...     row_limits=[4, 4, 7, 8, 8])
    >>> rt.eval().tolist()
    [[3, 1, 4, 1], [], [5, 9, 2], [6], []]
    ```
  """
    with ops.name_scope(name, 'RaggedFromRowLimits', [values, row_limits]):
        values = convert_to_tensor_or_ragged_tensor(values, name='values')
        row_limits = ops.convert_to_tensor(row_limits, dtypes.int64,
                                           'row_limits')
        row_limits.shape.assert_has_rank(1)
        zero = array_ops.zeros([1], dtypes.int64)
        row_splits = array_ops.concat([zero, row_limits], axis=0)
        return ragged_tensor.RaggedTensor(values=values,
                                          row_splits=row_splits,
                                          internal=True)
 def field_value(self, field_name):
   """See StructuredTensor.field_value for documentation."""
   if isinstance(field_name, (list, tuple)):
     value = self
     for f in field_name:
       value = value.field_value(f)
     return value
   return ragged_tensor.RaggedTensor(
       values=self._values.field_value(field_name),
       row_splits=self._row_splits,
       cached_row_lengths=self._row_lengths,
       cached_value_rowids=self._value_rowids,
       cached_nrows=self._nrows,
       uniform_row_length=self._uniform_row_length,
       internal=True)
Beispiel #5
0
def from_row_splits(values, row_splits, name=None):
    """Creates a `RaggedTensor` with rows partitioned by `row_splits`.

  The returned `RaggedTensor` corresponds with the python list defined by:

  ```python
  result = [values[row_splits[i]:row_splits[i + 1]]
            for i in range(len(row_splits) - 1)]
  ```

  Args:
    values: A potentially ragged tensor with shape `[nvals, ...]`.
    row_splits: A 1-D int64 tensor with shape `[nrows+1]`.  Must not be empty,
      and must be sorted in ascending order.  `row_splits[0]` must be zero and
      `row_splits[-1]` must be `nvals`.
    name: A name prefix for the RaggedTensor (optional).

  Returns:
    A `RaggedTensor`.  `result.rank = values.rank + 1`.
    `result.ragged_rank = values.ragged_rank + 1`.

  Raises:
    ValueError: If `row_splits` is an empty list.

  #### Example:
    ```python
    >>> rt = ragged.from_row_splits(
    ...     values=[3, 1, 4, 1, 5, 9, 2, 6],
    ...     row_splits=[0, 4, 4, 7, 8, 8])
    >>> rt.eval().tolist()
    [[3, 1, 4, 1], [], [5, 9, 2], [6], []]
    ```
  """
    if isinstance(row_splits, (list, tuple)) and not row_splits:
        raise ValueError('row_splits tensor may not be empty.')
    with ops.name_scope(name, 'RaggedFromRowSplits', [values, row_splits]):
        values = convert_to_tensor_or_ragged_tensor(values, name='values')
        row_splits = ops.convert_to_tensor(row_splits, dtypes.int64,
                                           'row_splits')
        row_splits.shape.assert_has_rank(1)
        return ragged_tensor.RaggedTensor(values=values,
                                          row_splits=row_splits,
                                          internal=True)
Beispiel #6
0
def from_row_lengths(values, row_lengths, name=None):
    """Creates a `RaggedTensor` with rows partitioned by `row_lengths`.

  The returned `RaggedTensor` corresponds with the python list defined by:

  ```python
  result = [[values.pop(0) for i in range(length)]
            for length in row_lengths]
  ```

  Args:
    values: A potentially ragged tensor with shape `[nvals, ...]`.
    row_lengths: A 1-D int64 tensor with shape `[nrows]`.  Must be nonnegative.
      `sum(row_lengths)` must be `nvals`.
    name: A name prefix for the RaggedTensor (optional).

  Returns:
    A `RaggedTensor`.  `result.rank = values.rank + 1`.
    `result.ragged_rank = values.ragged_rank + 1`.

  #### Example:
    ```python
    >>> rt = ragged.from_row_lengths(
    ...     values=[3, 1, 4, 1, 5, 9, 2, 6],
    ...     row_lengths=[4, 0, 3, 1, 0])
    >>> rt.eval().tolist()
    [[3, 1, 4, 1], [], [5, 9, 2], [6], []]
    ```
  """
    with ops.name_scope(name, 'RaggedFromRowLengths', [values, row_lengths]):
        values = convert_to_tensor_or_ragged_tensor(values, name='values')
        row_lengths = ops.convert_to_tensor(row_lengths, dtypes.int64,
                                            'row_lengths')
        row_lengths.shape.assert_has_rank(1)
        row_limits = math_ops.cumsum(row_lengths)
        row_splits = array_ops.concat([[0], row_limits], axis=0)
        return ragged_tensor.RaggedTensor(values=values,
                                          row_splits=row_splits,
                                          cached_row_lengths=row_lengths,
                                          internal=True)
Beispiel #7
0
def from_value_rowids(values, value_rowids, nrows=None, name=None):
    """Creates a `RaggedTensor` with rows partitioned by `value_rowids`.

  The returned `RaggedTensor` corresponds with the python list defined by:

  ```python
  result = [[values[i] for i in range(len(values)) if value_rowids[i] == row]
            for row in range(nrows)]
  ```

  Warning: currently, this needs to cast value_rowids to int64 before
  converting, since `tf.bincount` only supports `int32`.

  Args:
    values: A potentially ragged tensor with shape `[nvals, ...]`.
    value_rowids: A 1-D int64 tensor with shape `[nvals]`, which corresponds
      one-to-one with `values`, and specifies each value's row index.  Must be
      nonnegative, and must be sorted in ascending order.
    nrows: An int64 scalar specifying the number of rows.  This should be
      specified if the `RaggedTensor` may containing empty training rows.  Must
      be greater than `value_rowids[-1]` (or zero if `value_rowids` is empty).
      Defaults to `value_rowids[-1]` (or zero if `value_rowids` is empty).
    name: A name prefix for the RaggedTensor (optional).

  Returns:
    A `RaggedTensor`.  `result.rank = values.rank + 1`.
    `result.ragged_rank = values.ragged_rank + 1`.

  Raises:
    ValueError: If `nrows` is incompatible with `value_rowids`.

  #### Example:
    ```python
    >>> rt = ragged.from_value_rowids(
    ...     values=[3, 1, 4, 1, 5, 9, 2, 6],
    ...     value_rowids=[0, 0, 0, 0, 2, 2, 2, 3],
    ...     nrows=5)
    >>> rt.eval().tolist()
    [[3, 1, 4, 1], [], [5, 9, 2], [6], []]
    ```
  """
    with ops.name_scope(name, 'RaggedFromValueRowIds',
                        [values, value_rowids, nrows]):
        values = convert_to_tensor_or_ragged_tensor(values, name='values')
        value_rowids = ops.convert_to_tensor(value_rowids,
                                             dtypes.int64,
                                             name='value_rowids')
        if nrows is None:
            const_rowids = tensor_util.constant_value(value_rowids)
            if const_rowids is None:
                nrows = array_ops.concat([value_rowids[-1:], [-1]],
                                         axis=0)[0] + 1
                const_nrows = None
            else:
                const_nrows = const_rowids[
                    -1] + 1 if const_rowids.size > 0 else 0
                nrows = ops.convert_to_tensor(const_nrows,
                                              dtypes.int64,
                                              name='nrows')
        else:
            nrows = ops.convert_to_tensor(nrows, dtypes.int64, 'nrows')
            const_nrows = tensor_util.constant_value(nrows)
            if const_nrows is not None:
                if const_nrows < 0:
                    raise ValueError('Expected nrows >= 0; got %d' %
                                     const_nrows)
                const_rowids = tensor_util.constant_value(value_rowids)
                if const_rowids is not None and const_rowids.size > 0:
                    if not const_nrows >= const_rowids[-1] + 1:
                        raise ValueError(
                            'Expected nrows >= value_rowids[-1] + 1; got nrows=%d, '
                            'value_rowids[-1]=%d' %
                            (const_nrows, const_rowids[-1]))

        value_rowids.shape.assert_has_rank(1)
        nrows.shape.assert_has_rank(0)
        values.shape[:1].assert_is_compatible_with(value_rowids.shape)

        # Convert value_rowids & nrows to row_splits.
        # Note: we don't use segment_ids_to_row_splits() here because we want
        # to save the intermediate value `row_lengths`, so we can cache it.
        # TODO(b/116708836) Upgrade bincount to accept int64 so we can skip the cast
        # (Remove the warning in the docstring when we do.)
        value_rowids_int32 = math_ops.cast(value_rowids, dtypes.int32)
        nrows_int32 = math_ops.cast(nrows, dtypes.int32)
        row_lengths = math_ops.bincount(value_rowids_int32,
                                        minlength=nrows_int32,
                                        maxlength=nrows_int32,
                                        dtype=dtypes.int64)
        row_splits = array_ops.concat([[0], math_ops.cumsum(row_lengths)],
                                      axis=0)
        if const_nrows is not None:
            row_lengths.set_shape([const_nrows])
            row_splits.set_shape([const_nrows + 1])

        return ragged_tensor.RaggedTensor(values,
                                          row_splits,
                                          cached_row_lengths=row_lengths,
                                          cached_value_rowids=value_rowids,
                                          cached_nrows=nrows,
                                          internal=True)