def testRaggedFromTensor(self,
                          tensor,
                          expected,
                          lengths=None,
                          padding=None,
                          ragged_rank=1,
                          use_ragged_rank=True,
                          expected_shape=None):
     dt = constant_op.constant(tensor)
     if use_ragged_rank:
         rt = RaggedTensor.from_tensor(dt, lengths, padding, ragged_rank)
     else:
         rt = RaggedTensor.from_tensor(dt, lengths, padding)
     self.assertEqual(type(rt), RaggedTensor)
     self.assertEqual(rt.ragged_rank, ragged_rank)
     self.assertTrue(dt.shape.is_compatible_with(rt.shape),
                     '%s is incompatible with %s' % (dt.shape, rt.shape))
     if expected_shape is not None:
         self.assertEqual(rt.shape.as_list(), expected_shape)
     self.assertAllEqual(rt, expected)
     self.assertAllEqual(
         rt,
         RaggedTensor.from_nested_row_splits(rt.flat_values,
                                             rt.nested_row_splits,
                                             validate=True))
    def testNonRaggedSparseTensor(self):
        # "index_suffix" means the value of the innermost dimension of the index
        # (i.e., indices[i][-1]).
        # See comments in _assert_sparse_indices_are_ragged_right() for more
        # details/background.

        # index_suffix of first index is not zero.
        st1 = sparse_tensor.SparseTensor(indices=[[0, 1], [0, 2], [2, 0]],
                                         values=[1, 2, 3],
                                         dense_shape=[3, 3])
        with self.assertRaisesRegexp(errors.InvalidArgumentError,
                                     r'.*SparseTensor is not right-ragged'):
            self.evaluate(RaggedTensor.from_sparse(st1))
        # index_suffix of an index that starts a new row is not zero.
        st2 = sparse_tensor.SparseTensor(indices=[[0, 0], [0, 1], [2, 1]],
                                         values=[1, 2, 3],
                                         dense_shape=[3, 3])
        with self.assertRaisesRegexp(errors.InvalidArgumentError,
                                     r'.*SparseTensor is not right-ragged'):
            self.evaluate(RaggedTensor.from_sparse(st2))
        # index_suffix of an index that continues a row skips a cell.
        st3 = sparse_tensor.SparseTensor(indices=[[0, 1], [0, 1], [0, 3]],
                                         values=[1, 2, 3],
                                         dense_shape=[3, 3])
        with self.assertRaisesRegexp(errors.InvalidArgumentError,
                                     r'.*SparseTensor is not right-ragged'):
            self.evaluate(RaggedTensor.from_sparse(st3))
Exemple #3
0
    def testConstruction(self):
        tensor_values = constant_op.constant(
            ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h'])
        values = WrappedTensor(tensor_values)

        row_splits = constant_op.constant([0, 2, 2, 5, 6, 8], dtypes.int64)
        rt = RaggedTensor.from_row_splits(values, row_splits)
        self.assertIsInstance(rt.values, WrappedTensor)
        self.assertAllEqual(rt.values.value, tensor_values)
        self.assertAllEqual(rt.row_splits, row_splits)

        row_starts = constant_op.constant([0, 2, 2, 5, 6], dtypes.int64)
        rt = RaggedTensor.from_row_starts(values, row_starts)
        self.assertIsInstance(rt.values, WrappedTensor)
        self.assertAllEqual(rt.values.value, tensor_values)
        self.assertAllEqual(rt.row_starts(), row_starts)

        row_limits = constant_op.constant([2, 2, 5, 6, 8], dtypes.int64)
        rt = RaggedTensor.from_row_limits(values, row_limits)
        self.assertIsInstance(rt.values, WrappedTensor)
        self.assertAllEqual(rt.values.value, tensor_values)
        self.assertAllEqual(rt.row_limits(), row_limits)

        row_lengths = constant_op.constant([2, 0, 3, 1, 2], dtypes.int64)
        rt = RaggedTensor.from_row_lengths(values, row_lengths)
        self.assertIsInstance(rt.values, WrappedTensor)
        self.assertAllEqual(rt.values.value, tensor_values)
        self.assertAllEqual(rt.row_lengths(), row_lengths)

        rt = RaggedTensor.from_uniform_row_length(values, 4)
        self.assertIsInstance(rt.values, WrappedTensor)
        self.assertAllEqual(rt.values.value, tensor_values)
        self.assertAllEqual(rt.uniform_row_length, 4)
  def _whitespace_tokenize_codepoints_with_offsets(self, codepoints_tensor):
    """Tokenizes a tensor of codepoints with rank of 1.

    Args:
      codepoints_tensor: Single-dimension Tensor of codepoints to tokenize.

    Returns:
      Tuple of tokenized codepoints with offsets relative to the codepoints have
      a shape of [num_strings, (num_tokens or num_offsets)].
    """
    (output_values, output_values_inner_splits, output_offset_starts,
     output_offset_limits, output_outer_splits) = (
         gen_whitespace_tokenizer.whitespace_tokenize_with_offsets(
             input_values=codepoints_tensor.flat_values,
             input_splits=codepoints_tensor.row_splits))
    codepoint_tokens = RaggedTensor.from_nested_row_splits(
        flat_values=output_values,
        nested_row_splits=[output_outer_splits, output_values_inner_splits])
    codepoint_offset_starts = RaggedTensor.from_nested_row_splits(
        flat_values=output_offset_starts,
        nested_row_splits=[output_outer_splits])
    codepoint_offset_limits = RaggedTensor.from_nested_row_splits(
        flat_values=output_offset_limits,
        nested_row_splits=[output_outer_splits])
    return (codepoint_tokens, codepoint_offset_starts, codepoint_offset_limits)
    def testPartialShapes(self,
                          tensor,
                          tensor_shape,
                          shape=None,
                          expected=None):
        if expected is None:
            expected = tensor

        if context.executing_eagerly():
            return  # static shapes are always fully defined in eager mode.

        dt = constant_op.constant(tensor)
        for ragged_rank in range(1, len(dt.shape) - 1):
            dt_placeholder = array_ops.placeholder_with_default(
                tensor, tensor_shape)
            rt = RaggedTensor.from_tensor(dt_placeholder,
                                          ragged_rank=ragged_rank)
            self.assertIsInstance(rt, RaggedTensor)
            self.assertEqual(rt.ragged_rank, ragged_rank)
            self.assertTrue(
                dt.shape.is_compatible_with(rt.shape),
                '%s is incompatible with %s' % (dt.shape, rt.shape))
            if shape is not None:
                self.assertEqual(rt.shape.as_list(), shape)
            self.assertAllEqual(rt, expected.tolist())
            self.assertAllEqual(
                rt,
                RaggedTensor.from_nested_row_splits(rt.flat_values,
                                                    rt.nested_row_splits,
                                                    validate=True))
Exemple #6
0
    def tokenize_with_offsets(self, input, name=None):  # pylint: disable=redefined-builtin
        """Tokenizes a tensor of UTF-8 strings.

    Args:
      input: A `RaggedTensor` or `Tensor` of UTF-8 strings with any shape.
      name: The name argument that is passed to the op function.

    Returns:
      A `RaggedTensor` of tokenized text. The returned shape is the shape of the
      input tensor with an added ragged dimension for tokens of each string.
    """
        with ops.name_scope(name, "SentenceTokenizer", [input, self]):
            input_tensor = ragged_tensor.convert_to_tensor_or_ragged_tensor(
                input)
            if input_tensor.shape.ndims is None:
                raise ValueError(
                    "Rank of input_tensor must be statically known.")
            if ragged_tensor.is_ragged(input_tensor):
                # Recursively process the values of the ragged tensor
                (tokens, starts,
                 limits) = self.tokenize_with_offsets(input_tensor.flat_values)
                tokens = input_tensor.with_flat_values(tokens)
                starts = input_tensor.with_flat_values(starts)
                limits = input_tensor.with_flat_values(limits)
                return (tokens, starts, limits)
            else:
                if input_tensor.shape.ndims > 1:
                    # Convert the input tensor to ragged and process it.
                    return self.tokenize_with_offsets(
                        ragged_conversion_ops.from_tensor(input_tensor))
                elif input_tensor.shape.ndims == 0:
                    (tokens, starts, limits) = self.tokenize_with_offsets(
                        array_ops.stack([input_tensor]))
                    tokens = tokens.values
                    starts = starts.values
                    limits = limits.values
                    return (tokens, starts, limits)
                else:
                    # Our rank 1 tensor is the correct shape, so we can process it as
                    # normal.
                    (output_values, output_splits, output_offset_starts,
                     output_offset_limits) = (
                         gen_sentencepiece_tokenizer.
                         sentencepiece_tokenize_with_offsets_op(
                             self._resource_handle, input_tensor,
                             self.nbest_size, self.alpha, self.add_bos,
                             self.add_eos, self.reverse, self.out_type))
                    tokens = RaggedTensor.from_nested_row_splits(
                        flat_values=output_values,
                        nested_row_splits=[output_splits],
                        validate=False)
                    starts = RaggedTensor.from_nested_row_splits(
                        flat_values=output_offset_starts,
                        nested_row_splits=[output_splits],
                        validate=False)
                    limits = RaggedTensor.from_nested_row_splits(
                        flat_values=output_offset_limits,
                        nested_row_splits=[output_splits],
                        validate=False)
                    return (tokens, starts, limits)
Exemple #7
0
 def testErrorsWithUniformRowLength(self, slice_spec, expected, message):
   """Test that rt.__getitem__(slice_spec) == expected."""
   rt = RaggedTensor.from_uniform_row_length(
       RaggedTensor.from_row_splits(EXAMPLE_RAGGED_TENSOR_3D_VALUES,
                                    EXAMPLE_RAGGED_TENSOR_3D_SPLITS),
       EXAMPLE_RAGGED_TENSOR_3D_ROWLEN)
   self.assertAllEqual(rt, EXAMPLE_RAGGED_TENSOR_3D)
   self._TestGetItemException(rt, slice_spec, expected, message)
 def testEmpty(self, dt_shape, expected, lengths=None, padding=None):
   dt = array_ops.zeros(dt_shape)
   for ragged_rank in range(1, len(dt_shape) - 1):
     rt = RaggedTensor.from_tensor(dt, lengths, padding, ragged_rank)
     self.assertEqual(type(rt), RaggedTensor)
     self.assertEqual(rt.ragged_rank, ragged_rank)
     self.assertTrue(dt.shape.is_compatible_with(rt.shape))
     self.assertAllEqual(rt, expected)
     self.assertAllEqual(rt, RaggedTensor.from_nested_row_splits(
         rt.flat_values, rt.nested_row_splits, validate=True))
  def testDocStringExamples(self):
    # The examples from RaggedTensor.from_tensor.__doc__.
    dt = constant_op.constant([[5, 7, 0], [0, 3, 0], [6, 0, 0]])
    self.assertRaggedEqual(
        RaggedTensor.from_tensor(dt), [[5, 7, 0], [0, 3, 0], [6, 0, 0]])

    self.assertRaggedEqual(
        RaggedTensor.from_tensor(dt, lengths=[1, 0, 3]), [[5], [], [6, 0, 0]])

    self.assertRaggedEqual(
        RaggedTensor.from_tensor(dt, padding=0), [[5, 7], [0, 3], [6]])
  def testGoodPartialSparseTensorRank(self):
    if not context.executing_eagerly():
      st1 = sparse_tensor.SparseTensor(
          indices=[[0, 0]],
          values=[0],
          dense_shape=array_ops.placeholder(dtypes.int64))
      st2 = sparse_tensor.SparseTensor(
          indices=array_ops.placeholder(dtypes.int64),
          values=[0],
          dense_shape=[4, 3])

      # Shouldn't throw ValueError
      RaggedTensor.from_sparse(st1)
      RaggedTensor.from_sparse(st2)
    def testGoodPartialSparseTensorRank(self):
        if not context.executing_eagerly():
            st1 = sparse_tensor.SparseTensor(indices=[[0, 0]],
                                             values=[0],
                                             dense_shape=array_ops.placeholder(
                                                 dtypes.int64))
            st2 = sparse_tensor.SparseTensor(indices=array_ops.placeholder(
                dtypes.int64),
                                             values=[0],
                                             dense_shape=[4, 3])

            # Shouldn't throw ValueError
            RaggedTensor.from_sparse(st1)
            RaggedTensor.from_sparse(st2)
Exemple #12
0
  def testWithUniformRowLength(self, slice_spec, expected, expected_shape):
    """Test that rt.__getitem__(slice_spec) == expected."""
    rt = RaggedTensor.from_uniform_row_length(
        RaggedTensor.from_row_splits(EXAMPLE_RAGGED_TENSOR_3D_VALUES,
                                     EXAMPLE_RAGGED_TENSOR_3D_SPLITS),
        EXAMPLE_RAGGED_TENSOR_3D_ROWLEN)
    self.assertAllEqual(rt, EXAMPLE_RAGGED_TENSOR_3D)
    self.assertIsNot(rt.uniform_row_length, None)
    self._TestGetItem(rt, slice_spec, expected, expected_shape)

    # If the result is 3D, then check that it still has a uniform row length:
    actual = rt.__getitem__(slice_spec)
    if actual.shape.rank == 3:
      self.assertIsNot(actual.uniform_row_length, None)
      self.assertAllEqual(actual.uniform_row_length, expected_shape[1])
 def testHighDimensions(self):
   # Use distinct prime numbers for all dimension shapes in this test, so
   # we can see any errors that are caused by mixing up dimension sizes.
   dt = array_ops.reshape(
       math_ops.range(3 * 5 * 7 * 11 * 13 * 17), [3, 5, 7, 11, 13, 17])
   for ragged_rank in range(1, 4):
     rt = RaggedTensor.from_tensor(dt, ragged_rank=ragged_rank)
     self.assertEqual(type(rt), RaggedTensor)
     self.assertEqual(rt.ragged_rank, ragged_rank)
     self.assertTrue(
         dt.shape.is_compatible_with(rt.shape),
         '%s is incompatible with %s' % (dt.shape, rt.shape))
     self.assertAllEqual(rt, self.evaluate(dt).tolist())
     self.assertAllEqual(rt, RaggedTensor.from_nested_row_splits(
         rt.flat_values, rt.nested_row_splits, validate=True))
Exemple #14
0
 def testErrorsWithPlaceholderShapes(self, slice_spec, expected, message):
   """Test that rt.__getitem__(slice_spec) == expected."""
   if not context.executing_eagerly():
     # Intentionally use an unknown shape for `values`.
     values = array_ops.placeholder_with_default([0], None)
     rt = RaggedTensor.from_row_splits(values, [0, 1])
     self._TestGetItemException(rt, slice_spec, expected, message)
Exemple #15
0
 def testErrorsWithRaggedRank2(self, slice_spec, expected, message):
   """Test that rt.__getitem__(slice_spec) == expected."""
   rt = RaggedTensor.from_nested_row_splits(
       EXAMPLE_RAGGED_TENSOR_4D_VALUES,
       [EXAMPLE_RAGGED_TENSOR_4D_SPLITS1, EXAMPLE_RAGGED_TENSOR_4D_SPLITS2])
   self.assertAllEqual(rt, EXAMPLE_RAGGED_TENSOR_4D)
   self._TestGetItemException(rt, slice_spec, expected, message)
 def testEmpty(self, dt_shape, expected, lengths=None, padding=None):
   dt = array_ops.zeros(dt_shape)
   rt = RaggedTensor.from_tensor(dt, lengths, padding)
   self.assertEqual(type(rt), RaggedTensor)
   self.assertEqual(rt.ragged_rank, 1)
   self.assertTrue(dt.shape.is_compatible_with(rt.shape))
   self.assertRaggedEqual(rt, expected)
 def test_empty_tensor(self):
     input_data = RaggedTensor.from_value_rowids(
         values=constant_op.constant([], dtype=dtypes.int64),
         value_rowids=constant_op.constant([], dtype=dtypes.int64),
         nrows=constant_op.constant(2, dtype=dtypes.int64),
         validate=True)
     self._compare_to_reference(input_data, [[], []], default_value=3)
Exemple #18
0
 def testEmpty(self, dt_shape, expected, lengths=None, padding=None):
     dt = array_ops.zeros(dt_shape)
     rt = RaggedTensor.from_tensor(dt, lengths, padding)
     self.assertEqual(type(rt), RaggedTensor)
     self.assertEqual(rt.ragged_rank, 1)
     self.assertTrue(dt.shape.is_compatible_with(rt.shape))
     self.assertAllEqual(rt, expected)
Exemple #19
0
 def testRaggedFromTensor(self,
                          tensor,
                          expected,
                          lengths=None,
                          padding=None,
                          ragged_rank=1,
                          use_ragged_rank=True):
     dt = constant_op.constant(tensor)
     if use_ragged_rank:
         rt = RaggedTensor.from_tensor(dt, lengths, padding, ragged_rank)
     else:
         rt = RaggedTensor.from_tensor(dt, lengths, padding)
     self.assertEqual(type(rt), RaggedTensor)
     self.assertEqual(rt.ragged_rank, ragged_rank)
     self.assertTrue(dt.shape.is_compatible_with(rt.shape),
                     '%s is incompatible with %s' % (dt.shape, rt.shape))
     self.assertAllEqual(rt, expected)
    def testEmpty(self):
        st = sparse_tensor.SparseTensor(indices=array_ops.zeros(
            [0, 2], dtype=dtypes.int64),
                                        values=[],
                                        dense_shape=[4, 3])
        rt = RaggedTensor.from_sparse(st)

        self.assertRaggedEqual(rt, [[], [], [], []])
  def testDocStringExample(self):
    st = sparse_tensor.SparseTensor(
        indices=[[0, 0], [0, 1], [0, 2], [1, 0], [3, 0]],
        values=[1, 2, 3, 4, 5],
        dense_shape=[4, 3])
    rt = RaggedTensor.from_sparse(st)

    self.assertRaggedEqual(rt, [[1, 2, 3], [4], [], [5]])
  def testEmpty(self):
    st = sparse_tensor.SparseTensor(
        indices=array_ops.zeros([0, 2], dtype=dtypes.int64),
        values=[],
        dense_shape=[4, 3])
    rt = RaggedTensor.from_sparse(st)

    self.assertRaggedEqual(rt, [[], [], [], []])
Exemple #23
0
 def test_empty_tensor_with_shape(self):
     input_data = RaggedTensor.from_value_rowids(
         values=constant_op.constant([], dtype=dtypes.int64),
         value_rowids=constant_op.constant([], dtype=dtypes.int64),
         nrows=constant_op.constant(2, dtype=dtypes.int64),
         validate=True)
     actual = input_data.to_tensor(default_value=3, shape=[2, 3])
     self.assertAllEqual(actual, [[3, 3, 3], [3, 3, 3]])
 def test_preserve_shape_roundtrip(self, input_shape, to_tensor_shape,
                                   expected_shape):
     tensor = array_ops.zeros(input_shape)
     ragged_from_tensor = RaggedTensor.from_tensor(tensor, ragged_rank=2)
     recovered_tensor = ragged_from_tensor.to_tensor(shape=to_tensor_shape)
     self.assertAllEqual(tensor.shape.as_list(), expected_shape)
     self.assertAllEqual(ragged_from_tensor.shape.as_list(), expected_shape)
     self.assertAllEqual(recovered_tensor.shape.as_list(), expected_shape)
Exemple #25
0
  def testWithRaggedRank1(self, slice_spec, expected):
    """Test that rt.__getitem__(slice_spec) == expected."""
    # Ragged tensor
    rt = RaggedTensor.from_row_splits(EXAMPLE_RAGGED_TENSOR_2D_VALUES,
                                      EXAMPLE_RAGGED_TENSOR_2D_SPLITS)

    self.assertAllEqual(rt, EXAMPLE_RAGGED_TENSOR_2D)
    self._TestGetItem(rt, slice_spec, expected)
    def testDocStringExample(self):
        st = sparse_tensor.SparseTensor(indices=[[0, 0], [0, 1], [0, 2],
                                                 [1, 0], [3, 0]],
                                        values=[1, 2, 3, 4, 5],
                                        dense_shape=[4, 3])
        rt = RaggedTensor.from_sparse(st)

        self.assertRaggedEqual(rt, [[1, 2, 3], [4], [], [5]])
  def testDocStringExamples(self):
    # The examples from RaggedTensor.from_tensor.__doc__.
    dt = constant_op.constant([[5, 7, 0], [0, 3, 0], [6, 0, 0]])
    self.assertAllEqual(
        RaggedTensor.from_tensor(dt), [[5, 7, 0], [0, 3, 0], [6, 0, 0]])

    self.assertAllEqual(
        RaggedTensor.from_tensor(dt, lengths=[1, 0, 3]), [[5], [], [6, 0, 0]])

    self.assertAllEqual(
        RaggedTensor.from_tensor(dt, padding=0), [[5, 7], [0, 3], [6]])

    dt_3d = constant_op.constant([[[5, 0], [7, 0], [0, 0]],
                                  [[0, 0], [3, 0], [0, 0]],
                                  [[6, 0], [0, 0], [0, 0]]])
    self.assertAllEqual(
        RaggedTensor.from_tensor(dt_3d, lengths=([2, 0, 3], [1, 1, 2, 0, 1])),
        [[[5], [7]], [], [[6, 0], [], [0]]])
Exemple #28
0
 def testWithPlaceholderShapes(self, slice_spec, expected):
   """Test that rt.__getitem__(slice_spec) == expected."""
   # Intentionally use an unknown shape for `splits`, to force the code path
   # that deals with having nrows unknown at graph construction time.
   splits = constant_op.constant(
       EXAMPLE_RAGGED_TENSOR_2D_SPLITS, dtype=dtypes.int64)
   splits = array_ops.placeholder_with_default(splits, None)
   rt = RaggedTensor.from_row_splits(EXAMPLE_RAGGED_TENSOR_2D_VALUES, splits)
   self.assertAllEqual(rt, EXAMPLE_RAGGED_TENSOR_2D)
   self._TestGetItem(rt, slice_spec, expected)
 def test_already_dense_simple(self):
     """This studies a tensor initialized with value_rowids and nrows."""
     input_data = RaggedTensor.from_value_rowids(
         values=constant_op.constant([6, 7, 8, 9, 10, 11],
                                     dtype=dtypes.int64),
         value_rowids=constant_op.constant([0, 0, 0, 1, 1, 1],
                                           dtype=dtypes.int64),
         nrows=constant_op.constant(2, dtype=dtypes.int64),
         validate=True)
     self._compare_to_reference(input_data, [[6, 7, 8], [9, 10, 11]])
 def test_value_transposed(self):
   # Check that transposed data is not an issue.
   my_value = array_ops.transpose(
       constant_op.constant([[0, 1, 2, 3], [4, 5, 6, 7]]))
   input_data = RaggedTensor.from_value_rowids(
       values=my_value,
       value_rowids=constant_op.constant([0, 1, 2, 3], dtype=dtypes.int64),
       nrows=constant_op.constant(4, dtype=dtypes.int64),
       validate=True)
   self.assertAllEqual(input_data, [[[0, 4]], [[1, 5]], [[2, 6]], [[3, 7]]])
 def testHighDimensions(self):
   # Use distinct prime numbers for all dimension shapes in this test, so
   # we can see any errors that are caused by mixing up dimension sizes.
   dt = array_ops.reshape(
       math_ops.range(3 * 5 * 7 * 11 * 13 * 17), [3, 5, 7, 11, 13, 17])
   for ragged_rank in range(1, 4):
     rt = RaggedTensor.from_tensor(dt, ragged_rank=ragged_rank)
     self.assertEqual(type(rt), RaggedTensor)
     self.assertEqual(rt.ragged_rank, ragged_rank)
     self.assertTrue(
         dt.shape.is_compatible_with(rt.shape),
         '%s is incompatible with %s' % (dt.shape, rt.shape))
     self.assertRaggedEqual(rt, self.evaluate(dt).tolist())
  def testWithFlatValues(self):
    tensor_values = constant_op.constant(['a', 'b', 'c', 'd', 'e', 'f', 'g'])
    values = WrappedTensor(tensor_values)
    nested_row_splits = [[0, 2, 5], [0, 2, 2, 5, 6, 7]]
    rt = RaggedTensor.from_nested_row_splits(values, nested_row_splits)

    tensor_int = constant_op.constant([1, 2, 3, 4, 5, 6, 7])
    rt_int = rt.with_flat_values(tensor_int)
    self.assertAllEqual(rt_int.flat_values, tensor_int)

    rt_wrapped_int = rt.with_flat_values(WrappedTensor(tensor_int))
    self.assertIsInstance(rt_wrapped_int.flat_values, WrappedTensor)
    self.assertAllEqual(rt_wrapped_int.flat_values.value, tensor_int)
 def test_already_dense_with_string(self):
     """This studies a tensor initialized with value_rowids and nrows."""
     input_data = RaggedTensor.from_value_rowids(
         values=constant_op.constant(
             ['a', 'b', 'c', 'd', 'e', 'antidisestablishmentarianism'],
             dtype=dtypes.string),
         value_rowids=constant_op.constant([0, 0, 0, 1, 1, 1],
                                           dtype=dtypes.int64),
         nrows=constant_op.constant(2, dtype=dtypes.int64),
         validate=True)
     self._compare_to_reference(
         input_data, [[b'a', b'b', b'c'],
                      [b'd', b'e', b'antidisestablishmentarianism']])
 def test_already_dense_with_dense_values(self):
     """This studies a tensor initialized with value_rowids and nrows."""
     input_data = RaggedTensor.from_value_rowids(
         values=constant_op.constant(
             [[6, 7], [8, 9], [10, 11], [12, 13], [14, 15], [16, 17]],
             dtype=dtypes.int64),
         value_rowids=constant_op.constant([0, 0, 0, 1, 1, 1],
                                           dtype=dtypes.int64),
         nrows=constant_op.constant(2, dtype=dtypes.int64),
         validate=True)
     self._compare_to_reference(
         input_data,
         [[[6, 7], [8, 9], [10, 11]], [[12, 13], [14, 15], [16, 17]]])
  def testToList(self):
    with context.eager_mode():
      tensor_values = constant_op.constant(
          ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h'])
      row_splits = constant_op.constant([0, 2, 2, 5, 6, 8], dtypes.int64)
      values = WrappedTensor(tensor_values)
      rt = RaggedTensor.from_row_splits(values, row_splits)
      expected = ragged_factory_ops.constant([['a', 'b'], [], ['c', 'd', 'e'],
                                              ['f'], ['g', 'h']]).to_list()

      with self.subTest('Raise on unsupported'):
        with self.assertRaisesRegex(
            ValueError,
            'values must be convertible to a list',
        ):
          _ = rt.to_list()

      with self.subTest('Value with numpy method'):

        class WrappedTensorWithNumpy(WrappedTensor):

          def numpy(self):
            return self.value.numpy()

        values = WrappedTensorWithNumpy(tensor_values)
        rt = RaggedTensor.from_row_splits(values, row_splits)
        self.assertEqual(rt.to_list(), expected)

      with self.subTest('Value with to_list method'):

        class WrappedTensorWithToList(WrappedTensor):

          def to_list(self):
            return self.value.numpy().tolist()

        values = WrappedTensorWithToList(tensor_values)
        rt = RaggedTensor.from_row_splits(values, row_splits)
        self.assertEqual(rt.to_list(), expected)
 def testRaggedFromTensor(self,
                          tensor,
                          expected,
                          lengths=None,
                          padding=None,
                          ragged_rank=1):
   dt = constant_op.constant(tensor)
   rt = RaggedTensor.from_tensor(dt, lengths, padding, ragged_rank)
   self.assertEqual(type(rt), RaggedTensor)
   self.assertEqual(rt.ragged_rank, ragged_rank)
   self.assertTrue(
       dt.shape.is_compatible_with(rt.shape),
       '%s is incompatible with %s' % (dt.shape, rt.shape))
   self.assertRaggedEqual(rt, expected)
  def testNonRaggedSparseTensor(self):
    # "index_suffix" means the value of the innermost dimension of the index
    # (i.e., indices[i][-1]).
    # See comments in _assert_sparse_indices_are_ragged_right() for more
    # details/background.

    # index_suffix of first index is not zero.
    st1 = sparse_tensor.SparseTensor(
        indices=[[0, 1], [0, 2], [2, 0]], values=[1, 2, 3], dense_shape=[3, 3])
    with self.assertRaisesRegexp(errors.InvalidArgumentError,
                                 r'.*SparseTensor is not right-ragged'):
      self.evaluate(RaggedTensor.from_sparse(st1))
    # index_suffix of an index that starts a new row is not zero.
    st2 = sparse_tensor.SparseTensor(
        indices=[[0, 0], [0, 1], [2, 1]], values=[1, 2, 3], dense_shape=[3, 3])
    with self.assertRaisesRegexp(errors.InvalidArgumentError,
                                 r'.*SparseTensor is not right-ragged'):
      self.evaluate(RaggedTensor.from_sparse(st2))
    # index_suffix of an index that continues a row skips a cell.
    st3 = sparse_tensor.SparseTensor(
        indices=[[0, 1], [0, 1], [0, 3]], values=[1, 2, 3], dense_shape=[3, 3])
    with self.assertRaisesRegexp(errors.InvalidArgumentError,
                                 r'.*SparseTensor is not right-ragged'):
      self.evaluate(RaggedTensor.from_sparse(st3))
def squeeze(input, axis=None, name=None):  # pylint: disable=redefined-builtin
  """Ragged compatible squeeze.

  If `input` is a `tf.Tensor`, then this calls `tf.squeeze`.

  If `input` is a `tf.RaggedTensor`, then this operation takes `O(N)` time,
  where `N` is the number of elements in the squeezed dimensions.

  Args:
    input: A potentially ragged tensor. The input to squeeze.
    axis: An optional list of ints. Defaults to `None`. If the `input` is
      ragged, it only squeezes the dimensions listed. It fails if `input` is
      ragged and axis is []. If `input` is not ragged it calls tf.squeeze. Note
      that it is an error to squeeze a dimension that is not 1. It must be in
      the range of [-rank(input), rank(input)).
   name: A name for the operation (optional).

  Returns:
    A potentially ragged tensor. Contains the same data as input,
    but has one or more dimensions of size 1 removed.
  """
  with ops.name_scope(name, 'RaggedSqueeze', [input]):
    input = ragged_tensor.convert_to_tensor_or_ragged_tensor(input)
    if isinstance(input, ops.Tensor):
      return array_ops.squeeze(input, axis, name)

    if axis is None:
      raise ValueError('Ragged.squeeze must have an axis argument.')
    if isinstance(axis, int):
      axis = [axis]
    elif ((not isinstance(axis, (list, tuple))) or
          (not all(isinstance(d, int) for d in axis))):
      raise TypeError('Axis must be a list or tuple of integers.')

    dense_dims = []
    ragged_dims = []
    # Normalize all the dims in axis to be positive
    axis = [ragged_util.get_positive_axis(d, input.shape.ndims) for d in axis]
    for dim in axis:
      if dim > input.ragged_rank:
        dense_dims.append(dim - input.ragged_rank)
      else:
        ragged_dims.append(dim)

    # Make sure the specified ragged dimensions are squeezable.
    assertion_list = []
    scalar_tensor_one = constant_op.constant(1, dtype=input.row_splits.dtype)
    for i, r in enumerate(input.nested_row_lengths()):
      if i + 1 in ragged_dims:
        assertion_list.append(
            control_flow_ops.Assert(
                math_ops.reduce_all(math_ops.equal(r, scalar_tensor_one)),
                ['the given axis (axis = %d) is not squeezable!' % (i + 1)]))
    if 0 in ragged_dims:
      scalar_tensor_two = constant_op.constant(2, dtype=dtypes.int32)
      assertion_list.append(
          control_flow_ops.Assert(
              math_ops.equal(
                  array_ops.size(input.row_splits), scalar_tensor_two),
              ['the given axis (axis = 0) is not squeezable!']))

    # Till now, we are sure that the ragged dimensions are squeezable.
    squeezed_rt = None
    squeezed_rt = control_flow_ops.with_dependencies(assertion_list,
                                                     input.flat_values)

    if dense_dims:
      # Gives error if the dense dimension is not squeezable.
      squeezed_rt = array_ops.squeeze(squeezed_rt, dense_dims)

    remaining_row_splits = []
    remaining_row_splits = list()
    for i, row_split in enumerate(input.nested_row_splits):
      # each row_splits tensor is for dimension #(i+1) .
      if (i + 1) not in ragged_dims:
        remaining_row_splits.append(row_split)
    # Take care of the first row if it is to be squeezed.
    if remaining_row_splits and 0 in ragged_dims:
      remaining_row_splits.pop(0)

    squeezed_rt = RaggedTensor.from_nested_row_splits(squeezed_rt,
                                                      remaining_row_splits)

    # Corner case: when removing all the ragged dimensions and the output is
    # a scalar tensor e.g. ragged.squeeze(ragged.constant([[[1]]])).
    if set(range(0, input.ragged_rank + 1)).issubset(set(ragged_dims)):
      squeezed_rt = array_ops.squeeze(squeezed_rt, [0], name)

    return squeezed_rt