def testRaggedFromTensor(self, tensor, expected, lengths=None, padding=None, ragged_rank=1, use_ragged_rank=True, expected_shape=None): dt = constant_op.constant(tensor) if use_ragged_rank: rt = RaggedTensor.from_tensor(dt, lengths, padding, ragged_rank) else: rt = RaggedTensor.from_tensor(dt, lengths, padding) self.assertEqual(type(rt), RaggedTensor) self.assertEqual(rt.ragged_rank, ragged_rank) self.assertTrue(dt.shape.is_compatible_with(rt.shape), '%s is incompatible with %s' % (dt.shape, rt.shape)) if expected_shape is not None: self.assertEqual(rt.shape.as_list(), expected_shape) self.assertAllEqual(rt, expected) self.assertAllEqual( rt, RaggedTensor.from_nested_row_splits(rt.flat_values, rt.nested_row_splits, validate=True))
def testNonRaggedSparseTensor(self): # "index_suffix" means the value of the innermost dimension of the index # (i.e., indices[i][-1]). # See comments in _assert_sparse_indices_are_ragged_right() for more # details/background. # index_suffix of first index is not zero. st1 = sparse_tensor.SparseTensor(indices=[[0, 1], [0, 2], [2, 0]], values=[1, 2, 3], dense_shape=[3, 3]) with self.assertRaisesRegexp(errors.InvalidArgumentError, r'.*SparseTensor is not right-ragged'): self.evaluate(RaggedTensor.from_sparse(st1)) # index_suffix of an index that starts a new row is not zero. st2 = sparse_tensor.SparseTensor(indices=[[0, 0], [0, 1], [2, 1]], values=[1, 2, 3], dense_shape=[3, 3]) with self.assertRaisesRegexp(errors.InvalidArgumentError, r'.*SparseTensor is not right-ragged'): self.evaluate(RaggedTensor.from_sparse(st2)) # index_suffix of an index that continues a row skips a cell. st3 = sparse_tensor.SparseTensor(indices=[[0, 1], [0, 1], [0, 3]], values=[1, 2, 3], dense_shape=[3, 3]) with self.assertRaisesRegexp(errors.InvalidArgumentError, r'.*SparseTensor is not right-ragged'): self.evaluate(RaggedTensor.from_sparse(st3))
def testConstruction(self): tensor_values = constant_op.constant( ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h']) values = WrappedTensor(tensor_values) row_splits = constant_op.constant([0, 2, 2, 5, 6, 8], dtypes.int64) rt = RaggedTensor.from_row_splits(values, row_splits) self.assertIsInstance(rt.values, WrappedTensor) self.assertAllEqual(rt.values.value, tensor_values) self.assertAllEqual(rt.row_splits, row_splits) row_starts = constant_op.constant([0, 2, 2, 5, 6], dtypes.int64) rt = RaggedTensor.from_row_starts(values, row_starts) self.assertIsInstance(rt.values, WrappedTensor) self.assertAllEqual(rt.values.value, tensor_values) self.assertAllEqual(rt.row_starts(), row_starts) row_limits = constant_op.constant([2, 2, 5, 6, 8], dtypes.int64) rt = RaggedTensor.from_row_limits(values, row_limits) self.assertIsInstance(rt.values, WrappedTensor) self.assertAllEqual(rt.values.value, tensor_values) self.assertAllEqual(rt.row_limits(), row_limits) row_lengths = constant_op.constant([2, 0, 3, 1, 2], dtypes.int64) rt = RaggedTensor.from_row_lengths(values, row_lengths) self.assertIsInstance(rt.values, WrappedTensor) self.assertAllEqual(rt.values.value, tensor_values) self.assertAllEqual(rt.row_lengths(), row_lengths) rt = RaggedTensor.from_uniform_row_length(values, 4) self.assertIsInstance(rt.values, WrappedTensor) self.assertAllEqual(rt.values.value, tensor_values) self.assertAllEqual(rt.uniform_row_length, 4)
def _whitespace_tokenize_codepoints_with_offsets(self, codepoints_tensor): """Tokenizes a tensor of codepoints with rank of 1. Args: codepoints_tensor: Single-dimension Tensor of codepoints to tokenize. Returns: Tuple of tokenized codepoints with offsets relative to the codepoints have a shape of [num_strings, (num_tokens or num_offsets)]. """ (output_values, output_values_inner_splits, output_offset_starts, output_offset_limits, output_outer_splits) = ( gen_whitespace_tokenizer.whitespace_tokenize_with_offsets( input_values=codepoints_tensor.flat_values, input_splits=codepoints_tensor.row_splits)) codepoint_tokens = RaggedTensor.from_nested_row_splits( flat_values=output_values, nested_row_splits=[output_outer_splits, output_values_inner_splits]) codepoint_offset_starts = RaggedTensor.from_nested_row_splits( flat_values=output_offset_starts, nested_row_splits=[output_outer_splits]) codepoint_offset_limits = RaggedTensor.from_nested_row_splits( flat_values=output_offset_limits, nested_row_splits=[output_outer_splits]) return (codepoint_tokens, codepoint_offset_starts, codepoint_offset_limits)
def testPartialShapes(self, tensor, tensor_shape, shape=None, expected=None): if expected is None: expected = tensor if context.executing_eagerly(): return # static shapes are always fully defined in eager mode. dt = constant_op.constant(tensor) for ragged_rank in range(1, len(dt.shape) - 1): dt_placeholder = array_ops.placeholder_with_default( tensor, tensor_shape) rt = RaggedTensor.from_tensor(dt_placeholder, ragged_rank=ragged_rank) self.assertIsInstance(rt, RaggedTensor) self.assertEqual(rt.ragged_rank, ragged_rank) self.assertTrue( dt.shape.is_compatible_with(rt.shape), '%s is incompatible with %s' % (dt.shape, rt.shape)) if shape is not None: self.assertEqual(rt.shape.as_list(), shape) self.assertAllEqual(rt, expected.tolist()) self.assertAllEqual( rt, RaggedTensor.from_nested_row_splits(rt.flat_values, rt.nested_row_splits, validate=True))
def tokenize_with_offsets(self, input, name=None): # pylint: disable=redefined-builtin """Tokenizes a tensor of UTF-8 strings. Args: input: A `RaggedTensor` or `Tensor` of UTF-8 strings with any shape. name: The name argument that is passed to the op function. Returns: A `RaggedTensor` of tokenized text. The returned shape is the shape of the input tensor with an added ragged dimension for tokens of each string. """ with ops.name_scope(name, "SentenceTokenizer", [input, self]): input_tensor = ragged_tensor.convert_to_tensor_or_ragged_tensor( input) if input_tensor.shape.ndims is None: raise ValueError( "Rank of input_tensor must be statically known.") if ragged_tensor.is_ragged(input_tensor): # Recursively process the values of the ragged tensor (tokens, starts, limits) = self.tokenize_with_offsets(input_tensor.flat_values) tokens = input_tensor.with_flat_values(tokens) starts = input_tensor.with_flat_values(starts) limits = input_tensor.with_flat_values(limits) return (tokens, starts, limits) else: if input_tensor.shape.ndims > 1: # Convert the input tensor to ragged and process it. return self.tokenize_with_offsets( ragged_conversion_ops.from_tensor(input_tensor)) elif input_tensor.shape.ndims == 0: (tokens, starts, limits) = self.tokenize_with_offsets( array_ops.stack([input_tensor])) tokens = tokens.values starts = starts.values limits = limits.values return (tokens, starts, limits) else: # Our rank 1 tensor is the correct shape, so we can process it as # normal. (output_values, output_splits, output_offset_starts, output_offset_limits) = ( gen_sentencepiece_tokenizer. sentencepiece_tokenize_with_offsets_op( self._resource_handle, input_tensor, self.nbest_size, self.alpha, self.add_bos, self.add_eos, self.reverse, self.out_type)) tokens = RaggedTensor.from_nested_row_splits( flat_values=output_values, nested_row_splits=[output_splits], validate=False) starts = RaggedTensor.from_nested_row_splits( flat_values=output_offset_starts, nested_row_splits=[output_splits], validate=False) limits = RaggedTensor.from_nested_row_splits( flat_values=output_offset_limits, nested_row_splits=[output_splits], validate=False) return (tokens, starts, limits)
def testErrorsWithUniformRowLength(self, slice_spec, expected, message): """Test that rt.__getitem__(slice_spec) == expected.""" rt = RaggedTensor.from_uniform_row_length( RaggedTensor.from_row_splits(EXAMPLE_RAGGED_TENSOR_3D_VALUES, EXAMPLE_RAGGED_TENSOR_3D_SPLITS), EXAMPLE_RAGGED_TENSOR_3D_ROWLEN) self.assertAllEqual(rt, EXAMPLE_RAGGED_TENSOR_3D) self._TestGetItemException(rt, slice_spec, expected, message)
def testEmpty(self, dt_shape, expected, lengths=None, padding=None): dt = array_ops.zeros(dt_shape) for ragged_rank in range(1, len(dt_shape) - 1): rt = RaggedTensor.from_tensor(dt, lengths, padding, ragged_rank) self.assertEqual(type(rt), RaggedTensor) self.assertEqual(rt.ragged_rank, ragged_rank) self.assertTrue(dt.shape.is_compatible_with(rt.shape)) self.assertAllEqual(rt, expected) self.assertAllEqual(rt, RaggedTensor.from_nested_row_splits( rt.flat_values, rt.nested_row_splits, validate=True))
def testDocStringExamples(self): # The examples from RaggedTensor.from_tensor.__doc__. dt = constant_op.constant([[5, 7, 0], [0, 3, 0], [6, 0, 0]]) self.assertRaggedEqual( RaggedTensor.from_tensor(dt), [[5, 7, 0], [0, 3, 0], [6, 0, 0]]) self.assertRaggedEqual( RaggedTensor.from_tensor(dt, lengths=[1, 0, 3]), [[5], [], [6, 0, 0]]) self.assertRaggedEqual( RaggedTensor.from_tensor(dt, padding=0), [[5, 7], [0, 3], [6]])
def testGoodPartialSparseTensorRank(self): if not context.executing_eagerly(): st1 = sparse_tensor.SparseTensor( indices=[[0, 0]], values=[0], dense_shape=array_ops.placeholder(dtypes.int64)) st2 = sparse_tensor.SparseTensor( indices=array_ops.placeholder(dtypes.int64), values=[0], dense_shape=[4, 3]) # Shouldn't throw ValueError RaggedTensor.from_sparse(st1) RaggedTensor.from_sparse(st2)
def testGoodPartialSparseTensorRank(self): if not context.executing_eagerly(): st1 = sparse_tensor.SparseTensor(indices=[[0, 0]], values=[0], dense_shape=array_ops.placeholder( dtypes.int64)) st2 = sparse_tensor.SparseTensor(indices=array_ops.placeholder( dtypes.int64), values=[0], dense_shape=[4, 3]) # Shouldn't throw ValueError RaggedTensor.from_sparse(st1) RaggedTensor.from_sparse(st2)
def testWithUniformRowLength(self, slice_spec, expected, expected_shape): """Test that rt.__getitem__(slice_spec) == expected.""" rt = RaggedTensor.from_uniform_row_length( RaggedTensor.from_row_splits(EXAMPLE_RAGGED_TENSOR_3D_VALUES, EXAMPLE_RAGGED_TENSOR_3D_SPLITS), EXAMPLE_RAGGED_TENSOR_3D_ROWLEN) self.assertAllEqual(rt, EXAMPLE_RAGGED_TENSOR_3D) self.assertIsNot(rt.uniform_row_length, None) self._TestGetItem(rt, slice_spec, expected, expected_shape) # If the result is 3D, then check that it still has a uniform row length: actual = rt.__getitem__(slice_spec) if actual.shape.rank == 3: self.assertIsNot(actual.uniform_row_length, None) self.assertAllEqual(actual.uniform_row_length, expected_shape[1])
def testHighDimensions(self): # Use distinct prime numbers for all dimension shapes in this test, so # we can see any errors that are caused by mixing up dimension sizes. dt = array_ops.reshape( math_ops.range(3 * 5 * 7 * 11 * 13 * 17), [3, 5, 7, 11, 13, 17]) for ragged_rank in range(1, 4): rt = RaggedTensor.from_tensor(dt, ragged_rank=ragged_rank) self.assertEqual(type(rt), RaggedTensor) self.assertEqual(rt.ragged_rank, ragged_rank) self.assertTrue( dt.shape.is_compatible_with(rt.shape), '%s is incompatible with %s' % (dt.shape, rt.shape)) self.assertAllEqual(rt, self.evaluate(dt).tolist()) self.assertAllEqual(rt, RaggedTensor.from_nested_row_splits( rt.flat_values, rt.nested_row_splits, validate=True))
def testErrorsWithPlaceholderShapes(self, slice_spec, expected, message): """Test that rt.__getitem__(slice_spec) == expected.""" if not context.executing_eagerly(): # Intentionally use an unknown shape for `values`. values = array_ops.placeholder_with_default([0], None) rt = RaggedTensor.from_row_splits(values, [0, 1]) self._TestGetItemException(rt, slice_spec, expected, message)
def testErrorsWithRaggedRank2(self, slice_spec, expected, message): """Test that rt.__getitem__(slice_spec) == expected.""" rt = RaggedTensor.from_nested_row_splits( EXAMPLE_RAGGED_TENSOR_4D_VALUES, [EXAMPLE_RAGGED_TENSOR_4D_SPLITS1, EXAMPLE_RAGGED_TENSOR_4D_SPLITS2]) self.assertAllEqual(rt, EXAMPLE_RAGGED_TENSOR_4D) self._TestGetItemException(rt, slice_spec, expected, message)
def testEmpty(self, dt_shape, expected, lengths=None, padding=None): dt = array_ops.zeros(dt_shape) rt = RaggedTensor.from_tensor(dt, lengths, padding) self.assertEqual(type(rt), RaggedTensor) self.assertEqual(rt.ragged_rank, 1) self.assertTrue(dt.shape.is_compatible_with(rt.shape)) self.assertRaggedEqual(rt, expected)
def test_empty_tensor(self): input_data = RaggedTensor.from_value_rowids( values=constant_op.constant([], dtype=dtypes.int64), value_rowids=constant_op.constant([], dtype=dtypes.int64), nrows=constant_op.constant(2, dtype=dtypes.int64), validate=True) self._compare_to_reference(input_data, [[], []], default_value=3)
def testEmpty(self, dt_shape, expected, lengths=None, padding=None): dt = array_ops.zeros(dt_shape) rt = RaggedTensor.from_tensor(dt, lengths, padding) self.assertEqual(type(rt), RaggedTensor) self.assertEqual(rt.ragged_rank, 1) self.assertTrue(dt.shape.is_compatible_with(rt.shape)) self.assertAllEqual(rt, expected)
def testRaggedFromTensor(self, tensor, expected, lengths=None, padding=None, ragged_rank=1, use_ragged_rank=True): dt = constant_op.constant(tensor) if use_ragged_rank: rt = RaggedTensor.from_tensor(dt, lengths, padding, ragged_rank) else: rt = RaggedTensor.from_tensor(dt, lengths, padding) self.assertEqual(type(rt), RaggedTensor) self.assertEqual(rt.ragged_rank, ragged_rank) self.assertTrue(dt.shape.is_compatible_with(rt.shape), '%s is incompatible with %s' % (dt.shape, rt.shape)) self.assertAllEqual(rt, expected)
def testEmpty(self): st = sparse_tensor.SparseTensor(indices=array_ops.zeros( [0, 2], dtype=dtypes.int64), values=[], dense_shape=[4, 3]) rt = RaggedTensor.from_sparse(st) self.assertRaggedEqual(rt, [[], [], [], []])
def testDocStringExample(self): st = sparse_tensor.SparseTensor( indices=[[0, 0], [0, 1], [0, 2], [1, 0], [3, 0]], values=[1, 2, 3, 4, 5], dense_shape=[4, 3]) rt = RaggedTensor.from_sparse(st) self.assertRaggedEqual(rt, [[1, 2, 3], [4], [], [5]])
def testEmpty(self): st = sparse_tensor.SparseTensor( indices=array_ops.zeros([0, 2], dtype=dtypes.int64), values=[], dense_shape=[4, 3]) rt = RaggedTensor.from_sparse(st) self.assertRaggedEqual(rt, [[], [], [], []])
def test_empty_tensor_with_shape(self): input_data = RaggedTensor.from_value_rowids( values=constant_op.constant([], dtype=dtypes.int64), value_rowids=constant_op.constant([], dtype=dtypes.int64), nrows=constant_op.constant(2, dtype=dtypes.int64), validate=True) actual = input_data.to_tensor(default_value=3, shape=[2, 3]) self.assertAllEqual(actual, [[3, 3, 3], [3, 3, 3]])
def test_preserve_shape_roundtrip(self, input_shape, to_tensor_shape, expected_shape): tensor = array_ops.zeros(input_shape) ragged_from_tensor = RaggedTensor.from_tensor(tensor, ragged_rank=2) recovered_tensor = ragged_from_tensor.to_tensor(shape=to_tensor_shape) self.assertAllEqual(tensor.shape.as_list(), expected_shape) self.assertAllEqual(ragged_from_tensor.shape.as_list(), expected_shape) self.assertAllEqual(recovered_tensor.shape.as_list(), expected_shape)
def testWithRaggedRank1(self, slice_spec, expected): """Test that rt.__getitem__(slice_spec) == expected.""" # Ragged tensor rt = RaggedTensor.from_row_splits(EXAMPLE_RAGGED_TENSOR_2D_VALUES, EXAMPLE_RAGGED_TENSOR_2D_SPLITS) self.assertAllEqual(rt, EXAMPLE_RAGGED_TENSOR_2D) self._TestGetItem(rt, slice_spec, expected)
def testDocStringExample(self): st = sparse_tensor.SparseTensor(indices=[[0, 0], [0, 1], [0, 2], [1, 0], [3, 0]], values=[1, 2, 3, 4, 5], dense_shape=[4, 3]) rt = RaggedTensor.from_sparse(st) self.assertRaggedEqual(rt, [[1, 2, 3], [4], [], [5]])
def testDocStringExamples(self): # The examples from RaggedTensor.from_tensor.__doc__. dt = constant_op.constant([[5, 7, 0], [0, 3, 0], [6, 0, 0]]) self.assertAllEqual( RaggedTensor.from_tensor(dt), [[5, 7, 0], [0, 3, 0], [6, 0, 0]]) self.assertAllEqual( RaggedTensor.from_tensor(dt, lengths=[1, 0, 3]), [[5], [], [6, 0, 0]]) self.assertAllEqual( RaggedTensor.from_tensor(dt, padding=0), [[5, 7], [0, 3], [6]]) dt_3d = constant_op.constant([[[5, 0], [7, 0], [0, 0]], [[0, 0], [3, 0], [0, 0]], [[6, 0], [0, 0], [0, 0]]]) self.assertAllEqual( RaggedTensor.from_tensor(dt_3d, lengths=([2, 0, 3], [1, 1, 2, 0, 1])), [[[5], [7]], [], [[6, 0], [], [0]]])
def testWithPlaceholderShapes(self, slice_spec, expected): """Test that rt.__getitem__(slice_spec) == expected.""" # Intentionally use an unknown shape for `splits`, to force the code path # that deals with having nrows unknown at graph construction time. splits = constant_op.constant( EXAMPLE_RAGGED_TENSOR_2D_SPLITS, dtype=dtypes.int64) splits = array_ops.placeholder_with_default(splits, None) rt = RaggedTensor.from_row_splits(EXAMPLE_RAGGED_TENSOR_2D_VALUES, splits) self.assertAllEqual(rt, EXAMPLE_RAGGED_TENSOR_2D) self._TestGetItem(rt, slice_spec, expected)
def test_already_dense_simple(self): """This studies a tensor initialized with value_rowids and nrows.""" input_data = RaggedTensor.from_value_rowids( values=constant_op.constant([6, 7, 8, 9, 10, 11], dtype=dtypes.int64), value_rowids=constant_op.constant([0, 0, 0, 1, 1, 1], dtype=dtypes.int64), nrows=constant_op.constant(2, dtype=dtypes.int64), validate=True) self._compare_to_reference(input_data, [[6, 7, 8], [9, 10, 11]])
def test_value_transposed(self): # Check that transposed data is not an issue. my_value = array_ops.transpose( constant_op.constant([[0, 1, 2, 3], [4, 5, 6, 7]])) input_data = RaggedTensor.from_value_rowids( values=my_value, value_rowids=constant_op.constant([0, 1, 2, 3], dtype=dtypes.int64), nrows=constant_op.constant(4, dtype=dtypes.int64), validate=True) self.assertAllEqual(input_data, [[[0, 4]], [[1, 5]], [[2, 6]], [[3, 7]]])
def testHighDimensions(self): # Use distinct prime numbers for all dimension shapes in this test, so # we can see any errors that are caused by mixing up dimension sizes. dt = array_ops.reshape( math_ops.range(3 * 5 * 7 * 11 * 13 * 17), [3, 5, 7, 11, 13, 17]) for ragged_rank in range(1, 4): rt = RaggedTensor.from_tensor(dt, ragged_rank=ragged_rank) self.assertEqual(type(rt), RaggedTensor) self.assertEqual(rt.ragged_rank, ragged_rank) self.assertTrue( dt.shape.is_compatible_with(rt.shape), '%s is incompatible with %s' % (dt.shape, rt.shape)) self.assertRaggedEqual(rt, self.evaluate(dt).tolist())
def testWithFlatValues(self): tensor_values = constant_op.constant(['a', 'b', 'c', 'd', 'e', 'f', 'g']) values = WrappedTensor(tensor_values) nested_row_splits = [[0, 2, 5], [0, 2, 2, 5, 6, 7]] rt = RaggedTensor.from_nested_row_splits(values, nested_row_splits) tensor_int = constant_op.constant([1, 2, 3, 4, 5, 6, 7]) rt_int = rt.with_flat_values(tensor_int) self.assertAllEqual(rt_int.flat_values, tensor_int) rt_wrapped_int = rt.with_flat_values(WrappedTensor(tensor_int)) self.assertIsInstance(rt_wrapped_int.flat_values, WrappedTensor) self.assertAllEqual(rt_wrapped_int.flat_values.value, tensor_int)
def test_already_dense_with_string(self): """This studies a tensor initialized with value_rowids and nrows.""" input_data = RaggedTensor.from_value_rowids( values=constant_op.constant( ['a', 'b', 'c', 'd', 'e', 'antidisestablishmentarianism'], dtype=dtypes.string), value_rowids=constant_op.constant([0, 0, 0, 1, 1, 1], dtype=dtypes.int64), nrows=constant_op.constant(2, dtype=dtypes.int64), validate=True) self._compare_to_reference( input_data, [[b'a', b'b', b'c'], [b'd', b'e', b'antidisestablishmentarianism']])
def test_already_dense_with_dense_values(self): """This studies a tensor initialized with value_rowids and nrows.""" input_data = RaggedTensor.from_value_rowids( values=constant_op.constant( [[6, 7], [8, 9], [10, 11], [12, 13], [14, 15], [16, 17]], dtype=dtypes.int64), value_rowids=constant_op.constant([0, 0, 0, 1, 1, 1], dtype=dtypes.int64), nrows=constant_op.constant(2, dtype=dtypes.int64), validate=True) self._compare_to_reference( input_data, [[[6, 7], [8, 9], [10, 11]], [[12, 13], [14, 15], [16, 17]]])
def testToList(self): with context.eager_mode(): tensor_values = constant_op.constant( ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h']) row_splits = constant_op.constant([0, 2, 2, 5, 6, 8], dtypes.int64) values = WrappedTensor(tensor_values) rt = RaggedTensor.from_row_splits(values, row_splits) expected = ragged_factory_ops.constant([['a', 'b'], [], ['c', 'd', 'e'], ['f'], ['g', 'h']]).to_list() with self.subTest('Raise on unsupported'): with self.assertRaisesRegex( ValueError, 'values must be convertible to a list', ): _ = rt.to_list() with self.subTest('Value with numpy method'): class WrappedTensorWithNumpy(WrappedTensor): def numpy(self): return self.value.numpy() values = WrappedTensorWithNumpy(tensor_values) rt = RaggedTensor.from_row_splits(values, row_splits) self.assertEqual(rt.to_list(), expected) with self.subTest('Value with to_list method'): class WrappedTensorWithToList(WrappedTensor): def to_list(self): return self.value.numpy().tolist() values = WrappedTensorWithToList(tensor_values) rt = RaggedTensor.from_row_splits(values, row_splits) self.assertEqual(rt.to_list(), expected)
def testRaggedFromTensor(self, tensor, expected, lengths=None, padding=None, ragged_rank=1): dt = constant_op.constant(tensor) rt = RaggedTensor.from_tensor(dt, lengths, padding, ragged_rank) self.assertEqual(type(rt), RaggedTensor) self.assertEqual(rt.ragged_rank, ragged_rank) self.assertTrue( dt.shape.is_compatible_with(rt.shape), '%s is incompatible with %s' % (dt.shape, rt.shape)) self.assertRaggedEqual(rt, expected)
def testNonRaggedSparseTensor(self): # "index_suffix" means the value of the innermost dimension of the index # (i.e., indices[i][-1]). # See comments in _assert_sparse_indices_are_ragged_right() for more # details/background. # index_suffix of first index is not zero. st1 = sparse_tensor.SparseTensor( indices=[[0, 1], [0, 2], [2, 0]], values=[1, 2, 3], dense_shape=[3, 3]) with self.assertRaisesRegexp(errors.InvalidArgumentError, r'.*SparseTensor is not right-ragged'): self.evaluate(RaggedTensor.from_sparse(st1)) # index_suffix of an index that starts a new row is not zero. st2 = sparse_tensor.SparseTensor( indices=[[0, 0], [0, 1], [2, 1]], values=[1, 2, 3], dense_shape=[3, 3]) with self.assertRaisesRegexp(errors.InvalidArgumentError, r'.*SparseTensor is not right-ragged'): self.evaluate(RaggedTensor.from_sparse(st2)) # index_suffix of an index that continues a row skips a cell. st3 = sparse_tensor.SparseTensor( indices=[[0, 1], [0, 1], [0, 3]], values=[1, 2, 3], dense_shape=[3, 3]) with self.assertRaisesRegexp(errors.InvalidArgumentError, r'.*SparseTensor is not right-ragged'): self.evaluate(RaggedTensor.from_sparse(st3))
def squeeze(input, axis=None, name=None): # pylint: disable=redefined-builtin """Ragged compatible squeeze. If `input` is a `tf.Tensor`, then this calls `tf.squeeze`. If `input` is a `tf.RaggedTensor`, then this operation takes `O(N)` time, where `N` is the number of elements in the squeezed dimensions. Args: input: A potentially ragged tensor. The input to squeeze. axis: An optional list of ints. Defaults to `None`. If the `input` is ragged, it only squeezes the dimensions listed. It fails if `input` is ragged and axis is []. If `input` is not ragged it calls tf.squeeze. Note that it is an error to squeeze a dimension that is not 1. It must be in the range of [-rank(input), rank(input)). name: A name for the operation (optional). Returns: A potentially ragged tensor. Contains the same data as input, but has one or more dimensions of size 1 removed. """ with ops.name_scope(name, 'RaggedSqueeze', [input]): input = ragged_tensor.convert_to_tensor_or_ragged_tensor(input) if isinstance(input, ops.Tensor): return array_ops.squeeze(input, axis, name) if axis is None: raise ValueError('Ragged.squeeze must have an axis argument.') if isinstance(axis, int): axis = [axis] elif ((not isinstance(axis, (list, tuple))) or (not all(isinstance(d, int) for d in axis))): raise TypeError('Axis must be a list or tuple of integers.') dense_dims = [] ragged_dims = [] # Normalize all the dims in axis to be positive axis = [ragged_util.get_positive_axis(d, input.shape.ndims) for d in axis] for dim in axis: if dim > input.ragged_rank: dense_dims.append(dim - input.ragged_rank) else: ragged_dims.append(dim) # Make sure the specified ragged dimensions are squeezable. assertion_list = [] scalar_tensor_one = constant_op.constant(1, dtype=input.row_splits.dtype) for i, r in enumerate(input.nested_row_lengths()): if i + 1 in ragged_dims: assertion_list.append( control_flow_ops.Assert( math_ops.reduce_all(math_ops.equal(r, scalar_tensor_one)), ['the given axis (axis = %d) is not squeezable!' % (i + 1)])) if 0 in ragged_dims: scalar_tensor_two = constant_op.constant(2, dtype=dtypes.int32) assertion_list.append( control_flow_ops.Assert( math_ops.equal( array_ops.size(input.row_splits), scalar_tensor_two), ['the given axis (axis = 0) is not squeezable!'])) # Till now, we are sure that the ragged dimensions are squeezable. squeezed_rt = None squeezed_rt = control_flow_ops.with_dependencies(assertion_list, input.flat_values) if dense_dims: # Gives error if the dense dimension is not squeezable. squeezed_rt = array_ops.squeeze(squeezed_rt, dense_dims) remaining_row_splits = [] remaining_row_splits = list() for i, row_split in enumerate(input.nested_row_splits): # each row_splits tensor is for dimension #(i+1) . if (i + 1) not in ragged_dims: remaining_row_splits.append(row_split) # Take care of the first row if it is to be squeezed. if remaining_row_splits and 0 in ragged_dims: remaining_row_splits.pop(0) squeezed_rt = RaggedTensor.from_nested_row_splits(squeezed_rt, remaining_row_splits) # Corner case: when removing all the ragged dimensions and the output is # a scalar tensor e.g. ragged.squeeze(ragged.constant([[[1]]])). if set(range(0, input.ragged_rank + 1)).issubset(set(ragged_dims)): squeezed_rt = array_ops.squeeze(squeezed_rt, [0], name) return squeezed_rt