def testFromValueRowIdsWithBadNRows(self): value_rowids = constant_op.constant([0, 0, 2, 2, 2, 3, 4], dtypes.int64) nrows = constant_op.constant(5, dtypes.int64) with self.assertRaisesRegex(ValueError, r'Expected nrows >= 0; got -2'): RowPartition.from_value_rowids( value_rowids=array_ops.placeholder_with_default(value_rowids, None), nrows=-2) with self.assertRaisesRegex( ValueError, r'Expected nrows >= value_rowids\[-1\] \+ 1; got nrows=2, ' r'value_rowids\[-1\]=4'): RowPartition.from_value_rowids(value_rowids=value_rowids, nrows=2) with self.assertRaisesRegex( ValueError, r'Expected nrows >= value_rowids\[-1\] \+ 1; got nrows=4, ' r'value_rowids\[-1\]=4'): RowPartition.from_value_rowids(value_rowids=value_rowids, nrows=4) with self.assertRaisesRegex(ValueError, r'Shape \(7, 1\) must have rank 1'): RowPartition.from_value_rowids( value_rowids=array_ops.expand_dims(value_rowids, 1), nrows=nrows) with self.assertRaisesRegex(ValueError, r'Shape \(1,\) must have rank 0'): RowPartition.from_value_rowids( value_rowids=value_rowids, nrows=array_ops.expand_dims(nrows, 0))
def testRowPartitionConstructionErrors(self): row_splits = constant_op.constant([0, 2, 2, 5, 6, 7], dtypes.int64) with self.assertRaisesRegex(ValueError, 'RowPartition constructor is private'): RowPartition(row_splits=row_splits) with self.assertRaisesRegex( TypeError, 'Row-partitioning argument must be a Tensor'): RowPartition(row_splits=[0, 2, 2, 5, 6, 7], internal=row_partition._row_partition_factory_key) with self.assertRaisesRegex(ValueError, r'Shape \(6, 1\) must have rank 1'): RowPartition(row_splits=array_ops.expand_dims(row_splits, 1), internal=row_partition._row_partition_factory_key) with self.assertRaisesRegex(TypeError, 'Cached value must be a Tensor or None.'): RowPartition(row_splits=row_splits, row_lengths=[2, 3, 4], internal=row_partition._row_partition_factory_key) with self.assertRaisesRegex(ValueError, 'Inconsistent dtype'): RowPartition(row_splits=constant_op.constant([0, 3], dtypes.int64), nrows=constant_op.constant(1, dtypes.int32), internal=row_partition._row_partition_factory_key)
def testMergePrecomputedEncodingsFastPaths(self): # Same object: x gets returned as-is. x = RowPartition.from_row_splits([0, 3, 8, 8]) self.assertIs(x.merge_precomputed_encodings(x), x) # Same encoding tensor objects: x gets returned as-is. y = RowPartition.from_row_splits(x.row_splits(), validate=False) self.assertIs(x.merge_precomputed_encodings(y), x)
def testMergePrecomputedEncodingsWithMatchingTensors(self): # The encoding tensors for `a` are a superset of the encoding tensors # for `b`, and where they overlap, they the same tensor objects. a = RowPartition.from_value_rowids([0, 0, 3, 4, 4, 4]) b = RowPartition.from_row_splits(a.row_splits(), validate=False) self.assertIs(a.merge_precomputed_encodings(b), a) self.assertIs(b.merge_precomputed_encodings(a), a) self.assertIsNot(a, b)
def testTwoDeepStructuredTensor(self): rt = tf.RaggedTensor.from_value_rowids( tf.constant([[1, 2], [3, 4], [5, 6]]), [0, 0, 1]) struct = _make_structured_tensor([2], {"r": rt}) struct_2 = struct.partition_outer_dimension( RowPartition.from_row_splits([0, 1, 2])) struct_3 = struct_2.partition_outer_dimension( RowPartition.from_row_splits([0, 1, 2])) p = structured_tensor_to_prensor.structured_tensor_to_prensor(struct_3) rt_value = p.get_descendant(path.create_path("data.data.r.data")) self.assertAllEqual(rt_value.node.parent_index, [0, 0, 1, 1, 2, 2]) self.assertAllEqual(rt_value.node.values, [1, 2, 3, 4, 5, 6])
def testFromUniformRowLengthBugConvertToTensor(self): # This originally failed to run because nrows was dtypes.int32. I think # we may need to consider the semantics of the type of a RowPartition # if preferred_dtype is unspecified. Also, looking at convert_to_tensor: # dtype specifies the type of the output. # preferred_dtype/dtype_hint is a suggestion, and dtype_hint is the new # name. nrows = constant_op.constant(3, dtype=dtypes.int32) nvals = constant_op.constant(12, dtype=dtypes.int64) row_length = constant_op.constant(4, dtype=dtypes.int64) RowPartition.from_uniform_row_length(row_length, nvals=nvals, nrows=nrows)
def testFromUniformRowLengthWithPlaceholders2(self): nvals = array_ops.placeholder_with_default(6, None) ph_rowlen = array_ops.placeholder_with_default(3, None) rt2 = RowPartition.from_uniform_row_length( nvals=nvals, uniform_row_length=ph_rowlen) const_nvals2 = self.evaluate(rt2.nvals()) self.assertEqual(const_nvals2, 6)
def testFromRowSplitsWithDifferentSplitTypes(self): splits1 = [0, 2, 2, 5, 6, 7] splits2 = np.array([0, 2, 2, 5, 6, 7], np.int64) splits3 = np.array([0, 2, 2, 5, 6, 7], np.int32) splits4 = constant_op.constant([0, 2, 2, 5, 6, 7], dtypes.int64) splits5 = constant_op.constant([0, 2, 2, 5, 6, 7], dtypes.int32) rt1 = RowPartition.from_row_splits(splits1) rt2 = RowPartition.from_row_splits(splits2) rt3 = RowPartition.from_row_splits(splits3) rt4 = RowPartition.from_row_splits(splits4) rt5 = RowPartition.from_row_splits(splits5) self.assertEqual(rt1.row_splits().dtype, dtypes.int64) self.assertEqual(rt2.row_splits().dtype, dtypes.int64) self.assertEqual(rt3.row_splits().dtype, dtypes.int32) self.assertEqual(rt4.row_splits().dtype, dtypes.int64) self.assertEqual(rt5.row_splits().dtype, dtypes.int32)
def _get_specified_row_partition(): """Needed for merge_with_spec tests. Normally, nvals isn't set.""" return RowPartition( row_splits=constant_op.constant([0, 3, 8], dtype=dtypes.int64), nrows=constant_op.constant(2, dtype=dtypes.int64), nvals=constant_op.constant(8, dtype=dtypes.int64), internal=row_partition._row_partition_factory_key)
def testFromUniformRowLengthWithPlaceholders1(self): nvals = array_ops.placeholder_with_default( constant_op.constant(6, dtype=dtypes.int64), None) rt1 = RowPartition.from_uniform_row_length( nvals=nvals, uniform_row_length=3) const_nvals1 = self.evaluate(rt1.nvals()) self.assertEqual(const_nvals1, 6)
def testFromValue(self): self.assertEqual( RowPartitionSpec.from_value(RowPartition.from_row_splits([0, 2, 8, 8])), RowPartitionSpec(nrows=3)) self.assertEqual( RowPartitionSpec.from_value( RowPartition.from_row_lengths([5, 3, 0, 2])), RowPartitionSpec(nrows=4)) self.assertEqual( RowPartitionSpec.from_value( RowPartition.from_value_rowids([0, 2, 2, 8])), RowPartitionSpec(nrows=9, nvals=4)) self.assertEqual( RowPartitionSpec.from_value( RowPartition.from_uniform_row_length( nvals=12, uniform_row_length=3)), RowPartitionSpec(nvals=12, uniform_row_length=3))
def testEmpty2DRagged(self): struct = structured_tensor.StructuredTensor.from_fields( fields={}, shape=[2, None], row_partitions=[RowPartition.from_row_splits([0, 3, 5])]) p = structured_tensor_to_prensor.structured_tensor_to_prensor(struct) child_node = p.get_child("data").node self.assertAllEqual(child_node.parent_index, [0, 0, 0, 1, 1])
def testFromUniformRowPartitionNvalsStaticNoValidate(self): rp = RowPartition.from_uniform_row_length(3, nrows=4, nvals=12, validate=False) self.assertAllEqual(4, rp.static_nrows) self.assertAllEqual(3, rp.static_uniform_row_length) self.assertAllEqual(12, rp.static_nvals)
def _expand_st_row_partitions(st, axis): """Create the row_partitions for expand_dims.""" if axis == 0: if st.shape.rank == 0: return () nvals = st.nrows() new_partition = RowPartition.from_uniform_row_length( nvals, nvals, nrows=1, validate=False) return (new_partition,) + st.row_partitions elif axis == st.rank: nvals = ( st.row_partitions[axis - 2].nvals() if (axis - 2 >= 0) else st.nrows()) return st.row_partitions + (RowPartition.from_uniform_row_length( 1, nvals, nrows=nvals, validate=False),) else: nvals = ( st.row_partitions[axis - 1].nrows() if (axis - 1 >= 0) else st.nrows()) return st.row_partitions[:axis - 1] + (RowPartition.from_uniform_row_length( 1, nvals, nrows=nvals, validate=False),) + st.row_partitions[axis - 1:]
def testFromRowSplits(self): row_splits = constant_op.constant([0, 2, 2, 5, 6, 7], dtypes.int64) rp = RowPartition.from_row_splits(row_splits, validate=False) self.assertEqual(rp.dtype, dtypes.int64) rp_row_splits = rp.row_splits() rp_nrows = rp.nrows() self.assertIs(rp_row_splits, row_splits) self.assertAllEqual(rp_nrows, 5)
def _child_node_to_structured_tensor( node: prensor.ChildNodeTensor, fields: Mapping[path.Step, prensor.Prensor], nrows: tf.Tensor) -> structured_tensor.StructuredTensor: """Convert a map of prensors to map of structured tensors.""" st = structured_tensor.StructuredTensor.from_fields(fields=fields, shape=tf.TensorShape( [None])) row_partition = RowPartition.from_value_rowids( value_rowids=node.parent_index, nrows=nrows) return st.partition_outer_dimension(row_partition)
def testRowPartitionStr(self): row_splits = [0, 2, 5, 6, 6, 7] rp = RowPartition.from_row_splits(row_splits, validate=False) if context.executing_eagerly(): expected_repr = 'tf.RowPartition(row_splits=[0 2 5 6 6 7])' else: expected_repr = ('tf.RowPartition(row_splits=' 'Tensor("RowPartitionFromRowSplits/row_splits:0", ' 'shape=(6,), dtype=int64))') self.assertEqual(repr(rp), expected_repr) self.assertEqual(str(rp), expected_repr)
def testFromRowLengths(self): row_lengths = constant_op.constant([2, 0, 3, 1, 1], dtypes.int64) rp = RowPartition.from_row_lengths(row_lengths, validate=False) self.assertEqual(rp.dtype, dtypes.int64) rp_row_lengths = rp.row_lengths() rp_nrows = rp.nrows() self.assertIs(rp_row_lengths, row_lengths) # nrows self.assertAllEqual(rp_nrows, 5) self.assertAllEqual(rp_row_lengths, row_lengths)
def testRaggedTensorConstructionErrors(self): row_splits = constant_op.constant([0, 2, 2, 5, 6, 7], dtypes.int64) with self.assertRaisesRegexp(ValueError, 'RaggedTensor constructor is private'): RowPartition(row_splits=row_splits) with self.assertRaisesRegexp( TypeError, 'Row-partitioning argument must be a Tensor'): RowPartition(row_splits=[0, 2, 2, 5, 6, 7], internal=True) with self.assertRaisesRegexp(ValueError, r'Shape \(6, 1\) must have rank 1'): RowPartition(row_splits=array_ops.expand_dims(row_splits, 1), internal=True) with self.assertRaisesRegexp(TypeError, 'Cached value must be a Tensor or None.'): RowPartition(row_splits=row_splits, cached_row_lengths=[2, 3, 4], internal=True)
def testRowPartitionStrUniformRowLength(self): rp = RowPartition.from_uniform_row_length(5, nvals=10, nrows=2) if context.executing_eagerly(): expected_repr = ('tf.RowPartition(nrows=2, uniform_row_length=5)') else: expected_repr = ( 'tf.RowPartition(nrows=' 'Tensor("RowPartitionFromUniformRowLength/' 'nrows:0", shape=(), dtype=int64), ' 'uniform_row_length=Tensor("RowPartitionFromUniformRowLength/' 'uniform_row_length:0", shape=(), dtype=int64))') self.assertEqual(repr(rp), expected_repr) self.assertEqual(str(rp), expected_repr)
def testFromValueRowIdsWithExplicitNRows(self): value_rowids = constant_op.constant([0, 0, 2, 2, 2, 3, 4], dtypes.int64) nrows = constant_op.constant(7, dtypes.int64) rp = RowPartition.from_value_rowids(value_rowids, nrows, validate=False) rp_value_rowids = rp.value_rowids() rp_nrows = rp.nrows() rp_row_splits = rp.row_splits() self.assertIs(rp_value_rowids, value_rowids) # value_rowids self.assertIs(rp_nrows, nrows) # nrows self.assertAllEqual(rp_row_splits, [0, 2, 2, 5, 6, 7, 7, 7])
def testFromValueRowIdsWithDerivedNRowsDynamic(self): # nrows is not known at graph creation time. value_rowids = constant_op.constant([0, 0, 2, 2, 2, 3, 4], dtypes.int64) value_rowids = array_ops.placeholder_with_default(value_rowids, shape=None) rp = RowPartition.from_value_rowids(value_rowids, validate=False) rp_value_rowids = rp.value_rowids() rp_nrows = rp.nrows() self.assertIs(rp_value_rowids, value_rowids) # value_rowids self.assertAllEqual(rp_value_rowids, value_rowids) self.assertAllEqual(rp_nrows, 5)
def testRowPartitionStr(self): row_splits = [0, 2, 5, 6, 6, 7] rt = RowPartition.from_row_splits(row_splits, validate=False) splits_type = 'int64' if context.executing_eagerly(): expected_repr = ('tf.RowPartition(row_splits=tf.Tensor([0 2 5 6 6 7], ' 'shape=(6,), dtype=int64))') else: expected_repr = ('tf.RowPartition(row_splits=' 'Tensor("RowPartitionFromRowSplits/row_splits:0", ' 'shape=(6,), dtype={}))').format(splits_type) self.assertEqual(repr(rt), expected_repr) self.assertEqual(str(rt), expected_repr)
def testFromRowLimits(self): row_limits = constant_op.constant([2, 2, 5, 6, 7], dtypes.int64) rt = RowPartition.from_row_limits(row_limits, validate=False) self.assertEqual(rt.dtype, dtypes.int64) rt_row_limits = rt.row_limits() rt_row_splits = rt.row_splits rt_nrows = rt.nrows() self.assertAllEqual(rt_nrows, 5) self.assertAllEqual(rt_row_limits, row_limits) self.assertAllEqual(rt_row_splits, [0, 2, 2, 5, 6, 7])
def _expand_dims(st, axis): """tf.expand_dims, but works on StructuredTensor too. Note: the implementation does not work if axis > 1, and will throw a ValueError. Args: st: a Tensor, RaggedTensor, or StructuredTensor. axis: the axis to insert a dimension before. Returns: a tensor with one more dimension (see tf.expand_dims). Raises: ValueError: if the axis is not valid. """ if not isinstance(st, structured_tensor.StructuredTensor): return tf.expand_dims(st, axis) nn_axis = _expand_dims_nonnegative_axis(axis, st.rank) if st.rank == 0: return _expand_dims_scalar(st) if nn_axis == 0: # Here, we can add a dimension 1 at the front. nrows = st.nrows() return st.partition_outer_dimension( RowPartition.from_uniform_row_length(nrows, nrows)) elif nn_axis == 1: # Again, by partitioning the first dimension into vectors of length 1, # we can solve this problem. nrows = st.nrows() return st.partition_outer_dimension( RowPartition.from_uniform_row_length( tf.constant(1, dtype=nrows.dtype), nrows)) else: # Note: this is unreachable in the current code. raise ValueError( "Unimplemented: non-negative axis > 1 for _expand_dims")
def testFromValueRowIdsWithExplicitNRowsEqualToDefault(self): value_rowids = constant_op.constant([0, 0, 2, 2, 2, 3, 4], dtypes.int64) nrows = constant_op.constant(5, dtypes.int64) rt = RowPartition.from_value_rowids(value_rowids, nrows, validate=False) rt_value_rowids = rt.value_rowids() rt_nrows = rt.nrows() rt_row_splits = rt.row_splits() self.assertIs(rt_value_rowids, value_rowids) # value_rowids self.assertIs(rt_nrows, nrows) # nrows self.assertAllEqual(rt_value_rowids, value_rowids) self.assertAllEqual(rt_nrows, nrows) self.assertAllEqual(rt_row_splits, [0, 2, 2, 5, 6, 7])
def testFromValueRowIdsWithDerivedNRows(self): # nrows is known at graph creation time. value_rowids = constant_op.constant([0, 0, 2, 2, 2, 3, 4], dtypes.int64) # TODO(martinz): add nrows rp = RowPartition.from_value_rowids(value_rowids, validate=False) self.assertEqual(rp.dtype, dtypes.int64) rp_row_splits = rp.row_splits() rp_value_rowids = rp.value_rowids() rp_nrows = rp.nrows() self.assertIs(rp_value_rowids, value_rowids) # value_rowids self.assertAllEqual(rp_value_rowids, value_rowids) self.assertAllEqual(rp_nrows, 5) self.assertAllEqual(rp_row_splits, [0, 2, 2, 5, 6, 7])
def _merge_dims(value, outer_axis, inner_axis): """Merges `outer_axis...inner_axis` of `value` into a single dimension.""" assert outer_axis < inner_axis if isinstance(value, (ops.Tensor, ragged_tensor.RaggedTensor)): return ragged_tensor.merge_dims(value, outer_axis, inner_axis) else: assert isinstance(value, StructuredTensor) # Build the new fields. fields = dict((k, _merge_dims(v, outer_axis, inner_axis)) for (k, v) in value._fields.items()) # Build the new shape. value_shape = value.shape shape = ( value_shape[:outer_axis] + [value_shape[outer_axis:inner_axis].num_elements()] + value_shape[inner_axis + 1:]) # Build the new row_partitions & nrows if outer_axis == 0: if inner_axis == value.shape.rank - 1: partitions = () nrows = value.row_partitions[-1].nvals() else: partitions = value.row_partitions[inner_axis:] nrows = partitions[0].nrows() else: # Use tf.gather to merge row_splits from the merged row partitions. merged_splits = value.row_partitions[outer_axis - 1].row_splits() for dim in range(outer_axis, inner_axis): merged_splits = array_ops.gather(value.row_partitions[dim].row_splits(), merged_splits) partitions = ( value.row_partitions[:outer_axis - 1] + (RowPartition.from_row_splits(merged_splits),) + value.row_partitions[inner_axis:]) nrows = partitions[0].nrows() return StructuredTensor( fields, shape, nrows, partitions, internal=_structured_tensor_factory_key)
def testWithPrecomputedSplits(self): rp = RowPartition.from_row_splits([0, 2, 8]) rp_with_row_splits = rp.with_precomputed_row_splits() self.assertTrue(rp_with_row_splits.has_precomputed_row_splits()) self.assertFalse(rp.has_precomputed_row_lengths()) rp_with_row_lengths = rp.with_precomputed_row_lengths() self.assertTrue(rp_with_row_lengths.has_precomputed_row_lengths()) self.assertFalse(rp.has_precomputed_value_rowids()) rp_with_value_rowids = rp.with_precomputed_value_rowids() self.assertTrue(rp_with_value_rowids.has_precomputed_value_rowids()) self.assertFalse(rp.has_precomputed_nrows()) rp_with_nrows = rp.with_precomputed_nrows() self.assertTrue(rp_with_nrows.has_precomputed_nrows())
def _row_partitions_for_uniform_shape(shape, rank): """Returns row partitions for the given shape Tensor. Args: shape: A vector describing a uniform shape. rank: The number of dimensions to generate row partitions for Returns: A list of (rank-1) `RowPartition`s with uniform row length. """ shape_cumprod = math_ops.cumprod(shape[:rank]) # pylint: disable=g-complex-comprehension return tuple([ RowPartition.from_uniform_row_length(uniform_row_length=shape[i + 1], nvals=shape_cumprod[i + 1], nrows=shape_cumprod[i]) for i in range(rank - 1) ])