def test_add_ragged_fields(self): # Nested `Sequence(Sequence(tf.int64))` example_data = [ [1, 2, 3], [], [4, 5], ] tensor_info = feature_lib.TensorInfo(shape=( None, None, ), dtype=tf.int64, sequence_rank=2) out = example_serializer._add_ragged_fields(example_data, tensor_info) self.assertRaggedFieldEqual( out, { 'ragged_flat_values': ( np.array([1, 2, 3, 4, 5]), feature_lib.TensorInfo(shape=(None, ), dtype=tf.int64), ), 'ragged_row_lengths_0': ( [3, 0, 2], feature_lib.TensorInfo(shape=(None, ), dtype=tf.int64), ), })
def _add_ragged_fields(example_data, tensor_info): """Optionally convert the ragged data into flat/row_lengths fields. Example: ``` example_data = [ [1, 2, 3], [], [4, 5] ] tensor_info = TensorInfo(shape=(None, None,), sequence_rank=2, ...) out = _add_ragged_fields(example_data, tensor_info) out == { 'ragged_flat_values': ([1, 2, 3, 4, 5], TensorInfo(shape=(), ...)), 'ragged_row_length_0': ([3, 0, 2], TensorInfo(shape=(None,), ...)) } ``` If `example_data` isn't ragged, `example_data` and `tensor_info` are forwarded as-is. Args: example_data: Data to optionally convert to ragged data. tensor_info: TensorInfo associated with the given data. Returns: A tuple(example_data, tensor_info) if the tensor isn't ragged, or a dict of tuple(example_data, tensor_info) if the tensor is ragged. """ # Step 1: Extract the ragged tensor info if tensor_info.sequence_rank: # If the input is ragged, extract the nested values. # 1-level sequences are converted as numpy and stacked. # If the sequence is empty, a np.empty(shape=(0, ...)) array is returned. example_data, nested_row_lengths = _extract_ragged_attributes( example_data, tensor_info) # Step 2: Format the ragged tensor data as dict # No sequence or 1-level sequence, forward the data. # Could eventually handle multi-level sequences with static lengths # in a smarter way. if tensor_info.sequence_rank < 2: return (example_data, tensor_info) # Multiple level sequence: else: tensor_info_length = feature_lib.TensorInfo(shape=(None, ), dtype=tf.int64) ragged_attr_dict = { "ragged_row_lengths_{}".format(i): (length, tensor_info_length) for i, length in enumerate(nested_row_lengths) } tensor_info_flat = feature_lib.TensorInfo( shape=(None, ) + tensor_info.shape[tensor_info.sequence_rank:], dtype=tensor_info.dtype, ) ragged_attr_dict["ragged_flat_values"] = (example_data, tensor_info_flat) return ragged_attr_dict
def get_tensor_info(self): # Image is returned as a 3-d uint8 tf.Tensor. conf_shape = tuple(list(self._shape)[:3]) return { "data": feature.TensorInfo(shape=self._shape, dtype=tf.float32), "conf": feature.TensorInfo(shape=conf_shape, dtype=tf.float32), "fps": feature.TensorInfo(shape=(), dtype=tf.int32), }
def test_ragged_dict_to_tf_example_empty(self): example_data = { 'input': [], } tensor_info = { 'input': feature_lib.TensorInfo( shape=( None, None, ), dtype=tf.int64, sequence_rank=2, ), } ex_proto = example_serializer._dict_to_tf_example( example_data, tensor_info) feature = ex_proto.features.feature self.assertEqual( [], list(feature['input/ragged_flat_values'].int64_list.value), ) self.assertEqual( [], list(feature['input/ragged_row_lengths_0'].int64_list.value), )
def test_item_to_tf_feature_string_check(self): # Test string check in _item_to_tf_feature raises ValueError. example_item = [1, 2, 3, 4, 5] tensor_info = feature_lib.TensorInfo(shape=(5, ), dtype=tf.string) with self.assertRaisesRegex( ValueError, 'Unsupported value: (.*)\nCould not convert to bytes list.', ): example_serializer._item_to_tf_feature(example_item, tensor_info)
def test_dict_to_tf_example_error_reraise(self): # Test error reraise in _dict_to_tf_example. example_data = {'input': [1, 2, 3]} tensor_info = { 'input': feature_lib.TensorInfo( shape=(2, ), dtype=tf.int64, ), } with self.assertRaisesRegex( ValueError, 'Error while serializing feature `input`:'): example_serializer._dict_to_tf_example(example_data, tensor_info)
def test_add_ragged_fields_all_empty(self): # Empty list example_data = [] tensor_info = feature_lib.TensorInfo(shape=( None, None, ), dtype=tf.int64, sequence_rank=2) out = example_serializer._add_ragged_fields(example_data, tensor_info) self.assertRaggedFieldEqual( out, { 'ragged_flat_values': ( np.zeros(shape=(0, ), dtype=np.int64), feature_lib.TensorInfo(shape=(None, ), dtype=tf.int64), ), 'ragged_row_lengths_0': ( np.zeros(shape=(0, ), dtype=np.int64), feature_lib.TensorInfo(shape=(None, ), dtype=tf.int64), ), })
def get_serialized_info(self): """See base class for details.""" if self._encoded_to_bytes: # Values encoded (stored as bytes) serialized_spec = feature_lib.TensorInfo(shape=(), dtype=tf.string) else: serialized_spec = feature_lib.TensorInfo( shape=self._shape, dtype=self._dtype, ) # Dynamic shape, need an additional field to restore the shape after # de-serialization. if self._dynamic_shape: return { 'shape': feature_lib.TensorInfo( shape=(len(self._shape), ), dtype=tf.int32, ), 'value': serialized_spec, } return serialized_spec
def test_add_ragged_fields_single_level_sequence(self): # Single level sequence example_data = [ [1, 2], [2, 3], [4, 5], ] tensor_info = feature_lib.TensorInfo(shape=( None, 2, ), dtype=tf.int64, sequence_rank=1) out = example_serializer._add_ragged_fields(example_data, tensor_info) self.assertAllEqual(out[0], [ [1, 2], [2, 3], [4, 5], ]) self.assertEqual(out[1], tensor_info)
def add_length_dim(tensor_info): return feature_lib.TensorInfo( shape=(self._length, ) + tensor_info.shape, dtype=tensor_info.dtype, )
def add_length_dim(tensor_info): """Add the length dimension to the serialized_info.""" return feature_lib.TensorInfo( shape=(self._length, ) + tensor_info.shape, dtype=tensor_info.dtype, )
def test_item_to_tf_feature_incorrect_shape(self): # Test shape check in _item_to_tf_feature raises ValueError. example_item = [1, 2, 3, 4, 5] tensor_info = feature_lib.TensorInfo(shape=(4, ), dtype=tf.int64) with self.assertRaises(ValueError): example_serializer._item_to_tf_feature(example_item, tensor_info)
def get_tensor_info(self): # Image is returned as a 3-d uint8 tf.Tensor. return feature.TensorInfo(shape=self._shape, dtype=self._dtype)
def get_serialized_info(self): # Only store raw image (includes size). return feature.TensorInfo(shape=(), dtype=tf.string)
def get_tensor_info(self) -> feature_lib.TensorInfo: """See base class for details.""" return feature_lib.TensorInfo(shape=self._shape, dtype=self._dtype)
def get_tensor_info(self): return feature.TensorInfo(shape=(), dtype=tf.int64)
def get_tensor_info(self): if self.encoder: return feature.TensorInfo(shape=(None, ), dtype=tf.int64) else: return feature.TensorInfo(shape=(), dtype=tf.string)
def test_tensor_info_list_shape(): tensor_info = feature.TensorInfo(shape=[28, 28, 3], dtype=tf.int64) assert tensor_info.shape == (28, 28, 3)
def test_tensor_info_tensor_shape_with_none(): tensor_shape = tf.TensorShape([None, None, 3]) tensor_info = feature.TensorInfo(shape=tensor_shape, dtype=tf.int64) assert tensor_info.shape == (None, None, 3) assert tensor_info.to_tensor_spec() == tf.TensorSpec( shape=tensor_shape, dtype=tf.int64)
def test_tensor_info_tensor_shape(): tensor_shape = tf.TensorShape([28, 28, 3]) tensor_info = feature.TensorInfo(shape=tensor_shape, dtype=tf.int64) assert tensor_info.shape == (28, 28, 3) assert tensor_info.to_tensor_spec() == tf.TensorSpec( shape=tensor_shape, dtype=tf.int64)
def test_tensor_info_list_shape_with_none(): tensor_info = feature.TensorInfo(shape=[None, None, 3], dtype=tf.int64) assert tensor_info.shape == (None, None, 3)