def test_add_ragged_fields(self):
     # Nested `Sequence(Sequence(tf.int64))`
     example_data = [
         [1, 2, 3],
         [],
         [4, 5],
     ]
     tensor_info = feature_lib.TensorInfo(shape=(
         None,
         None,
     ),
                                          dtype=tf.int64,
                                          sequence_rank=2)
     out = example_serializer._add_ragged_fields(example_data, tensor_info)
     self.assertRaggedFieldEqual(
         out, {
             'ragged_flat_values': (
                 np.array([1, 2, 3, 4, 5]),
                 feature_lib.TensorInfo(shape=(None, ), dtype=tf.int64),
             ),
             'ragged_row_lengths_0': (
                 [3, 0, 2],
                 feature_lib.TensorInfo(shape=(None, ), dtype=tf.int64),
             ),
         })
Ejemplo n.º 2
0
def _add_ragged_fields(example_data, tensor_info):
    """Optionally convert the ragged data into flat/row_lengths fields.

  Example:

  ```
  example_data = [
      [1, 2, 3],
      [],
      [4, 5]
  ]
  tensor_info = TensorInfo(shape=(None, None,), sequence_rank=2, ...)
  out = _add_ragged_fields(example_data, tensor_info)
  out == {
      'ragged_flat_values': ([1, 2, 3, 4, 5], TensorInfo(shape=(), ...)),
      'ragged_row_length_0': ([3, 0, 2], TensorInfo(shape=(None,), ...))
  }
  ```

  If `example_data` isn't ragged, `example_data` and `tensor_info` are
  forwarded as-is.

  Args:
    example_data: Data to optionally convert to ragged data.
    tensor_info: TensorInfo associated with the given data.

  Returns:
    A tuple(example_data, tensor_info) if the tensor isn't ragged, or a dict of
      tuple(example_data, tensor_info) if the tensor is ragged.
  """
    # Step 1: Extract the ragged tensor info
    if tensor_info.sequence_rank:
        # If the input is ragged, extract the nested values.
        # 1-level sequences are converted as numpy and stacked.
        # If the sequence is empty, a np.empty(shape=(0, ...)) array is returned.
        example_data, nested_row_lengths = _extract_ragged_attributes(
            example_data, tensor_info)

    # Step 2: Format the ragged tensor data as dict
    # No sequence or 1-level sequence, forward the data.
    # Could eventually handle multi-level sequences with static lengths
    # in a smarter way.
    if tensor_info.sequence_rank < 2:
        return (example_data, tensor_info)
    # Multiple level sequence:
    else:
        tensor_info_length = feature_lib.TensorInfo(shape=(None, ),
                                                    dtype=tf.int64)
        ragged_attr_dict = {
            "ragged_row_lengths_{}".format(i): (length, tensor_info_length)
            for i, length in enumerate(nested_row_lengths)
        }
        tensor_info_flat = feature_lib.TensorInfo(
            shape=(None, ) + tensor_info.shape[tensor_info.sequence_rank:],
            dtype=tensor_info.dtype,
        )
        ragged_attr_dict["ragged_flat_values"] = (example_data,
                                                  tensor_info_flat)
        return ragged_attr_dict
 def get_tensor_info(self):
     # Image is returned as a 3-d uint8 tf.Tensor.
     conf_shape = tuple(list(self._shape)[:3])
     return {
         "data": feature.TensorInfo(shape=self._shape, dtype=tf.float32),
         "conf": feature.TensorInfo(shape=conf_shape, dtype=tf.float32),
         "fps": feature.TensorInfo(shape=(), dtype=tf.int32),
     }
 def test_ragged_dict_to_tf_example_empty(self):
     example_data = {
         'input': [],
     }
     tensor_info = {
         'input':
         feature_lib.TensorInfo(
             shape=(
                 None,
                 None,
             ),
             dtype=tf.int64,
             sequence_rank=2,
         ),
     }
     ex_proto = example_serializer._dict_to_tf_example(
         example_data, tensor_info)
     feature = ex_proto.features.feature
     self.assertEqual(
         [],
         list(feature['input/ragged_flat_values'].int64_list.value),
     )
     self.assertEqual(
         [],
         list(feature['input/ragged_row_lengths_0'].int64_list.value),
     )
 def test_item_to_tf_feature_string_check(self):
     # Test string check in _item_to_tf_feature raises ValueError.
     example_item = [1, 2, 3, 4, 5]
     tensor_info = feature_lib.TensorInfo(shape=(5, ), dtype=tf.string)
     with self.assertRaisesRegex(
             ValueError,
             'Unsupported value: (.*)\nCould not convert to bytes list.',
     ):
         example_serializer._item_to_tf_feature(example_item, tensor_info)
Ejemplo n.º 6
0
 def test_dict_to_tf_example_error_reraise(self):
     # Test error reraise in _dict_to_tf_example.
     example_data = {'input': [1, 2, 3]}
     tensor_info = {
         'input': feature_lib.TensorInfo(
             shape=(2, ),
             dtype=tf.int64,
         ),
     }
     with self.assertRaisesRegex(
             ValueError, 'Error while serializing feature `input`:'):
         example_serializer._dict_to_tf_example(example_data, tensor_info)
 def test_add_ragged_fields_all_empty(self):
     # Empty list
     example_data = []
     tensor_info = feature_lib.TensorInfo(shape=(
         None,
         None,
     ),
                                          dtype=tf.int64,
                                          sequence_rank=2)
     out = example_serializer._add_ragged_fields(example_data, tensor_info)
     self.assertRaggedFieldEqual(
         out, {
             'ragged_flat_values': (
                 np.zeros(shape=(0, ), dtype=np.int64),
                 feature_lib.TensorInfo(shape=(None, ), dtype=tf.int64),
             ),
             'ragged_row_lengths_0': (
                 np.zeros(shape=(0, ), dtype=np.int64),
                 feature_lib.TensorInfo(shape=(None, ), dtype=tf.int64),
             ),
         })
Ejemplo n.º 8
0
    def get_serialized_info(self):
        """See base class for details."""
        if self._encoded_to_bytes:  # Values encoded (stored as bytes)
            serialized_spec = feature_lib.TensorInfo(shape=(), dtype=tf.string)
        else:
            serialized_spec = feature_lib.TensorInfo(
                shape=self._shape,
                dtype=self._dtype,
            )

        # Dynamic shape, need an additional field to restore the shape after
        # de-serialization.
        if self._dynamic_shape:
            return {
                'shape':
                feature_lib.TensorInfo(
                    shape=(len(self._shape), ),
                    dtype=tf.int32,
                ),
                'value':
                serialized_spec,
            }
        return serialized_spec
 def test_add_ragged_fields_single_level_sequence(self):
     # Single level sequence
     example_data = [
         [1, 2],
         [2, 3],
         [4, 5],
     ]
     tensor_info = feature_lib.TensorInfo(shape=(
         None,
         2,
     ),
                                          dtype=tf.int64,
                                          sequence_rank=1)
     out = example_serializer._add_ragged_fields(example_data, tensor_info)
     self.assertAllEqual(out[0], [
         [1, 2],
         [2, 3],
         [4, 5],
     ])
     self.assertEqual(out[1], tensor_info)
Ejemplo n.º 10
0
 def add_length_dim(tensor_info):
     return feature_lib.TensorInfo(
         shape=(self._length, ) + tensor_info.shape,
         dtype=tensor_info.dtype,
     )
Ejemplo n.º 11
0
 def add_length_dim(tensor_info):
     """Add the length dimension to the serialized_info."""
     return feature_lib.TensorInfo(
         shape=(self._length, ) + tensor_info.shape,
         dtype=tensor_info.dtype,
     )
Ejemplo n.º 12
0
 def test_item_to_tf_feature_incorrect_shape(self):
     # Test shape check in _item_to_tf_feature raises ValueError.
     example_item = [1, 2, 3, 4, 5]
     tensor_info = feature_lib.TensorInfo(shape=(4, ), dtype=tf.int64)
     with self.assertRaises(ValueError):
         example_serializer._item_to_tf_feature(example_item, tensor_info)
Ejemplo n.º 13
0
 def get_tensor_info(self):
     # Image is returned as a 3-d uint8 tf.Tensor.
     return feature.TensorInfo(shape=self._shape, dtype=self._dtype)
Ejemplo n.º 14
0
 def get_serialized_info(self):
     # Only store raw image (includes size).
     return feature.TensorInfo(shape=(), dtype=tf.string)
Ejemplo n.º 15
0
 def get_tensor_info(self) -> feature_lib.TensorInfo:
     """See base class for details."""
     return feature_lib.TensorInfo(shape=self._shape, dtype=self._dtype)
Ejemplo n.º 16
0
 def get_tensor_info(self):
     return feature.TensorInfo(shape=(), dtype=tf.int64)
Ejemplo n.º 17
0
 def get_tensor_info(self):
     if self.encoder:
         return feature.TensorInfo(shape=(None, ), dtype=tf.int64)
     else:
         return feature.TensorInfo(shape=(), dtype=tf.string)
Ejemplo n.º 18
0
def test_tensor_info_list_shape():
  tensor_info = feature.TensorInfo(shape=[28, 28, 3], dtype=tf.int64)
  assert tensor_info.shape == (28, 28, 3)
Ejemplo n.º 19
0
def test_tensor_info_tensor_shape_with_none():
  tensor_shape = tf.TensorShape([None, None, 3])
  tensor_info = feature.TensorInfo(shape=tensor_shape, dtype=tf.int64)
  assert tensor_info.shape == (None, None, 3)
  assert tensor_info.to_tensor_spec() == tf.TensorSpec(
      shape=tensor_shape, dtype=tf.int64)
Ejemplo n.º 20
0
def test_tensor_info_tensor_shape():
  tensor_shape = tf.TensorShape([28, 28, 3])
  tensor_info = feature.TensorInfo(shape=tensor_shape, dtype=tf.int64)
  assert tensor_info.shape == (28, 28, 3)
  assert tensor_info.to_tensor_spec() == tf.TensorSpec(
      shape=tensor_shape, dtype=tf.int64)
Ejemplo n.º 21
0
def test_tensor_info_list_shape_with_none():
  tensor_info = feature.TensorInfo(shape=[None, None, 3], dtype=tf.int64)
  assert tensor_info.shape == (None, None, 3)