def test_int(self): self.assertFeature( feature=feature_lib.Sequence({'int': tf.int32}, length=3), shape={'int': (3,)}, dtype={'int': tf.int32}, serialized_info={ 'int': feature_lib.TensorInfo(shape=(3,), dtype=tf.int32), }, tests=[ # Python array testing.FeatureExpectationItem( value={'int': [1, 2, 3]}, expected={'int': [1, 2, 3]}, ), # Numpy array testing.FeatureExpectationItem( value={'int': np.ones(shape=(3,), dtype=np.int32)}, expected={'int': [1, 1, 1]}, ), # Array of dict testing.FeatureExpectationItem( value=[ {'int': 1}, {'int': 10}, {'int': 100}, ], expected={'int': [1, 10, 100]}, ), # Wrong sequence length testing.FeatureExpectationItem( value={'int': np.ones(shape=(4,), dtype=np.int32)}, raise_cls=ValueError, raise_msg='Input sequence length do not match', ), ], test_attributes=dict(_length=3) )
def test_int(self): self.assertFeatureEagerOnly( feature=feature_lib.Dataset({'int': tf.int32}), shape={'int': ()}, # shape of each element of the dataset dtype={'int': tf.int32}, serialized_info={ 'int': feature_lib.TensorInfo(shape=(None,), dtype=tf.int32), }, tests=[ # Python array testing.FeatureExpectationItem( value=[{ 'int': 1 }, { 'int': 2 }, { 'int': 3 }], expected=tf.data.Dataset.from_tensor_slices({'int': [1, 2, 3]}), ), # Numpy array testing.FeatureExpectationItem( value=dataset_utils.as_numpy( tf.data.Dataset.from_tensor_slices( {'int': np.ones(shape=(3,), dtype=np.int32)})), expected=tf.data.Dataset.from_tensor_slices({'int': [1, 1, 1]}), ), # Dataset length doesn't matter testing.FeatureExpectationItem( value=dataset_utils.as_numpy( tf.data.Dataset.from_tensor_slices( {'int': np.ones(shape=(4,), dtype=np.int32)})), expected=tf.data.Dataset.from_tensor_slices( {'int': [1, 1, 1, 1]}), ), ], test_attributes=dict(_length=None))
def test_label(self): self.assertFeatureEagerOnly( feature=feature_lib.Dataset( { 'label': feature_lib.ClassLabel(names=['left', 'right']), }, length=None), shape={'label': ()}, dtype={'label': tf.int64}, serialized_info={ 'label': feature_lib.TensorInfo(shape=(None, ), dtype=tf.int64), }, tests=[ testing.FeatureExpectationItem( value=[{ 'label': 'right' }, { 'label': 'left' }, { 'label': 'left' }], expected=tf.data.Dataset.from_tensor_slices( {'label': [1, 0, 0]}), ), # Variable sequence length testing.FeatureExpectationItem( value=dataset_utils.as_numpy( tf.data.Dataset.from_tensor_slices( {'label': ['right', 'left', 'right', 'left']})), expected=tf.data.Dataset.from_tensor_slices( {'label': [1, 0, 1, 0]}), ), ], test_attributes=dict(_length=None))
def test_features_sequence(self): self.assertFeature( specs={ "a": { "b": features.TensorInfo(shape=(None, ), dtype=tf.string), }, "a/c": features.TensorInfo(shape=(None, 2), dtype=tf.int32), }, serialized_info={ "a/b": tf.io.FixedLenSequenceFeature(shape=(), dtype=tf.string, allow_missing=True), "a/c": tf.io.FixedLenSequenceFeature(shape=(2, ), dtype=tf.int64, allow_missing=True), }, tests=[ # Raw values testing.FeatureExpectationItem( value={ "a": { "b": [], }, "a/c": [[1, 2], [3, 4]], }, expected={ "a": { "b": [], }, "a/c": [[1, 2], [3, 4]], }, expected_serialized={ "a/b": tf.train.Feature(bytes_list=tf.train.BytesList( value=[])), "a/c": tf.train.Feature(int64_list=tf.train.Int64List( value=[1, 2, 3, 4])), }, ), testing.FeatureExpectationItem( value={ "a": { "b": ["abc\n", "", "def "], }, "a/c": np.empty(shape=(0, 2), dtype=np.int32), }, expected={ "a": { "b": [b"abc\n", b"", b"def "], }, "a/c": np.empty(shape=(0, 2), dtype=np.int32), }, expected_serialized={ "a/b": tf.train.Feature(bytes_list=tf.train.BytesList( value=[b"abc\n", b"", b"def "])), "a/c": tf.train.Feature(int64_list=tf.train.Int64List( value=[])), }, ), ], )
def test_features(self): default_dict = { "str": "some value", "int": 1, "bool": [True], "float": [[2.0, 3.0]], } self.assertFeature( specs={ "str": features.TensorInfo(shape=(), dtype=tf.string), "int": features.TensorInfo(shape=(), dtype=tf.int32), "bool": features.TensorInfo(shape=(1, ), dtype=tf.bool), "float": features.TensorInfo(shape=(1, 2), dtype=tf.float32), }, serialized_info={ "str": tf.io.FixedLenFeature(shape=(), dtype=tf.string), "int": tf.io.FixedLenFeature(shape=(), dtype=tf.int64), "bool": tf.io.FixedLenFeature(shape=(1, ), dtype=tf.int64), "float": tf.io.FixedLenFeature(shape=(1, 2), dtype=tf.float32), }, tests=[ # Raw values testing.FeatureExpectationItem( value={ "str": "", "int": 1, "bool": [True], "float": [[2.0, 3.0]], }, expected={ "str": b"", "int": 1, "bool": [True], "float": [[2.0, 3.0]], }, expected_serialized={ "str": tf.train.Feature(bytes_list=tf.train.BytesList( value=[b""])), "int": tf.train.Feature(int64_list=tf.train.Int64List( value=[1])), "bool": tf.train.Feature(int64_list=tf.train.Int64List( value=[1])), "float": tf.train.Feature(float_list=tf.train.FloatList( value=[2.0, 3.0])), }, ), # Test numpy array testing.FeatureExpectationItem( value={ "str": np.zeros(2, dtype=np.uint8).tobytes(), "int": np.array(1), "bool": np.array([True]), "float": np.array([[2.0, 3.0]]), }, expected={ "str": b"\000\000", "int": 1, "bool": np.array([True]), "float": np.array([[2.0, 3.0]], dtype=np.float32), }, expected_serialized={ "str": tf.train.Feature(bytes_list=tf.train.BytesList( value=[b"\000\000"])), "int": tf.train.Feature(int64_list=tf.train.Int64List( value=[1])), "bool": tf.train.Feature(int64_list=tf.train.Int64List( value=[1])), "float": tf.train.Feature(float_list=tf.train.FloatList( value=[2.0, 3.0])), }, ), testing.FeatureExpectationItem( value=dict(default_dict, float=[[2.0], [3.0]]), # Wrong shape raise_cls=ValueError, raise_msg="Shapes (2, 1) and (1, 2) are incompatible", ), testing.FeatureExpectationItem( value=dict(default_dict, bool=True), # Wrong shape raise_cls=ValueError, raise_msg="Shapes () and (1,) must have the same rank", ), testing.FeatureExpectationItem( value=dict(default_dict, str=123), # Wrong dtype raise_cls=ValueError, raise_msg="Could not convert to bytes", ), ], )
def get_tensor_info(self): return features_lib.TensorInfo(shape=(), dtype=tf.float32)
def get_serialized_info(self): return { 'a': features_lib.TensorInfo(shape=(), dtype=tf.int64), 'b': features_lib.TensorInfo(shape=(), dtype=tf.int64), }
def get_tensor_info(self): # With this connector, the way the data is on disk ({'a', 'b'}) do not match # the way it is exposed to the user (int64), so we overwrite # FeaturesDict.get_tensor_info return features_lib.TensorInfo(shape=(), dtype=tf.int64)
def test_fdict(self): self.assertFeature( feature=features_lib.FeaturesDict({ 'input': AnInputConnector(), 'output': AnOutputConnector(), 'img': { 'size': { 'height': tf.int64, 'width': tf.int64, }, 'metadata/path': tf.string, } }), serialized_info={ 'input': { 'a': features_lib.TensorInfo(shape=(), dtype=tf.int64), 'b': features_lib.TensorInfo(shape=(), dtype=tf.int64), }, 'output': features_lib.TensorInfo(shape=(), dtype=tf.float32), 'img': { 'size': { 'height': features_lib.TensorInfo(shape=(), dtype=tf.int64), 'width': features_lib.TensorInfo(shape=(), dtype=tf.int64), }, 'metadata/path': features_lib.TensorInfo(shape=(), dtype=tf.string), } }, dtype={ 'input': tf.int64, 'output': tf.float32, 'img': { 'size': { 'height': tf.int64, 'width': tf.int64, }, 'metadata/path': tf.string, } }, shape={ 'input': (), 'output': (), 'img': { 'size': { 'height': (), 'width': (), }, 'metadata/path': (), }, }, tests=[ # Np array testing.FeatureExpectationItem( value={ 'input': 1, 'output': -1, 'img': { 'size': { 'height': 256, 'width': 128, }, 'metadata/path': 'path/to/xyz.jpg', } }, expected_serialized={ 'input': { 'a': 2, # 1 + 1 'b': 10, # 1 * 10 }, 'output': -10.0, # -1 * 10.0 'img': { 'size': { 'height': 256, 'width': 128, }, 'metadata/path': 'path/to/xyz.jpg', } }, expected={ # a = 1 + 1, b = 1 * 10 => output = a + b = 2 + 10 = 12 'input': 12, # 2 + 10 'output': -1.0, 'img': { 'size': { 'height': 256, 'width': 128, }, 'metadata/path': tf.compat.as_bytes('path/to/xyz.jpg'), }, }, ), ], )
def get_tensor_info(self): return features.TensorInfo(shape=(self._size, ), dtype=self._dtype)
def get_serialized_info(self): return features.TensorInfo(shape=(None, ), dtype=self._serialized_dtype)
def get_tensor_info(self): return features.TensorInfo(shape=self._shape_tuple, dtype=self._base_info.dtype)
def get_tensor_info(self): shape = ((None, ) * self._num_dims if self._padded_shape is None else self._padded_shape) return features.TensorInfo(shape=shape, dtype=self._dtype)