def test_features_shape_dynamic_multi_none( self, encoding: features_lib.Encoding): x = np.random.randint(256, size=(2, 3, 1), dtype=np.uint8) x_other_shape = np.random.randint(256, size=(4, 4, 1), dtype=np.uint8) wrong_shape = np.random.randint(256, size=(2, 3, 2), dtype=np.uint8) self.assertFeature( feature=features_lib.Tensor( shape=(None, None, 1), dtype=tf.uint8, encoding=encoding, ), shape=(None, None, 1), dtype=tf.uint8, tests=[ testing.FeatureExpectationItem( value=x, expected=x, ), testing.FeatureExpectationItem( value=x_other_shape, expected=x_other_shape, ), testing.FeatureExpectationItem( value=wrong_shape, # Wrong shape raise_cls=ValueError, raise_msg='are incompatible', ), ], )
def test_translation_variable_languages_nolist(self): self.assertFeature( feature=features.TranslationVariableLanguages(), shape={"language": (None,), "translation": (None,)}, dtype={"language": tf.string, "translation": tf.string}, tests=[ testing.FeatureExpectationItem( value={"en": EN_HELLO, "zh": ZH_HELLO}, expected={"language": [EN_B, ZH_B], "translation": [tf.compat.as_bytes(EN_HELLO), tf.compat.as_bytes(ZH_HELLO)]} ), testing.FeatureExpectationItem( value={"fr": FR_HELLO, "de": DE_HELLO, "zh": ZH_HELLO}, expected={"language": [DE_B, FR_B, ZH_B], "translation": [tf.compat.as_bytes(DE_HELLO), tf.compat.as_bytes(FR_HELLO), tf.compat.as_bytes(ZH_HELLO)]} ), testing.FeatureExpectationItem( value={"fr": [FR_HELLO, FR_HELLO[0:-1]], "en": EN_HELLO}, expected={"language": [EN_B, FR_B, FR_B], "translation": [tf.compat.as_bytes(EN_HELLO), tf.compat.as_bytes(FR_HELLO[0:-1]), tf.compat.as_bytes(FR_HELLO)]} ), ], )
def test_shape_dynamic_none_second(self, encoding: features_lib.Encoding): np_input_dynamic_1 = np.random.randint(256, size=(3, 2, 2), dtype=np.int32) np_input_dynamic_2 = np.random.randint(256, size=(3, 5, 2), dtype=np.int32) self.assertFeature( feature=features_lib.Tensor( shape=(3, None, 2), # None not at the first position. dtype=tf.int32, encoding=encoding, ), dtype=tf.int32, shape=(3, None, 2), tests=[ testing.FeatureExpectationItem( value=np_input_dynamic_1, expected=np_input_dynamic_1, ), testing.FeatureExpectationItem( value=np_input_dynamic_2, expected=np_input_dynamic_2, ), # Invalid shape testing.FeatureExpectationItem( value=np.random.randint(256, size=(2, 3, 1), dtype=np.int32), raise_cls=ValueError, raise_msg='are incompatible', ), ])
def test_label(self): self.assertFeature( feature=tfds.features.Sequence( { 'label': tfds.features.ClassLabel(names=['left', 'right']), }, length=None), shape={'label': (None, )}, dtype={'label': tf.int64}, tests=[ testing.FeatureExpectationItem( value={'label': ['right', 'left', 'left']}, expected={'label': [1, 0, 0]}, ), # Variable sequence length testing.FeatureExpectationItem( value={'label': ['right', 'left', 'right', 'left']}, expected={'label': [1, 0, 1, 0]}, ), # Empty sequence length testing.FeatureExpectationItem( value={'label': []}, expected={'label': []}, ), ], )
def test_images_float(self): img = np.random.rand(28, 28, 1).astype(np.float32) img_other_shape = np.random.rand(12, 34, 1).astype(np.float32) self.assertFeature( feature=features_lib.Image(shape=(None, None, 1), dtype=tf.float32), shape=(None, None, 1), dtype=tf.float32, tests=[ # Numpy array testing.FeatureExpectationItem( value=img, expected=img, ), # 'img' shape can be dynamic testing.FeatureExpectationItem( value=img_other_shape, expected=img_other_shape, ), # Invalid type testing.FeatureExpectationItem( value=img.astype(np.float64), raise_cls=ValueError, raise_msg='dtype should be', ), ], test_attributes=dict( _encoding_format=None, _use_colormap=False, ))
def test_image_shaped(self): img_shaped = randint(256, size=(32, 64, 1), dtype=np.uint8) self.assertFeature( # Image with statically defined shape feature=features_lib.Image( shape=(32, 64, 1), encoding_format='png', use_colormap=True, ), shape=(32, 64, 1), dtype=tf.uint8, tests=[ testing.FeatureExpectationItem( value=img_shaped, expected=img_shaped, ), # 'img_shaped' shape should be static testing.FeatureExpectationItem( value=randint(256, size=(31, 64, 1), dtype=np.uint8), raise_cls=ValueError, raise_msg='are incompatible', ), ], test_attributes=dict( _encoding_format='png', _use_colormap=True, ))
def test_features_multi_none_sequence( self, encoding: features_lib.Encoding, shape, ): x = np.random.randint(256, size=(3, 2, 3, 1), dtype=np.uint8) x_other_shape = np.random.randint(256, size=(3, 2, 2, 1), dtype=np.uint8) self.assertFeature( feature=features_lib.Sequence( features_lib.Tensor( shape=shape, dtype=tf.uint8, encoding=encoding, ), ), shape=(None, ) + shape, dtype=tf.uint8, tests=[ testing.FeatureExpectationItem( value=x, expected=x, ), testing.FeatureExpectationItem( value=x_other_shape, expected=x_other_shape, ), # TODO(epot): Is there a way to catch if the user try to encode # tensors with different shapes ? ], )
def test_bool_flat(self): self.assertFeature( feature=features_lib.Tensor(shape=(), dtype=tf.bool), dtype=tf.bool, shape=(), tests=[ testing.FeatureExpectationItem( value=np.array(True), expected=True, ), testing.FeatureExpectationItem( value=np.array(False), expected=False, ), testing.FeatureExpectationItem( value=True, expected=True, ), testing.FeatureExpectationItem( value=False, expected=False, ), ] )
def test_label(self): self.assertFeatureEagerOnly( feature=feature_lib.Dataset( { 'label': feature_lib.ClassLabel(names=['left', 'right']), }, length=None), shape={'label': ()}, dtype={'label': tf.int64}, serialized_info={ 'label': feature_lib.TensorInfo(shape=(None,), dtype=tf.int64), }, tests=[ testing.FeatureExpectationItem( value=[{ 'label': 'right' }, { 'label': 'left' }, { 'label': 'left' }], expected=tf.data.Dataset.from_tensor_slices( {'label': [1, 0, 0]}), ), # Variable sequence length testing.FeatureExpectationItem( value=dataset_utils.as_numpy( tf.data.Dataset.from_tensor_slices( {'label': ['right', 'left', 'right', 'left']})), expected=tf.data.Dataset.from_tensor_slices( {'label': [1, 0, 1, 0]}), ), ], test_attributes=dict(_length=None))
def test_3lvl_sequence(self): self.assertFeature( feature=feature_lib.Sequence( feature_lib.Sequence( feature_lib.Sequence(tf.int32), length=3, ), ), shape=(None, 3, None), dtype=tf.int32, tests=[ testing.FeatureExpectationItem( value=[ [[1, 2, 3], [], [4, 5]], [[10, 11], [12, 13], [14]], ], expected=[ [[1, 2, 3], [], [4, 5]], [[10, 11], [12, 13], [14]], ], ), testing.FeatureExpectationItem( value=[ [[1, 2, 3], [4, 5]], # < Only 2 instead of 3 [[10, 11], [12, 13], [14]], ], raise_cls=ValueError, raise_msg='Input sequence length do not match', ), ], )
def test_int(self): self.assertFeatureEagerOnly( feature=feature_lib.Dataset(tf.int32, length=3), shape=(), dtype=tf.int32, tests=[ # Python array testing.FeatureExpectationItem( value=[1, 2, 3], expected=tf.data.Dataset.from_tensor_slices([1, 2, 3]), ), # Numpy array testing.FeatureExpectationItem( value=np.ones(shape=(3, ), dtype=np.int32), expected=tf.data.Dataset.from_tensor_slices([1, 1, 1]), ), # Datasets with a different lenght will fail on encoding. testing.FeatureExpectationItem( value=np.ones(shape=(4, ), dtype=np.int32), raise_cls=ValueError, raise_msg='Error while serializing feature', ), ], )
def test_image_custom_decode(self): # Do not uses random here because Jpeg compression has loss, so decoded # value isn't the same img_shaped = np.ones(shape=(30, 60, 3), dtype=np.uint8) x, y, w, h = 4, 7, 10, 13 img_cropped = img_shaped[y:y + h, x:x + w, :] class DecodeCrop(decode_lib.Decoder): """Simple class on how to customize the decoding.""" def decode_example(self, serialized_image): return tf.image.decode_and_crop_jpeg( serialized_image, [y, x, h, w], channels=self.feature.shape[-1], ) @decode_lib.make_decoder() def decode_crop(serialized_image, feature): return tf.image.decode_and_crop_jpeg( serialized_image, [y, x, h, w], channels=feature.shape[-1], ) image_path = os.fspath( utils.tfds_path('testing/test_data/test_image.jpg')) with tf.io.gfile.GFile(image_path, 'rb') as f: serialized_img = f.read() self.assertFeature( # Image with statically defined shape feature=features_lib.Image(shape=(30, 60, 3), encoding_format='jpeg'), shape=(30, 60, 3), dtype=tf.uint8, # Output shape is different. test_tensor_spec=False, tests=[ testing.FeatureExpectationItem( value=img_shaped, expected=img_cropped, shape=(13, 10, 3), # Shape is cropped decoders=DecodeCrop(), ), testing.FeatureExpectationItem( value=img_shaped, expected=img_cropped, shape=(13, 10, 3), # Shape is cropped decoders=decode_crop(), # pylint: disable=no-value-for-parameter ), testing.FeatureExpectationItem( value=image_path, expected=serialized_img, shape=(), dtype=tf.string, decoders=decode_lib.SkipDecoding(), ), ], )
def test_int(self): self.assertFeature( feature=feature_lib.Sequence(tf.int32, length=3), shape=(3, ), dtype=tf.int32, tests=[ # Python array testing.FeatureExpectationItem( value=[1, 2, 3], expected=[1, 2, 3], ), # Numpy array testing.FeatureExpectationItem( value=np.ones(shape=(3, ), dtype=np.int32), expected=[1, 1, 1], ), # Wrong sequence length testing.FeatureExpectationItem( value=np.ones(shape=(4, ), dtype=np.int32), raise_cls=ValueError, raise_msg='Input sequence length do not match', ), ], )
def test_label(self): self.assertFeature( feature=feature_lib.Sequence( { 'label': feature_lib.ClassLabel(names=['left', 'right']), }, length=None), shape={'label': (None, )}, dtype={'label': tf.int64}, serialized_info={ 'label': feature_lib.TensorInfo(shape=(None, ), dtype=tf.int64), }, tests=[ testing.FeatureExpectationItem( value={'label': ['right', 'left', 'left']}, expected={'label': [1, 0, 0]}, ), # Variable sequence length testing.FeatureExpectationItem( value={'label': ['right', 'left', 'right', 'left']}, expected={'label': [1, 0, 1, 0]}, ), # Empty sequence length testing.FeatureExpectationItem( value={'label': []}, expected={'label': []}, ), ], test_attributes=dict(_length=None))
def test_features_sequence(self): self.assertFeature( specs={ "a": { "b": features.TensorInfo(shape=(None,), dtype=tf.string), }, "a/c": features.TensorInfo(shape=(None, 2), dtype=tf.int32), }, serialized_info={ "a/b": tf.io.FixedLenSequenceFeature( shape=(), dtype=tf.string, allow_missing=True), "a/c": tf.io.FixedLenSequenceFeature( shape=(2,), dtype=tf.int64, allow_missing=True), }, tests=[ # Raw values testing.FeatureExpectationItem( value={ "a": { "b": [], }, "a/c": [[1, 2], [3, 4]], }, expected={ "a": { "b": [], }, "a/c": [[1, 2], [3, 4]], }, expected_serialized={ "a/b": tf.train.Feature( bytes_list=tf.train.BytesList(value=[])), "a/c": tf.train.Feature( int64_list=tf.train.Int64List(value=[1, 2, 3, 4])), }, ), testing.FeatureExpectationItem( value={ "a": { "b": ["abc\n", "", "def "], }, "a/c": np.empty(shape=(0, 2), dtype=np.int32), }, expected={ "a": { "b": [b"abc\n", b"", b"def "], }, "a/c": np.empty(shape=(0, 2), dtype=np.int32), }, expected_serialized={ "a/b": tf.train.Feature( bytes_list=tf.train.BytesList( value=[b"abc\n", b"", b"def "])), "a/c": tf.train.Feature( int64_list=tf.train.Int64List(value=[])), }, ), ], )
def test_label(self): self.assertFeature( feature=feature_lib.Sequence( feature_lib.ClassLabel(names=['left', 'right']), ), shape=(None,), dtype=tf.int64, tests=[ testing.FeatureExpectationItem( value=['right', 'left', 'left'], expected=[1, 0, 0], ), # Variable sequence length testing.FeatureExpectationItem( value=['right', 'left', 'right', 'left'], expected=[1, 0, 1, 0], ), # Empty sequence length testing.FeatureExpectationItem( value=[], expected=[], ), ], )
def test_text(self): nonunicode_text = 'hello world' unicode_text = u'你好' self.assertFeature( feature=features.Text(), shape=(), dtype=tf.string, tests=[ # Non-unicode testing.FeatureExpectationItem( value=nonunicode_text, expected=tf.compat.as_bytes(nonunicode_text), ), # Unicode testing.FeatureExpectationItem( value=unicode_text, expected=tf.compat.as_bytes(unicode_text), ), # Empty string testing.FeatureExpectationItem( value='', expected=tf.compat.as_bytes(''), ), ], )
def test_feature(self): self.assertFeature( feature=features.ClassLabel(num_classes=10), dtype=tf.int64, shape=(), tests=[ testing.FeatureExpectationItem( value=3, expected=3, ), testing.FeatureExpectationItem( value='3', expected=3, ), testing.FeatureExpectationItem( value=10, raise_cls=ValueError, raise_msg='greater than configured num_classes', ), testing.FeatureExpectationItem( value='10', raise_cls=ValueError, raise_msg='Invalid', ), ])
def test_shape_dynamic(self): np_input_dynamic_1 = np.random.randint(256, size=(2, 3, 2), dtype=np.int32) np_input_dynamic_2 = np.random.randint(256, size=(5, 3, 2), dtype=np.int32) self.assertFeature( feature=features_lib.Tensor(shape=(None, 3, 2), dtype=tf.int32), dtype=tf.int32, shape=(None, 3, 2), tests=[ testing.FeatureExpectationItem( value=np_input_dynamic_1, expected=np_input_dynamic_1, ), testing.FeatureExpectationItem( value=np_input_dynamic_2, expected=np_input_dynamic_2, ), # Invalid shape testing.FeatureExpectationItem( value=np.random.randint(256, size=(2, 3, 1), dtype=np.int32), raise_cls=ValueError, raise_msg='are incompatible', ), ])
def test_feature(self): self.assertFeature( feature=features.ClassLabel(num_classes=10), dtype=tf.int64, shape=(), tests=[ testing.FeatureExpectationItem( value=3, expected=3, ), testing.FeatureExpectationItem( value='3', expected=3, ), testing.FeatureExpectationItem( value=10, raise_cls=ValueError, raise_msg='greater than configured num_classes', ), testing.FeatureExpectationItem( value='10', raise_cls=ValueError, raise_msg='Invalid', ), ], test_attributes=dict( num_classes=10, names=['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'], ))
def test_video_ffmpeg(self): video_path = os.path.join(self._test_data_path, 'video.mkv') video_json_path = os.path.join(self._test_data_path, 'video.json') with tf.io.gfile.GFile(video_json_path) as fp: video_array = np.asarray(json.load(fp)) self.assertFeature( feature=features.Video(shape=(5, 4, 2, 3)), shape=(5, 4, 2, 3), dtype=tf.uint8, tests=[ testing.FeatureExpectationItem( value=video_path, expected=video_array, ), ], ) self.assertFeature( feature=features.Video(shape=(5, 4, 2, 3)), shape=(5, 4, 2, 3), dtype=tf.uint8, tests=[ testing.FeatureExpectationItem( value=video_path, expected=video_array, ), ], ) class GFileWithSeekOnRead(tf.io.gfile.GFile): def read(self, *args, **kwargs): data_read = super(GFileWithSeekOnRead, self).read(*args, **kwargs) self.seek(0) return data_read with GFileWithSeekOnRead(video_path, 'rb') as video_fp: self.assertFeature( feature=features.Video(shape=(5, 4, 2, 3)), shape=(5, 4, 2, 3), dtype=tf.uint8, tests=[ testing.FeatureExpectationItem( value=video_fp, expected=video_array, ), ], )
def test_video_concatenated_frames(self): video_shape = (None, 400, 640, 3) lsun_examples_path = os.path.join(self._test_data_path, 'lsun_examples') frames_paths = [ os.path.join(lsun_examples_path, '{}.jpg'.format(i)) for i in (1, 2, 3, 4) ] frames = [] for frame_path in frames_paths: with tf.io.gfile.GFile(frame_path, 'rb') as frame_fp: frames.append(tf.image.decode_jpeg(frame_fp.read(), channels=3)) video = tf.stack(frames) self.assertFeature( feature=features.Video(shape=video_shape), shape=video_shape, dtype=tf.uint8, tests=[ # Numpy array testing.FeatureExpectationItem( value=frames_paths, expected=video, ), ], )
def test_translation_encoded(self): # Unicode integer-encoded by byte self.assertFeature( feature=features.Translation( languages=["en", "zh"], encoder=text_encoder.ByteTextEncoder()), shape={ "en": (None, ), "zh": (None, ) }, dtype={ "en": tf.int64, "zh": tf.int64 }, tests=[ testing.FeatureExpectationItem( value={ "en": EN_HELLO, "zh": ZH_HELLO }, expected={ # Incremented for pad "en": [i + 1 for i in [104, 101, 108, 108, 111, 32]], "zh": [i + 1 for i in [228, 189, 160, 229, 165, 189, 32]] }, ), ], skip_feature_tests=True)
def test_file_object(self): np_audio = self.create_np_audio() _, tmp_file = tempfile.mkstemp() self.write_wave_file(np_audio, tmp_file) class GFileWithSeekOnRead(tf.io.gfile.GFile): """Wrapper around GFile which is reusable across multiple read() calls. This is needed because assertFeature reuses the same FeatureExpectationItem several times. """ def read(self, *args, **kwargs): data_read = super(GFileWithSeekOnRead, self).read(*args, **kwargs) self.seek(0) return data_read with GFileWithSeekOnRead(tmp_file, "rb") as file_obj: self.assertFeature( feature=features.Audio(file_format="wav"), shape=(None,), dtype=tf.int64, tests=[ testing.FeatureExpectationItem( value=file_obj, expected=np_audio, ), ], )
def test_translation_multiple_encoders(self): # Unicode integer-encoded by byte self.assertFeature( feature=features.Translation(languages=["en", "zh"], encoder=[ text_encoder.TokenTextEncoder( ["hello", " "]), text_encoder.ByteTextEncoder() ]), shape={ "en": (None, ), "zh": (None, ) }, dtype={ "en": tf.int64, "zh": tf.int64 }, tests=[ testing.FeatureExpectationItem( value={ "en": EN_HELLO, "zh": ZH_HELLO }, expected={ "en": [1], "zh": [i + 1 for i in [228, 189, 160, 229, 165, 189, 32]] }, ), ], skip_feature_tests=True)
def test_2lvl_sequences_mixed(self): # Mix of sequence and non-sequence self.assertFeature( feature=feature_lib.Sequence({ 'a': feature_lib.Sequence(tf.int32), 'b': tf.int32, }), shape={ 'a': (None, None), 'b': (None, ), }, dtype={ 'a': tf.int32, 'b': tf.int32, }, tests=[ testing.FeatureExpectationItem( value={ 'a': [[1, 1, 1], [], [3, 3]], 'b': [1, 2, 3], }, expected={ 'a': [[1, 1, 1], [], [3, 3]], 'b': [1, 2, 3], }, ), ], )
def test_bool_array(self): self.assertFeature(feature=features_lib.Tensor(shape=(3, ), dtype=tf.bool), dtype=tf.bool, shape=(3, ), tests=[ testing.FeatureExpectationItem( value=np.array([True, True, False]), expected=[True, True, False], ), testing.FeatureExpectationItem( value=[True, False, True], expected=[True, False, True], ), ])
def test_image_nested_empty_len(self): imgs = [ np.random.randint(256, size=(28, 28, 3), dtype=np.uint8), np.random.randint(256, size=(28, 28, 3), dtype=np.uint8), ] imgs_stacked = np.stack(imgs) self.assertFeature( feature=feature_lib.Sequence({ 'a': feature_lib.Image(shape=(None, None, 3)), 'b': tf.int32, }), shape={ 'a': (None, None, None, 3), 'b': (None, ), }, dtype={ 'a': tf.uint8, 'b': tf.int32, }, tests=[ testing.FeatureExpectationItem( value={ 'a': imgs, 'b': [1, 2], }, expected={ 'a': imgs_stacked, 'b': [1, 2], }, ), testing.FeatureExpectationItem( value={ 'a': [], 'b': [], }, expected={ 'a': np.empty(shape=(0, 0, 0, 3), dtype=np.uint8), 'b': [], }, ), ], )
def test_shape_static(self, encoding: features_lib.Encoding): np_input = np.random.rand(2, 3).astype(np.float32) array_input = [ [1, 2, 3], [4, 5, 6], ] self.assertFeature( feature=features_lib.Tensor( shape=(2, 3), dtype=tf.float32, encoding=encoding, ), dtype=tf.float32, shape=(2, 3), tests=[ # Np array testing.FeatureExpectationItem( value=np_input, expected=np_input, ), # Python array testing.FeatureExpectationItem( value=array_input, expected=array_input, ), # Invalid dtype testing.FeatureExpectationItem( # On Windows, np default dtype is `int32` value=np.random.randint(256, size=(2, 3), dtype=np.int64), raise_cls=ValueError, raise_msg='int64 do not match', ), # Invalid shape testing.FeatureExpectationItem( value=np.random.rand(2, 4).astype(np.float32), raise_cls=ValueError, raise_msg='are incompatible', ), ], test_attributes={ '_encoding': encoding, }, )
def test_image(self): imgs = [ np.random.randint(256, size=(128, 100, 3), dtype=np.uint8), np.random.randint(256, size=(128, 100, 3), dtype=np.uint8), np.random.randint(256, size=(128, 100, 3), dtype=np.uint8), np.random.randint(256, size=(128, 100, 3), dtype=np.uint8), ] imgs_stacked = np.stack(imgs) self.assertFeature( feature=feature_lib.Sequence( { 'image': feature_lib.Image(shape=(128, 100, 3)), }, length=None), shape={'image': (None, 128, 100, 3)}, dtype={'image': tf.uint8}, tests=[ testing.FeatureExpectationItem( value=[{ 'image': img } for img in imgs], expected={'image': imgs_stacked}, ), testing.FeatureExpectationItem( value={'image': imgs_stacked}, expected={'image': imgs_stacked}, ), testing.FeatureExpectationItem( value={'image': imgs}, expected={'image': imgs_stacked}, ), # Empty value testing.FeatureExpectationItem( value={'image': []}, # The empty value still has the right shape expected={ 'image': np.empty(shape=(0, 128, 100, 3), dtype=np.uint8) }, ), ], )