def expectations(self): nonunicode_text = 'hello world' unicode_text = u'你好' return [ test_utils.FeatureExpectation( name='text', feature=features.Text(), shape=(), dtype=tf.string, tests=[ # Non-unicode test_utils.FeatureExpectationItem( value=nonunicode_text, expected=tf.compat.as_bytes(nonunicode_text), ), # Unicode test_utils.FeatureExpectationItem( value=unicode_text, expected=tf.compat.as_bytes(unicode_text), ), # Empty string test_utils.FeatureExpectationItem( value='', expected=tf.compat.as_bytes(''), ), ], ), # Unicode integer-encoded by byte test_utils.FeatureExpectation( name='text_unicode_encoded', feature=features.Text(encoder=text_encoder.ByteTextEncoder()), shape=(None, ), dtype=tf.int64, tests=[ test_utils.FeatureExpectationItem( value=unicode_text, expected=[ i + 1 for i in [228, 189, 160, 229, 165, 189] ], ), # Empty string test_utils.FeatureExpectationItem( value='', expected=[], ), ], ), ]
def test_bool_array(self): self.assertFeature(feature=features_lib.Tensor(shape=(3, ), dtype=tf.bool), dtype=tf.bool, shape=(3, ), tests=[ test_utils.FeatureExpectationItem( value=np.array([True, True, False]), expected=[True, True, False], ), test_utils.FeatureExpectationItem( value=[True, False, True], expected=[True, False, True], ), ])
def test_image(self): imgs = [ np.random.randint(256, size=(128, 100, 3), dtype=np.uint8), np.random.randint(256, size=(128, 100, 3), dtype=np.uint8), np.random.randint(256, size=(128, 100, 3), dtype=np.uint8), np.random.randint(256, size=(128, 100, 3), dtype=np.uint8), ] imgs_stacked = np.stack(imgs) self.assertFeature( feature=tfds.features.SequenceDict( { 'image': tfds.features.Image(shape=(128, 100, 3)), }, length=None), shape={'image': (None, 128, 100, 3)}, dtype={'image': tf.uint8}, tests=[ test_utils.FeatureExpectationItem( value=[{ 'image': img } for img in imgs], expected={'image': imgs_stacked}, ), test_utils.FeatureExpectationItem( value={'image': imgs_stacked}, expected={'image': imgs_stacked}, ), test_utils.FeatureExpectationItem( value={'image': imgs}, expected={'image': imgs_stacked}, ), # Empty value test_utils.FeatureExpectationItem( value={'image': []}, # The empty value still has the right shape expected={ 'image': np.empty(shape=(0, 128, 100, 3), dtype=np.uint8) }, ), ], )
def expectations(self): return [ test_utils.FeatureExpectation( name='label', feature=features.ClassLabel(10), dtype=tf.int64, shape=(), tests=[ test_utils.FeatureExpectationItem( value=3, expected=3, ), test_utils.FeatureExpectationItem( value=10, raise_cls=ValueError, raise_msg='greater than configured num_classes', ), ]), ]
def test_int(self): self.assertFeature( feature=tfds.features.SequenceDict({ 'int': tf.int32, }, length=3), shape={'int': (3, )}, dtype={'int': tf.int32}, tests=[ # Python array test_utils.FeatureExpectationItem( value={'int': [1, 2, 3]}, expected={'int': [1, 2, 3]}, ), # Numpy array test_utils.FeatureExpectationItem( value={'int': np.ones(shape=(3, ), dtype=np.int32)}, expected={'int': [1, 1, 1]}, ), # Array of dict test_utils.FeatureExpectationItem( value=[ { 'int': 1 }, { 'int': 10 }, { 'int': 100 }, ], expected={'int': [1, 10, 100]}, ), # Wrong sequence length test_utils.FeatureExpectationItem( value={'int': np.ones(shape=(4, ), dtype=np.int32)}, raise_cls=ValueError, raise_msg='Input sequence length do not match', ), ], )
def test_text_encoded(self): unicode_text = u'你好' # Unicode integer-encoded by byte self.assertFeature( feature=features.Text(encoder=text_encoder.ByteTextEncoder()), shape=(None, ), dtype=tf.int64, tests=[ test_utils.FeatureExpectationItem( value=unicode_text, expected=[i + 1 for i in [228, 189, 160, 229, 165, 189]], ), # Empty string test_utils.FeatureExpectationItem( value='', expected=[], ), ], )
def expectations(self): np_audio = np.random.randint(-2**10, 2**10, size=(10, ), dtype=np.int64) audio = pydub.AudioSegment.empty().set_sample_width(2) # See documentation for _spawn usage: # https://github.com/jiaaro/pydub/blob/master/API.markdown#audiosegmentget_array_of_samples audio = audio._spawn(array.array(audio.array_type, np_audio)) _, tmp_file = tempfile.mkstemp() audio.export(tmp_file, format="wav") return [ # Numpy array test_utils.FeatureExpectation( name="audio_np", feature=features.Audio(), shape=(None, ), dtype=tf.int64, tests=[ test_utils.FeatureExpectationItem( value=np_audio, expected=np_audio, ), ], ), # WAV file test_utils.FeatureExpectation( name="audio_np", feature=features.Audio(file_format="wav"), shape=(None, ), dtype=tf.int64, tests=[ test_utils.FeatureExpectationItem( value=tmp_file, expected=np_audio, ), ], ), ]
def test_labels(self): self.assertFeature( feature=features.ClassLabel(names=['left', 'right']), dtype=tf.int64, shape=(), tests=[ test_utils.FeatureExpectationItem( value=1, expected=1, ), test_utils.FeatureExpectationItem( value='left', expected=0, ), test_utils.FeatureExpectationItem( value='right', expected=1, ), ] )
def test_image_shaped(self): img_shaped = randint(256, size=(32, 64, 3), dtype=np.uint8) self.assertFeature( # Image with statically defined shape feature=features_lib.Image(shape=(32, 64, 3)), shape=(32, 64, 3), dtype=tf.uint8, tests=[ test_utils.FeatureExpectationItem( value=img_shaped, expected=img_shaped, ), # 'img_shaped' shape should be static test_utils.FeatureExpectationItem( value=randint(256, size=(31, 64, 3), dtype=np.uint8), raise_cls=ValueError, raise_msg='are incompatible', ), ], )
def test_shape_static(self): np_input = np.random.rand(2, 3).astype(np.float32) array_input = [ [1, 2, 3], [4, 5, 6], ] self.assertFeature( feature=features_lib.Tensor(shape=(2, 3), dtype=tf.float32), dtype=tf.float32, shape=(2, 3), tests=[ # Np array test_utils.FeatureExpectationItem( value=np_input, expected=np_input, ), # Python array test_utils.FeatureExpectationItem( value=array_input, expected=array_input, ), # Invalid dtype test_utils.FeatureExpectationItem( value=np.random.randint(256, size=(2, 3)), raise_cls=ValueError, raise_msg='int64 do not match', ), # Invalid shape test_utils.FeatureExpectationItem( value=np.random.rand(2, 4).astype(np.float32), raise_cls=ValueError, raise_msg='are incompatible', ), ], )
def test_bool_flat(self): self.assertFeature(feature=features_lib.Tensor(shape=(), dtype=tf.bool), dtype=tf.bool, shape=(), tests=[ test_utils.FeatureExpectationItem( value=np.array(True), expected=True, ), test_utils.FeatureExpectationItem( value=np.array(False), expected=False, ), test_utils.FeatureExpectationItem( value=True, expected=True, ), test_utils.FeatureExpectationItem( value=False, expected=False, ), ])
def test_label(self): self.assertFeature( feature=tfds.features.Sequence( tfds.features.ClassLabel(names=['left', 'right']), ), shape=(None, ), dtype=tf.int64, tests=[ test_utils.FeatureExpectationItem( value=['right', 'left', 'left'], expected=[1, 0, 0], ), # Variable sequence length test_utils.FeatureExpectationItem( value=['right', 'left', 'right', 'left'], expected=[1, 0, 1, 0], ), # Empty sequence length test_utils.FeatureExpectationItem( value=[], expected=[], ), ], )
def expectations(self): return [ test_utils.FeatureExpectation( name='label', feature=features.ClassLabel(num_classes=10), dtype=tf.int64, shape=(), tests=[ test_utils.FeatureExpectationItem( value=3, expected=3, ), test_utils.FeatureExpectationItem( value='3', expected=3, ), test_utils.FeatureExpectationItem( value=10, raise_cls=ValueError, raise_msg='greater than configured num_classes', ), test_utils.FeatureExpectationItem( value='10', raise_cls=ValueError, raise_msg='Invalid', ), ] ), test_utils.FeatureExpectation( name='directions', feature=features.ClassLabel(names=['left', 'right']), dtype=tf.int64, shape=(), tests=[ test_utils.FeatureExpectationItem( value=1, expected=1, ), test_utils.FeatureExpectationItem( value='left', expected=0, ), test_utils.FeatureExpectationItem( value='right', expected=1, ), ] ), ]
def expectations(self): return [ test_utils.FeatureExpectation( name='int', feature=tfds.features.Sequence(tf.int32, length=3), shape=(3,), dtype=tf.int32, tests=[ # Python array test_utils.FeatureExpectationItem( value=[1, 2, 3], expected=[1, 2, 3], ), # Numpy array test_utils.FeatureExpectationItem( value=np.ones(shape=(3,), dtype=np.int32), expected=[1, 1, 1], ), # Wrong sequence length test_utils.FeatureExpectationItem( value=np.ones(shape=(4,), dtype=np.int32), raise_cls=ValueError, raise_msg='Input sequence length do not match', ), ], ), test_utils.FeatureExpectation( name='label', feature=tfds.features.Sequence( tfds.features.ClassLabel(names=['left', 'right']), ), shape=(None,), dtype=tf.int64, tests=[ test_utils.FeatureExpectationItem( value=['right', 'left', 'left'], expected=[1, 0, 0], ), # Variable sequence length test_utils.FeatureExpectationItem( value=['right', 'left', 'right', 'left'], expected=[1, 0, 1, 0], ), # Empty sequence length test_utils.FeatureExpectationItem( value=[], expected=[], ), ], ), ]
def test_video(self): np_video = np.random.randint(256, size=(128, 64, 64, 3), dtype=np.uint8) self.assertFeature( feature=features.Video(shape=(None, 64, 64, 3)), shape=(None, 64, 64, 3), dtype=tf.uint8, tests=[ # Numpy array test_utils.FeatureExpectationItem( value=np_video, expected=np_video, ), # File path (Gif) # File path (.mp4) ], )
def test_images(self): img = randint(256, size=(128, 100, 3), dtype=np.uint8) img_other_shape = randint(256, size=(64, 200, 3), dtype=np.uint8) img_file_path = os.path.join(os.path.dirname(__file__), '../../testing/test_data/6pixels.png') img_file_expected_content = [ # see tests_data/README.md [[0, 255, 0], [255, 0, 0], [255, 0, 255]], [[0, 0, 255], [255, 255, 0], [126, 127, 128]], ] self.assertFeature( feature=features_lib.Image(), shape=(None, None, 3), dtype=tf.uint8, tests=[ # Numpy array test_utils.FeatureExpectationItem( value=img, expected=img, ), # File path test_utils.FeatureExpectationItem( value=img_file_path, expected=img_file_expected_content, ), # 'img' shape can be dynamic test_utils.FeatureExpectationItem( value=img_other_shape, expected=img_other_shape, ), # Invalid type test_utils.FeatureExpectationItem( value=randint(256, size=(128, 128, 3), dtype=np.uint32), raise_cls=ValueError, raise_msg='should be uint8', ), # Invalid number of dimensions test_utils.FeatureExpectationItem( value=randint(256, size=(128, 128), dtype=np.uint8), raise_cls=ValueError, raise_msg='must have the same rank', ), # Invalid number of channels test_utils.FeatureExpectationItem( value=randint(256, size=(128, 128, 1), dtype=np.uint8), raise_cls=ValueError, raise_msg='are incompatible', ), ], )
def test_feature(self): self.assertFeature( feature=features.BBoxFeature(), shape=(4, ), dtype=tf.float32, tests=[ # Numpy array test_utils.FeatureExpectationItem( value=features.BBox( ymin=0.0, xmin=0.25, ymax=1.0, xmax=0.75, ), expected=[0.0, 0.25, 1.0, 0.75], ), ], )
def expectations(self): return [ test_utils.FeatureExpectation( name='oneof', feature=features_lib.OneOf( choice='choice2', feature_dict={ 'choice1': tf.float32, 'choice2': AnInputConnector(), }, ), # All choices are present in the serialized feature serialized_features={ 'choice1': tf.FixedLenFeature(shape=(), dtype=tf.float32), 'choice2/a': tf.FixedLenFeature(shape=(), dtype=tf.int64), 'choice2/b': tf.FixedLenFeature(shape=(), dtype=tf.int64), }, # choice2 selected so dtype == AnInputConnector().dtype dtype=tf.int64, # choice2 selected so shape == AnInputConnector().shape shape=(), tests=[ # Np array test_utils.FeatureExpectationItem( value={ 'choice1': 0.0, 'choice2': 1, }, # All choices are serialized expected_serialized={ 'choice1': 0.0, 'choice2/a': 2, # 1 + 1 'choice2/b': 10, # 1 * 10 }, # Only choice 2 is decoded. # a = 1 + 1, b = 1 * 10 => output = a + b = 2 + 10 = 12 expected=12, ), ], ), ]
def expectations(self): return [ test_utils.FeatureExpectation( name='bbox', feature=features.BBoxFeature(), shape=(4, ), dtype=tf.float32, tests=[ # Numpy array test_utils.FeatureExpectationItem( value=features.BBox( ymin=0.0, xmin=0.25, ymax=1.0, xmax=0.75, ), expected=[0.0, 0.25, 1.0, 0.75], ), ], ), ]
def expectations(self): all_tests = [ test_utils.FeatureExpectation( name='int', feature=tfds.features.SequenceDict({ 'int': tf.int32, }, length=3), shape={'int': (3, )}, dtype={'int': tf.int32}, tests=[ # Python array test_utils.FeatureExpectationItem( value={'int': [1, 2, 3]}, expected={'int': [1, 2, 3]}, ), # Numpy array test_utils.FeatureExpectationItem( value={'int': np.ones(shape=(3, ), dtype=np.int32)}, expected={'int': [1, 1, 1]}, ), # Array of dict test_utils.FeatureExpectationItem( value=[ { 'int': 1 }, { 'int': 10 }, { 'int': 100 }, ], expected={'int': [1, 10, 100]}, ), # Wrong sequence length test_utils.FeatureExpectationItem( value={'int': np.ones(shape=(4, ), dtype=np.int32)}, raise_cls=ValueError, raise_msg='Input sequence length do not match', ), ], ), test_utils.FeatureExpectation( name='label', feature=tfds.features.SequenceDict( { 'label': tfds.features.ClassLabel(names=['left', 'right']), }, length=None), shape={'label': (None, )}, dtype={'label': tf.int64}, tests=[ test_utils.FeatureExpectationItem( value={'label': ['right', 'left', 'left']}, expected={'label': [1, 0, 0]}, ), # Variable sequence length test_utils.FeatureExpectationItem( value={'label': ['right', 'left', 'right', 'left']}, expected={'label': [1, 0, 1, 0]}, ), # Empty sequence length test_utils.FeatureExpectationItem( value={'label': []}, expected={'label': []}, ), ], ), test_utils.FeatureExpectation( name='nested', feature=tfds.features.SequenceDict( { 'a': tf.string, 'b': { 'c': tfds.features.Tensor(shape=(4, 2), dtype=tf.int32), 'd': tf.uint8, } }, length=None), shape={ 'a': (None, ), 'b': { 'c': (None, 4, 2), 'd': (None, ), } }, dtype={ 'a': tf.string, 'b': { 'c': tf.int32, 'd': tf.uint8, } }, tests=[ test_utils.FeatureExpectationItem( value={ 'a': ['aa', 'b', 'ccc'], 'b': { 'c': np.ones(shape=(3, 4, 2), dtype=np.int32), 'd': [1, 2, 3], } }, expected={ 'a': [ tf.compat.as_bytes(t) for t in ('aa', 'b', 'ccc') ], 'b': { 'c': np.ones(shape=(3, 4, 2), dtype=np.int32), 'd': [1, 2, 3], } }, ), test_utils.FeatureExpectationItem( value={ 'a': [str(i) for i in range(100)], 'b': [{ 'c': np.ones(shape=(4, 2), dtype=np.int32), 'd': 5, } for _ in range(100)] }, expected={ 'a': [tf.compat.as_bytes(str(i)) for i in range(100)], 'b': { 'c': np.ones(shape=(100, 4, 2), dtype=np.int32), 'd': [5] * 100, } }, ), # Test inputs not same sequence length test_utils.FeatureExpectationItem( value={ 'a': ['aa', 'b', 'ccc'], 'b': { 'c': np.ones(shape=(4, 4, 2), dtype=np.int32), 'd': [1, 2, 3], } }, raise_cls=ValueError, raise_msg= 'length of all elements of one sequence should', ), ], ), ] imgs = [ np.random.randint(256, size=(128, 100, 3), dtype=np.uint8), np.random.randint(256, size=(128, 100, 3), dtype=np.uint8), np.random.randint(256, size=(128, 100, 3), dtype=np.uint8), np.random.randint(256, size=(128, 100, 3), dtype=np.uint8), ] imgs_stacked = np.stack(imgs) all_tests += [ test_utils.FeatureExpectation( name='image', feature=tfds.features.SequenceDict( { 'image': tfds.features.Image(shape=(128, 100, 3)), }, length=None), shape={'image': (None, 128, 100, 3)}, dtype={'image': tf.uint8}, tests=[ test_utils.FeatureExpectationItem( value=[{ 'image': img } for img in imgs], expected={'image': imgs_stacked}, ), test_utils.FeatureExpectationItem( value={'image': imgs_stacked}, expected={'image': imgs_stacked}, ), test_utils.FeatureExpectationItem( value={'image': imgs}, expected={'image': imgs_stacked}, ), # Empty value test_utils.FeatureExpectationItem( value={'image': []}, # The empty value still has the right shape expected={ 'image': np.empty(shape=(0, 128, 100, 3), dtype=np.uint8) }, ), ], ), ] return all_tests
def expectations(self): return [ test_utils.FeatureExpectation( name='fdict', feature=features_lib.FeaturesDict({ 'input': AnInputConnector(), 'output': AnOutputConnector(), 'img': { 'size': { 'height': tf.int64, 'width': tf.int64, }, 'metadata/path': tf.string, } }), serialized_info={ 'input/a': tf.FixedLenFeature(shape=(), dtype=tf.int64), 'input/b': tf.FixedLenFeature(shape=(), dtype=tf.int64), 'output': tf.FixedLenFeature(shape=(), dtype=tf.float32), 'img/size/height': tf.FixedLenFeature(shape=(), dtype=tf.int64), 'img/size/width': tf.FixedLenFeature(shape=(), dtype=tf.int64), 'img/metadata/path': tf.FixedLenFeature(shape=(), dtype=tf.string), }, dtype={ 'input': tf.int64, 'output': tf.float32, 'img': { 'size': { 'height': tf.int64, 'width': tf.int64, }, 'metadata/path': tf.string, } }, shape={ 'input': (), 'output': (), 'img': { 'size': { 'height': (), 'width': (), }, 'metadata/path': (), }, }, tests=[ # Np array test_utils.FeatureExpectationItem( value={ 'input': 1, 'output': -1, 'img': { 'size': { 'height': 256, 'width': 128, }, 'metadata/path': 'path/to/xyz.jpg', } }, expected_serialized={ 'input/a': 2, # 1 + 1 'input/b': 10, # 1 * 10 'output': -10.0, # -1 * 10.0 'img/size/height': 256, 'img/size/width': 128, 'img/metadata/path': 'path/to/xyz.jpg', }, expected={ # a = 1 + 1, b = 1 * 10 => output = a + b = 2 + 10 = 12 'input': 12, # 2 + 10 'output': -1.0, 'img': { 'size': { 'height': 256, 'width': 128, }, 'metadata/path': tf.compat.as_bytes('path/to/xyz.jpg'), }, }, ), ], ), ]
def expectations(self): np_input = np.random.rand(2, 3).astype(np.float32) array_input = [ [1, 2, 3], [4, 5, 6], ] np_input_dynamic_1 = np.random.randint(256, size=(2, 3, 2), dtype=np.int32) np_input_dynamic_2 = np.random.randint(256, size=(5, 3, 2), dtype=np.int32) return [ test_utils.FeatureExpectation( name='shape_static', feature=features_lib.Tensor(shape=(2, 3), dtype=tf.float32), dtype=tf.float32, shape=(2, 3), tests=[ # Np array test_utils.FeatureExpectationItem( value=np_input, expected=np_input, ), # Python array test_utils.FeatureExpectationItem( value=array_input, expected=array_input, ), # Invalid dtype test_utils.FeatureExpectationItem( value=np.random.randint(256, size=(2, 3)), raise_cls=ValueError, raise_msg='int64 do not match', ), # Invalid shape test_utils.FeatureExpectationItem( value=np.random.rand(2, 4).astype(np.float32), raise_cls=ValueError, raise_msg='are incompatible', ), ], ), test_utils.FeatureExpectation( name='shape_dynamic', feature=features_lib.Tensor(shape=(None, 3, 2), dtype=tf.int32), dtype=tf.int32, shape=(None, 3, 2), tests=[ test_utils.FeatureExpectationItem( value=np_input_dynamic_1, expected=np_input_dynamic_1, ), test_utils.FeatureExpectationItem( value=np_input_dynamic_2, expected=np_input_dynamic_2, ), # Invalid shape test_utils.FeatureExpectationItem( value= np.random.randint(256, size=(2, 3, 1), dtype=np.int32), raise_cls=ValueError, raise_msg='are incompatible', ), ] ), test_utils.FeatureExpectation( name='bool_flat', feature=features_lib.Tensor(shape=(), dtype=tf.bool), dtype=tf.bool, shape=(), tests=[ test_utils.FeatureExpectationItem( value=np.array(True), expected=True, ), test_utils.FeatureExpectationItem( value=np.array(False), expected=False, ), test_utils.FeatureExpectationItem( value=True, expected=True, ), test_utils.FeatureExpectationItem( value=False, expected=False, ), ] ), test_utils.FeatureExpectation( name='bool_array', feature=features_lib.Tensor(shape=(3,), dtype=tf.bool), dtype=tf.bool, shape=(3,), tests=[ test_utils.FeatureExpectationItem( value=np.array([True, True, False]), expected=[True, True, False], ), test_utils.FeatureExpectationItem( value=[True, False, True], expected=[True, False, True], ), ] ), ]
def test_nested(self): self.assertFeature( feature=tfds.features.SequenceDict({ 'a': tf.string, 'b': { 'c': tfds.features.Tensor(shape=(4, 2), dtype=tf.int32), 'd': tf.uint8, } }, length=None), shape={ 'a': (None,), 'b': { 'c': (None, 4, 2), 'd': (None,), } }, dtype={ 'a': tf.string, 'b': { 'c': tf.int32, 'd': tf.uint8, } }, tests=[ test_utils.FeatureExpectationItem( value={ 'a': ['aa', 'b', 'ccc'], 'b': { 'c': np.ones(shape=(3, 4, 2), dtype=np.int32), 'd': [1, 2, 3], } }, expected={ 'a': [ tf.compat.as_bytes(t) for t in ('aa', 'b', 'ccc') ], 'b': { 'c': np.ones(shape=(3, 4, 2), dtype=np.int32), 'd': [1, 2, 3], } }, ), test_utils.FeatureExpectationItem( value={ 'a': [str(i) for i in range(100)], 'b': [{ # pylint: disable=g-complex-comprehension 'c': np.ones(shape=(4, 2), dtype=np.int32), 'd': 5, } for _ in range(100)] }, expected={ 'a': [tf.compat.as_bytes(str(i)) for i in range(100)], 'b': { 'c': np.ones(shape=(100, 4, 2), dtype=np.int32), 'd': [5] * 100, } }, ), # Test inputs not same sequence length test_utils.FeatureExpectationItem( value={ 'a': ['aa', 'b', 'ccc'], 'b': { 'c': np.ones(shape=(4, 4, 2), dtype=np.int32), 'd': [1, 2, 3], } }, raise_cls=ValueError, raise_msg='length of all elements of one sequence should', ), ], )