def test_2lvl_sequences_mixed(self): # Mix of sequence and non-sequence self.assertFeature( feature=feature_lib.Sequence({ 'a': feature_lib.Sequence(tf.int32), 'b': tf.int32, }), shape={ 'a': (None, None), 'b': (None, ), }, dtype={ 'a': tf.int32, 'b': tf.int32, }, tests=[ testing.FeatureExpectationItem( value={ 'a': [[1, 1, 1], [], [3, 3]], 'b': [1, 2, 3], }, expected={ 'a': [[1, 1, 1], [], [3, 3]], 'b': [1, 2, 3], }, ), ], )
def test_3lvl_sequence(self): self.assertFeature( feature=feature_lib.Sequence( feature_lib.Sequence( feature_lib.Sequence(tf.int32), length=3, ), ), shape=(None, 3, None), dtype=tf.int32, tests=[ testing.FeatureExpectationItem( value=[ [[1, 2, 3], [], [4, 5]], [[10, 11], [12, 13], [14]], ], expected=[ [[1, 2, 3], [], [4, 5]], [[10, 11], [12, 13], [14]], ], ), testing.FeatureExpectationItem( value=[ [[1, 2, 3], [4, 5]], # < Only 2 instead of 3 [[10, 11], [12, 13], [14]], ], raise_cls=ValueError, raise_msg='Input sequence length do not match', ), ], )
def test_getattr(self): feature = feature_lib.Sequence( feature_lib.ClassLabel(names=['left', 'right']),) self.assertEqual(feature.names, ['left', 'right']) feature = feature_lib.Sequence({ 'label': feature_lib.ClassLabel(names=['left', 'right']), }) self.assertEqual(feature['label'].names, ['left', 'right'])
def test_metadata(self): feature = feature_lib.Sequence(feature_lib.ClassLabel(num_classes=2)) feature.feature.names = ['left', 'right'] with testing.tmp_dir() as tmp_dir: feature.save_metadata(data_dir=tmp_dir, feature_name='test') feature2 = feature_lib.Sequence(feature_lib.ClassLabel(num_classes=2)) feature2.load_metadata(data_dir=tmp_dir, feature_name='test') self.assertEqual(feature2.feature.names, ['left', 'right'])
def _info(self): return dataset_info.DatasetInfo( builder=self, features=features.FeaturesDict({ "frames": features.Sequence({ "coordinates": features.Sequence( features.Tensor(shape=(2,), dtype=tf.int32) ), }), }), )
def test_feature__repr__(self): label = features_lib.ClassLabel(names=['m', 'f']) feature_dict = features_lib.FeaturesDict({ 'metadata': features_lib.Sequence({ 'frame': features_lib.Image(shape=(32, 32, 3)), }), 'label': features_lib.Sequence(label), }) self.assertEqual(repr(feature_dict), FEATURE_STR)
def _normalize_feature_dict( feature: features_lib.FeatureConnector, expected_feature: _FeatureSpecs, ) -> _FeatureSpecs: """Extract the features matching the expected_feature structure.""" if type(feature) == features_lib.FeaturesDict: # pylint: disable=unidiomatic-typecheck inner_features = { k: v for k, v in expected_feature.items() if v is not False # pylint: disable=g-bool-id-comparison } inner_features = { # Extract the feature subset # pylint: disable=g-complex-comprehension k: _extract_feature_item( feature=feature, expected_key=k, expected_value=v, fn=_normalize_feature_item, ) for k, v in inner_features.items() } # Filter `False` values return inner_features elif type(feature) == features_lib.Sequence: # pylint: disable=unidiomatic-typecheck inner_features = _normalize_feature_dict( feature=feature.feature, # pytype: disable=attribute-error expected_feature=expected_feature, ) return features_lib.Sequence(inner_features) else: raise ValueError( f'Unexpected structure {expected_feature!r} does not match ' f'{feature!r}')
def test_features_multi_none_sequence( self, encoding: features_lib.Encoding, shape, ): x = np.random.randint(256, size=(3, 2, 3, 1), dtype=np.uint8) x_other_shape = np.random.randint(256, size=(3, 2, 2, 1), dtype=np.uint8) self.assertFeature( feature=features_lib.Sequence( features_lib.Tensor( shape=shape, dtype=tf.uint8, encoding=encoding, ), ), shape=(None, ) + shape, dtype=tf.uint8, tests=[ testing.FeatureExpectationItem( value=x, expected=x, ), testing.FeatureExpectationItem( value=x_other_shape, expected=x_other_shape, ), # TODO(epot): Is there a way to catch if the user try to encode # tensors with different shapes ? ], )
def test_label(self): self.assertFeature( feature=feature_lib.Sequence( feature_lib.ClassLabel(names=['left', 'right']), ), shape=(None,), dtype=tf.int64, tests=[ testing.FeatureExpectationItem( value=['right', 'left', 'left'], expected=[1, 0, 0], ), # Variable sequence length testing.FeatureExpectationItem( value=['right', 'left', 'right', 'left'], expected=[1, 0, 1, 0], ), # Empty sequence length testing.FeatureExpectationItem( value=[], expected=[], ), ], )
def test_int(self): self.assertFeature( feature=feature_lib.Sequence(tf.int32, length=3), shape=(3, ), dtype=tf.int32, tests=[ # Python array testing.FeatureExpectationItem( value=[1, 2, 3], expected=[1, 2, 3], ), # Numpy array testing.FeatureExpectationItem( value=np.ones(shape=(3, ), dtype=np.int32), expected=[1, 1, 1], ), # Wrong sequence length testing.FeatureExpectationItem( value=np.ones(shape=(4, ), dtype=np.int32), raise_cls=ValueError, raise_msg='Input sequence length do not match', ), ], )
def test_label(self): self.assertFeature( feature=feature_lib.Sequence( { 'label': feature_lib.ClassLabel(names=['left', 'right']), }, length=None), shape={'label': (None, )}, dtype={'label': tf.int64}, serialized_info={ 'label': feature_lib.TensorInfo(shape=(None, ), dtype=tf.int64), }, tests=[ testing.FeatureExpectationItem( value={'label': ['right', 'left', 'left']}, expected={'label': [1, 0, 0]}, ), # Variable sequence length testing.FeatureExpectationItem( value={'label': ['right', 'left', 'right', 'left']}, expected={'label': [1, 0, 1, 0]}, ), # Empty sequence length testing.FeatureExpectationItem( value={'label': []}, expected={'label': []}, ), ], test_attributes=dict(_length=None))
def test_repr_tensor(self): # Top level Tensor is printed expanded self.assertEqual( repr(features_lib.Tensor(shape=(), dtype=tf.int32)), 'Tensor(shape=(), dtype=tf.int32)', ) # Sequences colapse tensor repr self.assertEqual( repr(features_lib.Sequence(tf.int32)), 'Sequence(tf.int32)', ) class ChildTensor(features_lib.Tensor): pass self.assertEqual( repr( features_lib.FeaturesDict({ 'colapsed': features_lib.Tensor(shape=(), dtype=tf.int32), # Tensor with defined shape are printed expanded 'noncolapsed': features_lib.Tensor(shape=(1,), dtype=tf.int32), # Tensor inherited are expanded 'child': ChildTensor(shape=(), dtype=tf.int32), })), textwrap.dedent("""\ FeaturesDict({ 'child': ChildTensor(shape=(), dtype=tf.int32), 'colapsed': tf.int32, 'noncolapsed': Tensor(shape=(1,), dtype=tf.int32), })"""), )
def test_2lvl_sequences_string(self): self.assertFeature( feature=feature_lib.Sequence(feature_lib.Sequence(tf.string), ), shape=( None, None, ), dtype=tf.string, tests=[ testing.FeatureExpectationItem( value=[ ['abcd', '', 'efg'], [], ['', ''], ['hij'], ], expected=[ [b'abcd', b'', b'efg'], [], [b'', b''], [b'hij'], ], ), testing.FeatureExpectationItem( value=[ [], [], ], expected=[ [], [], ], ), testing.FeatureExpectationItem( value=[ ['abcd', 'efg', 123], ], raise_cls=TypeError, raise_msg='Expected binary or unicode string', ), ], )
def test_feature__repr__(self): label = features_lib.ClassLabel(names=['m', 'f']) feature_dict = features_lib.FeaturesDict({ 'metadata': features_lib.Sequence({ 'frame': features_lib.Image(shape=(32, 32, 3)), }), 'label': features_lib.Sequence(label), }) self.assertEqual( repr(feature_dict), textwrap.dedent("""\ FeaturesDict({ 'label': Sequence(ClassLabel(shape=(), dtype=tf.int64, num_classes=2)), 'metadata': Sequence({ 'frame': Image(shape=(32, 32, 3), dtype=tf.uint8), }), })"""), )
def test_image_nested_empty_len(self): imgs = [ np.random.randint(256, size=(28, 28, 3), dtype=np.uint8), np.random.randint(256, size=(28, 28, 3), dtype=np.uint8), ] imgs_stacked = np.stack(imgs) self.assertFeature( feature=feature_lib.Sequence({ 'a': feature_lib.Image(shape=(None, None, 3)), 'b': tf.int32, }), shape={ 'a': (None, None, None, 3), 'b': (None, ), }, dtype={ 'a': tf.uint8, 'b': tf.int32, }, tests=[ testing.FeatureExpectationItem( value={ 'a': imgs, 'b': [1, 2], }, expected={ 'a': imgs_stacked, 'b': [1, 2], }, ), testing.FeatureExpectationItem( value={ 'a': [], 'b': [], }, expected={ 'a': np.empty(shape=(0, 0, 0, 3), dtype=np.uint8), 'b': [], }, ), ], )
def test_image(self): imgs = [ np.random.randint(256, size=(128, 100, 3), dtype=np.uint8), np.random.randint(256, size=(128, 100, 3), dtype=np.uint8), np.random.randint(256, size=(128, 100, 3), dtype=np.uint8), np.random.randint(256, size=(128, 100, 3), dtype=np.uint8), ] imgs_stacked = np.stack(imgs) self.assertFeature( feature=feature_lib.Sequence( { 'image': feature_lib.Image(shape=(128, 100, 3)), }, length=None), shape={'image': (None, 128, 100, 3)}, dtype={'image': tf.uint8}, tests=[ testing.FeatureExpectationItem( value=[{ 'image': img } for img in imgs], expected={'image': imgs_stacked}, ), testing.FeatureExpectationItem( value={'image': imgs_stacked}, expected={'image': imgs_stacked}, ), testing.FeatureExpectationItem( value={'image': imgs}, expected={'image': imgs_stacked}, ), # Empty value testing.FeatureExpectationItem( value={'image': []}, # The empty value still has the right shape expected={ 'image': np.empty(shape=(0, 128, 100, 3), dtype=np.uint8) }, ), ], )
def test_int(self): self.assertFeature( feature=feature_lib.Sequence({'int': tf.int32}, length=3), shape={'int': (3, )}, dtype={'int': tf.int32}, serialized_info={ 'int': feature_lib.TensorInfo(shape=(3, ), dtype=tf.int32), }, tests=[ # Python array testing.FeatureExpectationItem( value={'int': [1, 2, 3]}, expected={'int': [1, 2, 3]}, ), # Numpy array testing.FeatureExpectationItem( value={'int': np.ones(shape=(3, ), dtype=np.int32)}, expected={'int': [1, 1, 1]}, ), # Array of dict testing.FeatureExpectationItem( value=[ { 'int': 1 }, { 'int': 10 }, { 'int': 100 }, ], expected={'int': [1, 10, 100]}, ), # Wrong sequence length testing.FeatureExpectationItem( value={'int': np.ones(shape=(4, ), dtype=np.int32)}, raise_cls=ValueError, raise_msg='Input sequence length do not match', ), ], test_attributes=dict(_length=3))
def _extract_features( feature: features_lib.FeatureConnector, expected_feature: features_lib.FeatureConnector, ) -> features_lib.FeatureConnector: """Recursive implementation of `PartialDecoding.extract_features`.""" # Feature types should match if not isinstance(feature, type(expected_feature)): raise TypeError(f'Expected: {expected_feature}. Got: {feature}') # Recurse into FeaturesDict, Sequence # Use `type` rather than `isinstance` to not recurse into inherited classes. if type(feature) == features_lib.FeaturesDict: # pylint: disable=unidiomatic-typecheck expected_feature = typing.cast(features_lib.FeaturesDict, expected_feature) return features_lib.FeaturesDict({ # Extract the feature subset # pylint: disable=g-complex-comprehension k: _extract_feature_item( feature=feature, expected_key=k, expected_value=v, fn=_extract_features, ) for k, v in expected_feature.items() }) elif type(feature) == features_lib.Sequence: # pylint: disable=unidiomatic-typecheck feature = typing.cast(features_lib.Sequence, feature) expected_feature = typing.cast(features_lib.Sequence, expected_feature) feature_subset = _extract_features( feature=feature.feature, expected_feature=expected_feature.feature, ) return features_lib.Sequence(feature_subset, length=feature._length) # pylint: disable=protected-access else: # Assert that the specs matches if (feature.dtype != expected_feature.dtype or not utils.shapes_are_compatible(feature.shape, expected_feature.shape)): raise ValueError(f'Expected: {expected_feature}. Got: {feature}') return feature
def test_image_unknown_len(self): imgs = [ np.random.randint(256, size=(28, 28, 3), dtype=np.uint8), np.random.randint(256, size=(28, 28, 3), dtype=np.uint8), ] imgs_stacked = np.stack(imgs) self.assertFeature( feature=feature_lib.Sequence(feature_lib.Image(shape=(None, None, 3))), dtype=tf.uint8, shape=(None, None, None, 3), # (length, h, w, c) tests=[ testing.FeatureExpectationItem( value=[], # Empty input expected=np.empty(shape=(0, 0, 0, 3), dtype=np.uint8), ), testing.FeatureExpectationItem( value=imgs, expected=imgs_stacked, ), ], )
def new_to_feature(value): if isinstance(value, list): value, = value # List should contain a single element # pylint: disable=self-assigning-variable return features.Sequence(value) else: return to_feature_fn(value)
def test_2lvl_sequences(self): self.assertFeature( feature=feature_lib.Sequence( feature_lib.Sequence( feature_lib.Tensor(shape=(2, ), dtype=tf.int32), ), ), shape=(None, None, 2), dtype=tf.int32, tests=[ testing.FeatureExpectationItem( value=[ [[0, 1], [2, 3]], [], [[4, 5]], ], expected=testing.RaggedConstant([ [[0, 1], [2, 3]], [], [[4, 5]], ], inner_shape=(2, )), ), # Empty testing.FeatureExpectationItem( value=[], expected=[], ), # List of empty lists testing.FeatureExpectationItem( value=[[], [], []], expected=[[], [], []], ), # List of empty np.array testing.FeatureExpectationItem( value=[ np.empty(shape=(0, 2), dtype=np.int32), np.empty(shape=(0, 2), dtype=np.int32), ], expected=[ [], [], ], ), testing.FeatureExpectationItem( value=[ np.empty(shape=(0, 2), dtype=np.int32), np.empty(shape=(0, 2), dtype=np.int32), np.ones(shape=(3, 2), dtype=np.int32), ], expected=[ [], [], [[1, 1], [1, 1], [1, 1]], ], ), # Wrong types should fails testing.FeatureExpectationItem( value=[ np.ones(shape=(3, 2), dtype=np.float32), ], raise_cls=ValueError, raise_msg='float32 do not match int32', ), ], )
def test_encoding(self): f = feature_lib.Sequence({ 'a': feature_lib.Sequence({'c': tf.int64}), 'b': tf.int64, }) # Different combinaison of list of dict/dict of list to encode the same # nested sequence ex1 = f.encode_example([ { 'a': { 'c': [1, 1, 1] }, 'b': 1 }, { 'a': { 'c': [] }, 'b': 2 }, { 'a': { 'c': [3, 3] }, 'b': 3 }, ]) ex2 = f.encode_example([ { 'a': [{ 'c': 1 }, { 'c': 1 }, { 'c': 1 }], 'b': 1 }, { 'a': [], 'b': 2 }, { 'a': [{ 'c': 3 }, { 'c': 3 }], 'b': 3 }, ]) ex3 = f.encode_example({ 'a': [ [{ 'c': 1 }, { 'c': 1 }, { 'c': 1 }], [], [{ 'c': 3 }, { 'c': 3 }], ], 'b': [1, 2, 3], }) ex4 = f.encode_example({ 'a': { 'c': [[1, 1, 1], [], [3, 3]] }, 'b': [1, 2, 3], }) out = { 'a': { 'c': tf.ragged.constant([ [1, 1, 1], [], [3, 3], ]) }, 'b': [1, 2, 3], } def to_ragged(ex): ex['a']['c'] = tf.ragged.constant(ex['a']['c']) return ex self.assertAllEqualNested(to_ragged(ex1), out) self.assertAllEqualNested(to_ragged(ex2), out) self.assertAllEqualNested(to_ragged(ex3), out) self.assertAllEqualNested(to_ragged(ex4), out) # Should raise error if two sequences do not have the same length. with self.assertRaisesWithPredicateMatch(ValueError, 'length of all elements'): f.encode_example({ 'a': { 'c': [[1, 1, 1], []] }, 'b': [1, 2, 3], }) # Empty sequence should create the correct number of dimension ex2 = f.encode_example([]) self.assertAllEqualNested( ex2, { 'a': { 'c': np.zeros((0, 0), np.int64) }, 'b': np.zeros((0, ), np.int64), })
def test_flatten_nested(self): f = features_lib.FeaturesDict({ 'a': tf.int32, 'b': { 'c': { 'd': tf.int32, 'e': tf.int32, }, }, 'f': features_lib.Sequence({ 'g': features_lib.Sequence(tf.int32), 'h': tf.int32, }), }) flat1 = f._flatten({ 'a': 'a', 'b': { 'c': { 'd': {'d': 123}, }, }, 'f': { 'g': 'g', }, }) self.assertEqual(flat1, [ 'a', {'d': 123}, None, # 'e' 'g', None, # h ]) self.assertEqual(f._nest(flat1), { 'a': 'a', 'b': { 'c': { 'd': {'d': 123}, 'e': None, }, }, 'f': { 'g': 'g', 'h': None, }, }) f = features_lib.FeaturesDict({ 'a': tf.int32, 'b': { 'c': tf.int32, }, }) with self.assertRaisesWithPredicateMatch(ValueError, 'received a non dict'): f._flatten({'b': 123}) with self.assertRaisesWithPredicateMatch( ValueError, 'Unrecognized keys: [\'d\']'): f._flatten({'b': {'c': 123, 'd': 123}}) with self.assertRaisesWithPredicateMatch( ValueError, 'Expected length 2 does not match input length 3'): f._nest([None, None, None])
def test_extract_features(): features = features_lib.FeaturesDict({ 'img': features_lib.Image(shape=(256, 256, 3)), 'img2': features_lib.Image(shape=(256, 256, 3)), 'metadata': { 'label': features_lib.ClassLabel(num_classes=4), 'other': tf.string, }, 'sequence': features_lib.Sequence({ 'x': tf.int64, 'y': tf.int64, }), 'sequence_flat': features_lib.Sequence(tf.int64), }) result = _extract_features( feature=features, expected_feature={}, ) testing.assert_features_equal(result, features_lib.FeaturesDict({})) # Feature spec accepted result = _extract_features( feature=features, expected_feature={ 'img': features_lib.Image(shape=(None, None, 3)), 'metadata': { 'other': tf.string, }, 'sequence': features_lib.Sequence({ 'x': tf.int64, }), }, ) testing.assert_features_equal( result, features_lib.FeaturesDict({ 'img': features_lib.Image(shape=(256, 256, 3)), 'metadata': { 'other': tf.string, }, 'sequence': features_lib.Sequence({ 'x': tf.int64, }), }), ) # Failure mode: # * Structure not matching # * Type not matching # * Shape/dtype not matching # * Sequence values not matching (e.g. try bad dtype) with pytest.raises(ValueError, match="Missing expected feature 'unknown'"): _extract_features( feature=features, expected_feature={ 'sequence': features_lib.Sequence({ 'unknown': tf.bool, }) }, ) with pytest.raises(ValueError, match="Missing expected feature 'non_exista"): _extract_features( feature=features, expected_feature={ 'non_existant': features_lib.Image(shape=(None, None, 3)), }, ) with pytest.raises(TypeError, match='Expected: Tensor.*. Got: Image'): _extract_features( feature=features, expected_feature={ 'img': features_lib.Tensor(shape=(256, 256, 3), dtype=tf.uint8), }, ) with pytest.raises(ValueError, match='Expected: Image.*. Got: Image'): _extract_features( feature=features, expected_feature={ 'img': features_lib.Image(shape=(None, None, 1)), }, ) with pytest.raises(ValueError, match='Expected: Tensor.*. Got: Tensor'): _extract_features( feature=features, expected_feature={ 'sequence_flat': features_lib.Sequence(tf.float32), # Wrong dtype }, )
def test_nested(self): self.assertFeature( feature=feature_lib.Sequence({ 'a': tf.string, 'b': { 'c': feature_lib.Tensor(shape=(4, 2), dtype=tf.int32), 'd': tf.uint8, } }, length=None), shape={ 'a': (None,), 'b': { 'c': (None, 4, 2), 'd': (None,), } }, dtype={ 'a': tf.string, 'b': { 'c': tf.int32, 'd': tf.uint8, } }, tests=[ testing.FeatureExpectationItem( value={ 'a': ['aa', 'b', 'ccc'], 'b': { 'c': np.ones(shape=(3, 4, 2), dtype=np.int32), 'd': [1, 2, 3], } }, expected={ 'a': [ tf.compat.as_bytes(t) for t in ('aa', 'b', 'ccc') ], 'b': { 'c': np.ones(shape=(3, 4, 2), dtype=np.int32), 'd': [1, 2, 3], } }, ), testing.FeatureExpectationItem( value={ 'a': [str(i) for i in range(100)], 'b': [{ # pylint: disable=g-complex-comprehension 'c': np.ones(shape=(4, 2), dtype=np.int32), 'd': 5, } for _ in range(100)] }, expected={ 'a': [tf.compat.as_bytes(str(i)) for i in range(100)], 'b': { 'c': np.ones(shape=(100, 4, 2), dtype=np.int32), 'd': [5] * 100, } }, ), # Test inputs not same sequence length testing.FeatureExpectationItem( value={ 'a': ['aa', 'b', 'ccc'], 'b': { 'c': np.ones(shape=(4, 4, 2), dtype=np.int32), 'd': [1, 2, 3], } }, raise_cls=ValueError, raise_msg='length of all elements of one sequence should', ), ], )
def test_extract_features_values(): features = features_lib.FeaturesDict({ 'img': features_lib.Image(shape=(256, 256, 3)), 'img2': features_lib.Image(shape=(256, 256, 3)), 'metadata': { 'label': features_lib.ClassLabel(num_classes=4), 'other': tf.string, }, 'sequence': features_lib.Sequence({ 'x': tf.int64, 'y': tf.int64, }), 'sequence_flat': features_lib.Sequence(tf.int64), }) result = _extract_features( feature=features, expected_feature={ 'img': True, 'img2': False, 'unknown_key': False, # Extra keys are filtered 'metadata': ['label'], 'sequence': {'y'}, 'sequence_flat': True, }, ) testing.assert_features_equal( result, features_lib.FeaturesDict({ 'img': features_lib.Image(shape=(256, 256, 3)), 'metadata': { 'label': features_lib.ClassLabel(num_classes=4), }, 'sequence': features_lib.Sequence({ 'y': tf.int64, }), 'sequence_flat': features_lib.Sequence(tf.int64), }), ) result = _extract_features( feature=features, expected_feature={'metadata', 'sequence'}, ) testing.assert_features_equal( result, features_lib.FeaturesDict({ 'metadata': { 'label': features_lib.ClassLabel(num_classes=4), 'other': tf.string, }, 'sequence': features_lib.Sequence({ 'x': tf.int64, 'y': tf.int64, }), }), ) # Test, mixing Features with non-features. result = _extract_features( feature=features, expected_feature={ 'img': features_lib.Image(), 'sequence': { 'x': tf.int64, 'y': False, }, }, ) testing.assert_features_equal( result, features_lib.FeaturesDict({ 'img': features_lib.Image(shape=(256, 256, 3)), 'sequence': features_lib.Sequence({ 'x': tf.int64, }), }), )