def test_getattr(self): feature = feature_lib.Sequence( feature_lib.ClassLabel(names=['left', 'right']),) self.assertEqual(feature.names, ['left', 'right']) feature = feature_lib.Sequence({ 'label': feature_lib.ClassLabel(names=['left', 'right']), }) self.assertEqual(feature['label'].names, ['left', 'right'])
def test_metadata(self): feature = feature_lib.Sequence(feature_lib.ClassLabel(num_classes=2)) feature.feature.names = ['left', 'right'] with testing.tmp_dir() as tmp_dir: feature.save_metadata(data_dir=tmp_dir, feature_name='test') feature2 = feature_lib.Sequence(feature_lib.ClassLabel(num_classes=2)) feature2.load_metadata(data_dir=tmp_dir, feature_name='test') self.assertEqual(feature2.feature.names, ['left', 'right'])
def test_empty(self): # Encoding should works if num_classes=0 labels = features.ClassLabel(num_classes=0) self.assertEqual(0, labels.num_classes) self.assertEqual(0, len(labels.names)) self.assertEqual(-1, labels.encode_example(-1)) labels = features.ClassLabel(names=[]) self.assertEqual(0, labels.num_classes) self.assertEqual(0, len(labels.names)) self.assertEqual(-1, labels.encode_example(-1))
def expectations(self): return [ test_utils.FeatureExpectation( name='label', feature=features.ClassLabel(num_classes=10), dtype=tf.int64, shape=(), tests=[ test_utils.FeatureExpectationItem( value=3, expected=3, ), test_utils.FeatureExpectationItem( value='3', expected=3, ), test_utils.FeatureExpectationItem( value=10, raise_cls=ValueError, raise_msg='greater than configured num_classes', ), test_utils.FeatureExpectationItem( value='10', raise_cls=ValueError, raise_msg='Invalid', ), ] ), test_utils.FeatureExpectation( name='directions', feature=features.ClassLabel(names=['left', 'right']), dtype=tf.int64, shape=(), tests=[ test_utils.FeatureExpectationItem( value=1, expected=1, ), test_utils.FeatureExpectationItem( value='left', expected=0, ), test_utils.FeatureExpectationItem( value='right', expected=1, ), ] ), ]
def test_feature(self): self.assertFeature( feature=features.ClassLabel(num_classes=10), dtype=tf.int64, shape=(), tests=[ testing.FeatureExpectationItem( value=3, expected=3, ), testing.FeatureExpectationItem( value='3', expected=3, ), testing.FeatureExpectationItem( value=10, raise_cls=ValueError, raise_msg='greater than configured num_classes', ), testing.FeatureExpectationItem( value='10', raise_cls=ValueError, raise_msg='Invalid', ), ], test_attributes=dict( num_classes=10, names=['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'], ))
def test_label(self): self.assertFeature( feature=feature_lib.Sequence( feature_lib.ClassLabel(names=['left', 'right']), ), shape=(None,), dtype=tf.int64, tests=[ testing.FeatureExpectationItem( value=['right', 'left', 'left'], expected=[1, 0, 0], ), # Variable sequence length testing.FeatureExpectationItem( value=['right', 'left', 'right', 'left'], expected=[1, 0, 1, 0], ), # Empty sequence length testing.FeatureExpectationItem( value=[], expected=[], ), ], )
def test_file_path(tmp_path): label_file = tmp_path / 'label_names.txt' # Empty lines are ignored content = textwrap.dedent(""" label1 label0 """) label_file.write_text(content) # Both Path and str are supported labels = features.ClassLabel(names_file=label_file) labels_2 = features.ClassLabel(names_file=str(label_file)) assert labels.names == labels_2.names assert labels.names == ['label1', 'label0'] # Order is kept
def test_label(self): self.assertFeature( feature=feature_lib.Sequence( { 'label': feature_lib.ClassLabel(names=['left', 'right']), }, length=None), shape={'label': (None, )}, dtype={'label': tf.int64}, serialized_info={ 'label': feature_lib.TensorInfo(shape=(None, ), dtype=tf.int64), }, tests=[ testing.FeatureExpectationItem( value={'label': ['right', 'left', 'left']}, expected={'label': [1, 0, 0]}, ), # Variable sequence length testing.FeatureExpectationItem( value={'label': ['right', 'left', 'right', 'left']}, expected={'label': [1, 0, 1, 0]}, ), # Empty sequence length testing.FeatureExpectationItem( value={'label': []}, expected={'label': []}, ), ], test_attributes=dict(_length=None))
def test_label(self): self.assertFeatureEagerOnly( feature=feature_lib.Dataset( { 'label': feature_lib.ClassLabel(names=['left', 'right']), }, length=None), shape={'label': ()}, dtype={'label': tf.int64}, serialized_info={ 'label': feature_lib.TensorInfo(shape=(None,), dtype=tf.int64), }, tests=[ testing.FeatureExpectationItem( value=[{ 'label': 'right' }, { 'label': 'left' }, { 'label': 'left' }], expected=tf.data.Dataset.from_tensor_slices( {'label': [1, 0, 0]}), ), # Variable sequence length testing.FeatureExpectationItem( value=dataset_utils.as_numpy( tf.data.Dataset.from_tensor_slices( {'label': ['right', 'left', 'right', 'left']})), expected=tf.data.Dataset.from_tensor_slices( {'label': [1, 0, 1, 0]}), ), ], test_attributes=dict(_length=None))
def test_feature(self): self.assertFeature( feature=features.ClassLabel(num_classes=10), dtype=tf.int64, shape=(), tests=[ testing.FeatureExpectationItem( value=3, expected=3, ), testing.FeatureExpectationItem( value='3', expected=3, ), testing.FeatureExpectationItem( value=10, raise_cls=ValueError, raise_msg='greater than configured num_classes', ), testing.FeatureExpectationItem( value='10', raise_cls=ValueError, raise_msg='Invalid', ), ])
def test_feature_save_load_metadata_slashes(self): with testing.tmp_dir() as data_dir: fd = features_lib.FeaturesDict({ 'image/frame': features_lib.Image(shape=(32, 32, 3)), 'image/label': features_lib.ClassLabel(num_classes=2), }) fd.save_metadata(data_dir) fd.load_metadata(data_dir)
def _info(self): return dataset_info.DatasetInfo( builder=self, features=features.FeaturesDict({ "image": features.Image(shape=(28, 28, 1)), "label": features.ClassLabel(num_classes=10), }), )
def _info(self): return dataset_info.DatasetInfo( builder=self, features=features.FeaturesDict({ 'image': features.Image(shape=(28, 28, 1)), 'label': features.ClassLabel(num_classes=10), }), description='Mnist description.', )
def test_save_load(self): labels1 = features.ClassLabel(names=['label3', 'label1', 'label2']) labels2 = features.ClassLabel(num_classes=None) labels3 = features.ClassLabel(num_classes=1) with test_utils.tmp_dir(self.get_temp_dir()) as tmp_dir: labels1.save_metadata(tmp_dir, 'test-labels') labels2.load_metadata(tmp_dir, 'test-labels') with self.assertRaisesWithPredicateMatch( ValueError, 'number of names do not match the defined num_classes'): labels3.load_metadata(tmp_dir, 'test-labels') # labels2 should have been copied from label1 self.assertEqual(3, labels2.num_classes) self.assertEqual(labels2.names, [ 'label3', 'label1', 'label2', ])
def _info(self): return dataset_info.DatasetInfo( builder=self, features=features.FeaturesDict({ "image": features.Image(shape=(16, 16, 1)), "label": features.ClassLabel(names=["dog", "cat"]), "id": tf.int32, }), supervised_keys=("x", "x"), metadata=dataset_info.BeamMetadataDict(), )
def test_names(self): labels = features.ClassLabel(names=['label3', 'label1', 'label2']) with self.assertRaisesWithPredicateMatch( ValueError, 'overwrite already defined ClassLabel'): labels.names = ['other', 'labels'] labels = features.ClassLabel() labels.names = ['label3', 'label1', 'label2'] with self.assertRaisesWithPredicateMatch( ValueError, 'overwrite already defined ClassLabel'): labels.names = ['other', 'labels'] labels = features.ClassLabel(num_classes=3) labels.names = ['label3', 'label1', 'label2'] labels = features.ClassLabel(num_classes=3) with self.assertRaisesWithPredicateMatch( ValueError, 'number of names do not match the defined num_classes'): labels.names = ['label3', 'label1']
def test_feature__repr__(self): label = features_lib.ClassLabel(names=['m', 'f']) feature_dict = features_lib.FeaturesDict({ 'metadata': features_lib.Sequence({ 'frame': features_lib.Image(shape=(32, 32, 3)), }), 'label': features_lib.Sequence(label), }) self.assertEqual(repr(feature_dict), FEATURE_STR)
def test_num_classes(self): labels = features.ClassLabel(num_classes=10) self.assertEqual(10, labels.num_classes) self.assertEqual(10, len(labels.names)) self.assertEqual(1, labels.str2int('1')) self.assertEqual(u'1', labels.int2str(1)) with self.assertRaisesWithPredicateMatch(ValueError, 'Invalid'): labels.str2int('10') with self.assertRaisesWithPredicateMatch(ValueError, 'Invalid'): labels.int2str(10)
def _info(self) -> dataset_info.DatasetInfo: return dataset_info.DatasetInfo( builder=self, description='Generic image classification dataset.', features=features_lib.FeaturesDict({ 'image': features_lib.Image(), 'label': features_lib.ClassLabel(), 'image/filename': features_lib.Text(), }), supervised_keys=('image', 'label'), )
def _info(self): return dataset_info.DatasetInfo( builder=self, features=features.FeaturesDict({ 'image': features.Image(shape=(16, 16, 1)), 'label': features.ClassLabel(names=['dog', 'cat']), 'id': tf.int32, }), supervised_keys=('x', 'x'), metadata=dataset_info.BeamMetadataDict(), )
def expectations(self): return [ test_utils.FeatureExpectation( name='label', feature=features.ClassLabel(10), dtype=tf.int64, shape=(), tests=[ test_utils.FeatureExpectationItem( value=3, expected=3, ), test_utils.FeatureExpectationItem( value=10, raise_cls=ValueError, raise_msg='greater than configured num_classes', ), ]), ]
def test_str_classes(self): labels = features.ClassLabel(names=[ 'label3', 'label1', 'label2', ]) self.assertEqual(3, labels.num_classes) self.assertEqual(labels.names, [ 'label3', 'label1', 'label2', ]) self.assertEqual(labels.str2int('label3'), 0) self.assertEqual(labels.str2int('label1'), 1) self.assertEqual(labels.str2int('label2'), 2) self.assertEqual(labels.int2str(0), 'label3') self.assertEqual(labels.int2str(1), 'label1') self.assertEqual(labels.int2str(2), 'label2')
def test_labels(self): self.assertFeature( feature=features.ClassLabel(names=['left', 'right']), dtype=tf.int64, shape=(), tests=[ testing.FeatureExpectationItem( value=1, expected=1, ), testing.FeatureExpectationItem( value='left', expected=0, ), testing.FeatureExpectationItem( value='right', expected=1, ), ])
def test_feature__repr__(self): label = features_lib.ClassLabel(names=['m', 'f']) feature_dict = features_lib.FeaturesDict({ 'metadata': features_lib.Sequence({ 'frame': features_lib.Image(shape=(32, 32, 3)), }), 'label': features_lib.Sequence(label), }) self.assertEqual( repr(feature_dict), textwrap.dedent("""\ FeaturesDict({ 'label': Sequence(ClassLabel(shape=(), dtype=tf.int64, num_classes=2)), 'metadata': Sequence({ 'frame': Image(shape=(32, 32, 3), dtype=tf.uint8), }), })"""), )
def test_label(self): self.assertFeatureEagerOnly( feature=feature_lib.Dataset( feature_lib.ClassLabel(names=['left', 'right']), ), shape=(), dtype=tf.int64, tests=[ testing.FeatureExpectationItem( value=['right', 'left', 'left'], expected=tf.data.Dataset.from_tensor_slices([1, 0, 0]), ), # Variable sequence length testing.FeatureExpectationItem( value=['right', 'left', 'right', 'left'], expected=tf.data.Dataset.from_tensor_slices([1, 0, 1, 0]), ), # Empty sequence length testing.FeatureExpectationItem( value=[], expected=[], ), ], )
def test_extract_features_values(): features = features_lib.FeaturesDict({ 'img': features_lib.Image(shape=(256, 256, 3)), 'img2': features_lib.Image(shape=(256, 256, 3)), 'metadata': { 'label': features_lib.ClassLabel(num_classes=4), 'other': tf.string, }, 'sequence': features_lib.Sequence({ 'x': tf.int64, 'y': tf.int64, }), 'sequence_flat': features_lib.Sequence(tf.int64), }) result = _extract_features( feature=features, expected_feature={ 'img': True, 'img2': False, 'unknown_key': False, # Extra keys are filtered 'metadata': ['label'], 'sequence': {'y'}, 'sequence_flat': True, }, ) testing.assert_features_equal( result, features_lib.FeaturesDict({ 'img': features_lib.Image(shape=(256, 256, 3)), 'metadata': { 'label': features_lib.ClassLabel(num_classes=4), }, 'sequence': features_lib.Sequence({ 'y': tf.int64, }), 'sequence_flat': features_lib.Sequence(tf.int64), }), ) result = _extract_features( feature=features, expected_feature={'metadata', 'sequence'}, ) testing.assert_features_equal( result, features_lib.FeaturesDict({ 'metadata': { 'label': features_lib.ClassLabel(num_classes=4), 'other': tf.string, }, 'sequence': features_lib.Sequence({ 'x': tf.int64, 'y': tf.int64, }), }), ) # Test, mixing Features with non-features. result = _extract_features( feature=features, expected_feature={ 'img': features_lib.Image(), 'sequence': { 'x': tf.int64, 'y': False, }, }, ) testing.assert_features_equal( result, features_lib.FeaturesDict({ 'img': features_lib.Image(shape=(256, 256, 3)), 'sequence': features_lib.Sequence({ 'x': tf.int64, }), }), )
def test_num_classes(self): self.assertEqual(10, features.ClassLabel(10).num_classes)
def test_duplicate_names(self): with self.assertRaisesWithPredicateMatch(ValueError, 'label names are duplicated'): features.ClassLabel(names=['label1', 'label1', 'label2'])
def test_extract_features(): features = features_lib.FeaturesDict({ 'img': features_lib.Image(shape=(256, 256, 3)), 'img2': features_lib.Image(shape=(256, 256, 3)), 'metadata': { 'label': features_lib.ClassLabel(num_classes=4), 'other': tf.string, }, 'sequence': features_lib.Sequence({ 'x': tf.int64, 'y': tf.int64, }), 'sequence_flat': features_lib.Sequence(tf.int64), }) result = _extract_features( feature=features, expected_feature={}, ) testing.assert_features_equal(result, features_lib.FeaturesDict({})) # Feature spec accepted result = _extract_features( feature=features, expected_feature={ 'img': features_lib.Image(shape=(None, None, 3)), 'metadata': { 'other': tf.string, }, 'sequence': features_lib.Sequence({ 'x': tf.int64, }), }, ) testing.assert_features_equal( result, features_lib.FeaturesDict({ 'img': features_lib.Image(shape=(256, 256, 3)), 'metadata': { 'other': tf.string, }, 'sequence': features_lib.Sequence({ 'x': tf.int64, }), }), ) # Failure mode: # * Structure not matching # * Type not matching # * Shape/dtype not matching # * Sequence values not matching (e.g. try bad dtype) with pytest.raises(ValueError, match="Missing expected feature 'unknown'"): _extract_features( feature=features, expected_feature={ 'sequence': features_lib.Sequence({ 'unknown': tf.bool, }) }, ) with pytest.raises(ValueError, match="Missing expected feature 'non_exista"): _extract_features( feature=features, expected_feature={ 'non_existant': features_lib.Image(shape=(None, None, 3)), }, ) with pytest.raises(TypeError, match='Expected: Tensor.*. Got: Image'): _extract_features( feature=features, expected_feature={ 'img': features_lib.Tensor(shape=(256, 256, 3), dtype=tf.uint8), }, ) with pytest.raises(ValueError, match='Expected: Image.*. Got: Image'): _extract_features( feature=features, expected_feature={ 'img': features_lib.Image(shape=(None, None, 1)), }, ) with pytest.raises(ValueError, match='Expected: Tensor.*. Got: Tensor'): _extract_features( feature=features, expected_feature={ 'sequence_flat': features_lib.Sequence(tf.float32), # Wrong dtype }, )