def assertFeature(self, feature, shape, dtype, tests, serialized_info=None): """Test the given feature against the predicates.""" # Check the shape/dtype with self._subTest("shape"): self.assertEqual(feature.shape, shape) with self._subTest("dtype"): self.assertEqual(feature.dtype, dtype) # Check the serialized features if serialized_info is not None: with self._subTest("serialized_info"): self.assertEqual( serialized_info, feature.get_serialized_info(), ) # Create the feature dict fdict = features.FeaturesDict({"inner": feature}) for i, test in enumerate(tests): with self._subTest(str(i)): self.assertFeatureTest( fdict=fdict, test=test, feature=feature, shape=shape, dtype=dtype, )
def test_repr_tensor(self): # Top level Tensor is printed expanded self.assertEqual( repr(features_lib.Tensor(shape=(), dtype=tf.int32)), 'Tensor(shape=(), dtype=tf.int32)', ) # Sequences colapse tensor repr self.assertEqual( repr(features_lib.Sequence(tf.int32)), 'Sequence(tf.int32)', ) class ChildTensor(features_lib.Tensor): pass self.assertEqual( repr( features_lib.FeaturesDict({ 'colapsed': features_lib.Tensor(shape=(), dtype=tf.int32), # Tensor with defined shape are printed expanded 'noncolapsed': features_lib.Tensor(shape=(1,), dtype=tf.int32), # Tensor inherited are expanded 'child': ChildTensor(shape=(), dtype=tf.int32), })), textwrap.dedent("""\ FeaturesDict({ 'child': ChildTensor(shape=(), dtype=tf.int32), 'colapsed': tf.int32, 'noncolapsed': Tensor(shape=(1,), dtype=tf.int32), })"""), )
def assertFeature(self, feature, shape, dtype, tests, serialized_info=None): """Test the given feature against the predicates.""" # Check the shape/dtype with self._subTest('shape'): self.assertEqual(feature.shape, shape) with self._subTest('dtype'): self.assertEqual(feature.dtype, dtype) # Check the serialized features if serialized_info is not None: with self._subTest('serialized_info'): self.assertEqual( serialized_info, feature.get_serialized_info(), ) # Create the feature dict fdict = features.FeaturesDict({'inner': feature}) fdict._set_top_level() # pylint: disable=protected-access for i, test in enumerate(tests): with self._subTest(str(i)): self.assertFeatureTest( fdict=fdict, test=test, feature=feature, shape=shape, dtype=dtype, )
def _info(self): return dataset_info.DatasetInfo( builder=self, features=features.FeaturesDict({"im": features.Image()}), supervised_keys=("im", "im"), metadata=dataset_info.MetadataDict(), )
def test_feature_getitem(self): fdict = features_lib.FeaturesDict({ 'integer': tf.int32, 'string': tf.string, }) self.assertEqual(fdict['integer'].dtype, tf.int32) self.assertEqual(fdict['string'].dtype, tf.string)
def _info(self): return dataset_info.DatasetInfo( builder=self, features=features.FeaturesDict({"x": tf.int64}), supervised_keys=("x", "x"), )
def test_tensor_spec(self): feature = features_lib.FeaturesDict({ 'input': AnInputConnector(), 'output': AnOutputConnector(), 'img': { 'size': { 'height': features_lib.Tensor(shape=(2, 3), dtype=tf.int64), 'width': features_lib.Tensor(shape=[None, 3], dtype=tf.int64), }, 'image': features_lib.Image(shape=(28, 28, 1)), 'metadata/path': tf.string, } }) self.assertAllEqualNested( feature.get_tensor_spec(), { 'input': tf.TensorSpec(shape=[], dtype=tf.int64), 'output': tf.TensorSpec(shape=[], dtype=tf.float32), 'img': { 'size': { 'height': tf.TensorSpec(shape=[2, 3], dtype=tf.int64), 'width': tf.TensorSpec(shape=[None, 3], dtype=tf.int64), }, 'image': tf.TensorSpec(shape=[28, 28, 1], dtype=tf.uint8), 'metadata/path': tf.TensorSpec(shape=[], dtype=tf.string), } })
def test_feature_save_load_metadata_slashes(self): with testing.tmp_dir() as data_dir: fd = features_lib.FeaturesDict({ 'image/frame': features_lib.Image(shape=(32, 32, 3)), 'image/label': features_lib.ClassLabel(num_classes=2), }) fd.save_metadata(data_dir) fd.load_metadata(data_dir)
def _info(self): return dataset_info.DatasetInfo( features=features.FeaturesDict({ "x": tf.int64, "y": tf.int64, "z": tf.string, }), )
def _info(self) -> dataset_info.DatasetInfo: return dataset_info.DatasetInfo( builder=self, description='Generic text translation dataset.', features=features_lib.FeaturesDict({ lang: features_lib.Text() for lang in self._languages }), )
def _info(self): return dataset_info.DatasetInfo( builder=self, features=features.FeaturesDict({ 'id': tf.int64, }), description='Minimal DatasetBuilder.', )
def _info(self): return dataset_info.DatasetInfo( builder=self, features=features.FeaturesDict({ "image": features.Image(shape=(28, 28, 1)), "label": features.ClassLabel(num_classes=10), }), )
def _info(self): return dataset_info.DatasetInfo( builder=self, features=features.FeaturesDict({ 'image': features.Image(shape=(28, 28, 1)), 'label': features.ClassLabel(num_classes=10), }), description='Mnist description.', )
def _info(self): return dataset_info.DatasetInfo( builder=self, features=features.FeaturesDict({ "frames": features.Sequence({ "coordinates": features.Sequence( features.Tensor(shape=(2,), dtype=tf.int32) ), }), }), )
def feature_item(self): from tensorflow_datasets.core import features import numpy as np grid_size = np.prod(self.grid_shape) return 'ffd', features.FeaturesDict({ 'b': features.Tensor(shape=(self.num_points, grid_size), dtype=tf.float32), 'p': features.Tensor(shape=(grid_size, 3), dtype=tf.float32), })
def _info(self): return dataset_info.DatasetInfo( builder=self, features=features.FeaturesDict({ "image": features.Image(shape=(16, 16, 1)), "label": features.ClassLabel(names=["dog", "cat"]), "id": tf.int32, }), supervised_keys=("x", "x"), metadata=dataset_info.BeamMetadataDict(), )
def test_feature__repr__(self): label = features_lib.ClassLabel(names=['m', 'f']) feature_dict = features_lib.FeaturesDict({ 'metadata': features_lib.Sequence({ 'frame': features_lib.Image(shape=(32, 32, 3)), }), 'label': features_lib.Sequence(label), }) self.assertEqual(repr(feature_dict), FEATURE_STR)
def _info(self) -> dataset_info.DatasetInfo: return dataset_info.DatasetInfo( builder=self, description='Generic image classification dataset.', features=features_lib.FeaturesDict({ 'image': features_lib.Image(), 'label': features_lib.ClassLabel(), 'image/filename': features_lib.Text(), }), supervised_keys=('image', 'label'), )
def _info(self): return dataset_info.DatasetInfo( builder=self, features=features.FeaturesDict({ 'image': features.Image(shape=(16, 16, 1)), 'label': features.ClassLabel(names=['dog', 'cat']), 'id': tf.int32, }), supervised_keys=('x', 'x'), metadata=dataset_info.BeamMetadataDict(), )
def test_top_level(self): f = features_lib.FeaturesDict({ 'a': tf.int32, 'b': { 'c': tf.int32, }, }) # Only top level can be decoded f.decode_example({ 'a': 1, 'b': { 'c': 2, }, })
def _assert_feature( self, feature, shape, dtype, tests, serialized_info=None, test_tensor_spec=True, skip_feature_tests=False, test_attributes=None, ): with self._subTest('shape'): self.assertEqual(feature.shape, shape) with self._subTest('dtype'): self.assertEqual(feature.dtype, dtype) # Check the serialized features if serialized_info: with self._subTest('serialized_info'): self.assertEqual( serialized_info, feature.get_serialized_info(), ) if not skip_feature_tests and test_attributes: for key, value in test_attributes.items(): self.assertEqual(getattr(feature, key), value) # Create the feature dict fdict = features.FeaturesDict({'inner': feature}) # Check whether the following doesn't raise an exception fdict.catalog_documentation() for i, test in enumerate(tests): with self._subTest(str(i)): self.assertFeatureTest( fdict=fdict, test=test, feature=feature, shape=shape, dtype=dtype, test_tensor_spec=test_tensor_spec, )
def test_top_level(self): f = features_lib.FeaturesDict({ 'a': tf.int32, 'b': { 'c': tf.int32, }, }) f._set_top_level() # Only top level can be decoded f.decode_example({ 'a': 1, 'b': { 'c': 2, }, }) with self.assertRaisesWithPredicateMatch( AssertionError, 'decoded when defined as top-level'): f['b'].decode_example({'c': 1})
def _assert_feature( self, feature, shape, dtype, tests, serialized_info=None, skip_feature_tests=False, test_attributes=None, ): with self._subTest('shape'): self.assertEqual(feature.shape, shape) with self._subTest('dtype'): self.assertEqual(feature.dtype, dtype) # Check the serialized features if serialized_info: with self._subTest('serialized_info'): self.assertEqual( serialized_info, feature.get_serialized_info(), ) if not skip_feature_tests and test_attributes: for key, value in test_attributes.items(): self.assertEqual(getattr(feature, key), value) # Create the feature dict fdict = features.FeaturesDict({'inner': feature}) for i, test in enumerate(tests): with self._subTest(str(i)): self.assertFeatureTest( fdict=fdict, test=test, feature=feature, shape=shape, dtype=dtype, )
def test_feature__repr__(self): label = features_lib.ClassLabel(names=['m', 'f']) feature_dict = features_lib.FeaturesDict({ 'metadata': features_lib.Sequence({ 'frame': features_lib.Image(shape=(32, 32, 3)), }), 'label': features_lib.Sequence(label), }) self.assertEqual( repr(feature_dict), textwrap.dedent("""\ FeaturesDict({ 'label': Sequence(ClassLabel(shape=(), dtype=tf.int64, num_classes=2)), 'metadata': Sequence({ 'frame': Image(shape=(32, 32, 3), dtype=tf.uint8), }), })"""), )
def _extract_features( feature: features_lib.FeatureConnector, expected_feature: features_lib.FeatureConnector, ) -> features_lib.FeatureConnector: """Recursive implementation of `PartialDecoding.extract_features`.""" # Feature types should match if not isinstance(feature, type(expected_feature)): raise TypeError(f'Expected: {expected_feature}. Got: {feature}') # Recurse into FeaturesDict, Sequence # Use `type` rather than `isinstance` to not recurse into inherited classes. if type(feature) == features_lib.FeaturesDict: # pylint: disable=unidiomatic-typecheck expected_feature = typing.cast(features_lib.FeaturesDict, expected_feature) return features_lib.FeaturesDict({ # Extract the feature subset # pylint: disable=g-complex-comprehension k: _extract_feature_item( feature=feature, expected_key=k, expected_value=v, fn=_extract_features, ) for k, v in expected_feature.items() }) elif type(feature) == features_lib.Sequence: # pylint: disable=unidiomatic-typecheck feature = typing.cast(features_lib.Sequence, feature) expected_feature = typing.cast(features_lib.Sequence, expected_feature) feature_subset = _extract_features( feature=feature.feature, expected_feature=expected_feature.feature, ) return features_lib.Sequence(feature_subset, length=feature._length) # pylint: disable=protected-access else: # Assert that the specs matches if (feature.dtype != expected_feature.dtype or not utils.shapes_are_compatible(feature.shape, expected_feature.shape)): raise ValueError(f'Expected: {expected_feature}. Got: {feature}') return feature
def _process_exp(self, exp): # Check the shape/dtype with self._subTest("shape"): self.assertEqual(exp.feature.shape, exp.shape) with self._subTest("dtype"): self.assertEqual(exp.feature.dtype, exp.dtype) # Check the serialized features if exp.serialized_features is not None: with self._subTest("serialized_features"): self.assertEqual( exp.serialized_features, exp.feature.get_serialized_features(), ) # Create the feature dict fdict = features.FeaturesDict({exp.name: exp.feature}) for i, test in enumerate(exp.tests): with self._subTest(str(i)): # self._process_subtest_exp(e) input_value = {exp.name: test.value} if test.raise_cls is not None: with self._subTest("raise"): if not test.raise_msg: raise ValueError( "test.raise_msg should be set with {}for test {}" .format(test.raise_cls, exp.name)) with self.assertRaisesWithPredicateMatch( test.raise_cls, test.raise_msg): features_encode_decode(fdict, input_value) else: # Test the serialization only if test.expected_serialized is not None: with self._subTest("out_serialize"): self.assertEqual( test.expected_serialized, exp.feature.encode_sample(test.value), ) # Assert the returned type match the expected one with self._subTest("out_extract"): out = features_encode_decode(fdict, input_value, as_tensor=True) out = out[exp.name] with self._subTest("out_dtype"): out_dtypes = utils.map_nested(lambda s: s.dtype, out) self.assertEqual(out_dtypes, exp.feature.dtype) with self._subTest("out_shape"): # For shape, because (None, 3) match with (5, 3), we use # tf.TensorShape.assert_is_compatible_with on each of the elements out_shapes = utils.zip_nested(out, exp.feature.shape) utils.map_nested( lambda x: x[0].shape.assert_is_compatible_with(x[ 1]), out_shapes) # Test serialization + decoding from disk with self._subTest("out_value"): decoded_samples = features_encode_decode( fdict, input_value) self.assertAllEqual(test.expected, decoded_samples[exp.name])
def test_fdict(self): self.assertFeature( feature=features_lib.FeaturesDict({ 'input': AnInputConnector(), 'output': AnOutputConnector(), 'img': { 'size': { 'height': tf.int64, 'width': tf.int64, }, 'metadata/path': tf.string, } }), serialized_info={ 'input': { 'a': features_lib.TensorInfo(shape=(), dtype=tf.int64), 'b': features_lib.TensorInfo(shape=(), dtype=tf.int64), }, 'output': features_lib.TensorInfo(shape=(), dtype=tf.float32), 'img': { 'size': { 'height': features_lib.TensorInfo(shape=(), dtype=tf.int64), 'width': features_lib.TensorInfo(shape=(), dtype=tf.int64), }, 'metadata/path': features_lib.TensorInfo(shape=(), dtype=tf.string), } }, dtype={ 'input': tf.int64, 'output': tf.float32, 'img': { 'size': { 'height': tf.int64, 'width': tf.int64, }, 'metadata/path': tf.string, } }, shape={ 'input': (), 'output': (), 'img': { 'size': { 'height': (), 'width': (), }, 'metadata/path': (), }, }, tests=[ # Np array testing.FeatureExpectationItem( value={ 'input': 1, 'output': -1, 'img': { 'size': { 'height': 256, 'width': 128, }, 'metadata/path': 'path/to/xyz.jpg', } }, expected_serialized={ 'input': { 'a': 2, # 1 + 1 'b': 10, # 1 * 10 }, 'output': -10.0, # -1 * 10.0 'img': { 'size': { 'height': 256, 'width': 128, }, 'metadata/path': 'path/to/xyz.jpg', } }, expected={ # a = 1 + 1, b = 1 * 10 => output = a + b = 2 + 10 = 12 'input': 12, # 2 + 10 'output': -1.0, 'img': { 'size': { 'height': 256, 'width': 128, }, 'metadata/path': tf.compat.as_bytes('path/to/xyz.jpg'), }, }, ), ], )
def test_video_custom_decode(self): image_path = os.fspath( utils.tfds_path('testing/test_data/test_image.jpg')) with tf.io.gfile.GFile(image_path, 'rb') as f: serialized_img = f.read() self.assertFeature( # Image with statically defined shape feature=features_lib.Video(shape=(None, 30, 60, 3)), shape=(None, 30, 60, 3), dtype=tf.uint8, tests=[ testing.FeatureExpectationItem( value=[image_path] * 15, # 15 frames of video expected=[serialized_img] * 15, # Non-decoded image shape=(15, ), dtype=tf.string, # Only string are decoded decoders=decode_lib.SkipDecoding(), ), ], ) # Test with FeatureDict self.assertFeature( feature=features_lib.FeaturesDict({ 'image': features_lib.Image(shape=(30, 60, 3), encoding_format='jpeg'), 'label': tf.int64, }), shape={ 'image': (30, 60, 3), 'label': (), }, dtype={ 'image': tf.uint8, 'label': tf.int64, }, tests=[ testing.FeatureExpectationItem( decoders={ 'image': decode_lib.SkipDecoding(), }, value={ 'image': image_path, 'label': 123, }, expected={ 'image': serialized_img, 'label': 123, }, shape={ 'image': (), 'label': (), }, dtype={ 'image': tf.string, 'label': tf.int64, }, ), ], )
def expectations(self): return [ test_utils.FeatureExpectation( name='fdict', feature=features_lib.FeaturesDict({ 'input': AnInputConnector(), 'output': AnOutputConnector(), 'img': { 'size': { 'height': tf.int64, 'width': tf.int64, }, 'metadata/path': tf.string, } }), serialized_info={ 'input/a': tf.FixedLenFeature(shape=(), dtype=tf.int64), 'input/b': tf.FixedLenFeature(shape=(), dtype=tf.int64), 'output': tf.FixedLenFeature(shape=(), dtype=tf.float32), 'img/size/height': tf.FixedLenFeature(shape=(), dtype=tf.int64), 'img/size/width': tf.FixedLenFeature(shape=(), dtype=tf.int64), 'img/metadata/path': tf.FixedLenFeature(shape=(), dtype=tf.string), }, dtype={ 'input': tf.int64, 'output': tf.float32, 'img': { 'size': { 'height': tf.int64, 'width': tf.int64, }, 'metadata/path': tf.string, } }, shape={ 'input': (), 'output': (), 'img': { 'size': { 'height': (), 'width': (), }, 'metadata/path': (), }, }, tests=[ # Np array test_utils.FeatureExpectationItem( value={ 'input': 1, 'output': -1, 'img': { 'size': { 'height': 256, 'width': 128, }, 'metadata/path': 'path/to/xyz.jpg', } }, expected_serialized={ 'input/a': 2, # 1 + 1 'input/b': 10, # 1 * 10 'output': -10.0, # -1 * 10.0 'img/size/height': 256, 'img/size/width': 128, 'img/metadata/path': 'path/to/xyz.jpg', }, expected={ # a = 1 + 1, b = 1 * 10 => output = a + b = 2 + 10 = 12 'input': 12, # 2 + 10 'output': -1.0, 'img': { 'size': { 'height': 256, 'width': 128, }, 'metadata/path': tf.compat.as_bytes('path/to/xyz.jpg'), }, }, ), ], ), ]
def _info(self): return dataset_info.DatasetInfo( builder=self, features=features.FeaturesDict({'x': tf.int64}), supervised_keys=('x', 'x'), )