Example #1
0
    def assertFeature(self,
                      feature,
                      shape,
                      dtype,
                      tests,
                      serialized_info=None):
        """Test the given feature against the predicates."""

        # Check the shape/dtype
        with self._subTest("shape"):
            self.assertEqual(feature.shape, shape)
        with self._subTest("dtype"):
            self.assertEqual(feature.dtype, dtype)

        # Check the serialized features
        if serialized_info is not None:
            with self._subTest("serialized_info"):
                self.assertEqual(
                    serialized_info,
                    feature.get_serialized_info(),
                )

        # Create the feature dict
        fdict = features.FeaturesDict({"inner": feature})
        for i, test in enumerate(tests):
            with self._subTest(str(i)):
                self.assertFeatureTest(
                    fdict=fdict,
                    test=test,
                    feature=feature,
                    shape=shape,
                    dtype=dtype,
                )
  def test_repr_tensor(self):

    # Top level Tensor is printed expanded
    self.assertEqual(
        repr(features_lib.Tensor(shape=(), dtype=tf.int32)),
        'Tensor(shape=(), dtype=tf.int32)',
    )

    # Sequences colapse tensor repr
    self.assertEqual(
        repr(features_lib.Sequence(tf.int32)),
        'Sequence(tf.int32)',
    )

    class ChildTensor(features_lib.Tensor):
      pass

    self.assertEqual(
        repr(
            features_lib.FeaturesDict({
                'colapsed': features_lib.Tensor(shape=(), dtype=tf.int32),
                # Tensor with defined shape are printed expanded
                'noncolapsed': features_lib.Tensor(shape=(1,), dtype=tf.int32),
                # Tensor inherited are expanded
                'child': ChildTensor(shape=(), dtype=tf.int32),
            })),
        textwrap.dedent("""\
        FeaturesDict({
            'child': ChildTensor(shape=(), dtype=tf.int32),
            'colapsed': tf.int32,
            'noncolapsed': Tensor(shape=(1,), dtype=tf.int32),
        })"""),
    )
Example #3
0
    def assertFeature(self,
                      feature,
                      shape,
                      dtype,
                      tests,
                      serialized_info=None):
        """Test the given feature against the predicates."""

        # Check the shape/dtype
        with self._subTest('shape'):
            self.assertEqual(feature.shape, shape)
        with self._subTest('dtype'):
            self.assertEqual(feature.dtype, dtype)

        # Check the serialized features
        if serialized_info is not None:
            with self._subTest('serialized_info'):
                self.assertEqual(
                    serialized_info,
                    feature.get_serialized_info(),
                )

        # Create the feature dict
        fdict = features.FeaturesDict({'inner': feature})
        fdict._set_top_level()  # pylint: disable=protected-access

        for i, test in enumerate(tests):
            with self._subTest(str(i)):
                self.assertFeatureTest(
                    fdict=fdict,
                    test=test,
                    feature=feature,
                    shape=shape,
                    dtype=dtype,
                )
 def _info(self):
     return dataset_info.DatasetInfo(
         builder=self,
         features=features.FeaturesDict({"im": features.Image()}),
         supervised_keys=("im", "im"),
         metadata=dataset_info.MetadataDict(),
     )
Example #5
0
 def test_feature_getitem(self):
     fdict = features_lib.FeaturesDict({
         'integer': tf.int32,
         'string': tf.string,
     })
     self.assertEqual(fdict['integer'].dtype, tf.int32)
     self.assertEqual(fdict['string'].dtype, tf.string)
Example #6
0
    def _info(self):

        return dataset_info.DatasetInfo(
            builder=self,
            features=features.FeaturesDict({"x": tf.int64}),
            supervised_keys=("x", "x"),
        )
Example #7
0
 def test_tensor_spec(self):
     feature = features_lib.FeaturesDict({
         'input': AnInputConnector(),
         'output': AnOutputConnector(),
         'img': {
             'size': {
                 'height': features_lib.Tensor(shape=(2, 3),
                                               dtype=tf.int64),
                 'width': features_lib.Tensor(shape=[None, 3],
                                              dtype=tf.int64),
             },
             'image': features_lib.Image(shape=(28, 28, 1)),
             'metadata/path': tf.string,
         }
     })
     self.assertAllEqualNested(
         feature.get_tensor_spec(), {
             'input': tf.TensorSpec(shape=[], dtype=tf.int64),
             'output': tf.TensorSpec(shape=[], dtype=tf.float32),
             'img': {
                 'size': {
                     'height': tf.TensorSpec(shape=[2, 3], dtype=tf.int64),
                     'width': tf.TensorSpec(shape=[None, 3],
                                            dtype=tf.int64),
                 },
                 'image': tf.TensorSpec(shape=[28, 28, 1], dtype=tf.uint8),
                 'metadata/path': tf.TensorSpec(shape=[], dtype=tf.string),
             }
         })
 def test_feature_save_load_metadata_slashes(self):
   with testing.tmp_dir() as data_dir:
     fd = features_lib.FeaturesDict({
         'image/frame': features_lib.Image(shape=(32, 32, 3)),
         'image/label': features_lib.ClassLabel(num_classes=2),
     })
     fd.save_metadata(data_dir)
     fd.load_metadata(data_dir)
 def _info(self):
   return dataset_info.DatasetInfo(
       features=features.FeaturesDict({
           "x": tf.int64,
           "y": tf.int64,
           "z": tf.string,
       }),
   )
 def _info(self) -> dataset_info.DatasetInfo:
   return dataset_info.DatasetInfo(
       builder=self,
       description='Generic text translation dataset.',
       features=features_lib.FeaturesDict({
           lang: features_lib.Text() for lang in self._languages
       }),
   )
Example #11
0
 def _info(self):
     return dataset_info.DatasetInfo(
         builder=self,
         features=features.FeaturesDict({
             'id': tf.int64,
         }),
         description='Minimal DatasetBuilder.',
     )
Example #12
0
 def _info(self):
   return dataset_info.DatasetInfo(
       builder=self,
       features=features.FeaturesDict({
           "image": features.Image(shape=(28, 28, 1)),
           "label": features.ClassLabel(num_classes=10),
       }),
   )
Example #13
0
 def _info(self):
   return dataset_info.DatasetInfo(
       builder=self,
       features=features.FeaturesDict({
           'image': features.Image(shape=(28, 28, 1)),
           'label': features.ClassLabel(num_classes=10),
       }),
       description='Mnist description.',
   )
 def _info(self):
   return dataset_info.DatasetInfo(
       builder=self,
       features=features.FeaturesDict({
           "frames": features.Sequence({
               "coordinates": features.Sequence(
                   features.Tensor(shape=(2,), dtype=tf.int32)
               ),
           }),
       }),
   )
Example #15
0
 def feature_item(self):
     from tensorflow_datasets.core import features
     import numpy as np
     grid_size = np.prod(self.grid_shape)
     return 'ffd', features.FeaturesDict({
         'b':
         features.Tensor(shape=(self.num_points, grid_size),
                         dtype=tf.float32),
         'p':
         features.Tensor(shape=(grid_size, 3), dtype=tf.float32),
     })
Example #16
0
 def _info(self):
   return dataset_info.DatasetInfo(
       builder=self,
       features=features.FeaturesDict({
           "image": features.Image(shape=(16, 16, 1)),
           "label": features.ClassLabel(names=["dog", "cat"]),
           "id": tf.int32,
       }),
       supervised_keys=("x", "x"),
       metadata=dataset_info.BeamMetadataDict(),
   )
Example #17
0
  def test_feature__repr__(self):

    label = features_lib.ClassLabel(names=['m', 'f'])
    feature_dict = features_lib.FeaturesDict({
        'metadata': features_lib.Sequence({
            'frame': features_lib.Image(shape=(32, 32, 3)),
        }),
        'label': features_lib.Sequence(label),
    })

    self.assertEqual(repr(feature_dict), FEATURE_STR)
Example #18
0
 def _info(self) -> dataset_info.DatasetInfo:
     return dataset_info.DatasetInfo(
         builder=self,
         description='Generic image classification dataset.',
         features=features_lib.FeaturesDict({
             'image':
             features_lib.Image(),
             'label':
             features_lib.ClassLabel(),
             'image/filename':
             features_lib.Text(),
         }),
         supervised_keys=('image', 'label'),
     )
 def _info(self):
     return dataset_info.DatasetInfo(
         builder=self,
         features=features.FeaturesDict({
             'image':
             features.Image(shape=(16, 16, 1)),
             'label':
             features.ClassLabel(names=['dog', 'cat']),
             'id':
             tf.int32,
         }),
         supervised_keys=('x', 'x'),
         metadata=dataset_info.BeamMetadataDict(),
     )
    def test_top_level(self):

        f = features_lib.FeaturesDict({
            'a': tf.int32,
            'b': {
                'c': tf.int32,
            },
        })

        # Only top level can be decoded
        f.decode_example({
            'a': 1,
            'b': {
                'c': 2,
            },
        })
Example #21
0
    def _assert_feature(
        self,
        feature,
        shape,
        dtype,
        tests,
        serialized_info=None,
        test_tensor_spec=True,
        skip_feature_tests=False,
        test_attributes=None,
    ):
        with self._subTest('shape'):
            self.assertEqual(feature.shape, shape)
        with self._subTest('dtype'):
            self.assertEqual(feature.dtype, dtype)

        # Check the serialized features
        if serialized_info:
            with self._subTest('serialized_info'):
                self.assertEqual(
                    serialized_info,
                    feature.get_serialized_info(),
                )

        if not skip_feature_tests and test_attributes:
            for key, value in test_attributes.items():
                self.assertEqual(getattr(feature, key), value)

        # Create the feature dict
        fdict = features.FeaturesDict({'inner': feature})

        # Check whether the following doesn't raise an exception
        fdict.catalog_documentation()

        for i, test in enumerate(tests):
            with self._subTest(str(i)):
                self.assertFeatureTest(
                    fdict=fdict,
                    test=test,
                    feature=feature,
                    shape=shape,
                    dtype=dtype,
                    test_tensor_spec=test_tensor_spec,
                )
  def test_top_level(self):

    f = features_lib.FeaturesDict({
        'a': tf.int32,
        'b': {
            'c': tf.int32,
        },
    })
    f._set_top_level()

    # Only top level can be decoded
    f.decode_example({
        'a': 1,
        'b': {
            'c': 2,
        },
    })

    with self.assertRaisesWithPredicateMatch(
        AssertionError, 'decoded when defined as top-level'):
      f['b'].decode_example({'c': 1})
Example #23
0
  def _assert_feature(
      self,
      feature,
      shape,
      dtype,
      tests,
      serialized_info=None,
      skip_feature_tests=False,
      test_attributes=None,
  ):
    with self._subTest('shape'):
      self.assertEqual(feature.shape, shape)
    with self._subTest('dtype'):
      self.assertEqual(feature.dtype, dtype)

    # Check the serialized features
    if serialized_info:
      with self._subTest('serialized_info'):
        self.assertEqual(
            serialized_info,
            feature.get_serialized_info(),
        )

    if not skip_feature_tests and test_attributes:
      for key, value in test_attributes.items():
        self.assertEqual(getattr(feature, key), value)

    # Create the feature dict
    fdict = features.FeaturesDict({'inner': feature})

    for i, test in enumerate(tests):
      with self._subTest(str(i)):
        self.assertFeatureTest(
            fdict=fdict,
            test=test,
            feature=feature,
            shape=shape,
            dtype=dtype,
        )
  def test_feature__repr__(self):

    label = features_lib.ClassLabel(names=['m', 'f'])
    feature_dict = features_lib.FeaturesDict({
        'metadata':
            features_lib.Sequence({
                'frame': features_lib.Image(shape=(32, 32, 3)),
            }),
        'label':
            features_lib.Sequence(label),
    })

    self.assertEqual(
        repr(feature_dict),
        textwrap.dedent("""\
        FeaturesDict({
            'label': Sequence(ClassLabel(shape=(), dtype=tf.int64, num_classes=2)),
            'metadata': Sequence({
                'frame': Image(shape=(32, 32, 3), dtype=tf.uint8),
            }),
        })"""),
    )
Example #25
0
def _extract_features(
    feature: features_lib.FeatureConnector,
    expected_feature: features_lib.FeatureConnector,
) -> features_lib.FeatureConnector:
    """Recursive implementation of `PartialDecoding.extract_features`."""
    # Feature types should match
    if not isinstance(feature, type(expected_feature)):
        raise TypeError(f'Expected: {expected_feature}. Got: {feature}')

    # Recurse into FeaturesDict, Sequence
    # Use `type` rather than `isinstance` to not recurse into inherited classes.
    if type(feature) == features_lib.FeaturesDict:  # pylint: disable=unidiomatic-typecheck
        expected_feature = typing.cast(features_lib.FeaturesDict,
                                       expected_feature)
        return features_lib.FeaturesDict({  # Extract the feature subset  # pylint: disable=g-complex-comprehension
            k: _extract_feature_item(
                feature=feature,
                expected_key=k,
                expected_value=v,
                fn=_extract_features,
            )
            for k, v in expected_feature.items()
        })
    elif type(feature) == features_lib.Sequence:  # pylint: disable=unidiomatic-typecheck
        feature = typing.cast(features_lib.Sequence, feature)
        expected_feature = typing.cast(features_lib.Sequence, expected_feature)
        feature_subset = _extract_features(
            feature=feature.feature,
            expected_feature=expected_feature.feature,
        )
        return features_lib.Sequence(feature_subset, length=feature._length)  # pylint: disable=protected-access
    else:
        # Assert that the specs matches
        if (feature.dtype != expected_feature.dtype
                or not utils.shapes_are_compatible(feature.shape,
                                                   expected_feature.shape)):
            raise ValueError(f'Expected: {expected_feature}. Got: {feature}')
        return feature
Example #26
0
    def _process_exp(self, exp):

        # Check the shape/dtype
        with self._subTest("shape"):
            self.assertEqual(exp.feature.shape, exp.shape)
        with self._subTest("dtype"):
            self.assertEqual(exp.feature.dtype, exp.dtype)

        # Check the serialized features
        if exp.serialized_features is not None:
            with self._subTest("serialized_features"):
                self.assertEqual(
                    exp.serialized_features,
                    exp.feature.get_serialized_features(),
                )

        # Create the feature dict
        fdict = features.FeaturesDict({exp.name: exp.feature})
        for i, test in enumerate(exp.tests):
            with self._subTest(str(i)):
                # self._process_subtest_exp(e)
                input_value = {exp.name: test.value}

                if test.raise_cls is not None:
                    with self._subTest("raise"):
                        if not test.raise_msg:
                            raise ValueError(
                                "test.raise_msg should be set with {}for test {}"
                                .format(test.raise_cls, exp.name))
                        with self.assertRaisesWithPredicateMatch(
                                test.raise_cls, test.raise_msg):
                            features_encode_decode(fdict, input_value)
                else:
                    # Test the serialization only
                    if test.expected_serialized is not None:
                        with self._subTest("out_serialize"):
                            self.assertEqual(
                                test.expected_serialized,
                                exp.feature.encode_sample(test.value),
                            )

                    # Assert the returned type match the expected one
                    with self._subTest("out_extract"):
                        out = features_encode_decode(fdict,
                                                     input_value,
                                                     as_tensor=True)
                        out = out[exp.name]
                    with self._subTest("out_dtype"):
                        out_dtypes = utils.map_nested(lambda s: s.dtype, out)
                        self.assertEqual(out_dtypes, exp.feature.dtype)
                    with self._subTest("out_shape"):
                        # For shape, because (None, 3) match with (5, 3), we use
                        # tf.TensorShape.assert_is_compatible_with on each of the elements
                        out_shapes = utils.zip_nested(out, exp.feature.shape)
                        utils.map_nested(
                            lambda x: x[0].shape.assert_is_compatible_with(x[
                                1]), out_shapes)

                    # Test serialization + decoding from disk
                    with self._subTest("out_value"):
                        decoded_samples = features_encode_decode(
                            fdict, input_value)
                        self.assertAllEqual(test.expected,
                                            decoded_samples[exp.name])
Example #27
0
    def test_fdict(self):

        self.assertFeature(
            feature=features_lib.FeaturesDict({
                'input': AnInputConnector(),
                'output': AnOutputConnector(),
                'img': {
                    'size': {
                        'height': tf.int64,
                        'width': tf.int64,
                    },
                    'metadata/path': tf.string,
                }
            }),
            serialized_info={
                'input': {
                    'a': features_lib.TensorInfo(shape=(), dtype=tf.int64),
                    'b': features_lib.TensorInfo(shape=(), dtype=tf.int64),
                },
                'output': features_lib.TensorInfo(shape=(), dtype=tf.float32),
                'img': {
                    'size': {
                        'height': features_lib.TensorInfo(shape=(),
                                                          dtype=tf.int64),
                        'width': features_lib.TensorInfo(shape=(),
                                                         dtype=tf.int64),
                    },
                    'metadata/path':
                    features_lib.TensorInfo(shape=(), dtype=tf.string),
                }
            },
            dtype={
                'input': tf.int64,
                'output': tf.float32,
                'img': {
                    'size': {
                        'height': tf.int64,
                        'width': tf.int64,
                    },
                    'metadata/path': tf.string,
                }
            },
            shape={
                'input': (),
                'output': (),
                'img': {
                    'size': {
                        'height': (),
                        'width': (),
                    },
                    'metadata/path': (),
                },
            },
            tests=[
                # Np array
                testing.FeatureExpectationItem(
                    value={
                        'input': 1,
                        'output': -1,
                        'img': {
                            'size': {
                                'height': 256,
                                'width': 128,
                            },
                            'metadata/path': 'path/to/xyz.jpg',
                        }
                    },
                    expected_serialized={
                        'input': {
                            'a': 2,  # 1 + 1
                            'b': 10,  # 1 * 10
                        },
                        'output': -10.0,  # -1 * 10.0
                        'img': {
                            'size': {
                                'height': 256,
                                'width': 128,
                            },
                            'metadata/path': 'path/to/xyz.jpg',
                        }
                    },
                    expected={
                        # a = 1 + 1, b = 1 * 10 => output = a + b = 2 + 10 = 12
                        'input': 12,  # 2 + 10
                        'output': -1.0,
                        'img': {
                            'size': {
                                'height': 256,
                                'width': 128,
                            },
                            'metadata/path':
                            tf.compat.as_bytes('path/to/xyz.jpg'),
                        },
                    },
                ),
            ],
        )
Example #28
0
    def test_video_custom_decode(self):

        image_path = os.fspath(
            utils.tfds_path('testing/test_data/test_image.jpg'))
        with tf.io.gfile.GFile(image_path, 'rb') as f:
            serialized_img = f.read()

        self.assertFeature(
            # Image with statically defined shape
            feature=features_lib.Video(shape=(None, 30, 60, 3)),
            shape=(None, 30, 60, 3),
            dtype=tf.uint8,
            tests=[
                testing.FeatureExpectationItem(
                    value=[image_path] * 15,  # 15 frames of video
                    expected=[serialized_img] * 15,  # Non-decoded image
                    shape=(15, ),
                    dtype=tf.string,  # Only string are decoded
                    decoders=decode_lib.SkipDecoding(),
                ),
            ],
        )

        # Test with FeatureDict
        self.assertFeature(
            feature=features_lib.FeaturesDict({
                'image':
                features_lib.Image(shape=(30, 60, 3), encoding_format='jpeg'),
                'label':
                tf.int64,
            }),
            shape={
                'image': (30, 60, 3),
                'label': (),
            },
            dtype={
                'image': tf.uint8,
                'label': tf.int64,
            },
            tests=[
                testing.FeatureExpectationItem(
                    decoders={
                        'image': decode_lib.SkipDecoding(),
                    },
                    value={
                        'image': image_path,
                        'label': 123,
                    },
                    expected={
                        'image': serialized_img,
                        'label': 123,
                    },
                    shape={
                        'image': (),
                        'label': (),
                    },
                    dtype={
                        'image': tf.string,
                        'label': tf.int64,
                    },
                ),
            ],
        )
Example #29
0
  def expectations(self):

    return [
        test_utils.FeatureExpectation(
            name='fdict',
            feature=features_lib.FeaturesDict({
                'input': AnInputConnector(),
                'output': AnOutputConnector(),
                'img': {
                    'size': {
                        'height': tf.int64,
                        'width': tf.int64,
                    },
                    'metadata/path': tf.string,
                }
            }),
            serialized_info={
                'input/a':
                    tf.FixedLenFeature(shape=(), dtype=tf.int64),
                'input/b':
                    tf.FixedLenFeature(shape=(), dtype=tf.int64),
                'output':
                    tf.FixedLenFeature(shape=(), dtype=tf.float32),
                'img/size/height':
                    tf.FixedLenFeature(shape=(), dtype=tf.int64),
                'img/size/width':
                    tf.FixedLenFeature(shape=(), dtype=tf.int64),
                'img/metadata/path':
                    tf.FixedLenFeature(shape=(), dtype=tf.string),
            },
            dtype={
                'input': tf.int64,
                'output': tf.float32,
                'img': {
                    'size': {
                        'height': tf.int64,
                        'width': tf.int64,
                    },
                    'metadata/path': tf.string,
                }
            },
            shape={
                'input': (),
                'output': (),
                'img': {
                    'size': {
                        'height': (),
                        'width': (),
                    },
                    'metadata/path': (),
                },
            },
            tests=[
                # Np array
                test_utils.FeatureExpectationItem(
                    value={
                        'input': 1,
                        'output': -1,
                        'img': {
                            'size': {
                                'height': 256,
                                'width': 128,
                            },
                            'metadata/path': 'path/to/xyz.jpg',
                        }
                    },
                    expected_serialized={
                        'input/a': 2,  # 1 + 1
                        'input/b': 10,  # 1 * 10
                        'output': -10.0,  # -1 * 10.0
                        'img/size/height': 256,
                        'img/size/width': 128,
                        'img/metadata/path': 'path/to/xyz.jpg',
                    },
                    expected={
                        # a = 1 + 1, b = 1 * 10 => output = a + b = 2 + 10 = 12
                        'input': 12,  # 2 + 10
                        'output': -1.0,
                        'img': {
                            'size': {
                                'height': 256,
                                'width': 128,
                            },
                            'metadata/path':
                                tf.compat.as_bytes('path/to/xyz.jpg'),
                        },
                    },
                ),
            ],
        ),
    ]
Example #30
0
 def _info(self):
     return dataset_info.DatasetInfo(
         builder=self,
         features=features.FeaturesDict({'x': tf.int64}),
         supervised_keys=('x', 'x'),
     )