Example #1
0
  def test_getattr(self):
    feature = feature_lib.Sequence(
        feature_lib.ClassLabel(names=['left', 'right']),)
    self.assertEqual(feature.names, ['left', 'right'])

    feature = feature_lib.Sequence({
        'label': feature_lib.ClassLabel(names=['left', 'right']),
    })
    self.assertEqual(feature['label'].names, ['left', 'right'])
  def test_metadata(self):
    feature = feature_lib.Sequence(feature_lib.ClassLabel(num_classes=2))
    feature.feature.names = ['left', 'right']
    with testing.tmp_dir() as tmp_dir:
      feature.save_metadata(data_dir=tmp_dir, feature_name='test')

      feature2 = feature_lib.Sequence(feature_lib.ClassLabel(num_classes=2))
      feature2.load_metadata(data_dir=tmp_dir, feature_name='test')
    self.assertEqual(feature2.feature.names, ['left', 'right'])
    def test_empty(self):
        # Encoding should works if num_classes=0
        labels = features.ClassLabel(num_classes=0)
        self.assertEqual(0, labels.num_classes)
        self.assertEqual(0, len(labels.names))
        self.assertEqual(-1, labels.encode_example(-1))

        labels = features.ClassLabel(names=[])
        self.assertEqual(0, labels.num_classes)
        self.assertEqual(0, len(labels.names))
        self.assertEqual(-1, labels.encode_example(-1))
 def expectations(self):
   return [
       test_utils.FeatureExpectation(
           name='label',
           feature=features.ClassLabel(num_classes=10),
           dtype=tf.int64,
           shape=(),
           tests=[
               test_utils.FeatureExpectationItem(
                   value=3,
                   expected=3,
               ),
               test_utils.FeatureExpectationItem(
                   value='3',
                   expected=3,
               ),
               test_utils.FeatureExpectationItem(
                   value=10,
                   raise_cls=ValueError,
                   raise_msg='greater than configured num_classes',
               ),
               test_utils.FeatureExpectationItem(
                   value='10',
                   raise_cls=ValueError,
                   raise_msg='Invalid',
               ),
           ]
       ),
       test_utils.FeatureExpectation(
           name='directions',
           feature=features.ClassLabel(names=['left', 'right']),
           dtype=tf.int64,
           shape=(),
           tests=[
               test_utils.FeatureExpectationItem(
                   value=1,
                   expected=1,
               ),
               test_utils.FeatureExpectationItem(
                   value='left',
                   expected=0,
               ),
               test_utils.FeatureExpectationItem(
                   value='right',
                   expected=1,
               ),
           ]
       ),
   ]
 def test_feature(self):
     self.assertFeature(
         feature=features.ClassLabel(num_classes=10),
         dtype=tf.int64,
         shape=(),
         tests=[
             testing.FeatureExpectationItem(
                 value=3,
                 expected=3,
             ),
             testing.FeatureExpectationItem(
                 value='3',
                 expected=3,
             ),
             testing.FeatureExpectationItem(
                 value=10,
                 raise_cls=ValueError,
                 raise_msg='greater than configured num_classes',
             ),
             testing.FeatureExpectationItem(
                 value='10',
                 raise_cls=ValueError,
                 raise_msg='Invalid',
             ),
         ],
         test_attributes=dict(
             num_classes=10,
             names=['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'],
         ))
  def test_label(self):

    self.assertFeature(
        feature=feature_lib.Sequence(
            feature_lib.ClassLabel(names=['left', 'right']),
        ),
        shape=(None,),
        dtype=tf.int64,
        tests=[
            testing.FeatureExpectationItem(
                value=['right', 'left', 'left'],
                expected=[1, 0, 0],
            ),
            # Variable sequence length
            testing.FeatureExpectationItem(
                value=['right', 'left', 'right', 'left'],
                expected=[1, 0, 1, 0],
            ),
            # Empty sequence length
            testing.FeatureExpectationItem(
                value=[],
                expected=[],
            ),
        ],
    )
Example #7
0
def test_file_path(tmp_path):
    label_file = tmp_path / 'label_names.txt'
    # Empty lines are ignored
    content = textwrap.dedent("""
      label1


      label0
      """)
    label_file.write_text(content)

    # Both Path and str are supported
    labels = features.ClassLabel(names_file=label_file)
    labels_2 = features.ClassLabel(names_file=str(label_file))
    assert labels.names == labels_2.names
    assert labels.names == ['label1', 'label0']  # Order is kept
Example #8
0
    def test_label(self):

        self.assertFeature(
            feature=feature_lib.Sequence(
                {
                    'label': feature_lib.ClassLabel(names=['left', 'right']),
                },
                length=None),
            shape={'label': (None, )},
            dtype={'label': tf.int64},
            serialized_info={
                'label': feature_lib.TensorInfo(shape=(None, ),
                                                dtype=tf.int64),
            },
            tests=[
                testing.FeatureExpectationItem(
                    value={'label': ['right', 'left', 'left']},
                    expected={'label': [1, 0, 0]},
                ),
                # Variable sequence length
                testing.FeatureExpectationItem(
                    value={'label': ['right', 'left', 'right', 'left']},
                    expected={'label': [1, 0, 1, 0]},
                ),
                # Empty sequence length
                testing.FeatureExpectationItem(
                    value={'label': []},
                    expected={'label': []},
                ),
            ],
            test_attributes=dict(_length=None))
  def test_label(self):

    self.assertFeatureEagerOnly(
        feature=feature_lib.Dataset(
            {
                'label': feature_lib.ClassLabel(names=['left', 'right']),
            },
            length=None),
        shape={'label': ()},
        dtype={'label': tf.int64},
        serialized_info={
            'label': feature_lib.TensorInfo(shape=(None,), dtype=tf.int64),
        },
        tests=[
            testing.FeatureExpectationItem(
                value=[{
                    'label': 'right'
                }, {
                    'label': 'left'
                }, {
                    'label': 'left'
                }],
                expected=tf.data.Dataset.from_tensor_slices(
                    {'label': [1, 0, 0]}),
            ),
            # Variable sequence length
            testing.FeatureExpectationItem(
                value=dataset_utils.as_numpy(
                    tf.data.Dataset.from_tensor_slices(
                        {'label': ['right', 'left', 'right', 'left']})),
                expected=tf.data.Dataset.from_tensor_slices(
                    {'label': [1, 0, 1, 0]}),
            ),
        ],
        test_attributes=dict(_length=None))
Example #10
0
 def test_feature(self):
     self.assertFeature(
         feature=features.ClassLabel(num_classes=10),
         dtype=tf.int64,
         shape=(),
         tests=[
             testing.FeatureExpectationItem(
                 value=3,
                 expected=3,
             ),
             testing.FeatureExpectationItem(
                 value='3',
                 expected=3,
             ),
             testing.FeatureExpectationItem(
                 value=10,
                 raise_cls=ValueError,
                 raise_msg='greater than configured num_classes',
             ),
             testing.FeatureExpectationItem(
                 value='10',
                 raise_cls=ValueError,
                 raise_msg='Invalid',
             ),
         ])
 def test_feature_save_load_metadata_slashes(self):
   with testing.tmp_dir() as data_dir:
     fd = features_lib.FeaturesDict({
         'image/frame': features_lib.Image(shape=(32, 32, 3)),
         'image/label': features_lib.ClassLabel(num_classes=2),
     })
     fd.save_metadata(data_dir)
     fd.load_metadata(data_dir)
Example #12
0
 def _info(self):
   return dataset_info.DatasetInfo(
       builder=self,
       features=features.FeaturesDict({
           "image": features.Image(shape=(28, 28, 1)),
           "label": features.ClassLabel(num_classes=10),
       }),
   )
Example #13
0
 def _info(self):
   return dataset_info.DatasetInfo(
       builder=self,
       features=features.FeaturesDict({
           'image': features.Image(shape=(28, 28, 1)),
           'label': features.ClassLabel(num_classes=10),
       }),
       description='Mnist description.',
   )
  def test_save_load(self):
    labels1 = features.ClassLabel(names=['label3', 'label1', 'label2'])
    labels2 = features.ClassLabel(num_classes=None)
    labels3 = features.ClassLabel(num_classes=1)

    with test_utils.tmp_dir(self.get_temp_dir()) as tmp_dir:
      labels1.save_metadata(tmp_dir, 'test-labels')
      labels2.load_metadata(tmp_dir, 'test-labels')
      with self.assertRaisesWithPredicateMatch(
          ValueError, 'number of names do not match the defined num_classes'):
        labels3.load_metadata(tmp_dir, 'test-labels')

    # labels2 should have been copied from label1
    self.assertEqual(3, labels2.num_classes)
    self.assertEqual(labels2.names, [
        'label3',
        'label1',
        'label2',
    ])
Example #15
0
 def _info(self):
   return dataset_info.DatasetInfo(
       builder=self,
       features=features.FeaturesDict({
           "image": features.Image(shape=(16, 16, 1)),
           "label": features.ClassLabel(names=["dog", "cat"]),
           "id": tf.int32,
       }),
       supervised_keys=("x", "x"),
       metadata=dataset_info.BeamMetadataDict(),
   )
  def test_names(self):

    labels = features.ClassLabel(names=['label3', 'label1', 'label2'])
    with self.assertRaisesWithPredicateMatch(
        ValueError, 'overwrite already defined ClassLabel'):
      labels.names = ['other', 'labels']

    labels = features.ClassLabel()
    labels.names = ['label3', 'label1', 'label2']
    with self.assertRaisesWithPredicateMatch(
        ValueError, 'overwrite already defined ClassLabel'):
      labels.names = ['other', 'labels']

    labels = features.ClassLabel(num_classes=3)
    labels.names = ['label3', 'label1', 'label2']

    labels = features.ClassLabel(num_classes=3)
    with self.assertRaisesWithPredicateMatch(
        ValueError, 'number of names do not match the defined num_classes'):
      labels.names = ['label3', 'label1']
Example #17
0
  def test_feature__repr__(self):

    label = features_lib.ClassLabel(names=['m', 'f'])
    feature_dict = features_lib.FeaturesDict({
        'metadata': features_lib.Sequence({
            'frame': features_lib.Image(shape=(32, 32, 3)),
        }),
        'label': features_lib.Sequence(label),
    })

    self.assertEqual(repr(feature_dict), FEATURE_STR)
    def test_num_classes(self):
        labels = features.ClassLabel(num_classes=10)
        self.assertEqual(10, labels.num_classes)
        self.assertEqual(10, len(labels.names))

        self.assertEqual(1, labels.str2int('1'))
        self.assertEqual(u'1', labels.int2str(1))

        with self.assertRaisesWithPredicateMatch(ValueError, 'Invalid'):
            labels.str2int('10')
        with self.assertRaisesWithPredicateMatch(ValueError, 'Invalid'):
            labels.int2str(10)
Example #19
0
 def _info(self) -> dataset_info.DatasetInfo:
     return dataset_info.DatasetInfo(
         builder=self,
         description='Generic image classification dataset.',
         features=features_lib.FeaturesDict({
             'image':
             features_lib.Image(),
             'label':
             features_lib.ClassLabel(),
             'image/filename':
             features_lib.Text(),
         }),
         supervised_keys=('image', 'label'),
     )
 def _info(self):
     return dataset_info.DatasetInfo(
         builder=self,
         features=features.FeaturesDict({
             'image':
             features.Image(shape=(16, 16, 1)),
             'label':
             features.ClassLabel(names=['dog', 'cat']),
             'id':
             tf.int32,
         }),
         supervised_keys=('x', 'x'),
         metadata=dataset_info.BeamMetadataDict(),
     )
Example #21
0
 def expectations(self):
     return [
         test_utils.FeatureExpectation(
             name='label',
             feature=features.ClassLabel(10),
             dtype=tf.int64,
             shape=(),
             tests=[
                 test_utils.FeatureExpectationItem(
                     value=3,
                     expected=3,
                 ),
                 test_utils.FeatureExpectationItem(
                     value=10,
                     raise_cls=ValueError,
                     raise_msg='greater than configured num_classes',
                 ),
             ]),
     ]
    def test_str_classes(self):
        labels = features.ClassLabel(names=[
            'label3',
            'label1',
            'label2',
        ])
        self.assertEqual(3, labels.num_classes)
        self.assertEqual(labels.names, [
            'label3',
            'label1',
            'label2',
        ])

        self.assertEqual(labels.str2int('label3'), 0)
        self.assertEqual(labels.str2int('label1'), 1)
        self.assertEqual(labels.str2int('label2'), 2)
        self.assertEqual(labels.int2str(0), 'label3')
        self.assertEqual(labels.int2str(1), 'label1')
        self.assertEqual(labels.int2str(2), 'label2')
Example #23
0
    def test_labels(self):

        self.assertFeature(
            feature=features.ClassLabel(names=['left', 'right']),
            dtype=tf.int64,
            shape=(),
            tests=[
                testing.FeatureExpectationItem(
                    value=1,
                    expected=1,
                ),
                testing.FeatureExpectationItem(
                    value='left',
                    expected=0,
                ),
                testing.FeatureExpectationItem(
                    value='right',
                    expected=1,
                ),
            ])
  def test_feature__repr__(self):

    label = features_lib.ClassLabel(names=['m', 'f'])
    feature_dict = features_lib.FeaturesDict({
        'metadata':
            features_lib.Sequence({
                'frame': features_lib.Image(shape=(32, 32, 3)),
            }),
        'label':
            features_lib.Sequence(label),
    })

    self.assertEqual(
        repr(feature_dict),
        textwrap.dedent("""\
        FeaturesDict({
            'label': Sequence(ClassLabel(shape=(), dtype=tf.int64, num_classes=2)),
            'metadata': Sequence({
                'frame': Image(shape=(32, 32, 3), dtype=tf.uint8),
            }),
        })"""),
    )
    def test_label(self):

        self.assertFeatureEagerOnly(
            feature=feature_lib.Dataset(
                feature_lib.ClassLabel(names=['left', 'right']), ),
            shape=(),
            dtype=tf.int64,
            tests=[
                testing.FeatureExpectationItem(
                    value=['right', 'left', 'left'],
                    expected=tf.data.Dataset.from_tensor_slices([1, 0, 0]),
                ),
                # Variable sequence length
                testing.FeatureExpectationItem(
                    value=['right', 'left', 'right', 'left'],
                    expected=tf.data.Dataset.from_tensor_slices([1, 0, 1, 0]),
                ),
                # Empty sequence length
                testing.FeatureExpectationItem(
                    value=[],
                    expected=[],
                ),
            ],
        )
Example #26
0
def test_extract_features_values():
  features = features_lib.FeaturesDict({
      'img': features_lib.Image(shape=(256, 256, 3)),
      'img2': features_lib.Image(shape=(256, 256, 3)),
      'metadata': {
          'label': features_lib.ClassLabel(num_classes=4),
          'other': tf.string,
      },
      'sequence': features_lib.Sequence({
          'x': tf.int64,
          'y': tf.int64,
      }),
      'sequence_flat': features_lib.Sequence(tf.int64),
  })

  result = _extract_features(
      feature=features,
      expected_feature={
          'img': True,
          'img2': False,
          'unknown_key': False,  # Extra keys are filtered
          'metadata': ['label'],
          'sequence': {'y'},
          'sequence_flat': True,
      },
  )
  testing.assert_features_equal(
      result,
      features_lib.FeaturesDict({
          'img': features_lib.Image(shape=(256, 256, 3)),
          'metadata': {
              'label': features_lib.ClassLabel(num_classes=4),
          },
          'sequence': features_lib.Sequence({
              'y': tf.int64,
          }),
          'sequence_flat': features_lib.Sequence(tf.int64),
      }),
  )

  result = _extract_features(
      feature=features,
      expected_feature={'metadata', 'sequence'},
  )
  testing.assert_features_equal(
      result,
      features_lib.FeaturesDict({
          'metadata': {
              'label': features_lib.ClassLabel(num_classes=4),
              'other': tf.string,
          },
          'sequence': features_lib.Sequence({
              'x': tf.int64,
              'y': tf.int64,
          }),
      }),
  )

  # Test, mixing Features with non-features.
  result = _extract_features(
      feature=features,
      expected_feature={
          'img': features_lib.Image(),
          'sequence': {
              'x': tf.int64,
              'y': False,
          },
      },
  )
  testing.assert_features_equal(
      result,
      features_lib.FeaturesDict({
          'img': features_lib.Image(shape=(256, 256, 3)),
          'sequence': features_lib.Sequence({
              'x': tf.int64,
          }),
      }),
  )
Example #27
0
 def test_num_classes(self):
     self.assertEqual(10, features.ClassLabel(10).num_classes)
    def test_duplicate_names(self):

        with self.assertRaisesWithPredicateMatch(ValueError,
                                                 'label names are duplicated'):
            features.ClassLabel(names=['label1', 'label1', 'label2'])
Example #29
0
def test_extract_features():
  features = features_lib.FeaturesDict({
      'img': features_lib.Image(shape=(256, 256, 3)),
      'img2': features_lib.Image(shape=(256, 256, 3)),
      'metadata': {
          'label': features_lib.ClassLabel(num_classes=4),
          'other': tf.string,
      },
      'sequence': features_lib.Sequence({
          'x': tf.int64,
          'y': tf.int64,
      }),
      'sequence_flat': features_lib.Sequence(tf.int64),
  })

  result = _extract_features(
      feature=features,
      expected_feature={},
  )
  testing.assert_features_equal(result, features_lib.FeaturesDict({}))

  # Feature spec accepted
  result = _extract_features(
      feature=features,
      expected_feature={
          'img': features_lib.Image(shape=(None, None, 3)),
          'metadata': {
              'other': tf.string,
          },
          'sequence': features_lib.Sequence({
              'x': tf.int64,
          }),
      },
  )
  testing.assert_features_equal(
      result,
      features_lib.FeaturesDict({
          'img': features_lib.Image(shape=(256, 256, 3)),
          'metadata': {
              'other': tf.string,
          },
          'sequence': features_lib.Sequence({
              'x': tf.int64,
          }),
      }),
  )

  # Failure mode:
  # * Structure not matching
  # * Type not matching
  # * Shape/dtype not matching
  # * Sequence values not matching (e.g. try bad dtype)

  with pytest.raises(ValueError, match="Missing expected feature 'unknown'"):
    _extract_features(
        feature=features,
        expected_feature={
            'sequence': features_lib.Sequence({
                'unknown': tf.bool,
            })
        },
    )

  with pytest.raises(ValueError, match="Missing expected feature 'non_exista"):
    _extract_features(
        feature=features,
        expected_feature={
            'non_existant': features_lib.Image(shape=(None, None, 3)),
        },
    )

  with pytest.raises(TypeError, match='Expected: Tensor.*. Got: Image'):
    _extract_features(
        feature=features,
        expected_feature={
            'img': features_lib.Tensor(shape=(256, 256, 3), dtype=tf.uint8),
        },
    )

  with pytest.raises(ValueError, match='Expected: Image.*. Got: Image'):
    _extract_features(
        feature=features,
        expected_feature={
            'img': features_lib.Image(shape=(None, None, 1)),
        },
    )

  with pytest.raises(ValueError, match='Expected: Tensor.*. Got: Tensor'):
    _extract_features(
        feature=features,
        expected_feature={
            'sequence_flat': features_lib.Sequence(tf.float32),  # Wrong dtype
        },
    )