예제 #1
0
    def test_encode_decode(self):
        nonunicode_text = 'hello world'
        unicode_text = u'你好,世界'
        expectations = [
            # Non-unicode
            test_utils.FeatureExpectation(
                name='text',
                feature=features.Text(),
                value=nonunicode_text,
                expected=tf.compat.as_bytes(nonunicode_text)),
            # Unicode
            test_utils.FeatureExpectation(
                name='text_unicode',
                feature=features.Text(),
                value=unicode_text,
                expected=tf.compat.as_bytes(unicode_text)),
        ]

        specs = features.SpecDict(
            {exp.name: exp.feature
             for exp in expectations})

        decoded_sample = test_utils.features_encode_decode(
            specs, dict([(exp.name, exp.value) for exp in expectations]))

        for exp in expectations:
            self.assertAllEqual(decoded_sample[exp.name], exp.expected)
예제 #2
0
    def test_save_load_metadata(self):
        text_f = features.Text(encoder=text_encoder.ByteTextEncoder(
            additional_tokens=['HI']))
        text = u'HI 你好'
        ids = text_f.str2ints(text)
        self.assertEqual(1, ids[0])

        with testing.tmp_dir(self.get_temp_dir()) as data_dir:
            feature_name = 'dummy'
            text_f.save_metadata(data_dir, feature_name)

            new_f = features.Text()
            new_f.load_metadata(data_dir, feature_name)
            self.assertEqual(ids, text_f.str2ints(text))
예제 #3
0
 def expectations(self):
     nonunicode_text = 'hello world'
     unicode_text = u'你好'
     return [
         test_utils.FeatureExpectation(
             name='text',
             feature=features.Text(),
             shape=(),
             dtype=tf.string,
             tests=[
                 # Non-unicode
                 test_utils.FeatureExpectationItem(
                     value=nonunicode_text,
                     expected=tf.compat.as_bytes(nonunicode_text),
                 ),
                 # Unicode
                 test_utils.FeatureExpectationItem(
                     value=unicode_text,
                     expected=tf.compat.as_bytes(unicode_text),
                 ),
                 # Empty string
                 test_utils.FeatureExpectationItem(
                     value='',
                     expected=tf.compat.as_bytes(''),
                 ),
             ],
         ),
         # Unicode integer-encoded by byte
         test_utils.FeatureExpectation(
             name='text_unicode_encoded',
             feature=features.Text(encoder=text_encoder.ByteTextEncoder()),
             shape=(None, ),
             dtype=tf.int64,
             tests=[
                 test_utils.FeatureExpectationItem(
                     value=unicode_text,
                     expected=[
                         i + 1 for i in [228, 189, 160, 229, 165, 189]
                     ],
                 ),
                 # Empty string
                 test_utils.FeatureExpectationItem(
                     value='',
                     expected=[],
                 ),
             ],
         ),
     ]
예제 #4
0
    def test_text(self):
        nonunicode_text = 'hello world'
        unicode_text = u'你好'

        self.assertFeature(
            feature=features.Text(),
            shape=(),
            dtype=tf.string,
            tests=[
                # Non-unicode
                testing.FeatureExpectationItem(
                    value=nonunicode_text,
                    expected=tf.compat.as_bytes(nonunicode_text),
                ),
                # Unicode
                testing.FeatureExpectationItem(
                    value=unicode_text,
                    expected=tf.compat.as_bytes(unicode_text),
                ),
                # Empty string
                testing.FeatureExpectationItem(
                    value='',
                    expected=tf.compat.as_bytes(''),
                ),
            ],
        )
 def _info(self) -> dataset_info.DatasetInfo:
   return dataset_info.DatasetInfo(
       builder=self,
       description='Generic text translation dataset.',
       features=features_lib.FeaturesDict({
           lang: features_lib.Text() for lang in self._languages
       }),
   )
예제 #6
0
 def _info(self) -> dataset_info.DatasetInfo:
     return dataset_info.DatasetInfo(
         builder=self,
         description='Generic image classification dataset.',
         features=features_lib.FeaturesDict({
             'image':
             features_lib.Image(),
             'label':
             features_lib.ClassLabel(),
             'image/filename':
             features_lib.Text(),
         }),
         supervised_keys=('image', 'label'),
     )
예제 #7
0
    def test_text_encoded(self):
        unicode_text = u'你好'

        # Unicode integer-encoded by byte
        self.assertFeature(
            feature=features.Text(encoder=text_encoder.ByteTextEncoder()),
            shape=(None, ),
            dtype=tf.int64,
            tests=[
                testing.FeatureExpectationItem(
                    value=unicode_text,
                    expected=[i + 1 for i in [228, 189, 160, 229, 165, 189]],
                ),
                # Empty string
                testing.FeatureExpectationItem(
                    value='',
                    expected=[],
                ),
            ],
        )
예제 #8
0
 def test_text_conversion(self):
     text_f = features.Text(encoder=text_encoder.ByteTextEncoder())
     text = u'你好'
     self.assertEqual(text, text_f.ints2str(text_f.str2ints(text)))