Exemple #1
0
 def test_encode_decode_from_loaded_tokenizer_with_1_maxlen(self, temp_dir):
     tokenizer = Tokenizer(self.captions, 1)
     tokenizer.save_dictionaries(temp_dir.path)
     loading_tokenizer = Tokenizer()
     loading_tokenizer.load_dictionaries(temp_dir.path)
     phrase_encoded = loading_tokenizer.encode_caption(self.phrase)
     phrase_decoded = tokenizer.decode_caption(phrase_encoded)
     self.assertEqual(phrase_decoded, [Tokenizer.END])
Exemple #2
0
 def test_encode_decode(self):
     for value in [None, 5, 10]:
         with self.subTest(captions=self.captions, user_maxlen=value):
             tokenizer = Tokenizer(self.captions, value)
             phrase_encoded = tokenizer.encode_caption(self.phrase)
             phrase_decoded = tokenizer.decode_caption(phrase_encoded)
             expected_string = ["THERE", Tokenizer.UNK, "ONE", "HAND"]
             pad_length = self.max_phraselen if (value is None) or (
                 value >= self.max_phraselen) else value
             self.assertEqual(
                 phrase_decoded[:pad_length], expected_string +
                 [Tokenizer.END] * (pad_length - len(expected_string)))
Exemple #3
0
 def test_encode_decode_from_loaded_tokenizer(self, temp_dir):
     for value in [None, 5, 10]:
         with self.subTest(captions=self.captions, user_maxlen=value):
             tokenizer = Tokenizer(self.captions, value)
             tokenizer.save_dictionaries(temp_dir.path)
             loading_tokenizer = Tokenizer()
             loading_tokenizer.load_dictionaries(temp_dir.path)
             phrase_encoded = loading_tokenizer.encode_caption(self.phrase)
             phrase_decoded = tokenizer.decode_caption(phrase_encoded)
             expected_string = ["THERE", Tokenizer.UNK, "ONE", "HAND"]
             pad_length = self.max_phraselen if (value is None) or (
                 value >= self.max_phraselen) else value
             self.assertEqual(
                 phrase_decoded[:pad_length], expected_string +
                 [Tokenizer.END] * (pad_length - len(expected_string)))
Exemple #4
0
 def test_encode_decode_with_1_maxlen(self):
     tokenizer = Tokenizer(self.captions, 1)
     phrase_encoded = tokenizer.encode_caption(self.phrase)
     phrase_decoded = tokenizer.decode_caption(phrase_encoded)
     self.assertEqual(phrase_decoded, [Tokenizer.END])