Exemple #1
0
 def test_base_tokenizer_punctuation(self):
     tokenizer = Tokenizer()
     for punctuation in string.punctuation:
         test = "%sWORD%s WO%sRD" % ((punctuation,) * 3)
         result = tokenizer.tokenize(test)
         exception = [punctuation, 'WORD', punctuation, 'WO%sRD' % punctuation]
         self.assertTrue(all(map(lambda a, b: a['token'] == b, result, exception)))
Exemple #2
0
 def test_base_tokenizer_empty_string(self):
     tokenizer = Tokenizer()
     for test in [" ", "", "\n", "\t", "  \n \t \n   \n \t \t\t\t \n\n\n    "]:
         result = tokenizer.tokenize(test)
         self.assertTrue(0 == len(result))