def test_base_tokenizer_punctuation(self): tokenizer = Tokenizer() for punctuation in string.punctuation: test = "%sWORD%s WO%sRD" % ((punctuation,) * 3) result = tokenizer.tokenize(test) exception = [punctuation, 'WORD', punctuation, 'WO%sRD' % punctuation] self.assertTrue(all(map(lambda a, b: a['token'] == b, result, exception)))
def test_base_tokenizer_empty_string(self): tokenizer = Tokenizer() for test in [" ", "", "\n", "\t", " \n \t \n \n \t \t\t\t \n\n\n "]: result = tokenizer.tokenize(test) self.assertTrue(0 == len(result))