def test_nltk_word_tokenizer(): tokenizer = NLTKWordTokenizer() dummy = "justo. Praesent luctus." assert tokenizer(dummy) == ['justo', '.', 'Praesent', 'luctus', '.'] dummy = "" assert tokenizer(dummy) == []
def test_ngram_tokenizer_equivalence(): t1 = NGramsTokenizer(1) t2 = NLTKWordTokenizer() assert t1(example) == t2(example)