コード例 #1
0
def test_nltk_word_tokenizer():
    tokenizer = NLTKWordTokenizer()

    dummy = "justo. Praesent luctus."
    assert tokenizer(dummy) == ['justo', '.', 'Praesent', 'luctus', '.']
    dummy = ""
    assert tokenizer(dummy) == []
コード例 #2
0
def test_ngram_tokenizer_equivalence():
    t1 = NGramsTokenizer(1)
    t2 = NLTKWordTokenizer()

    assert t1(example) == t2(example)