Esempio n. 1
0
def test_stop_word_filter():
    stop = tokenization.StopWordFilter(["the", "of"])
    assert isinstance(stop.stop_words_, set)
    assert stop(["and", "the", "they", "of"]) == ["and", "they"]
    standardizer = tokenization.Standardizer("porter_stemmer")
    stop = tokenization.StopWordFilter("nltk", standardizer)
    assert "yourselv" in stop.stop_words_
    assert stop(standardizer(
        "do it yourselves computers".split())) == ["comput"]
Esempio n. 2
0
def test_standardizer():
    standardizer = tokenization.Standardizer("porter_stemmer")
    assert standardizer(["EatIng", "Cheese"]) == ["eat", "chees"]