コード例 #1
0
def test_preprocess_words_gives_bigrams():
    stemmer = noop_stemmer()
    lemmatizer = noop_lemmatizer()
    processed = preprocess_words(["alright", "welcome", "everyone"],
                                 stemmer,
                                 lemmatizer, [],
                                 bigrams=True)
    assert set(processed) == set(["alright welcome", "welcome everyone"])
コード例 #2
0
def test_preprocess_words_stems_words():
    stemmer = Mock()
    stemmer.stem = lambda word: "1" if word == "aaaa" else word
    lemmatizer = noop_lemmatizer()
    processed = preprocess_words(["aaaa", "bbbb"], stemmer, lemmatizer, [])
    assert list(processed) == ["1", "bbbb"]
コード例 #3
0
def test_preprocess_words_lemmatizes_words():
    stemmer = noop_stemmer()
    lemmatizer = Mock()
    lemmatizer.lemmatize = lambda word, **kwargs: "a" if word == "bbbb" else word
    processed = preprocess_words(["bbbb", "dddd"], stemmer, lemmatizer, [])
    assert list(processed) == ["a", "dddd"]
コード例 #4
0
def test_preprocess_words_removes_stopwords():
    stemmer = noop_stemmer()
    lemmatizer = noop_lemmatizer()
    processed = preprocess_words(["Test", "YOLO"], stemmer, lemmatizer,
                                 ["YOLO"])
    assert list(processed) == ["Test"]
コード例 #5
0
def test_preprocess_words_removes_lt_3_char_words():
    stemmer = noop_stemmer()
    lemmatizer = noop_lemmatizer()
    processed = preprocess_words(["help", "me"], stemmer, lemmatizer, [])
    assert list(processed) == ["help"]
コード例 #6
0
def test_preprocess_words_handles_getting_bigrams_from_empty_word_list():
    stemmer = noop_stemmer()
    lemmatizer = noop_lemmatizer()
    processed = preprocess_words([], stemmer, lemmatizer, [], bigrams=True)
    assert list(processed) == []