def test_get_stop_words(): stop = tokenization._get_stop_words("nltk") assert stop == tokenization.nltk_stop_words() stop = tokenization._get_stop_words("sklearn") assert stop == tokenization.sklearn_stop_words() stop = tokenization._get_stop_words(["sklearn", "nltk"]) assert stop == {"sklearn", "nltk"} with pytest.raises(ValueError): tokenization._get_stop_words("other")
def test_nltk_stop_words(): stop_words = tokenization.nltk_stop_words() assert "the" in stop_words assert len(stop_words) > 100