コード例 #1
0
ファイル: test_text.py プロジェクト: fcopello/barbante
def test_count_common_terms_English():
    """ Tests common terms counting.
    """
    language = "english"
    text1 = "Just a test sentence for the purpose of just testing common terms counting."
    text2 = "This is just a sentence for tests purposes."
    text1_tokens = text.tokenize(text1)
    text2_tokens = text.tokenize(text2)
    text1_stems = text.get_stems(text1_tokens, language)
    text2_stems = text.get_stems(text2_tokens, language)
    text1_stems_no_stopwords = set(text.remove_stopwords(text1_stems, language))
    text2_stems_no_stopwords = set(text.remove_stopwords(text2_stems, language))
    nose.tools.eq_(text.count_common_terms(text1_stems_no_stopwords,
                                           text2_stems_no_stopwords),
                   3)  # sentence, purpos3, tests
コード例 #2
0
ファイル: test_text.py プロジェクト: fcopello/barbante
def test_remove_stopwords():
    """ Tests removal of stopwords.
    """
    actual = text.remove_stopwords(["The", "car", "is", "going", "to",
                                    "crash", "or", "going", "to", "win"],
                                   "english", 3)
    expected = ['The', 'car', 'going', 'crash', 'going', 'win']
    nose.tools.eq_(actual, expected)