コード例 #1
0
ファイル: test_lex_rank.py プロジェクト: sreenadhp/sumy
def test_cosine_similarity_sentences_with_no_common_word_should_be_zero():
    """
    We compute similarity of the sentences without single common word.
    These are considered dissimilar so have similarity close to 0.0.
    see https://github.com/miso-belica/sumy/issues/58
    """
    sentence1 = ["this", "sentence", "is", "simple", "sentence"]
    tf1 = {"this": 1 / 2, "sentence": 1.0, "is": 1 / 2, "simple": 1 / 2}
    sentence2 = ["that", "paragraph", "has", "some", "words"]
    tf2 = {
        "that": 1.0,
        "paragraph": 1.0,
        "has": 1.0,
        "some": 1.0,
        "words": 1.0
    }
    idf = {
        "this": 2 / 1,
        "sentence": 2 / 1,
        "is": 2 / 1,
        "simple": 2 / 1,
        "that": 2 / 1,
        "paragraph": 2 / 1,
        "has": 2 / 1,
        "some": 2 / 1,
        "words": 2 / 1,
    }

    summarizer = LexRankSummarizer()
    cosine = summarizer.cosine_similarity(sentence1, sentence2, tf1, tf2, idf)

    assert abs(0.0 - cosine) < 0.00001
コード例 #2
0
ファイル: test_lex_rank.py プロジェクト: JyothsnaKS/sumy
def test_cosine_similarity_sentences_with_no_common_word_should_be_zero():
    """
    We compute similarity of the sentences without single common word.
    These are considered dissimilar so have similarity close to 0.0.
    see https://github.com/miso-belica/sumy/issues/58
    """
    sentence1 = ["this", "sentence", "is", "simple", "sentence"]
    tf1 = {"this": 1/2, "sentence": 1.0, "is": 1/2, "simple": 1/2}
    sentence2 = ["that", "paragraph", "has", "some", "words"]
    tf2 = {"that": 1.0, "paragraph": 1.0, "has": 1.0, "some": 1.0, "words": 1.0}
    idf = {
        "this": 2/1,
        "sentence": 2/1,
        "is": 2/1,
        "simple": 2/1,
        "that": 2/1,
        "paragraph": 2/1,
        "has": 2/1,
        "some": 2/1,
        "words": 2/1,
    }

    summarizer = LexRankSummarizer()
    cosine = summarizer.cosine_similarity(sentence1, sentence2, tf1, tf2, idf)

    assert abs(0.0 - cosine) < 0.00001
コード例 #3
0
ファイル: test_lex_rank.py プロジェクト: JyothsnaKS/sumy
def test_cosine_similarity_for_the_same_sentence_with_duplicate_words_should_be_one():
    """
    We compute similarity of the same sentences. These should be exactly the same and
    therefor have similarity close to 1.0.
    see https://github.com/miso-belica/sumy/issues/58
    """
    sentence1 = ["this", "sentence", "is", "simple", "sentence"]
    tf1 = {"this": 1/2, "sentence": 1.0, "is": 1/2, "simple": 1/2}
    sentence2 = ["this", "sentence", "is", "simple", "sentence"]
    tf2 = {"this": 1/2, "sentence": 1.0, "is": 1/2, "simple": 1/2}
    idf = {
        "this": 2/2,
        "sentence": 2/2,
        "is": 2/2,
        "simple": 2/2,
    }

    summarizer = LexRankSummarizer()
    cosine = summarizer.cosine_similarity(sentence1, sentence2, tf1, tf2, idf)

    assert abs(1.0 - cosine) < 0.00001
コード例 #4
0
def test_cosine_similarity_for_the_same_sentence_with_duplicate_words_should_be_one():
    """
    We compute similarity of the same sentences. These should be exactly the same and
    therefor have similarity close to 1.0.
    see https://github.com/miso-belica/sumy/issues/58
    """
    sentence1 = ["this", "sentence", "is", "simple", "sentence"]
    tf1 = {"this": 1/2, "sentence": 1.0, "is": 1/2, "simple": 1/2}
    sentence2 = ["this", "sentence", "is", "simple", "sentence"]
    tf2 = {"this": 1/2, "sentence": 1.0, "is": 1/2, "simple": 1/2}
    idf = {
        "this": 2/2,
        "sentence": 2/2,
        "is": 2/2,
        "simple": 2/2,
    }

    summarizer = LexRankSummarizer()
    cosine = summarizer.cosine_similarity(sentence1, sentence2, tf1, tf2, idf)

    assert abs(1.0 - cosine) < 0.00001