def test_cosine_similarity_sentences_with_no_common_word_should_be_zero(): """ We compute similarity of the sentences without single common word. These are considered dissimilar so have similarity close to 0.0. see https://github.com/miso-belica/sumy/issues/58 """ sentence1 = ["this", "sentence", "is", "simple", "sentence"] tf1 = {"this": 1 / 2, "sentence": 1.0, "is": 1 / 2, "simple": 1 / 2} sentence2 = ["that", "paragraph", "has", "some", "words"] tf2 = { "that": 1.0, "paragraph": 1.0, "has": 1.0, "some": 1.0, "words": 1.0 } idf = { "this": 2 / 1, "sentence": 2 / 1, "is": 2 / 1, "simple": 2 / 1, "that": 2 / 1, "paragraph": 2 / 1, "has": 2 / 1, "some": 2 / 1, "words": 2 / 1, } summarizer = LexRankSummarizer() cosine = summarizer.cosine_similarity(sentence1, sentence2, tf1, tf2, idf) assert abs(0.0 - cosine) < 0.00001
def test_cosine_similarity_sentences_with_no_common_word_should_be_zero(): """ We compute similarity of the sentences without single common word. These are considered dissimilar so have similarity close to 0.0. see https://github.com/miso-belica/sumy/issues/58 """ sentence1 = ["this", "sentence", "is", "simple", "sentence"] tf1 = {"this": 1/2, "sentence": 1.0, "is": 1/2, "simple": 1/2} sentence2 = ["that", "paragraph", "has", "some", "words"] tf2 = {"that": 1.0, "paragraph": 1.0, "has": 1.0, "some": 1.0, "words": 1.0} idf = { "this": 2/1, "sentence": 2/1, "is": 2/1, "simple": 2/1, "that": 2/1, "paragraph": 2/1, "has": 2/1, "some": 2/1, "words": 2/1, } summarizer = LexRankSummarizer() cosine = summarizer.cosine_similarity(sentence1, sentence2, tf1, tf2, idf) assert abs(0.0 - cosine) < 0.00001
def test_cosine_similarity_for_the_same_sentence_with_duplicate_words_should_be_one(): """ We compute similarity of the same sentences. These should be exactly the same and therefor have similarity close to 1.0. see https://github.com/miso-belica/sumy/issues/58 """ sentence1 = ["this", "sentence", "is", "simple", "sentence"] tf1 = {"this": 1/2, "sentence": 1.0, "is": 1/2, "simple": 1/2} sentence2 = ["this", "sentence", "is", "simple", "sentence"] tf2 = {"this": 1/2, "sentence": 1.0, "is": 1/2, "simple": 1/2} idf = { "this": 2/2, "sentence": 2/2, "is": 2/2, "simple": 2/2, } summarizer = LexRankSummarizer() cosine = summarizer.cosine_similarity(sentence1, sentence2, tf1, tf2, idf) assert abs(1.0 - cosine) < 0.00001
def test_cosine_similarity_for_the_same_sentence_with_duplicate_words_should_be_one(): """ We compute similarity of the same sentences. These should be exactly the same and therefor have similarity close to 1.0. see https://github.com/miso-belica/sumy/issues/58 """ sentence1 = ["this", "sentence", "is", "simple", "sentence"] tf1 = {"this": 1/2, "sentence": 1.0, "is": 1/2, "simple": 1/2} sentence2 = ["this", "sentence", "is", "simple", "sentence"] tf2 = {"this": 1/2, "sentence": 1.0, "is": 1/2, "simple": 1/2} idf = { "this": 2/2, "sentence": 2/2, "is": 2/2, "simple": 2/2, } summarizer = LexRankSummarizer() cosine = summarizer.cosine_similarity(sentence1, sentence2, tf1, tf2, idf) assert abs(1.0 - cosine) < 0.00001