コード例 #1
0
 def test_simple_different(self):
     doc1 = "abc def ghi1"
     doc2 = "abc def xxx"
     similarity = get_similarity(doc1, doc2)
     self.assertLess(similarity, 1.0)
コード例 #2
0
 def test_spam_bounce(self):
     # Test a "normal" spam body against a bounce
     doc1 = open("main/tests/blacklist_emails/spam_2.txt").read()
     doc2 = open("main/tests/blacklist_emails/spam_1b.txt").read()
     similarity = get_similarity(doc1, doc2)
     self.assertLess(similarity, 0.8)
コード例 #3
0
 def test_bounce(self):
     doc1 = open("main/tests/blacklist_emails/spam_1a.txt").read()
     doc2 = open("main/tests/blacklist_emails/spam_1b.txt").read()
     similarity = get_similarity(doc1, doc2)
     self.assertGreaterEqual(similarity, 0.9)
コード例 #4
0
 def test_repeat_text(self):
     # Test that repeated words do not affect the (cosine) similarity
     doc1 = "abc def ghi1"
     doc2 = doc1 + " " + doc1 + " " + doc1
     similarity = get_similarity(doc1, doc2)
     self.assertGreaterEqual(similarity, 1.0)
コード例 #5
0
 def test_simple_name(self):
     doc1 = "abc def ghi1"
     similarity = get_similarity(doc1, doc1)
     self.assertGreaterEqual(similarity, 1.0)