def test_cosine(self): vector1 = [1, 2, 3] vector2 = vector1 expected = 1 self.assertAlmostEqual(expected, proximity.cosine(vector1, vector2)) vector2 = [-4, 8, -4] expected = 0 self.assertAlmostEqual(expected, proximity.cosine(vector1, vector2)) vector2 = [5, 6, 1] expected = 0.678844233302 self.assertAlmostEqual(expected, proximity.cosine(vector1, vector2))
NPH_words = file_to_dict('input_files/wiki_NPH.txt'); NPC_words = file_to_dict('input_files/wiki_NPC.txt'); #compute cosine similarity print 'This program uses cosine similarity metric to compare text documents:' print print "Here's how the documents were generated:" print "doc1.txt : seq 1.0 1.0 100.0 > doc1.txt" print "doc2.txt : seq 0.5 99.5 > doc2.txt" print "doc3.txt : seq 1 0.5 50 > doc3.txt" print "wiki_NPH.txt : wget http://en.wikipedia.org/wiki/NP-hard -O wiki_NPH.txt" print "wiki_NPC.txt : wget http://en.wikipedia.org/wiki/NP-complete -O wiki_NPH.txt" print print "Document similarities:" print "======================================================================" print "cosine similarity of doc1.txt vs. doc2.txt = %.4f" % cosine(f1_words, f2_words); print "expecting similarity of 0.0" print "======================================================================" print "cosine similarity of doc2.txt vs. doc3.txt = %.4f" % cosine(f2_words, f3_words); print "expecting similarity of 0.5" print "======================================================================" print "cosine similarity of doc1.txt vs. doc3.txt = %.4f" % cosine(f1_words, f3_words); print "expecting similarity of 0.5" print "======================================================================" print "cosine similarity of doc2.txt vs. doc2.txt = %.4f" % cosine(f2_words, f2_words); print "expecting similarity of 1.0" print "======================================================================" print "cosine similarity of wiki_NPH.txt vs. wiki_NPC.txt = %.4f" % cosine(NPH_words, NPC_words); print