def compare_vectors(word_vector1, word_vector2): """Numerical similarity between lists of words. Higher is better. Uses cosine similarity. Result range: 0 (bad) - 1 (uses all the same words in the same proportions) """ all_words = list(set(word_vector1).union(set(word_vector2))) frequency_dict1 = word_frequencies(word_vector1) frequency_dict2 = word_frequencies(word_vector2) frequency_vector1 = [frequency_dict1.get(word, 0) for word in all_words] frequency_vector2 = [frequency_dict2.get(word, 0) for word in all_words] return similarity(frequency_vector1, frequency_vector2)
def test_similarity(self): self.assertAlmostEqual(similarity([1, 1], [-1, -1]), -1) self.assertAlmostEqual(similarity([1, 1], [1, 1]), 1) self.assertAlmostEqual(similarity([0, 1], [1, 0]), 0)