def test_intra_similarity_of_cluster(self): """ Test that the intra-similarity of a cluster with several vectors is equivalent to the average similarity. """ v = [ Document("", ['a', 'b'], scheme=TF()), Document("", ['a', 'a'], scheme=TF()), ] c = Cluster(v) self.assertEqual((c.similarity(v[0]) + c.similarity(v[1])) / 2., c.get_intra_similarity())
def test_empty_cluster_similarity(self): """ Test that when calculating the similarity between a vector and an empty cluster, the similarity is 0. """ c = Cluster() v = Document("", ["a", "c"], scheme=TF()) self.assertEqual(0, c.similarity(v))
def test_intra_similarity_of_cluster_with_single_vector(self): """ Test that the intra-similarity of a cluster with a single vector is equivalent to that vector's similarity with the cluster. """ v = Document("", ['a', 'b'], scheme=TF()) c = Cluster(v) self.assertEqual(c.similarity(v), c.get_intra_similarity())
def test_cluster_similarity(self): """ Test calculating the similarity between a cluster and a new vector. """ v = [ Document("", ["a", "b", "a", "c"], scheme=TF()), Document("", ["a", "c"], scheme=TF()) ] c = Cluster(v) n = Document("", ["a", "b"], scheme=TF()) self.assertEqual( round( (1.5 + 0.5) / (math.sqrt(2) * math.sqrt(1.5**2 + 0.5**2 + 1)), 5), round(c.similarity(n), 5)) c.vectors.remove(v[1]) self.assertEqual( round(3 / (math.sqrt(2) * math.sqrt(2**2 + 1 + 1)), 5), round(c.similarity(n), 5))