def test_hierarchical(self): # Assert cluster contains nested clusters and/or vectors. def _test_cluster(cluster): for nested in cluster: if isinstance(nested, vector.Cluster): v1 = set((v.id for v in nested.flatten())) v2 = set((v.id for v in cluster.flatten())) self.assertTrue(nested.depth < cluster.depth) self.assertTrue(v1.issubset(v2)) else: self.assertTrue(isinstance(nested, vector.Vector)) self.assertTrue(isinstance(cluster, list)) self.assertTrue(isinstance(cluster.depth, int)) self.assertTrue(isinstance(cluster.flatten(), list)) n = 50 m = dict((d.vector.id, d.type) for d in self.model[:n]) h = vector.hierarchical([d.vector for d in self.model[:n]], k=2) h.traverse(_test_cluster) # Assert the accuracy of hierarchical clustering (shallow test). # Assert that cats are separated from dogs. v = ( vector.Vector({"feline": 1, " lion": 1, "mane": 1}), vector.Vector({"feline": 1, "tiger": 1, "stripe": 1}), vector.Vector({"canine": 1, "wolf": 1, "howl": 1}), vector.Vector({"canine": 1, "dog": 1, "bark": 1}) ) h = vector.hierarchical(v) self.assertTrue(len(h[0][0]) == 2) self.assertTrue(len(h[0][1]) == 2) self.assertTrue( v[0] in h[0][0] and v[1] in h[0][0] or v[0] in h[0][1] and v[1] in h[0][1]) self.assertTrue( v[2] in h[0][0] and v[3] in h[0][0] or v[2] in h[0][1] and v[3] in h[0][1]) print("pattern.vector.Cluster()") print("pattern.vector.hierarchical()")
def test_distancemap(self): # Assert distance caching mechanism. v1 = vector.Vector({"cat": 1}) v2 = vector.Vector({"cat": 0.5, "dog": 1}) m = vector.DistanceMap(method=vector.COSINE) for i in range(100): self.assertAlmostEqual(m.distance(v1, v2), 0.55, places=2) self.assertAlmostEqual(m._cache[(v1.id, v2.id)], 0.55, places=2) print "pattern.vector.DistanceMap"
def test_distance(self): # Assert distance metrics. v1 = vector.Vector({"cat":1}) v2 = vector.Vector({"cat":0.5, "dog":1}) for d, method in ( (0.55, vector.COSINE), # 1 - ((1*0.5 + 0*1) / (sqrt(1**2 + 0**2) * sqrt(0.5**2 + 1**2))) (1.25, vector.EUCLIDEAN), # (1-0.5)**2 + (0-1)**2 (1.50, vector.MANHATTAN), # abs(1-0.5) + abs(0-1) (1.00, vector.HAMMING), # (True + True) / 2 (1.11, lambda v1, v2: 1.11)): self.assertAlmostEqual(vector.distance(v1, v2, method), d, places=2) print("pattern.vector.distance()")
def test_kd_tree(self): # Assert KDTree nearest neighbor search. v = [{ "cat": 0, "hat": 0 }, { "cat": 0, "hat": 1 }, { "cat": 1, "hat": 1 }, { "cat": 1, "hat": 0 }] v = [vector.Vector(v) for v in v] v = vector.KDTree(v).nearest_neighbors self.assertEqual( v(vector.Vector({"cat": 1}))[0][1], { "cat": 1, "hat": 0 }) self.assertEqual( v(vector.Vector({"cat": 1}))[1][1], { "cat": 1, "hat": 1 }) self.assertEqual( v(vector.Vector({"hat": 1}))[0][1], { "cat": 0, "hat": 1 }) self.assertEqual( v(vector.Vector({"hat": 1}))[1][1], { "cat": 1, "hat": 1 }) self.assertEqual(v(vector.Vector({"dog": 1}))[0][0], 1.0) print "pattern.vector.KDTree.nearest_neighbors()"