Esempio n. 1
0
 def test_hierarchical(self):
     # Assert cluster contains nested clusters and/or vectors.
     def _test_cluster(cluster):
         for nested in cluster:
             if isinstance(nested, vector.Cluster):
                 v1 = set((v.id for v in nested.flatten()))
                 v2 = set((v.id for v in cluster.flatten()))
                 self.assertTrue(nested.depth < cluster.depth)
                 self.assertTrue(v1.issubset(v2))
             else:
                 self.assertTrue(isinstance(nested, vector.Vector))
         self.assertTrue(isinstance(cluster, list))
         self.assertTrue(isinstance(cluster.depth, int))
         self.assertTrue(isinstance(cluster.flatten(), list))
     n = 50
     m = dict((d.vector.id, d.type) for d in self.model[:n])
     h = vector.hierarchical([d.vector for d in self.model[:n]], k=2)
     h.traverse(_test_cluster)
     # Assert the accuracy of hierarchical clustering (shallow test).
     # Assert that cats are separated from dogs.
     v = (
         vector.Vector({"feline": 1, " lion": 1,   "mane": 1}),
         vector.Vector({"feline": 1, "tiger": 1, "stripe": 1}),
         vector.Vector({"canine": 1,  "wolf": 1,   "howl": 1}),
         vector.Vector({"canine": 1,   "dog": 1,   "bark": 1})
     )
     h = vector.hierarchical(v)
     self.assertTrue(len(h[0][0]) == 2)
     self.assertTrue(len(h[0][1]) == 2)
     self.assertTrue(
         v[0] in h[0][0] and v[1] in h[0][0] or v[0] in h[0][1] and v[1] in h[0][1])
     self.assertTrue(
         v[2] in h[0][0] and v[3] in h[0][0] or v[2] in h[0][1] and v[3] in h[0][1])
     print("pattern.vector.Cluster()")
     print("pattern.vector.hierarchical()")
Esempio n. 2
0
 def test_distancemap(self):
     # Assert distance caching mechanism.
     v1 = vector.Vector({"cat": 1})
     v2 = vector.Vector({"cat": 0.5, "dog": 1})
     m = vector.DistanceMap(method=vector.COSINE)
     for i in range(100):
         self.assertAlmostEqual(m.distance(v1, v2), 0.55, places=2)
         self.assertAlmostEqual(m._cache[(v1.id, v2.id)], 0.55, places=2)
     print "pattern.vector.DistanceMap"
Esempio n. 3
0
 def test_distance(self):
     # Assert distance metrics.
     v1 = vector.Vector({"cat":1})
     v2 = vector.Vector({"cat":0.5, "dog":1})
     for d, method in (
       (0.55, vector.COSINE),    # 1 - ((1*0.5 + 0*1) / (sqrt(1**2 + 0**2) * sqrt(0.5**2 + 1**2)))
       (1.25, vector.EUCLIDEAN), # (1-0.5)**2 + (0-1)**2
       (1.50, vector.MANHATTAN), # abs(1-0.5) + abs(0-1)
       (1.00, vector.HAMMING),   # (True + True) / 2
       (1.11, lambda v1, v2: 1.11)):
         self.assertAlmostEqual(vector.distance(v1, v2, method), d, places=2)
     print("pattern.vector.distance()")
Esempio n. 4
0
 def test_kd_tree(self):
     # Assert KDTree nearest neighbor search.
     v = [{
         "cat": 0,
         "hat": 0
     }, {
         "cat": 0,
         "hat": 1
     }, {
         "cat": 1,
         "hat": 1
     }, {
         "cat": 1,
         "hat": 0
     }]
     v = [vector.Vector(v) for v in v]
     v = vector.KDTree(v).nearest_neighbors
     self.assertEqual(
         v(vector.Vector({"cat": 1}))[0][1], {
             "cat": 1,
             "hat": 0
         })
     self.assertEqual(
         v(vector.Vector({"cat": 1}))[1][1], {
             "cat": 1,
             "hat": 1
         })
     self.assertEqual(
         v(vector.Vector({"hat": 1}))[0][1], {
             "cat": 0,
             "hat": 1
         })
     self.assertEqual(
         v(vector.Vector({"hat": 1}))[1][1], {
             "cat": 1,
             "hat": 1
         })
     self.assertEqual(v(vector.Vector({"dog": 1}))[0][0], 1.0)
     print "pattern.vector.KDTree.nearest_neighbors()"