Esempio n. 1
0
 def test_hierarchical(self):
     # Assert cluster contains nested clusters and/or vectors.
     def _test_cluster(cluster):
         for nested in cluster:
             if isinstance(nested, vector.Cluster):
                 v1 = set((v.id for v in nested.flatten()))
                 v2 = set((v.id for v in cluster.flatten()))
                 self.assertTrue(nested.depth < cluster.depth)
                 self.assertTrue(v1.issubset(v2))
             else:
                 self.assertTrue(isinstance(nested, vector.Vector))
         self.assertTrue(isinstance(cluster, list))
         self.assertTrue(isinstance(cluster.depth, int))
         self.assertTrue(isinstance(cluster.flatten(), list))
     n = 50
     m = dict((d.vector.id, d.type) for d in self.model[:n])
     h = vector.hierarchical([d.vector for d in self.model[:n]], k=2)
     h.traverse(_test_cluster)
     # Assert the accuracy of hierarchical clustering (shallow test).
     # Assert that cats are separated from dogs.
     v = (
         vector.Vector({"feline": 1, " lion": 1,   "mane": 1}),
         vector.Vector({"feline": 1, "tiger": 1, "stripe": 1}),
         vector.Vector({"canine": 1,  "wolf": 1,   "howl": 1}),
         vector.Vector({"canine": 1,   "dog": 1,   "bark": 1})
     )
     h = vector.hierarchical(v)
     self.assertTrue(len(h[0][0]) == 2)
     self.assertTrue(len(h[0][1]) == 2)
     self.assertTrue(
         v[0] in h[0][0] and v[1] in h[0][0] or v[0] in h[0][1] and v[1] in h[0][1])
     self.assertTrue(
         v[2] in h[0][0] and v[3] in h[0][0] or v[2] in h[0][1] and v[3] in h[0][1])
     print("pattern.vector.Cluster()")
     print("pattern.vector.hierarchical()")
Esempio n. 2
0
 def test_hierarchical(self):
     # Assert cluster contains nested clusters and/or vectors.
     def _test_cluster(cluster):
         for nested in cluster:
             if isinstance(nested, vector.Cluster):
                 v1 = set((v.id for v in nested.flatten()))
                 v2 = set((v.id for v in cluster.flatten()))
                 self.assertTrue(nested.depth < cluster.depth)
                 self.assertTrue(v1.issubset(v2))
             else:
                 self.assertTrue(isinstance(nested, vector.Vector))
         self.assertTrue(isinstance(cluster, list))
         self.assertTrue(isinstance(cluster.depth, int))
         self.assertTrue(isinstance(cluster.flatten(), list))
     n = 50
     m = dict((d.vector.id, d.type) for d in self.model[:n])
     h = vector.hierarchical([d.vector for d in self.model[:n]], k=2)
     h.traverse(_test_cluster)
     # Assert the accuracy of hierarchical clustering (shallow test).
     # Assert that cats are separated from dogs.
     v = (
         vector.Vector({"feline":1, " lion":1,   "mane":1}),
         vector.Vector({"feline":1, "tiger":1, "stripe":1}),
         vector.Vector({"canine":1,  "wolf":1,   "howl":1}),
         vector.Vector({"canine":1,   "dog":1,   "bark":1})
     )
     h = vector.hierarchical(v)
     self.assertTrue(len(h[0][0]) == 2)
     self.assertTrue(len(h[0][1]) == 2)
     self.assertTrue(v[0] in h[0][0] and v[1] in h[0][0] or v[0] in h[0][1] and v[1] in h[0][1])
     self.assertTrue(v[2] in h[0][0] and v[3] in h[0][0] or v[2] in h[0][1] and v[3] in h[0][1])
     print("pattern.vector.Cluster()")
     print("pattern.vector.hierarchical()"        )
                return len(questions)
            
            def elaboration(questions):
                return sum(min(len(parsetree(a)[0].pnp), 2) for a in questions)
                
            def variance(cluster):
                return avg([distance(centroid(cluster), v) for v in cluster])
    
            vectors = []
                
            for q in all_q:
                v = count(words(q), stemmer='lemma') 
                v = Vector(v)
                vectors.append(v)
                
            clusters = hierarchical(vectors, k=250, distance='cosine')
            clusters = [isinstance(v, Vector) and [v] or v.flatten() for v in clusters] 
            clusters = sorted(clusters, key=variance)
            
            categories = {}
            
            for i, cluster in enumerate(clusters):
                for v in cluster: 
                    categories[row[vectors.index(v)]] = i

            def flex(questions):
                ml_categories = []
                for q in questions:
                    q_uri = classifier_uri + q
                    j = requests.get(q_uri, auth=(watson_user, watson_pass))
                    ml_categories.append(j.json()['top_class'])