def test_hierarchical(self): # Assert cluster contains nested clusters and/or vectors. def _test_cluster(cluster): for nested in cluster: if isinstance(nested, vector.Cluster): v1 = set((v.id for v in nested.flatten())) v2 = set((v.id for v in cluster.flatten())) self.assertTrue(nested.depth < cluster.depth) self.assertTrue(v1.issubset(v2)) else: self.assertTrue(isinstance(nested, vector.Vector)) self.assertTrue(isinstance(cluster, list)) self.assertTrue(isinstance(cluster.depth, int)) self.assertTrue(isinstance(cluster.flatten(), list)) n = 50 m = dict((d.vector.id, d.type) for d in self.model[:n]) h = vector.hierarchical([d.vector for d in self.model[:n]], k=2) h.traverse(_test_cluster) # Assert the accuracy of hierarchical clustering (shallow test). # Assert that cats are separated from dogs. v = ( vector.Vector({"feline": 1, " lion": 1, "mane": 1}), vector.Vector({"feline": 1, "tiger": 1, "stripe": 1}), vector.Vector({"canine": 1, "wolf": 1, "howl": 1}), vector.Vector({"canine": 1, "dog": 1, "bark": 1}) ) h = vector.hierarchical(v) self.assertTrue(len(h[0][0]) == 2) self.assertTrue(len(h[0][1]) == 2) self.assertTrue( v[0] in h[0][0] and v[1] in h[0][0] or v[0] in h[0][1] and v[1] in h[0][1]) self.assertTrue( v[2] in h[0][0] and v[3] in h[0][0] or v[2] in h[0][1] and v[3] in h[0][1]) print("pattern.vector.Cluster()") print("pattern.vector.hierarchical()")
def test_hierarchical(self): # Assert cluster contains nested clusters and/or vectors. def _test_cluster(cluster): for nested in cluster: if isinstance(nested, vector.Cluster): v1 = set((v.id for v in nested.flatten())) v2 = set((v.id for v in cluster.flatten())) self.assertTrue(nested.depth < cluster.depth) self.assertTrue(v1.issubset(v2)) else: self.assertTrue(isinstance(nested, vector.Vector)) self.assertTrue(isinstance(cluster, list)) self.assertTrue(isinstance(cluster.depth, int)) self.assertTrue(isinstance(cluster.flatten(), list)) n = 50 m = dict((d.vector.id, d.type) for d in self.model[:n]) h = vector.hierarchical([d.vector for d in self.model[:n]], k=2) h.traverse(_test_cluster) # Assert the accuracy of hierarchical clustering (shallow test). # Assert that cats are separated from dogs. v = ( vector.Vector({"feline":1, " lion":1, "mane":1}), vector.Vector({"feline":1, "tiger":1, "stripe":1}), vector.Vector({"canine":1, "wolf":1, "howl":1}), vector.Vector({"canine":1, "dog":1, "bark":1}) ) h = vector.hierarchical(v) self.assertTrue(len(h[0][0]) == 2) self.assertTrue(len(h[0][1]) == 2) self.assertTrue(v[0] in h[0][0] and v[1] in h[0][0] or v[0] in h[0][1] and v[1] in h[0][1]) self.assertTrue(v[2] in h[0][0] and v[3] in h[0][0] or v[2] in h[0][1] and v[3] in h[0][1]) print("pattern.vector.Cluster()") print("pattern.vector.hierarchical()" )
return len(questions) def elaboration(questions): return sum(min(len(parsetree(a)[0].pnp), 2) for a in questions) def variance(cluster): return avg([distance(centroid(cluster), v) for v in cluster]) vectors = [] for q in all_q: v = count(words(q), stemmer='lemma') v = Vector(v) vectors.append(v) clusters = hierarchical(vectors, k=250, distance='cosine') clusters = [isinstance(v, Vector) and [v] or v.flatten() for v in clusters] clusters = sorted(clusters, key=variance) categories = {} for i, cluster in enumerate(clusters): for v in cluster: categories[row[vectors.index(v)]] = i def flex(questions): ml_categories = [] for q in questions: q_uri = classifier_uri + q j = requests.get(q_uri, auth=(watson_user, watson_pass)) ml_categories.append(j.json()['top_class'])