def test_centroid(self): # Assert center of list of vectors. v = vector.centroid([{ "cat": 1 }, { "cat": 0.5, "dog": 1 }], keys=["cat", "dog"]) self.assertEqual(v, {"cat": 0.75, "dog": 0.5}) print "pattern.vector.centroid()"
def _generate_clusters(self, k=10, p=0.8, maxlevel=10): """Use KMEANS method by default, and choose the initial k values by KMPP method. k is the number of clusters. p is to control the error of KMEANS, when p=1.0 is faster with small error. """ if self.method == "kmeans": from pattern.vector import KMEANS, KMPP self.clusters = self.corpus.cluster(method=KMEANS, k=k, seed=KMPP, p=p, iterations=10) doc_list = [] # For each cluster, calculate the centroid, and calculate the doc (vector) which is nearest to the centroid. for cluster in self.clusters: c = centroid(cluster) d_min = (cluster[0].vector, c) for doc in cluster: d = distance(doc.vector, c) if distance(doc.vector, c) < d_min: d_min = d doc_min = doc doc_list.append(doc_min) self.centroids = [i.name for i in doc_list] self.clusters = [[i.name for i in cluster] for cluster in self.clusters] elif self.method == 'covertree': def mydistance(doc_name1, doc_name2): v1 = self.corpus.document(doc_name1).vector v2 = self.corpus.document(doc_name2).vector return distance(v1, v2) self.covertree = Covertree(mydistance, maxlevel) for i, doc in enumerate(self.corpus): tree_node = myTree(doc.name) self.covertree.insert(tree_node, self.covertree.ct, 0) self.covertree.merge_levels() self.centroids, self.clusters = self.covertree.clustering_from_ct( k)
def _generate_clusters(self, k=10, p=0.8, maxlevel=10): """Use KMEANS method by default, and choose the initial k values by KMPP method. k is the number of clusters. p is to control the error of KMEANS, when p=1.0 is faster with small error. """ if self.method == "kmeans": from pattern.vector import KMEANS, KMPP self.clusters = self.corpus.cluster(method=KMEANS, k=k, seed=KMPP, p=p, iterations=10) doc_list = [] # For each cluster, calculate the centroid, and calculate the doc (vector) which is nearest to the centroid. for cluster in self.clusters: c = centroid(cluster) d_min = (cluster[0].vector, c) for doc in cluster: d = distance(doc.vector, c) if distance(doc.vector, c) < d_min: d_min = d doc_min = doc doc_list.append(doc_min) self.centroids = [i.name for i in doc_list] self.clusters = [[i.name for i in cluster] for cluster in self.clusters] elif self.method == 'covertree': def mydistance(doc_name1, doc_name2): v1 = self.corpus.document(doc_name1).vector v2 = self.corpus.document(doc_name2).vector return distance(v1, v2) self.covertree = Covertree(mydistance, maxlevel) for i, doc in enumerate(self.corpus): tree_node = myTree(doc.name) self.covertree.insert(tree_node, self.covertree.ct, 0) self.covertree.merge_levels() self.centroids, self.clusters = self.covertree.clustering_from_ct(k)
def test_centroid(self): # Assert center of list of vectors. v = vector.centroid([{"cat":1}, {"cat":0.5, "dog":1}], features=["cat", "dog"]) self.assertEqual(v, {"cat":0.75, "dog":0.5}) print("pattern.vector.centroid()")
def test_centroid(self): # Assert centroid of recursive Cluster. v = vector.Cluster(({"a": 1}, vector.Cluster(({"a": 2}, {"a": 4})))) self.assertAlmostEqual(vector.centroid(v)["a"], 2.33, places=2) print("pattern.vector.centroid()")
def test_centroid(self): # Assert centroid of recursive Cluster. v = vector.Cluster(({"a": 1}, vector.Cluster(({"a": 2}, {"a": 4})))) self.assertAlmostEqual(vector.centroid(v)["a"], 2.33, places=2) print "pattern.vector.centroid()"
def variance(cluster): return avg([distance(centroid(cluster), v) for v in cluster])