self.points[i].clusterId = self.classfication[i] for i in set(self.classfication): cluster = {} cluster['cluster'] = set() for p in self.points: if p.clusterId == i: cluster['cluster'].add(p) self.clusters.append(cluster) if __name__ == '__main__': es, ps, fs = [], [], [] for i in xrange(1, 10): points = readPoints('dataset2.dat', 'dataset2-label.dat') dbscan = DBscanClustering(points, i, 100) dbscan.clustering() es.append(i) ps.append(purity(dbscan.clusters, dbscan.points)) fs.append(fscore(dbscan.clusters, dbscan.points)) print '\t'.join([str(i) for i in es]) print '\t'.join([str(i) for i in ps]) print '\t'.join([str(i) for i in fs]) print "\n\n" ms, ps, fs = [], [], []
d = distance(point, cluster["mean"]) ds.append(d) minId = ds.index(min(ds)) if point.clusterId != minId: self.clusters[point.clusterId]["cluster"].remove(point) self.clusters[minId]["cluster"].add(point) point.clusterId = minId stop = False self._reCalculateMean() if __name__ == "__main__": ks, ps, fs = [], [], [] for k in xrange(5, 26): points = readPoints("dataset1.dat", "dataset1-label.dat") kmeans = KmeansClustering(points, k) kmeans.clustering() ks.append(k) ps.append(purity(kmeans.clusters, kmeans.points)) fs.append(fscore(kmeans.clusters, kmeans.points)) print "\t".join([str(i) for i in ks]) print "\t".join([str(i) for i in ps]) print "\t".join([str(i) for i in fs])