def test_simple_kmeans(self): # Test out a simple kmeans example. # Suppose we have two-dimensional data points # (1,1), (1,2), (2,2), (3,3), (5,5) # 3 clusters # initial means: [0, 1, 4] dataset = [ [1,1], [1,2], [2,2], [3,3], [5,5] ] # initial means: # [ 1, 1 ], [ 1, 2 ], [ 5, 5 ] # new assignment: # [ 0, 1, 1, 1, 2 ] # next means: # [ 1, 1 ], [ 2, 7 / 3.0 ], [ 5, 5 ] # new assignment: # [ 0, 0, 1, 1, 2 ] # next means: # [ 1, 1.5 ], [ 2.5, 2.5 ], [ 5, 5 ] # total error: # (0.5^2 + 0.5^2 + 2 * 0.5^2 + 2 * 0.5^2) / 5.0 (means, error) = clust.kmeans(dataset, 3, [ 0, 1, 4 ]) print means self.assertEqual(1, means.count([5, 5])) self.assertEqual(1, means.count([1, 1.5])) self.assertEqual(1, means.count([2.5, 2.5])) self.assertAlmostEqual(6 * math.pow(0.5, 2) / 5.0, error)
def task(self): dataset = clust.parse_input(open('adults.txt', 'r'), 1000) all_errors = [] # Generate a random initial assignment for clusters in self.CLUSTERS: # get the lowest error over three runs errors = [ clust.kmeans(dataset, clusters)[1] for i in xrange(self.SAMPLES) ] error = sorted(errors)[0] all_errors.append({"x": clusters, "y": error}) chart = { "chart": { "defaultSeriesType": "line" }, "xAxis": { "title": { "text": "Clusters" }, "min": 1 }, "yAxis": { "title": { "text": "Mean Squared Error" } }, "title": { "text": "K-means Results" }, "series": [{ "data": all_errors }] } return chart
def test_simple_kmeans(self): # Test out a simple kmeans example. # Suppose we have two-dimensional data points # (1,1), (1,2), (2,2), (3,3), (5,5) # 3 clusters # initial means: [0, 1, 4] dataset = [[1, 1], [1, 2], [2, 2], [3, 3], [5, 5]] # initial means: # [ 1, 1 ], [ 1, 2 ], [ 5, 5 ] # new assignment: # [ 0, 1, 1, 1, 2 ] # next means: # [ 1, 1 ], [ 2, 7 / 3.0 ], [ 5, 5 ] # new assignment: # [ 0, 0, 1, 1, 2 ] # next means: # [ 1, 1.5 ], [ 2.5, 2.5 ], [ 5, 5 ] # total error: # (0.5^2 + 0.5^2 + 2 * 0.5^2 + 2 * 0.5^2) / 5.0 (means, error) = clust.kmeans(dataset, 3, [0, 1, 4]) print means self.assertEqual(1, means.count([5, 5])) self.assertEqual(1, means.count([1, 1.5])) self.assertEqual(1, means.count([2.5, 2.5])) self.assertAlmostEqual(6 * math.pow(0.5, 2) / 5.0, error)
def vclust(cond, q=1, vrep='irate', vargs={}, cmeth='kmeans', nclust=16, cargs={}): evts = dist.VREPS[vrep](cond['evts'], **vargs) if cmeth == 'tree': t = clust.vtree(evts, **cargs) clsts = t.cut(nclust) elif cmeth == 'kmeans': clsts = clust.kmeans(evts, nclust, **cargs) elif cmeth == 'mixmod': clsts = clust.mixmodpartition(evts, nclust, **cargs) #cargs should contain eg. {'model':"Gaussian_pk_Lk_Bk", reps:1} else: raise StandardError("don't know clustering method %s" % cmeth) return cond.fuse(cond.new(evts=clsts))
def task(self): dataset = clust.parse_input(open('adults.txt', 'r'), 1000) all_errors = [] # Generate a random initial assignment for clusters in self.CLUSTERS: # get the lowest error over three runs errors = [ clust.kmeans(dataset, clusters)[1] for i in xrange(self.SAMPLES) ] error = sorted(errors)[0] all_errors.append({ "x": clusters, "y": error }) chart = {"chart": {"defaultSeriesType": "line"}, "xAxis": {"title": {"text": "Clusters"}, "min": 1 }, "yAxis": {"title": {"text": "Mean Squared Error"}}, "title": {"text": "K-means Results"}, "series": [ {"data": all_errors } ]} return chart
def test_kmeans(self): for i in xrange(2, 5): # Test kmeans with many different initial assignments. (means, error) = clust.kmeans(self.dataset, i) self.assertEqual(i, len(means))