def kmeansTest(k=2, n=20, verbose=False): random.seed(0) xMean = 3 xSD = 1 yMean = 5 ySD = 1 d1Samples = util.genDistribution(xMean, xSD, yMean, ySD, n, '1.') d2Samples = util.genDistribution(xMean+3, xSD, yMean+1, ySD, n, '2.') allSamples = d1Samples + d2Samples print("before clustering") util.plot_cluster([cluster.Cluster(allSamples)]) print("after clustering") clusters = kmeans(allSamples, k, verbose) util.plot_cluster(clusters, verbose) print('Final result') for c in clusters: print('', c)
def make_data(n, scale=1): """ A simple y = x curve, with noisy displacement on both both x and y axis; change scale to change the range """ linear_data = [ sample.Sample('', [float(x) / scale, float(x) / scale], '') for x in range(n) ] noise = util.genDistribution(xSD=0.3, ySD=0.3, n=n) data = [linear_data[i] + noise[i] for i in range(n)] return data
def make_data(n): C = [random.choice(LABELS) for x in range(n)] linear_data = [ sample.Sample(C[x], [x / (float(SCALE)), x / (float(SCALE))], C[x]) for x in range(n) ] mean = 0 std = DEV * SCALE noise = util.genDistribution(mean, std, mean, std, n, '') data = [linear_data[i] + noise[i] for i in range(n)] return data
#### Implement the centroid updating function here! def update(self, samples): """Replace the samples in the cluster by new samples Return: how much the centroid has changed""" return helper.update(self, samples) def __str__(self): names = [] for e in self.samples: names.append(e.getName()) names.sort() result = 'Cluster with centroid '\ + str(self.centroid.getFeatures()) + ' contains:\n ' for e in names: result = result + e + ', ' return result[:-2] if __name__ == "__main__": test_samples = util.genDistribution() c = Cluster(test_samples) print(c.centroid) print("cluster center: ", c.centroid.features) util.plot_cluster([c]) # now assign the cluster new samples, and move it test_samples2 = util.genDistribution(1, 1, 1, 1, 30) diff = c.update(test_samples2) print("center moved: ", diff) # plot_cluster expects an array of cluster... util.plot_cluster([c])
print(p) if __name__ == "__main__": # make data random.seed(0) n = 100 K = 3 LABELS = ('a', 'b', 'c') all_cluster = [] data = [] for i in range(K): tmp_data = util.genDistribution(i * 2 + 1, 1, i * 2 + 1, 1, n=20, label=LABELS[i]) all_cluster.append(cl.Cluster(tmp_data)) data += tmp_data def onclick(event): # Creating a new point and finding the k nearest neighbours new = sample.Sample('', [event.xdata, event.ydata], '') knn(new, data, K) # draw the new point data.append(new) pylab.scatter([new.getFeatures()[0]], \ [new.getFeatures()[1]], \ label = new.getLabel(), \
""" max_label = util.LABELS[0] p.setLabel(max_label) # above forces a fixed label: remove them # replace knn_helper.knn(p, data, k) with your own logic print(p) knn_helper.knn(p, data, k) print(p) if __name__ == "__main__": random.seed(0) n = 100 K = 3 data = util.genDistribution(n=10) for d in data: d.setLabel(random.choice(util.LABELS)) print("before....") util.plot_data(data) new_pt = sample.Sample('', [0.2, 0.3], '') knn(new_pt, data, K) data.append(new_pt) print("\nafter....") util.plot_data(data)