def train(self, inputs): # choose k random points as the initial mean self.means = random.sample(inputs, self.k) assignments = None while True: # find new assignments new_assignments = map(self.classify, inputs) # if no assignments have changed, we are done if assignments == new_assignments: return # otherwise keep new assignments assignments = new_assignments # and compute new means based on the new assignement for i in range(self.k): # find all points assigned to cluster i i_points = [p for p, a in zip(inputs, assignments) if a == i] # make sure i_points is not empty so don't divide by 0 if i_points: self.means[i] = vector_mean(i_points)
plt.plot(ks, errors) plt.xticks(ks) plt.xlabel("k") plt.ylabel("total squared error") plt.title("Total Error vs. # of Clusters") plt.show() # Bottom-up Hierarchical Clustering base_cluster = bottom_up_cluster(inputs) three_clusters = [get_values(cluster) for cluster in generate_clusters(base_cluster, 3)] for i, cluster, marker, color in zip([1, 2, 3], three_clusters, ['D', 'o', '*'], ['r', 'g', 'b']): xs, ys = zip(*cluster) plt.scatter(xs, ys, color=color, marker=marker) # put a number at the mean of the cluster x, y = vector_mean(cluster) plt.plot(x, y, marker='$' + str(i) + '$', color='black') plt.title("User locations -- 3 Bottom-up Clusters, Min") plt.xlabel("blocks east of city center") plt.ylabel("blocks north of city center") plt.show()