def run_example(): """ Load a data table, compute a list of clusters and plot a list of clusters Set DESKTOP = True/False to use either matplotlib or simplegui """ data_table = load_data_table(DATA_896_URL) singleton_list = [] for line in data_table: singleton_list.append( alg_cluster.Cluster(set([line[0]]), line[1], line[2], line[3], line[4])) #cluster_list = sequential_clustering(singleton_list, 15) #print "Displaying", len(cluster_list), "sequential clusters" #cluster_list = alg_project3_solution.hierarchical_clustering(singleton_list, 16) #print "Displaying", len(cluster_list), "hierarchical clusters" #cluster_list = alg_project3_solution.kmeans_clustering(singleton_list, 16, 5) #print "Displaying", len(cluster_list), "k-means clusters" kmeans = [] for clusters_number in xrange(6, 21): cluster_list = alg_project3_solution.kmeans_clustering( singleton_list, clusters_number, 5) kmeans.append([ clusters_number, 0.0 + alg_project3_solution.compute_distortion(cluster_list, data_table) ]) #cluster_list = alg_project3_solution.hierarchical_clustering(singleton_list, 20) #hierarchical = [[20, alg_project3_solution.compute_distortion(cluster_list, data_table)]] hierarchical = [] for clusters_number in xrange(20, 5, -1): cluster_list = alg_project3_solution.hierarchical_clustering( singleton_list, clusters_number) hierarchical.append([ clusters_number, 0.0 + alg_project3_solution.compute_distortion(cluster_list, data_table) ]) hierarchical.reverse() #print hierarchical[10], kmeans[10] simpleplot.plot_lines( "Distortion of the clusterings produced by hierarchical and k-means metods on 896 county data set", 800, 600, "Number of clusters n [6 .. 20]", "Distortion", [hierarchical, kmeans], False, ["Hierarchical clustering", "k-means clustering with 5 iterations"])
def run_example(): """ Load a data table, compute a list of clusters and plot a list of clusters Set DESKTOP = True/False to use either matplotlib or simplegui """ data_table = load_data_table(DATA_111_URL) singleton_list = [] for line in data_table: singleton_list.append(alg_cluster.Cluster(set([line[0]]), line[1], line[2], line[3], line[4])) #cluster_list = sequential_clustering(singleton_list, 15) #print "Displaying", len(cluster_list), "sequential clusters" #cluster_list = alg_project3_solution.hierarchical_clustering(singleton_list, 9) #print "Displaying", len(cluster_list), "hierarchical clusters" cluster_list = alg_project3_solution.kmeans_clustering(singleton_list, 9, 5) print "Displaying", len(cluster_list), "k-means clusters" print "Distortion", alg_project3_solution.compute_distortion(cluster_list, data_table) # draw the clusters using matplotlib or simplegui if DESKTOP: #alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, False) alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, True) #add cluster centers else: alg_clusters_simplegui.PlotClusters(data_table, cluster_list) # use toggle in GUI to add cluster centers
def run_example(): """ Load a data table, compute a list of clusters and plot a list of clusters Set DESKTOP = True/False to use either matplotlib or simplegui """ data_table = load_data_table(DATA_896_URL) singleton_list = [] for line in data_table: singleton_list.append(alg_cluster.Cluster(set([line[0]]), line[1], line[2], line[3], line[4])) #cluster_list = sequential_clustering(singleton_list, 15) #print "Displaying", len(cluster_list), "sequential clusters" #cluster_list = alg_project3_solution.hierarchical_clustering(singleton_list, 16) #print "Displaying", len(cluster_list), "hierarchical clusters" #cluster_list = alg_project3_solution.kmeans_clustering(singleton_list, 16, 5) #print "Displaying", len(cluster_list), "k-means clusters" kmeans = [] for clusters_number in xrange(6, 21): cluster_list = alg_project3_solution.kmeans_clustering(singleton_list, clusters_number, 5) kmeans.append([clusters_number, 0.0 + alg_project3_solution.compute_distortion(cluster_list, data_table)]) #cluster_list = alg_project3_solution.hierarchical_clustering(singleton_list, 20) #hierarchical = [[20, alg_project3_solution.compute_distortion(cluster_list, data_table)]] hierarchical = [] for clusters_number in xrange(20, 5, -1): cluster_list = alg_project3_solution.hierarchical_clustering(singleton_list, clusters_number) hierarchical.append([clusters_number, 0.0 + alg_project3_solution.compute_distortion(cluster_list, data_table)]) hierarchical.reverse() #print hierarchical[10], kmeans[10] simpleplot.plot_lines("Distortion of the clusterings produced by hierarchical and k-means metods on 896 county data set", 800, 600, "Number of clusters n [6 .. 20]", "Distortion", [hierarchical, kmeans], False, ["Hierarchical clustering", "k-means clustering with 5 iterations"])
def compute_kmeans_distortions(cluster_list): """ list -> list Takes a list of cluster objects and iteratively clusters the data further, while calculating the distortion at each iteration. Returns a list of distortion values. """ distortions = [] for iteration in range(6, 21): new_list = sol.kmeans_clustering(cluster_list, iteration, 5) distortions.append(sol.compute_distortion(new_list, data_table)) return distortions
def compute_hier_distortions(cluster_list): """ list -> list Takes a list of cluster objects and returns the list of distortions as that list is further clustered from 20 down to 5 clusters. """ distortions = [] for iteration in range(20, 5, -1): new_list = sol.hierarchical_clustering(cluster_list, iteration) cluster_list = new_list distortions.append(sol.compute_distortion(new_list, data_table)) distortions.reverse() return distortions
""" Assignment 3 Question 7 Answer """ import alg_project3_viz as viz import alg_project3_solution as sol import alg_cluster data_table = viz.load_data_table(viz.DATA_111_URL) hier_data_list = sol.make_data_list(data_table) kmeans_data_list = sol.make_data_list(data_table) hier_cluster_list = sol.hierarchical_clustering(hier_data_list, 9) kmeans_cluster_list = sol.kmeans_clustering(kmeans_data_list, 9, 5) print("hierarchical:", sol.compute_distortion(hier_cluster_list, data_table)) print("kmeans:", sol.compute_distortion(kmeans_cluster_list, data_table)) # Hierarchical: 175163886915.8305 or 1.752 x 10^11 with four significant figures # K-means: 271254226924.20047 or 2.712 x 10^11
def run_example(): """ Load a data table, compute a list of clusters and plot a list of clusters Set DESKTOP = True/False to use either matplotlib or simplegui """ data_table = load_data_table(DATA_896_URL) singleton_list = [] for line in data_table: singleton_list.append( alg_cluster.Cluster(set([line[0]]), line[1], line[2], line[3], line[4])) # cluster_list = sequential_clustering(singleton_list, 15) # print "Displaying", len(cluster_list), "sequential clusters" # question 5 # cluster_list = alg_project3_solution.hierarchical_clustering(singleton_list, 9) # print "Displaying", len(cluster_list), "hierarchical clusters" # question 6 # cluster_list = alg_project3_solution.kmeans_clustering(singleton_list, 9, 5) # print "Displaying", len(cluster_list), "k-means clusters" # question 7 # cluster_list = alg_project3_solution.kmeans_clustering(singleton_list, 9, 5) # kmeans_result = alg_project3_solution.compute_distortion(cluster_list, data_table) # print("Displaying", kmeans_result, "kmeans_result") # cluster_list = alg_project3_solution.hierarchical_clustering(singleton_list, 9) # hierarchical_result = alg_project3_solution.compute_distortion(cluster_list, data_table) # print("Displaying", hierarchical_result, "hierarchical_result") # question 10 kmeans_res = [] for clusters_number in range(6, 21): cluster_list = alg_project3_solution.kmeans_clustering( singleton_list, clusters_number, 5) kmeans_res.append([ clusters_number, alg_project3_solution.compute_distortion(cluster_list, data_table) ]) hier_res = [] for clusters_number in range(20, 5, -1): cluster_list = alg_project3_solution.hierarchical_clustering( singleton_list, clusters_number) hier_res.append([ clusters_number, alg_project3_solution.compute_distortion(cluster_list, data_table) ]) hier_res.reverse() # draw the clusters using matplotlib or simplegui if DESKTOP: # alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, False) # alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, True) # add cluster centers create_separate_plots(kmeans_res, hier_res) else: alg_clusters_simplegui.PlotClusters( data_table, cluster_list) # use toggle in GUI to add cluster centers