def answer_q10(): """ generates clusters, figure out distoration and return x & y values """ data_sources = [viz.DATA_111_URL, viz.DATA_290_URL, viz.DATA_896_URL] x_vals = range(6, 21) y_vals_hier = {} y_vals_kmean = {} for idx in range(len(data_sources)): # 0. Generate data_field & cluster_list clust_list, data_table = closest_pair.create_cluster_list( data_sources[idx]) y_vals_hier[idx] = [] # 1. calculate values for hierarchical - decreasing order for clust_size in reversed(x_vals): clust_list = closest_pair.hierarchical_clustering(clust_list, clust_size) clust_error = closest_pair.compute_distortions(clust_list, data_table) y_vals_hier[idx].insert(0, clust_error) # 2. calculate values for kmeans - decreasing order y_vals_kmean[idx] = [] for clust_size in x_vals: clust_list, data_table = closest_pair.create_cluster_list( data_sources[idx]) clust_list = closest_pair.kmeans_clustering(clust_list, clust_size, 5) clust_error = closest_pair.compute_distortions(clust_list, data_table) y_vals_kmean[idx].append(clust_error) return x_vals, y_vals_hier, y_vals_kmean
def run_example(): """ Load a data table, compute a list of clusters and plot a list of clusters Set DESKTOP = True/False to use either matplotlib or simplegui """ data_table = load_data_table(DATA_111_URL) singleton_list = [] for line in data_table: singleton_list.append(alg_cluster.Cluster(set([line[0]]), line[1], line[2], line[3], line[4])) # cluster_list = sequential_clustering(singleton_list, 15) # print "Displaying", len(cluster_list), "sequential clusters" # cluster_list = alg_project3_solution.hierarchical_clustering(singleton_list, 9) # print "Displaying", len(cluster_list), "hierarchical clusters" # cluster_list = alg_project3_solution.kmeans_clustering(singleton_list, 15, 5) print "Displaying", len(cluster_list), "k-means clusters" # draw the clusters using matplotlib or simplegui if DESKTOP: alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, True) # alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, True) #add cluster centers else: alg_clusters_simplegui.PlotClusters(data_table, cluster_list) # use toggle in GUI to add cluster centers
def test_q7(): clust_list, data_table = create_cluster_list(URL) print(closest_pair.compute_distortions(clust_list, data_table)) test1 = closest_pair.hierarchical_clustering(clust_list, 16) print(closest_pair.compute_distortions(test1, data_table)) clust_list, data_table = create_cluster_list(URL) test2 = closest_pair.kmeans_clustering(clust_list, 16, 5) print(closest_pair.compute_distortions(test2, data_table))
""" code to answer question 7 """ import closest_pair import alg_project3_viz URL = alg_project3_viz.DATA_111_URL clust_list, data_table = closest_pair.create_cluster_list(URL) test1 = closest_pair.hierarchical_clustering(clust_list,9) print(closest_pair.compute_distortions(test1, data_table)) clust_list, data_table = closest_pair.create_cluster_list(URL) test2 = closest_pair.kmeans_clustering(clust_list,9,5) print(closest_pair.compute_distortions(test2, data_table))