def answer_q10(): """ generates clusters, figure out distoration and return x & y values """ data_sources = [viz.DATA_111_URL, viz.DATA_290_URL, viz.DATA_896_URL] x_vals = range(6, 21) y_vals_hier = {} y_vals_kmean = {} for idx in range(len(data_sources)): # 0. Generate data_field & cluster_list clust_list, data_table = closest_pair.create_cluster_list( data_sources[idx]) y_vals_hier[idx] = [] # 1. calculate values for hierarchical - decreasing order for clust_size in reversed(x_vals): clust_list = closest_pair.hierarchical_clustering(clust_list, clust_size) clust_error = closest_pair.compute_distortions(clust_list, data_table) y_vals_hier[idx].insert(0, clust_error) # 2. calculate values for kmeans - decreasing order y_vals_kmean[idx] = [] for clust_size in x_vals: clust_list, data_table = closest_pair.create_cluster_list( data_sources[idx]) clust_list = closest_pair.kmeans_clustering(clust_list, clust_size, 5) clust_error = closest_pair.compute_distortions(clust_list, data_table) y_vals_kmean[idx].append(clust_error) return x_vals, y_vals_hier, y_vals_kmean
def test_q7(): clust_list, data_table = create_cluster_list(URL) print(closest_pair.compute_distortions(clust_list, data_table)) test1 = closest_pair.hierarchical_clustering(clust_list, 16) print(closest_pair.compute_distortions(test1, data_table)) clust_list, data_table = create_cluster_list(URL) test2 = closest_pair.kmeans_clustering(clust_list, 16, 5) print(closest_pair.compute_distortions(test2, data_table))
""" code to answer question 7 """ import closest_pair import alg_project3_viz URL = alg_project3_viz.DATA_111_URL clust_list, data_table = closest_pair.create_cluster_list(URL) test1 = closest_pair.hierarchical_clustering(clust_list,9) print(closest_pair.compute_distortions(test1, data_table)) clust_list, data_table = closest_pair.create_cluster_list(URL) test2 = closest_pair.kmeans_clustering(clust_list,9,5) print(closest_pair.compute_distortions(test2, data_table))