def run_example(): """ Load a data table, compute a list of clusters and plot a list of clusters Set DESKTOP = True/False to use either matplotlib or simplegui """ #data_table = load_data_table(DATA_3108_URL) data_table = load_data_table(DATA_896_URL) singleton_list = [] for line in data_table: singleton_list.append(alg_cluster.Cluster(set([line[0]]), line[1], line[2], line[3], line[4])) #cluster_list = sequential_clustering(singleton_list, 15) #print "Displaying", len(cluster_list), "sequential clusters" cluster_list = alg_project3_solution.hierarchical_clustering(singleton_list, 20) print "Distortion of hierarchical clusters is ", str(compute_distortion(cluster_list, data_table)) print "Displaying", len(cluster_list), "hierarchical clusters" #cluster_list = alg_project3_solution.kmeans_clustering(singleton_list, 9, 5) #print "Distortion of k-means clusters is ", str(compute_distortion(cluster_list, data_table)) #print "Displaying", len(cluster_list), "k-means clusters" # draw the clusters using matplotlib or simplegui if DESKTOP: alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, True) #alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, True) #add cluster centers else: alg_clusters_simplegui.PlotClusters(data_table, cluster_list) # use toggle in GUI to add cluster centers
Question 3-1 Author: You-Hao Chang """ import helper import AT_project_3 as pj3 import time import matplotlib.pyplot as plt running_time_scp = [] running_time_fcp = [] for num_clusters in range(2, 201): cluster_list = helper.gen_random_clusters(num_clusters) start_time = time.time() pj3.slow_closest_pair(cluster_list) running_time_scp.append(time.time() - start_time) start_time = time.time() pj3.fast_closest_pair(cluster_list) running_time_fcp.append(time.time() - start_time) #making comparison plots fig, ax = plt.subplots() x_vals = [idx for idx in range(2, 201)] ax.plot(x_vals, running_time_scp, '-b', label = 'slow closest pair') ax.plot(x_vals, running_time_fcp, '-r', label = 'fast closest pair') ax.legend(loc = 'upper left') ax.set_title('Timing results (in desktop Python)') ax.set_xlabel('number of initial clusters')
for cluster in cluster_list: distortion += cluster.cluster_error(data_table) return distortion data_table = load_data_table(DATA_896_URL) #hierarchical_clustering distortion_hc = [] for num_cluster in range(6, 21): singleton_list = [] for line in data_table: singleton_list.append(alg_cluster.Cluster(set([line[0]]), line[1], line[2], line[3], line[4])) cluster_list = pj3.hierarchical_clustering(singleton_list, num_cluster) distortion_hc.append(compute_distortion(cluster_list, data_table)) print "Distortion of hierarchical clusters is ", str(compute_distortion(cluster_list, data_table)) print "Displaying", len(cluster_list), "hierarchical clusters" #kmeans_clustering distortion_kc = [] for num_cluster in range(6, 21): singleton_list = [] for line in data_table: singleton_list.append(alg_cluster.Cluster(set([line[0]]), line[1], line[2], line[3], line[4])) cluster_list = pj3.kmeans_clustering(singleton_list, num_cluster, 5) distortion_kc.append(compute_distortion(cluster_list, data_table)) print "Distortion of k-means clusters is ", str(compute_distortion(cluster_list, data_table)) print "Displaying", len(cluster_list), "k-means clusters"