def run_example(): """ Load a data table, compute a list of clusters and plot a list of clusters Set DESKTOP = True/False to use either matplotlib or simplegui """ data_table = load_data_table(DATA_111_URL) singleton_list = [] for line in data_table: singleton_list.append(alg_cluster.Cluster(set([line[0]]), line[1], line[2], line[3], line[4])) cluster_list = cl.kmeans_clustering(singleton_list, 9, 5) print "Displaying", len(cluster_list), "kmeans_clustering" print compute_distortion(cluster_list, data_table)
for cluster in cluster_list: ans += cluster.cluster_error(datatable) return ans data_table1 = load_data_table(DATA_896_URL) data_table2 = load_data_table(DATA_896_URL) singleton_list1 = [] singleton_list2 = [] for line in data_table1: singleton_list1.append(alg_cluster.Cluster(set([line[0]]), line[1], line[2], line[3], line[4])) for line in data_table2: singleton_list2.append(alg_cluster.Cluster(set([line[0]]), line[1], line[2], line[3], line[4])) data1 = [] data2 = [] cluster_list1 = cl.hierarchical_clustering(singleton_list1, 21) for i in range(20, 5, -1): new_pair = cl.fast_closest_pair(cluster_list1) cluster_list1[new_pair[1]].merge_clusters(cluster_list1[new_pair[2]]) cluster_list1.remove(cluster_list1[new_pair[2]]) cluster_list2 = cl.kmeans_clustering(singleton_list2, i, 1) data1.append((i, compute_distortion(cluster_list1, data_table1))) data2.append((i, compute_distortion(cluster_list2, data_table2))) simpleplot.plot_lines('Quality Analysis - 896', 800, 600, 'num of clusters', 'Total Error', [data1, data2], False, ['hierarchical_clustering', 'kmeans_clustering'])