def q10(): nodes_list = {viz.DATA_111_URL:111, viz.DATA_290_URL:290, viz.DATA_896_URL:896} url_list = [viz.DATA_111_URL, viz.DATA_290_URL, viz.DATA_896_URL] kmeans_dict = dict() hierarchical_dict = dict() for url in url_list: data_table = viz.load_data_table(url) singleton_list = [] for line in data_table: singleton_list.append(alg_cluster.Cluster(set([line[0]]), line[1], line[2], line[3], line[4])) kmeans_dict[url] = list() hierarchical_dict[url] = list() cluster_range = range(6, 20 + 1) for cluster_count in cluster_range: #kmeans cluster_list = alg_project3.kmeans_clustering(singleton_list, cluster_count, 5) kmeans_error = compute_distortion(cluster_list, data_table) kmeans_dict[url].append(kmeans_error) #hierarchical count = 20 while count >= 6: alg_project3.hierarchical_clustering(singleton_list, count) hierarchical_error = compute_distortion(singleton_list, data_table) hierarchical_dict[url].insert(0, hierarchical_error) count -= 1 for url in url_list: plt.title('Distortion for hierarchical and k-means clustering for '+str(nodes_list[url])+' points') plt.xlabel('Number of clusters') plt.ylabel('Distortion') line1, = plt.plot(cluster_range, kmeans_dict[url],'g') line2, = plt.plot(cluster_range, hierarchical_dict[url],'b') plt.legend((line1, line2), ('kmeans clustering', 'hierarchical clustering')) plt.show() #q2() #q2() #q3() #q5() #q6() #q7() #q10()
def q10(): nodes_list = { viz.DATA_111_URL: 111, viz.DATA_290_URL: 290, viz.DATA_896_URL: 896 } url_list = [viz.DATA_111_URL, viz.DATA_290_URL, viz.DATA_896_URL] kmeans_dict = dict() hierarchical_dict = dict() for url in url_list: data_table = viz.load_data_table(url) singleton_list = [] for line in data_table: singleton_list.append( alg_cluster.Cluster(set([line[0]]), line[1], line[2], line[3], line[4])) kmeans_dict[url] = list() hierarchical_dict[url] = list() cluster_range = range(6, 20 + 1) for cluster_count in cluster_range: #kmeans cluster_list = alg_project3.kmeans_clustering( singleton_list, cluster_count, 5) kmeans_error = compute_distortion(cluster_list, data_table) kmeans_dict[url].append(kmeans_error) #hierarchical count = 20 while count >= 6: alg_project3.hierarchical_clustering(singleton_list, count) hierarchical_error = compute_distortion(singleton_list, data_table) hierarchical_dict[url].insert(0, hierarchical_error) count -= 1 for url in url_list: plt.title('Distortion for hierarchical and k-means clustering for ' + str(nodes_list[url]) + ' points') plt.xlabel('Number of clusters') plt.ylabel('Distortion') line1, = plt.plot(cluster_range, kmeans_dict[url], 'g') line2, = plt.plot(cluster_range, hierarchical_dict[url], 'b') plt.legend((line1, line2), ('kmeans clustering', 'hierarchical clustering')) plt.show()
def run_example(): """ Load a data table, compute a list of clusters and plot a list of clusters Set DESKTOP = True/False to use either matplotlib or simplegui """ #data_table = load_data_table(DATA_3108_URL) #data_table = load_data_from_file("unifiedCancerData_290.csv") #data_table = load_data_from_file("unifiedCancerData_896.csv") #data_table = load_data_from_file("unifiedCancerData_3108.csv") data_table = load_data_from_file("unifiedCancerData_111.csv") singleton_list = gen_singleton_list(data_table) #print alg_project3.fast_closest_pair(singleton_list) #print alg_project3.slow_closest_pairs(singleton_list) #cluster_list = sequential_clustering(singleton_list, 15) #print "Displaying", len(cluster_list), "sequential clusters" cluster_list = alg_project3.kmeans_clustering(singleton_list, 9, 5) print "Displaying", len(cluster_list), "k-means clusters" print "compute_distortion : ", alg_project3.compute_distortion( cluster_list, data_table) cluster_list = alg_project3.hierarchical_clustering(singleton_list, 9) print "Displaying", len(cluster_list), "hierarchical clusters" print "compute_distortion : ", alg_project3.compute_distortion( cluster_list, data_table)
def run_example(): """ Load a data table, compute a list of clusters and plot a list of clusters Set DESKTOP = True/False to use either matplotlib or simplegui """ data_table = load_data_table(DATA_111_URL) singleton_list = [] for line in data_table: singleton_list.append( alg_cluster.Cluster(set([line[0]]), line[1], line[2], line[3], line[4])) cluster_list = sequential_clustering(singleton_list, 15) print("Displaying", len(cluster_list), "sequential clusters") cluster_list = alg_project3.hierarchical_clustering(singleton_list, 9) #print "Displaying", len(cluster_list), "hierarchical clusters" #cluster_list = alg_project3.kmeans_clustering(singleton_list, 9, 5) #print "Displaying", len(cluster_list), "k-means clusters" # draw the clusters using matplotlib or simplegui if DESKTOP: #alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, False) alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, True) #add cluster centers else: alg_clusters_simplegui.PlotClusters( data_table, cluster_list) # use toggle in GUI to add cluster centers
def q2(): data_table = viz.load_data_table(viz.DATA_3108_URL) singleton_list = [] for line in data_table: singleton_list.append(alg_cluster.Cluster(set([line[0]]), line[1], line[2], line[3], line[4])) cluster_list = alg_project3.hierarchical_clustering(singleton_list, 15) alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, True)
def run_example(): """ Load a data table, compute a list of clusters and plot a list of clusters Set DESKTOP = True/False to use either matplotlib or simplegui """ #data_table = load_data_table(DATA_3108_URL) #data_table = load_data_from_file("unifiedCancerData_290.csv") #data_table = load_data_from_file("unifiedCancerData_896.csv") #data_table = load_data_from_file("unifiedCancerData_3108.csv") data_table = load_data_from_file("unifiedCancerData_111.csv") singleton_list = gen_singleton_list(data_table) #print alg_project3.fast_closest_pair(singleton_list) #print alg_project3.slow_closest_pairs(singleton_list) #cluster_list = sequential_clustering(singleton_list, 15) #print "Displaying", len(cluster_list), "sequential clusters" cluster_list = alg_project3.kmeans_clustering(singleton_list, 9, 5) print "Displaying", len(cluster_list), "k-means clusters" print "compute_distortion : ", alg_project3.compute_distortion(cluster_list, data_table) cluster_list = alg_project3.hierarchical_clustering(singleton_list, 9) print "Displaying", len(cluster_list), "hierarchical clusters" print "compute_distortion : ", alg_project3.compute_distortion(cluster_list, data_table)
def run_example(): """ Load a data table, compute a list of clusters and plot a list of clusters Set DESKTOP = True/False to use either matplotlib or simplegui """ data_table = load_data_table(DATA_111_URL) singleton_list = [] for line in data_table: singleton_list.append(alg_cluster.Cluster(set([line[0]]), line[1], line[2], line[3], line[4])) cluster_list = sequential_clustering(singleton_list, 15) print "Displaying", len(cluster_list), "sequential clusters" cluster_list = alg_project3.hierarchical_clustering(singleton_list, 9) #print "Displaying", len(cluster_list), "hierarchical clusters" #cluster_list = alg_project3.kmeans_clustering(singleton_list, 9, 5) #print "Displaying", len(cluster_list), "k-means clusters" # draw the clusters using matplotlib or simplegui if DESKTOP: #alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, False) alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, True) #add cluster centers else: alg_clusters_simplegui.PlotClusters(data_table, cluster_list) # use toggle in GUI to add cluster centers
def test_hierarchical24(): """ Test for hierarchical clustering Note that hierarchical_clustering mutates cluster_list """ # load small data table print print "Testing hierarchical_clustering on 24 county set" data_24_table = load_data_table(DATA_24_URL) # test data of the form [size of output cluster, sets of county tuples] hierdata_24 = [[23, set([('11001', '51013'), ('01073',), ('06059',), ('06037',), ('06029',), ('06071',), ('06075',), ('08031',), ('24510',), ('34013',), ('34039',), ('34017',), ('36061',), ('36005',), ('36047',), ('36059',), ('36081',), ('41051',), ('41067',), ('51840',), ('51760',), ('55079',), ('54009',)])], [22, set([('11001', '51013'), ('36047', '36081'), ('01073',), ('06059',), ('06037',), ('06029',), ('06071',), ('06075',), ('08031',), ('24510',), ('34013',), ('34039',), ('34017',), ('36061',), ('36005',), ('36059',), ('41051',), ('41067',), ('51840',), ('51760',), ('55079',), ('54009',)])], [21, set([('11001', '51013'), ('36005', '36061'), ('36047', '36081'), ('01073',), ('06059',), ('06037',), ('06029',), ('06071',), ('06075',), ('08031',), ('24510',), ('34013',), ('34039',), ('34017',), ('36059',), ('41051',), ('41067',), ('51840',), ('51760',), ('55079',), ('54009',)])], [20, set([('11001', '51013'), ('36005', '36061'), ('36047', '36081'), ('01073',), ('06059',), ('06037',), ('06029',), ('06071',), ('06075',), ('08031',), ('24510',), ('34039',), ('34013', '34017'), ('36059',), ('41051',), ('41067',), ('51840',), ('51760',), ('55079',), ('54009',)])], [19, set([('34013', '34017', '34039'), ('11001', '51013'), ('36005', '36061'), ('36047', '36081'), ('01073',), ('06059',), ('06037',), ('06029',), ('06071',), ('06075',), ('08031',), ('24510',), ('36059',), ('41051',), ('41067',), ('51840',), ('51760',), ('55079',), ('54009',)])], [18, set([('34013', '34017', '34039'), ('11001', '51013'), ('01073',), ('06059',), ('06037',), ('06029',), ('06071',), ('06075',), ('08031',), ('24510',), ('36059',), ('36005', '36047', '36061', '36081'), ('41051',), ('41067',), ('51840',), ('51760',), ('55079',), ('54009',)])], [17, set([('11001', '51013'), ('01073',), ('06059',), ('06037',), ('06029',), ('06071',), ('06075',), ('08031',), ('24510',), ('36059',), ('34013', '34017', '34039', '36005', '36047', '36061', '36081'), ('41051',), ('41067',), ('51840',), ('51760',), ('55079',), ('54009',)])], [16, set([('11001', '51013'), ('01073',), ('06059',), ('06037',), ('06029',), ('06071',), ('06075',), ('08031',), ('24510',), ('34013', '34017', '34039', '36005', '36047', '36059', '36061', '36081'), ('41051',), ('41067',), ('51840',), ('51760',), ('55079',), ('54009',)])], [15, set([('11001', '51013'), ('01073',), ('06059',), ('06037',), ('06029',), ('06071',), ('06075',), ('08031',), ('24510',), ('34013', '34017', '34039', '36005', '36047', '36059', '36061', '36081'), ('41051', '41067'), ('51840',), ('51760',), ('55079',), ('54009',)])], [14, set([('01073',), ('06059',), ('06037',), ('06029',), ('06071',), ('06075',), ('08031',), ('34013', '34017', '34039', '36005', '36047', '36059', '36061', '36081'), ('41051', '41067'), ('51840',), ('51760',), ('55079',), ('54009',), ('11001', '24510', '51013')])], [13, set([('06037', '06059'), ('01073',), ('06029',), ('06071',), ('06075',), ('08031',), ('34013', '34017', '34039', '36005', '36047', '36059', '36061', '36081'), ('41051', '41067'), ('51840',), ('51760',), ('55079',), ('54009',), ('11001', '24510', '51013')])], [12, set([('06037', '06059'), ('01073',), ('06029',), ('06071',), ('06075',), ('08031',), ('34013', '34017', '34039', '36005', '36047', '36059', '36061', '36081'), ('41051', '41067'), ('51760',), ('55079',), ('54009',), ('11001', '24510', '51013', '51840')])], [11, set([('06029', '06037', '06059'), ('01073',), ('06071',), ('06075',), ('08031',), ('34013', '34017', '34039', '36005', '36047', '36059', '36061', '36081'), ('41051', '41067'), ('51760',), ('55079',), ('54009',), ('11001', '24510', '51013', '51840')])], [10, set([('06029', '06037', '06059'), ('01073',), ('06071',), ('06075',), ('08031',), ('34013', '34017', '34039', '36005', '36047', '36059', '36061', '36081'), ('41051', '41067'), ('55079',), ('54009',), ('11001', '24510', '51013', '51760', '51840')])], [9, set([('01073',), ('06029', '06037', '06059', '06071'), ('06075',), ('08031',), ('34013', '34017', '34039', '36005', '36047', '36059', '36061', '36081'), ('41051', '41067'), ('55079',), ('54009',), ('11001', '24510', '51013', '51760', '51840')])], [8, set([('01073',), ('06029', '06037', '06059', '06071'), ('06075',), ('08031',), ('41051', '41067'), ('55079',), ('54009',), ('11001', '24510', '34013', '34017', '34039', '36005', '36047', '36059', '36061', '36081', '51013', '51760', '51840')])], [7, set([('01073',), ('06029', '06037', '06059', '06071'), ('06075',), ('08031',), ('41051', '41067'), ('55079',), ('11001', '24510', '34013', '34017', '34039', '36005', '36047', '36059', '36061', '36081', '51013', '51760', '51840', '54009')])], [6, set([('06029', '06037', '06059', '06071', '06075'), ('01073',), ('08031',), ('41051', '41067'), ('55079',), ('11001', '24510', '34013', '34017', '34039', '36005', '36047', '36059', '36061', '36081', '51013', '51760', '51840', '54009')])], [5, set([('06029', '06037', '06059', '06071', '06075'), ('08031',), ('41051', '41067'), ('01073', '55079'), ('11001', '24510', '34013', '34017', '34039', '36005', '36047', '36059', '36061', '36081', '51013', '51760', '51840', '54009')])], [4, set([('06029', '06037', '06059', '06071', '06075'), ('01073', '11001', '24510', '34013', '34017', '34039', '36005', '36047', '36059', '36061', '36081', '51013', '51760', '51840', '54009', '55079'), ('08031',), ('41051', '41067')])], [3, set([('06029', '06037', '06059', '06071', '06075', '41051', '41067'), ('01073', '11001', '24510', '34013', '34017', '34039', '36005', '36047', '36059', '36061', '36081', '51013', '51760', '51840', '54009', '55079'), ('08031',)])], [2, set([('01073', '11001', '24510', '34013', '34017', '34039', '36005', '36047', '36059', '36061', '36081', '51013', '51760', '51840', '54009', '55079'), ('06029', '06037', '06059', '06071', '06075', '08031', '41051', '41067')])], ] suite = poc_simpletest.TestSuite() for num_clusters, expected_county_tuple in hierdata_24: # build initial list of clusters for each test since mutation is allowed cluster_list = [] for idx in range(len(data_24_table)): line = data_24_table[idx] cluster_list.append(alg_cluster.Cluster(set([line[0]]), line[1], line[2], line[3], line[4])) # compute student answer student_clustering = student.hierarchical_clustering(cluster_list, num_clusters) student_county_tuple = set_of_county_tuples(student_clustering) # Prepare test error_message = "Testing hierarchical_clustering on 24 county table, num_clusters = " + str(num_clusters) error_message += "\nStudent county tuples: " + str(student_county_tuple) error_message += "\nExpected county tuples: " + str(expected_county_tuple) suite.run_test(student_county_tuple == expected_county_tuple, True, error_message) suite.report_results()
def q2(): data_table = viz.load_data_table(viz.DATA_3108_URL) singleton_list = [] for line in data_table: singleton_list.append( alg_cluster.Cluster(set([line[0]]), line[1], line[2], line[3], line[4])) cluster_list = alg_project3.hierarchical_clustering(singleton_list, 15) alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, True)
def q7(): data_table = viz.load_data_table(viz.DATA_111_URL) singleton_list = [] for line in data_table: singleton_list.append(alg_cluster.Cluster(set([line[0]]), line[1], line[2], line[3], line[4])) cluster_list = alg_project3.kmeans_clustering(singleton_list, 9, 5) error2 = compute_distortion(cluster_list, data_table) cluster_list = alg_project3.hierarchical_clustering(singleton_list, 9) error1 = compute_distortion(cluster_list, data_table) print 'hierarchical clustering',error1 print 'kmeans clustering', error2
def q7(): data_table = viz.load_data_table(viz.DATA_111_URL) singleton_list = [] for line in data_table: singleton_list.append( alg_cluster.Cluster(set([line[0]]), line[1], line[2], line[3], line[4])) cluster_list = alg_project3.kmeans_clustering(singleton_list, 9, 5) error2 = compute_distortion(cluster_list, data_table) cluster_list = alg_project3.hierarchical_clustering(singleton_list, 9) error1 = compute_distortion(cluster_list, data_table) print('hierarchical clustering', error1) print('kmeans clustering', error2)
def run_example(): """ Load a data table, compute a list of clusters and plot a list of clusters Set DESKTOP = True/False to use either matplotlib or simplegui """ data_table = load_data_table(DATA_896_URL) singleton_list = [] for line in data_table: singleton_list.append(alg_cluster.Cluster(set([line[0]]), line[1], line[2], line[3], line[4])) #cluster_list = sequential_clustering(singleton_list, 15) #print "Displaying", len(cluster_list), "sequential clusters" k_dis = [] h_dis = [] for cluster_num in xrange(6, 21): cluster_list = alg_project3.hierarchical_clustering([c.copy() for c in singleton_list], cluster_num) print "Displaying", len(cluster_list), "hierarchical clusters" h_dis.append(alg_app3.compute_distortion(cluster_list, data_table)) cluster_list = alg_project3.kmeans_clustering([c.copy() for c in singleton_list], cluster_num, 5) print "Displaying", len(cluster_list), "k-means clusters" k_dis.append(alg_app3.compute_distortion(cluster_list, data_table)) xvals = range(6, 21) plt.plot(xvals, k_dis, '-b', label='kmeans_clustering (5 iteration)') plt.plot(xvals, h_dis, '-r', label='hierarchical_clustering') plt.legend(loc='upper right') plt.xlabel("number of clusters") plt.ylabel("distortion") plt.title("Distortion of 2 clustering methods: 896 counties") plt.show() # draw the clusters using matplotlib or simplegui """
def test_hierarchical24(): """ Test for hierarchical clustering Note that hierarchical_clustering mutates cluster_list """ # load small data table print print "Testing hierarchical_clustering on 24 county set" data_24_table = load_data_table(DATA_24_URL) # test data of the form [size of output cluster, sets of county tuples] hierdata_24 = [ [ 23, set([('11001', '51013'), ('01073', ), ('06059', ), ('06037', ), ('06029', ), ('06071', ), ('06075', ), ('08031', ), ('24510', ), ('34013', ), ('34039', ), ('34017', ), ('36061', ), ('36005', ), ('36047', ), ('36059', ), ('36081', ), ('41051', ), ('41067', ), ('51840', ), ('51760', ), ('55079', ), ('54009', )]) ], [ 22, set([('11001', '51013'), ('36047', '36081'), ('01073', ), ('06059', ), ('06037', ), ('06029', ), ('06071', ), ('06075', ), ('08031', ), ('24510', ), ('34013', ), ('34039', ), ('34017', ), ('36061', ), ('36005', ), ('36059', ), ('41051', ), ('41067', ), ('51840', ), ('51760', ), ('55079', ), ('54009', )]) ], [ 21, set([('11001', '51013'), ('36005', '36061'), ('36047', '36081'), ('01073', ), ('06059', ), ('06037', ), ('06029', ), ('06071', ), ('06075', ), ('08031', ), ('24510', ), ('34013', ), ('34039', ), ('34017', ), ('36059', ), ('41051', ), ('41067', ), ('51840', ), ('51760', ), ('55079', ), ('54009', )]) ], [ 20, set([('11001', '51013'), ('36005', '36061'), ('36047', '36081'), ('01073', ), ('06059', ), ('06037', ), ('06029', ), ('06071', ), ('06075', ), ('08031', ), ('24510', ), ('34039', ), ('34013', '34017'), ('36059', ), ('41051', ), ('41067', ), ('51840', ), ('51760', ), ('55079', ), ('54009', )]) ], [ 19, set([('34013', '34017', '34039'), ('11001', '51013'), ('36005', '36061'), ('36047', '36081'), ('01073', ), ('06059', ), ('06037', ), ('06029', ), ('06071', ), ('06075', ), ('08031', ), ('24510', ), ('36059', ), ('41051', ), ('41067', ), ('51840', ), ('51760', ), ('55079', ), ('54009', )]) ], [ 18, set([('34013', '34017', '34039'), ('11001', '51013'), ('01073', ), ('06059', ), ('06037', ), ('06029', ), ('06071', ), ('06075', ), ('08031', ), ('24510', ), ('36059', ), ('36005', '36047', '36061', '36081'), ('41051', ), ('41067', ), ('51840', ), ('51760', ), ('55079', ), ('54009', )]) ], [ 17, set([('11001', '51013'), ('01073', ), ('06059', ), ('06037', ), ('06029', ), ('06071', ), ('06075', ), ('08031', ), ('24510', ), ('36059', ), ('34013', '34017', '34039', '36005', '36047', '36061', '36081'), ('41051', ), ('41067', ), ('51840', ), ('51760', ), ('55079', ), ('54009', )]) ], [ 16, set([('11001', '51013'), ('01073', ), ('06059', ), ('06037', ), ('06029', ), ('06071', ), ('06075', ), ('08031', ), ('24510', ), ('34013', '34017', '34039', '36005', '36047', '36059', '36061', '36081'), ('41051', ), ('41067', ), ('51840', ), ('51760', ), ('55079', ), ('54009', )]) ], [ 15, set([('11001', '51013'), ('01073', ), ('06059', ), ('06037', ), ('06029', ), ('06071', ), ('06075', ), ('08031', ), ('24510', ), ('34013', '34017', '34039', '36005', '36047', '36059', '36061', '36081'), ('41051', '41067'), ('51840', ), ('51760', ), ('55079', ), ('54009', )]) ], [ 14, set([('01073', ), ('06059', ), ('06037', ), ('06029', ), ('06071', ), ('06075', ), ('08031', ), ('34013', '34017', '34039', '36005', '36047', '36059', '36061', '36081'), ('41051', '41067'), ('51840', ), ('51760', ), ('55079', ), ('54009', ), ('11001', '24510', '51013')]) ], [ 13, set([('06037', '06059'), ('01073', ), ('06029', ), ('06071', ), ('06075', ), ('08031', ), ('34013', '34017', '34039', '36005', '36047', '36059', '36061', '36081'), ('41051', '41067'), ('51840', ), ('51760', ), ('55079', ), ('54009', ), ('11001', '24510', '51013')]) ], [ 12, set([('06037', '06059'), ('01073', ), ('06029', ), ('06071', ), ('06075', ), ('08031', ), ('34013', '34017', '34039', '36005', '36047', '36059', '36061', '36081'), ('41051', '41067'), ('51760', ), ('55079', ), ('54009', ), ('11001', '24510', '51013', '51840')]) ], [ 11, set([('06029', '06037', '06059'), ('01073', ), ('06071', ), ('06075', ), ('08031', ), ('34013', '34017', '34039', '36005', '36047', '36059', '36061', '36081'), ('41051', '41067'), ('51760', ), ('55079', ), ('54009', ), ('11001', '24510', '51013', '51840')]) ], [ 10, set([('06029', '06037', '06059'), ('01073', ), ('06071', ), ('06075', ), ('08031', ), ('34013', '34017', '34039', '36005', '36047', '36059', '36061', '36081'), ('41051', '41067'), ('55079', ), ('54009', ), ('11001', '24510', '51013', '51760', '51840')]) ], [ 9, set([('01073', ), ('06029', '06037', '06059', '06071'), ('06075', ), ('08031', ), ('34013', '34017', '34039', '36005', '36047', '36059', '36061', '36081'), ('41051', '41067'), ('55079', ), ('54009', ), ('11001', '24510', '51013', '51760', '51840')]) ], [ 8, set([ ('01073', ), ('06029', '06037', '06059', '06071'), ('06075', ), ('08031', ), ('41051', '41067'), ('55079', ), ('54009', ), ('11001', '24510', '34013', '34017', '34039', '36005', '36047', '36059', '36061', '36081', '51013', '51760', '51840') ]) ], [ 7, set([ ('01073', ), ('06029', '06037', '06059', '06071'), ('06075', ), ('08031', ), ('41051', '41067'), ('55079', ), ('11001', '24510', '34013', '34017', '34039', '36005', '36047', '36059', '36061', '36081', '51013', '51760', '51840', '54009') ]) ], [ 6, set([ ('06029', '06037', '06059', '06071', '06075'), ('01073', ), ('08031', ), ('41051', '41067'), ('55079', ), ('11001', '24510', '34013', '34017', '34039', '36005', '36047', '36059', '36061', '36081', '51013', '51760', '51840', '54009') ]) ], [ 5, set([ ('06029', '06037', '06059', '06071', '06075'), ('08031', ), ('41051', '41067'), ('01073', '55079'), ('11001', '24510', '34013', '34017', '34039', '36005', '36047', '36059', '36061', '36081', '51013', '51760', '51840', '54009') ]) ], [ 4, set([('06029', '06037', '06059', '06071', '06075'), ('01073', '11001', '24510', '34013', '34017', '34039', '36005', '36047', '36059', '36061', '36081', '51013', '51760', '51840', '54009', '55079'), ('08031', ), ('41051', '41067')]) ], [ 3, set([('06029', '06037', '06059', '06071', '06075', '41051', '41067'), ('01073', '11001', '24510', '34013', '34017', '34039', '36005', '36047', '36059', '36061', '36081', '51013', '51760', '51840', '54009', '55079'), ('08031', )]) ], [ 2, set([('01073', '11001', '24510', '34013', '34017', '34039', '36005', '36047', '36059', '36061', '36081', '51013', '51760', '51840', '54009', '55079'), ('06029', '06037', '06059', '06071', '06075', '08031', '41051', '41067')]) ], ] suite = poc_simpletest.TestSuite() for num_clusters, expected_county_tuple in hierdata_24: # build initial list of clusters for each test since mutation is allowed cluster_list = [] for idx in range(len(data_24_table)): line = data_24_table[idx] cluster_list.append( alg_cluster.Cluster(set([line[0]]), line[1], line[2], line[3], line[4])) # compute student answer student_clustering = student.hierarchical_clustering( cluster_list, num_clusters) student_county_tuple = set_of_county_tuples(student_clustering) # Prepare test error_message = "Testing hierarchical_clustering on 24 county table, num_clusters = " + str( num_clusters) error_message += "\nStudent county tuples: " + str( student_county_tuple) error_message += "\nExpected county tuples: " + str( expected_county_tuple) suite.run_test(student_county_tuple == expected_county_tuple, True, error_message) suite.report_results()