def calculate_distortion(): data_table = load_data_table(DATA_111_URL) singleton_list = [] for line in data_table: singleton_list.append( alg_cluster.Cluster(set([line[0]]), line[1], line[2], line[3], line[4])) cluster_list_hierarchical = closest_pairs_and_clustering_algorithms.hierarchical_clustering( singleton_list, 9) cluster_list_kmeans = closest_pairs_and_clustering_algorithms.kmeans_clustering( singleton_list, 9, 5) distortion_hierarchical = closest_pairs_and_clustering_algorithms.compute_distortion( cluster_list_hierarchical, data_table) distortion_kmeans = closest_pairs_and_clustering_algorithms.compute_distortion( cluster_list_kmeans, data_table) print "distortion_hierarchical: ", distortion_hierarchical print "distortion_kmeans: ", distortion_kmeans
def compare_distortion(): distortion_hierarchical_111 = [] distortion_kmeans_111 = [] distortion_hierarchical_290 = [] distortion_kmeans_290 = [] distortion_hierarchical_896 = [] distortion_kmeans_896 = [] data_table = load_data_table(DATA_111_URL) singleton_list = [] for line in data_table: singleton_list.append( alg_cluster.Cluster(set([line[0]]), line[1], line[2], line[3], line[4])) for num in xrange(6, 21): cluster_list_hierarchical = closest_pairs_and_clustering_algorithms.hierarchical_clustering( singleton_list[:], num) cluster_list_kmeans = closest_pairs_and_clustering_algorithms.kmeans_clustering( singleton_list[:], num, 5) distortion_hierarchical_111.append( closest_pairs_and_clustering_algorithms.compute_distortion( cluster_list_hierarchical, data_table)) distortion_kmeans_111.append( closest_pairs_and_clustering_algorithms.compute_distortion( cluster_list_kmeans, data_table)) print "distortion_hierarchical_111: ", distortion_hierarchical_111 print "distortion_kmeans_111: ", distortion_kmeans_111 data_table = load_data_table(DATA_290_URL) singleton_list = [] for line in data_table: singleton_list.append( alg_cluster.Cluster(set([line[0]]), line[1], line[2], line[3], line[4])) for num in xrange(6, 21): cluster_list_hierarchical = closest_pairs_and_clustering_algorithms.hierarchical_clustering( singleton_list[:], num) cluster_list_kmeans = closest_pairs_and_clustering_algorithms.kmeans_clustering( singleton_list[:], num, 5) distortion_hierarchical_290.append( closest_pairs_and_clustering_algorithms.compute_distortion( cluster_list_hierarchical, data_table)) distortion_kmeans_290.append( closest_pairs_and_clustering_algorithms.compute_distortion( cluster_list_kmeans, data_table)) print "distortion_hierarchical_290: ", distortion_hierarchical_290 print "distortion_kmeans_290: ", distortion_kmeans_290 data_table = load_data_table(DATA_896_URL) singleton_list = [] for line in data_table: singleton_list.append( alg_cluster.Cluster(set([line[0]]), line[1], line[2], line[3], line[4])) for num in xrange(6, 21): cluster_list_hierarchical = closest_pairs_and_clustering_algorithms.hierarchical_clustering( singleton_list[:], num) cluster_list_kmeans = closest_pairs_and_clustering_algorithms.kmeans_clustering( singleton_list[:], num, 5) distortion_hierarchical_896.append( closest_pairs_and_clustering_algorithms.compute_distortion( cluster_list_hierarchical, data_table)) distortion_kmeans_896.append( closest_pairs_and_clustering_algorithms.compute_distortion( cluster_list_kmeans, data_table)) print "distortion_hierarchical_896: ", distortion_hierarchical_896 print "distortion_kmeans_896: ", distortion_kmeans_896