예제 #1
0
def run_example():
    """
    Load a data table, compute a list of clusters and 
    plot a list of clusters

    Set DESKTOP = True/False to use either matplotlib or simplegui
    """
    #data_table = load_data_table(DATA_3108_URL)
    data_table = load_data_table(DATA_896_URL)

    singleton_list = []
    for line in data_table:
        singleton_list.append(alg_cluster.Cluster(set([line[0]]), line[1], line[2], line[3], line[4]))
        
    #cluster_list = sequential_clustering(singleton_list, 15)	
    #print "Displaying", len(cluster_list), "sequential clusters"

    cluster_list = alg_project3_solution.hierarchical_clustering(singleton_list, 20)
    print "Distortion of hierarchical clusters is ", str(compute_distortion(cluster_list, data_table))
    print "Displaying", len(cluster_list), "hierarchical clusters"

    #cluster_list = alg_project3_solution.kmeans_clustering(singleton_list, 9, 5)
    #print "Distortion of k-means clusters is ", str(compute_distortion(cluster_list, data_table))
    #print "Displaying", len(cluster_list), "k-means clusters"

            
    # draw the clusters using matplotlib or simplegui
    if DESKTOP:
        alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, True)
        #alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, True)  #add cluster centers
    else:
        alg_clusters_simplegui.PlotClusters(data_table, cluster_list)   # use toggle in GUI to add cluster centers
예제 #2
0
Question 3-1
Author: You-Hao Chang
"""

import helper
import AT_project_3 as pj3
import time
import matplotlib.pyplot as plt

running_time_scp = []
running_time_fcp = []

for num_clusters in range(2, 201):
    cluster_list = helper.gen_random_clusters(num_clusters)
    start_time = time.time()
    pj3.slow_closest_pair(cluster_list)
    running_time_scp.append(time.time() - start_time)
    start_time = time.time()
    pj3.fast_closest_pair(cluster_list)
    running_time_fcp.append(time.time() - start_time)

#making comparison plots
fig, ax = plt.subplots()
x_vals = [idx for idx in range(2, 201)]

ax.plot(x_vals, running_time_scp, '-b', label = 'slow closest pair')
ax.plot(x_vals, running_time_fcp, '-r', label = 'fast closest pair')

ax.legend(loc = 'upper left')
ax.set_title('Timing results (in desktop Python)')
ax.set_xlabel('number of initial clusters')
예제 #3
0
    for cluster in cluster_list:
        distortion += cluster.cluster_error(data_table)

    return distortion


data_table = load_data_table(DATA_896_URL)

#hierarchical_clustering
distortion_hc = []
for num_cluster in range(6, 21):
    singleton_list = []
    for line in data_table:
        singleton_list.append(alg_cluster.Cluster(set([line[0]]), line[1], line[2], line[3], line[4]))

    cluster_list = pj3.hierarchical_clustering(singleton_list, num_cluster)
    distortion_hc.append(compute_distortion(cluster_list, data_table))
    print "Distortion of hierarchical clusters is ", str(compute_distortion(cluster_list, data_table))
    print "Displaying", len(cluster_list), "hierarchical clusters"

#kmeans_clustering
distortion_kc = []
for num_cluster in range(6, 21):
    singleton_list = []
    for line in data_table:
        singleton_list.append(alg_cluster.Cluster(set([line[0]]), line[1], line[2], line[3], line[4]))
        
    cluster_list = pj3.kmeans_clustering(singleton_list, num_cluster, 5)
    distortion_kc.append(compute_distortion(cluster_list, data_table))
    print "Distortion of k-means clusters is ", str(compute_distortion(cluster_list, data_table))
    print "Displaying", len(cluster_list), "k-means clusters"