def run_example():
    """
    Load a data table, compute a list of clusters and 
    plot a list of clusters

    Set DESKTOP = True/False to use either matplotlib or simplegui
    """
    data_table = load_data_table(DATA_896_URL)

    singleton_list = []
    for line in data_table:
        singleton_list.append(
            alg_cluster.Cluster(set([line[0]]), line[1], line[2], line[3],
                                line[4]))

    #cluster_list = sequential_clustering(singleton_list, 15)
    #print "Displaying", len(cluster_list), "sequential clusters"

    #cluster_list = alg_project3_solution.hierarchical_clustering(singleton_list, 16)
    #print "Displaying", len(cluster_list), "hierarchical clusters"

    #cluster_list = alg_project3_solution.kmeans_clustering(singleton_list, 16, 5)
    #print "Displaying", len(cluster_list), "k-means clusters"

    kmeans = []
    for clusters_number in xrange(6, 21):
        cluster_list = alg_project3_solution.kmeans_clustering(
            singleton_list, clusters_number, 5)
        kmeans.append([
            clusters_number, 0.0 +
            alg_project3_solution.compute_distortion(cluster_list, data_table)
        ])

    #cluster_list = alg_project3_solution.hierarchical_clustering(singleton_list, 20)
    #hierarchical = [[20, alg_project3_solution.compute_distortion(cluster_list, data_table)]]
    hierarchical = []
    for clusters_number in xrange(20, 5, -1):
        cluster_list = alg_project3_solution.hierarchical_clustering(
            singleton_list, clusters_number)
        hierarchical.append([
            clusters_number, 0.0 +
            alg_project3_solution.compute_distortion(cluster_list, data_table)
        ])
    hierarchical.reverse()
    #print hierarchical[10], kmeans[10]

    simpleplot.plot_lines(
        "Distortion of the clusterings produced by hierarchical and k-means metods on 896 county data set",
        800, 600, "Number of clusters n [6 .. 20]", "Distortion",
        [hierarchical, kmeans], False,
        ["Hierarchical clustering", "k-means clustering with 5 iterations"])
예제 #2
0
def run_example():
    """
    Load a data table, compute a list of clusters and 
    plot a list of clusters

    Set DESKTOP = True/False to use either matplotlib or simplegui
    """
    data_table = load_data_table(DATA_111_URL)
    
    singleton_list = []
    for line in data_table:
        singleton_list.append(alg_cluster.Cluster(set([line[0]]), line[1], line[2], line[3], line[4]))

       
        
    #cluster_list = sequential_clustering(singleton_list, 15)	
    #print "Displaying", len(cluster_list), "sequential clusters"

    #cluster_list = alg_project3_solution.hierarchical_clustering(singleton_list, 9)
    #print "Displaying", len(cluster_list), "hierarchical clusters"

    cluster_list = alg_project3_solution.kmeans_clustering(singleton_list, 9, 5)	
    print "Displaying", len(cluster_list), "k-means clusters"

    print "Distortion", alg_project3_solution.compute_distortion(cluster_list, data_table)
    

            
    # draw the clusters using matplotlib or simplegui
    if DESKTOP:
        #alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, False)
        alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, True)  #add cluster centers
    else:
        alg_clusters_simplegui.PlotClusters(data_table, cluster_list)   # use toggle in GUI to add cluster centers
def run_example():
    """
    Load a data table, compute a list of clusters and 
    plot a list of clusters

    Set DESKTOP = True/False to use either matplotlib or simplegui
    """
    data_table = load_data_table(DATA_896_URL)
    
    singleton_list = []
    for line in data_table:
        singleton_list.append(alg_cluster.Cluster(set([line[0]]), line[1], line[2], line[3], line[4]))
        
    #cluster_list = sequential_clustering(singleton_list, 15)	
    #print "Displaying", len(cluster_list), "sequential clusters"

    #cluster_list = alg_project3_solution.hierarchical_clustering(singleton_list, 16)
    #print "Displaying", len(cluster_list), "hierarchical clusters"
 
    #cluster_list = alg_project3_solution.kmeans_clustering(singleton_list, 16, 5)	
    #print "Displaying", len(cluster_list), "k-means clusters"
    
    kmeans = []
    for clusters_number in xrange(6, 21):
        cluster_list = alg_project3_solution.kmeans_clustering(singleton_list, clusters_number, 5)
        kmeans.append([clusters_number, 0.0 + alg_project3_solution.compute_distortion(cluster_list, data_table)])

    #cluster_list = alg_project3_solution.hierarchical_clustering(singleton_list, 20)
    #hierarchical = [[20, alg_project3_solution.compute_distortion(cluster_list, data_table)]]
    hierarchical = []
    for clusters_number in xrange(20, 5, -1):
        cluster_list = alg_project3_solution.hierarchical_clustering(singleton_list, clusters_number)
        hierarchical.append([clusters_number, 0.0 + alg_project3_solution.compute_distortion(cluster_list, data_table)])
    hierarchical.reverse() 
    #print hierarchical[10], kmeans[10]
 
 
    
    
    simpleplot.plot_lines("Distortion of the clusterings produced by hierarchical and k-means metods on 896 county data set",
                      800, 600, "Number of clusters n [6 .. 20]", "Distortion",
                      [hierarchical, kmeans], False,
                      ["Hierarchical clustering", "k-means clustering with 5 iterations"])            
예제 #4
0
def compute_kmeans_distortions(cluster_list):
    """ list -> list
    Takes a list of cluster objects and iteratively clusters the data further,
    while calculating the distortion at each iteration.  Returns a list of
    distortion values.
    """
    distortions = []

    for iteration in range(6, 21):
        new_list = sol.kmeans_clustering(cluster_list, iteration, 5)
        distortions.append(sol.compute_distortion(new_list, data_table))

    return distortions
예제 #5
0
def compute_hier_distortions(cluster_list):
    """ list -> list
    Takes a list of cluster objects and returns the list of distortions as that
    list is further clustered from 20 down to 5 clusters.
    """
    distortions = []

    for iteration in range(20, 5, -1):
        new_list = sol.hierarchical_clustering(cluster_list, iteration)
        cluster_list = new_list
        distortions.append(sol.compute_distortion(new_list, data_table))

    distortions.reverse()
    return distortions
예제 #6
0
"""
Assignment 3 Question 7 Answer
"""

import alg_project3_viz as viz
import alg_project3_solution as sol
import alg_cluster

data_table = viz.load_data_table(viz.DATA_111_URL)

hier_data_list = sol.make_data_list(data_table)
kmeans_data_list = sol.make_data_list(data_table)

hier_cluster_list = sol.hierarchical_clustering(hier_data_list, 9)
kmeans_cluster_list = sol.kmeans_clustering(kmeans_data_list, 9, 5)

print("hierarchical:", sol.compute_distortion(hier_cluster_list, data_table))
print("kmeans:", sol.compute_distortion(kmeans_cluster_list, data_table))


# Hierarchical: 175163886915.8305 or 1.752 x 10^11 with four significant figures
# K-means: 271254226924.20047 or 2.712 x 10^11
예제 #7
0
def run_example():
    """
    Load a data table, compute a list of clusters and
    plot a list of clusters

    Set DESKTOP = True/False to use either matplotlib or simplegui
    """
    data_table = load_data_table(DATA_896_URL)

    singleton_list = []
    for line in data_table:
        singleton_list.append(
            alg_cluster.Cluster(set([line[0]]), line[1], line[2], line[3],
                                line[4]))

    # cluster_list = sequential_clustering(singleton_list, 15)
    # print "Displaying", len(cluster_list), "sequential clusters"

    # question 5
    # cluster_list = alg_project3_solution.hierarchical_clustering(singleton_list, 9)
    # print "Displaying", len(cluster_list), "hierarchical clusters"

    # question 6
    # cluster_list = alg_project3_solution.kmeans_clustering(singleton_list, 9, 5)
    # print "Displaying", len(cluster_list), "k-means clusters"

    # question 7
    # cluster_list = alg_project3_solution.kmeans_clustering(singleton_list, 9, 5)
    # kmeans_result = alg_project3_solution.compute_distortion(cluster_list, data_table)
    # print("Displaying", kmeans_result, "kmeans_result")
    # cluster_list = alg_project3_solution.hierarchical_clustering(singleton_list, 9)
    # hierarchical_result = alg_project3_solution.compute_distortion(cluster_list, data_table)
    # print("Displaying", hierarchical_result, "hierarchical_result")

    # question 10
    kmeans_res = []
    for clusters_number in range(6, 21):
        cluster_list = alg_project3_solution.kmeans_clustering(
            singleton_list, clusters_number, 5)
        kmeans_res.append([
            clusters_number,
            alg_project3_solution.compute_distortion(cluster_list, data_table)
        ])

    hier_res = []
    for clusters_number in range(20, 5, -1):
        cluster_list = alg_project3_solution.hierarchical_clustering(
            singleton_list, clusters_number)
        hier_res.append([
            clusters_number,
            alg_project3_solution.compute_distortion(cluster_list, data_table)
        ])

    hier_res.reverse()
    # draw the clusters using matplotlib or simplegui
    if DESKTOP:
        # alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, False)
        # alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, True)  # add cluster centers
        create_separate_plots(kmeans_res, hier_res)
    else:
        alg_clusters_simplegui.PlotClusters(
            data_table,
            cluster_list)  # use toggle in GUI to add cluster centers