Esempio n. 1
0
def run_example():
    """ Load a data table, compute a list of clusters and plot a list of clusters. Set DESKTOP = True/False to use either matplotlib or simplegui """
    data_table = load_data_table(DATA_3108_URL)
    #data_table = load_data_table(DATA_111_URL)

    singleton_list = []
    for line in data_table:
        singleton_list.append(
            alg_cluster.Cluster(set([line[0]]), line[1], line[2], line[3],
                                line[4]))

    # ************** Here we have to choose the type of clustering we want to use for visualization ********************
    #cluster_list = sequential_clustering(singleton_list, 15); print "Displaying", len(cluster_list), "sequential clusters"
    #cluster_list = alg_project3_solution.hierarchical_clustering(singleton_list, 9); print "Displaying", len(cluster_list), "hierarchical clusters"
    cluster_list = alg_project3_solution.kmeans_clustering(
        singleton_list, 9, 5)
    print "Displaying", len(cluster_list), "k-means clusters"

    if DESKTOP:  # draw the clusters using matplotlib or simplegui
        alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, False)
        # alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, True)  # add cluster centers
    else:
        alg_clusters_simplegui.PlotClusters(
            data_table,
            cluster_list)  # use toggle in GUI to add cluster centers
def run_example():
    """
    Load a data table, compute a list of clusters and 
    plot a list of clusters

    Set DESKTOP = True/False to use either matplotlib or simplegui
    """
    data_table = load_data_table(DATA_3108_URL)

    singleton_list = []
    for line in data_table:
        singleton_list.append(
            alg_cluster.Cluster(set([line[0]]), line[1], line[2], line[3],
                                line[4]))

    #cluster_list = sequential_clustering(singleton_list, 15)
    cluster_list = hierarchical_clustering(singleton_list, 15)

    print "Displaying", len(cluster_list), "sequential clusters"

    #cluster_list = alg_project3_solution.hierarchical_clustering(singleton_list, 9)
    #print "Displaying", len(cluster_list), "hierarchical clusters"

    #cluster_list = alg_project3_solution.kmeans_clustering(singleton_list, 9, 5)
    #print "Displaying", len(cluster_list), "k-means clusters"

    # draw the clusters using matplotlib or simplegui
    if DESKTOP:
        alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, True)
    else:
        alg_clusters_simplegui.PlotClusters(data_table, cluster_list)
Esempio n. 3
0
def run_example():
    """
    Load a data table, compute a list of clusters and
    plot a list of clusters

    Set DESKTOP = True/False to use either matplotlib or simplegui
    """
    data_table = load_data_table(DATA_111_URL)

    singleton_list = []
    for line in data_table:
        singleton_list.append(
            alg_cluster.Cluster(set([line[0]]), line[1], line[2], line[3],
                                line[4]))

    #cluster_list = sequential_clustering(singleton_list, 15)
    #print "Displaying", len(cluster_list), "sequential clusters"

    #cluster_list = closest_pairs_and_clustering_algorithms.hierarchical_clustering(singleton_list, 9)
    #print "Displaying", len(cluster_list), "hierarchical clusters"

    cluster_list = closest_pairs_and_clustering_algorithms.kmeans_clustering(
        singleton_list, 9, 5)
    print "Displaying", len(cluster_list), "k-means clusters"

    # draw the clusters using matplotlib or simplegui
    if DESKTOP:
        #alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, False)
        alg_clusters_matplotlib.plot_clusters(data_table, cluster_list,
                                              True)  #add cluster centers
    else:
        alg_clusters_simplegui.PlotClusters(
            data_table,
            cluster_list)  # use toggle in GUI to add cluster centers
Esempio n. 4
0
def run_question(number, data_set):
    """
    Load a data table, compute a list of clusters and 
    plot a list of clusters.
    Set DESKTOP = True/False to use either matplotlib or simplegui
    """
    global DESKTOP
    print "Loading data table ..."
    data_table = load_data_table(data_set)
    print "Data table loaded.  Creating clusters ..."
    singleton_list = []

    # set correct number of clusters
    if number in [2, 3]:
        num_clusters = 15
    elif number in [5, 6]:
        num_clusters = 9
    print "\nQuestion number:  ", number
    print "Number of clusters to be calculated:  ", num_clusters

    # parse data_table into cluster objects
    for line in data_table:
        singleton_list.append(
            alg_cluster.Cluster(set([line[0]]), line[1], line[2], line[3],
                                line[4]))
    print "\nCluster list created.  Passing list to hierarchical_clustering ..."

    # calculate clusters
    if number == 0:
        cluster_list = sequential_clustering(singleton_list, 15)
        print "Displaying", len(cluster_list), "sequential clusters"
    elif number in [2, 5]:
        cluster_list = cpf.hierarchical_clustering(singleton_list,
                                                   num_clusters)
        print "Displaying", len(cluster_list), "hierarchical clusters"
    elif number in [3, 6]:
        cluster_list = cpf.kmeans_clustering(singleton_list, num_clusters, 5)
        print "Displaying", len(cluster_list), "k-means clusters"
    else:
        "Please pass a valid number to run_question.  Valid options are 0, 2, 3, 5, or 6."

    # draw the clusters using matplotlib or simplegui
    if DESKTOP:
        # alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, False)
        alg_clusters_matplotlib.plot_clusters(data_table, cluster_list,
                                              True)  #add cluster centers
    else:
        alg_clusters_simplegui.PlotClusters(
            data_table,
            cluster_list)  # use toggle in GUI to add cluster centers
def run_kmeans_example():
    """
    Load a data table, compute a list of clusters and
    plot a list of clusters

    Set DESKTOP = True/False to use either matplotlib or simplegui
    """

    data_table = load_data_table(DATA_896_URL)
    singleton_list = []
    for line in data_table:
        singleton_list.append(
            alg_cluster.Cluster(set([line[0]]), line[1], line[2], line[3],
                                line[4]))

    def compute_distortion(cluster_list):
        error = 0
        for cluster in cluster_list:
            error += cluster.cluster_error(data_table)
        return error

    error = []
    for cluster_num in range(6, 21):
        cluster_list = kmeans_clustering(singleton_list, cluster_num, 5)
        error.append(compute_distortion(cluster_list))
        singleton_list = []
        for line in data_table:
            singleton_list.append(
                alg_cluster.Cluster(set([line[0]]), line[1], line[2], line[3],
                                    line[4]))
    return error

    print("Displaying", len(cluster_list), "kmeans clusters")

    # cluster_list = alg_project3_solution.hierarchical_clustering(singleton_list, 9)
    # print "Displaying", len(cluster_list), "hierarchical clusters"

    # cluster_list = alg_project3_solution.kmeans_clustering(singleton_list, 9, 5)
    # print "Displaying", len(cluster_list), "k-means clusters"

    # draw the clusters using matplotlib or simplegui
    if DESKTOP:
        # alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, False)
        alg_clusters_matplotlib.plot_clusters(data_table, cluster_list,
                                              True)  # add cluster centers
    else:
        alg_clusters_simplegui.PlotClusters(
            data_table,
            cluster_list)  # use toggle in GUI to add cluster centers
def Question5():
    data_table = load_data_table(DATA_111_URL)
    singleton_list = []
    for line in data_table:
        singleton_list.append(
            alg_cluster.Cluster(set([line[0]]), line[1], line[2], line[3],
                                line[4]))
    cluster_list = module3_project.hierarchical_clustering(singleton_list, 9)
    print "Displaying", len(cluster_list), "hierarchical clusters"
    # draw the clusters using matplotlib or simplegui

    if DESKTOP:
        alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, True)
    else:
        alg_clusters_simplegui.PlotClusters(data_table, cluster_list)
Esempio n. 7
0
def run_example():
    """
    Load a data table, compute a list of clusters and
    plot a list of clusters

    Set DESKTOP = True/False to use either matplotlib or simplegui
    """
    data_table = load_data_table(DATA_111_URL)

    singleton_list = []
    for line in data_table:
        singleton_list.append(
            alg_cluster.Cluster(set([line[0]]), line[1], line[2], line[3],
                                line[4]))

    # cluster_list = sequential_clustering(singleton_list, 15)
    # print "Displaying", len(cluster_list), "sequential clusters"

    # Question 2 answer: uncomment bottom two lines in block comment with
    # data_table being equal to load_data_table(DATA_3108_URL)
    # cluster_list = alg_project3_solution.hierarchical_clustering(singleton_list, 9)
    # print "Displaying", len(cluster_list), "hierarchical clusters"

    # Question 3 answer: uncomment bottom two lines in block comment with
    # data_table being equal to load_data_table(DATA_3108_URL)
    # cluster_list = alg_project3_solution.kmeans_clustering(singleton_list, 15, 5)
    # print "Displaying", len(cluster_list), "k-means clusters"

    # Question 5 answer: uncomment bottom two lines in block comment with
    # data_table being equal to load_data_table(DATA_111_URL)
    # cluster_list = alg_project3_solution.hierarchical_clustering(singleton_list, 9)
    # print "Displaying", len(cluster_list), "hierarchical clusters"

    # Question 6 answer: uncomment bottom two lines in block comment with
    # data_table being equal to load_data_table(DATA_111_URL)
    # cluster_list = alg_project3_solution.kmeans_clustering(singleton_list, 9, 5)
    # print "Displaying", len(cluster_list), "k-means clusters"

    # draw the clusters using matplotlib or simplegui
    if DESKTOP:
        alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, True)
    else:
        alg_clusters_simplegui.PlotClusters(
            data_table,
            cluster_list)  # use toggle in GUI to add cluster centers
Esempio n. 8
0
def run_example():
    """
    Load a data table, compute a list of clusters and 
    plot a list of clusters

    Set DESKTOP = True/False to use either matplotlib or simplegui
    """
    #data_table = load_data_table(DATA_3108_URL)
    #data_table = load_data_table(DATA_111_URL)
    #data_table = load_data_table(DATA_290_URL)
    data_table = load_data_table(DATA_896_URL)
    
    
    singleton_list = []
    for line in data_table:
        singleton_list.append(alg_cluster.Cluster(set([line[0]]), line[1], line[2], line[3], line[4]))
        
    #cluster_list = sequential_clustering(singleton_list, 9)	
    #print "Displaying", len(cluster_list), "sequential clusters"

   
    #start_time = time.clock()
    cluster_list = alg_project3_solution.hierarchical_clustering(singleton_list, 9)
    #print "hierarchical:",compute_distortion(cluster_list)
    #print "time taken for plot:"+ str(time.clock() - start_time) 
    print "Displaying", len(cluster_list), "hierarchical clusters"

    #start_time = time.clock()
    #cluster_list = alg_project3_solution.kmeans_clustering(singleton_list, 9, 5)
    #print "kmeans: ",compute_distortion(cluster_list)
    #print "time taken for plot:"+ str(time.clock() - start_time) 
    #print "Displaying", len(cluster_list), "k-means clusters"

            
    # draw the clusters using matplotlib or simplegui
    if DESKTOP:
        alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, False)
        
        alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, True)  #add cluster centers
        #print ""
        
    else:
        alg_clusters_simplegui.PlotClusters(data_table, cluster_list)   # use toggle in GUI to add cluster centers
Esempio n. 9
0
def run_example():
    """
    Load a data table, compute a list of clusters and
    plot a list of clusters

    Set DESKTOP = True/False to use either matplotlib or simplegui
    """
    data_table = load_data_table(DATA_896_URL)
    singleton_list = []
    for line in data_table:
        singleton_list.append(
            alg_cluster.Cluster(set([line[0]]), line[1], line[2], line[3],
                                line[4]))

    # cluster_list = sequential_clustering(singleton_list, 15)
    # print "Displaying", len(cluster_list), "sequential clusters"

    cluster_list = alg_project3_solution.hierarchical_clustering(
        singleton_list, 9)
    print "Displaying", len(cluster_list), "hierarchical clusters"

    # cluster_list = alg_project3_solution.kmeans_clustering(singleton_list, 9, 5)
    # print "Displaying", len(cluster_list), "k-means clusters"

    # calculate cluster_error of hierarchical_clustering and kmeans_clustering
    hc_error = compute_distortion(
        alg_project3_solution.hierarchical_clustering(singleton_list, 9),
        data_table)
    kmc_error = compute_distortion(
        alg_project3_solution.kmeans_clustering(singleton_list, 9, 5),
        data_table)
    print "cluster_error:\nhierarchical_clustring: ", hc_error, "\nkmeans_clustering: ", kmc_error

    # draw the clusters using matplotlib or simplegui
    if DESKTOP:
        # alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, False)
        alg_clusters_matplotlib.plot_clusters(data_table, cluster_list,
                                              True)  #add cluster centers
    else:
        alg_clusters_simplegui.PlotClusters(
            data_table,
            cluster_list)  # use toggle in GUI to add cluster centers
Esempio n. 10
0
def run_question(number, data_set):
    """
    Load a data table, compute a list of clusters and 
    plot a list of clusters.
    Set DESKTOP = True/False to use either matplotlib or simplegui
    """
    global DESKTOP
    print "Loading data table ..."
    data_table = load_data_table(data_set)
    print "Data table loaded.  Creating clusters ..."

    singleton_list = []
    for line in data_table:
        singleton_list.append(
            alg_cluster.Cluster(set([line[0]]), line[1], line[2], line[3],
                                line[4]))
    print "Cluster list created.  Passing list to hierarchical_clustering ..."

    if number == 0:
        cluster_list = sequential_clustering(singleton_list, 15)
        print "Displaying", len(cluster_list), "sequential clusters"
    elif number in [2, 5]:
        cluster_list = cpf.hierarchical_clustering(singleton_list, 9)
        print "Displaying", len(cluster_list), "hierarchical clusters"
    elif number in [3, 6]:
        cluster_list = cpf.kmeans_clustering(singleton_list, 9, 5)
        print "Displaying", len(cluster_list), "k-means clusters"
    else:
        "Please pass a valid number to run_question."

    # draw the clusters using matplotlib or simplegui
    if DESKTOP:
        # alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, False)
        alg_clusters_matplotlib.plot_clusters(data_table, cluster_list,
                                              True)  #add cluster centers
    else:
        alg_clusters_simplegui.PlotClusters(
            data_table,
            cluster_list)  # use toggle in GUI to add cluster centers
def run_example(table_type, clustering_algorithm, num_cluster, plot=True):
    """
    Load a data table, compute a list of clusters and 
    plot a list of clusters

    Set DESKTOP = True/False to use either matplotlib or simplegui
    """
    data_table = load_data_table(table_type)

    singleton_list = []
    for line in data_table:
        singleton_list.append(
            alg_cluster.Cluster(set([line[0]]), line[1], line[2], line[3],
                                line[4]))

    #cluster_list = sequential_clustering(singleton_list, 9)
    #print "Displaying", len(cluster_list), "sequential clusters"
    if clustering_algorithm == "h":
        cluster_list = alg_project3_solution.hierarchical_clustering(
            singleton_list, num_cluster)
        print "get", len(cluster_list), "hierarchical clusters"
    elif clustering_algorithm == "k":
        cluster_list = alg_project3_solution.kmeans_clustering(
            singleton_list, num_cluster, 5)
        print "get", len(cluster_list), "k-means clusters"
    # draw the clusters using matplotlib or simplegui
    if plot:
        if DESKTOP:
            #alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, False)
            alg_clusters_matplotlib.plot_clusters(data_table, cluster_list,
                                                  True)  #add cluster centers
        else:
            alg_clusters_simplegui.PlotClusters(
                data_table,
                cluster_list)  # use toggle in GUI to add cluster centers

    return (cluster_list, data_table)
Esempio n. 12
0
def run_example():
    """
    Load a data table, compute a list of clusters and
    plot a list of clusters

    Set DESKTOP = True/False to use either matplotlib or simplegui
    """
    data_table = load_data_table(DATA_290_URL)

    singleton_list = []
    for line in data_table:
        singleton_list.append(
            alg_cluster.Cluster(set([line[0]]), line[1], line[2], line[3],
                                line[4]))

    # cluster_list = sequential_clustering(singleton_list, 15)

    # print "Displaying", len(cluster_list), "sequential clusters"

    # cluster_list = alg_project3_solution.hierarchical_clustering(singleton_list, 16)
    # distortion = application3.compute_distortion(cluster_list, data_table)
    # print "Displaying", len(cluster_list), "hierarchical clusters, distortion:", distortion

    cluster_list = alg_project3_solution.kmeans_clustering(
        singleton_list, 16, 5)
    distortion = application3.compute_distortion(cluster_list, data_table)
    print "Displaying", len(
        cluster_list), "k-means clusters, distortion:", distortion

    # draw the clusters using matplotlib or simplegui
    if DESKTOP:
        alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, True)
        # alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, True)  #add cluster centers
    else:
        alg_clusters_simplegui.PlotClusters(
            data_table,
            cluster_list)  # use toggle in GUI to add cluster centers
Esempio n. 13
0
def run_example():
    """
    Load a data table, compute a list of clusters and 
    plot a list of clusters

    Set DESKTOP = True/False to use either matplotlib or simplegui
    """
    data_table = load_data_table('unifiedCancerData_111.csv')

    singleton_list = []
    for line in data_table:
        singleton_list.append(
            alg_cluster.Cluster(set([line[0]]), line[1], line[2], line[3],
                                line[4]))

    # cluster_list = sequential_clustering(singleton_list, 15)
    # print "Displaying", len(cluster_list), "sequential clusters"

    # cluster_list = alg_project3_solution.hierarchical_clustering(singleton_list, 9)
    # print 'hierarchical error', app3_7.compute_distortion(cluster_list, data_table)
    # print "Displaying", len(cluster_list), "hierarchical clusters"

    cluster_list = alg_project3_solution.kmeans_clustering(
        singleton_list, 9, 5)
    print 'k means error', app3_7.compute_distortion(cluster_list, data_table)
    print "Displaying", len(cluster_list), "k-means clusters"

    # draw the clusters using matplotlib or simplegui
    if DESKTOP:
        # alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, False)
        alg_clusters_matplotlib.plot_clusters(data_table, cluster_list,
                                              True)  #add cluster centers
    else:
        alg_clusters_simplegui.PlotClusters(
            data_table,
            cluster_list)  # use toggle in GUI to add cluster centers
Esempio n. 14
0
def run_example(table, method):
    """
    Load a data table, compute a list of clusters and 
    plot a list of clusters

    Set DESKTOP = True/False to use either matplotlib or simplegui
    """
    #data_table = load_data_table(DATA_3108_URL)
    #data_table = load_data_table(DATA_290_URL)
    data_table = load_data_table(table)

    singleton_list = []
    for line in data_table:
        singleton_list.append(
            alg_cluster.Cluster(set([line[0]]), line[1], line[2], line[3],
                                line[4]))

    #cluster_list = sequential_clustering(singleton_list, 15)
    #print "Displaying", len(cluster_list), "sequential clusters"

    cluster_distortion_dict = {}
    start = 20
    end = 6

    count = start

    new_list = list(singleton_list)

    while count >= end:
        if method == 'h_cluster':

            cluster_list = alg_project3_solution.hierarchical_clustering(
                new_list, count)
            cluster_distortion_dict[count] = compute_distortion(
                cluster_list, data_table)
            new_list = cluster_list

        elif method == 'k_cluster':

            cluster_list = alg_project3_solution.kmeans_clustering(
                singleton_list, count, 5)
            cluster_distortion_dict[count] = compute_distortion(
                cluster_list, data_table)
            #new_list = cluster_list

        count -= 1

    #print "Displaying", len(cluster_list), "hierarchical clusters"
    #print "Displaying", len(cluster_list), "hierarchical clusters cluster error"

    #cluster_list = alg_project3_solution.kmeans_clustering(singleton_list, 9, 5)
    #print "Displaying", len(cluster_list), "k-means clusters"
    #print "Displaying", len(cluster_list), "k-means clusters cluster error"

    # draw the clusters using matplotlib or simplegui
    if DESKTOP:
        #alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, False)
        #alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, True)  #add cluster centers
        #print compute_distortion(cluster_list, data_table)
        return cluster_distortion_dict
    else:
        alg_clusters_simplegui.PlotClusters(
            data_table,
            cluster_list)  # use toggle in GUI to add cluster centers
def run_example():
    """
    Load a data table, compute a list of clusters and 
    plot a list of clusters

    Set DESKTOP = True/False to use either matplotlib or simplegui
    """
    algo_used = 1  # 1: sequential clusters, 2: hierarchical clusters, 3: k-means clusters
    
    data_urls = [DATA_3108_URL, DATA_896_URL, DATA_290_URL, DATA_111_URL]
    source = 3 # pick which data source url
    data_table = load_data_table(data_urls[source - 1]) 
    

    
    def clustering(algo_used, num_clusters, num_iter = 5):
        """
        Uses specified algorithm to cluster data
        
        input: int for specified algorithm, data_table
        output: cluster_list
        """     
        singleton_list = []
        for line in data_table:
            singleton_list.append(alg_cluster.Cluster(set([line[0]]), line[1], line[2], line[3], line[4]))
        
        if algo_used == 1:
            cluster_list = sequential_clustering(singleton_list, num_clusters)
            print "Displaying", len(cluster_list), "sequential clusters"
        elif algo_used == 2:
            cluster_list = prj3.hierarchical_clustering(singleton_list, num_clusters)
            print "Displaying", len(cluster_list), "hierarchical clusters"
        elif algo_used == 3:
            cluster_list = prj3.kmeans_clustering(singleton_list, num_clusters, num_iter)
            print "Displaying", len(cluster_list), "k-means clusters"
        
        return cluster_list


    def gen_random_clusters(num_clusters):
        """
        Creates a list of clusters where each cluster in this list corresponds to one randomly generated point in the 2 x 2 square
        Input: number of clusters (int)
        Output: list of random clusters that is num_clusters long (list)
        """
        cluster_list = []
        for cluster in xrange(num_clusters):
            x = random.choice([1, -1]) * random.random()
            y = random.choice([1, -1]) * random.random()
            cluster_list.append(alg_cluster.Cluster(set([]), x, y, 1, 0))
        return cluster_list
        
        
    def question_one():
        """
        Function for answering first question
        """
        xvals = range(2, 200)
        slow_yvals = []
        fast_yvals = []
        for num in xvals:
            cluster_list = gen_random_clusters(num)
            initial = time.time()
            answer = prj3.slow_closest_pairs(cluster_list)
            final = time.time()
            slow_yvals.append(final - initial)
        for num in xvals:
            cluster_list = gen_random_clusters(num)
            initial = time.time()
            answer = prj3.fast_closest_pair(cluster_list)
            final = time.time()
            fast_yvals.append(final - initial)
        slow_line = plt.plot(xvals, slow_yvals, color='r', label="Slow Closest Pair")
        fast_line = plt.plot(xvals, fast_yvals, color='b', label="Fast Closest Pair")
        plt.legend(loc=2)
        plt.title("Efficiency of Slow and Fast Closest Pairs Algorithms")
        plt.xlabel("Number of Clusters")
        plt.ylabel("Run Times in Milliseconds")
        plt.show()
    
    
    def compute_distortion(cluster_list):
        """
        Takes a list of clusters and uses cluster_error to compute its distortion.
        
        input: list of clusters, original data table
        output: cluster distortion int
        """
        distortion = 0
        for cluster in cluster_list:
            distortion += cluster.cluster_error(data_table)
        return distortion
    
    def question_ten():
        """
        Function for answering question 10
        """
        xvals = xrange(6, 21)
        kmeans_y = []
        high_y = []
        
        for clusters in xvals:
            kmeans_y.append(compute_distortion(clustering(3, clusters)))
        for clusters in xvals:
            high_y.append(compute_distortion(clustering(2, clusters)))
        
        kmeans_line = plt.plot(xvals, kmeans_y, color='r', label="K-Means Clustering")
        high_line = plt.plot(xvals, high_y, color='b', label="Hierarchical Clustering")
        plt.legend()
        plt.title("Distortion Comparison Between Clustering Methods on 290 County Data Set")
        plt.xlabel("Number of Output Clusters")
        plt.ylabel("Distortion")
        plt.show()

    #question_one()
    #question_ten()

    # draw the clusters using matplotlib or simplegui
    cluster_list = clustering(1, 5)
    if DESKTOP:
        alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, True)
    else:
        alg_clusters_simplegui.PlotClusters(data_table, cluster_list)
Esempio n. 16
0
def run_example():
    """
    Load a data table, compute a list of clusters and
    plot a list of clusters

    Set DESKTOP = True/False to use either matplotlib or simplegui
    """
    data_table = load_data_table(DATA_896_URL)

    singleton_list = []
    for line in data_table:
        singleton_list.append(
            alg_cluster.Cluster(set([line[0]]), line[1], line[2], line[3],
                                line[4]))

    # cluster_list = sequential_clustering(singleton_list, 15)
    # print "Displaying", len(cluster_list), "sequential clusters"

    # question 5
    # cluster_list = alg_project3_solution.hierarchical_clustering(singleton_list, 9)
    # print "Displaying", len(cluster_list), "hierarchical clusters"

    # question 6
    # cluster_list = alg_project3_solution.kmeans_clustering(singleton_list, 9, 5)
    # print "Displaying", len(cluster_list), "k-means clusters"

    # question 7
    # cluster_list = alg_project3_solution.kmeans_clustering(singleton_list, 9, 5)
    # kmeans_result = alg_project3_solution.compute_distortion(cluster_list, data_table)
    # print("Displaying", kmeans_result, "kmeans_result")
    # cluster_list = alg_project3_solution.hierarchical_clustering(singleton_list, 9)
    # hierarchical_result = alg_project3_solution.compute_distortion(cluster_list, data_table)
    # print("Displaying", hierarchical_result, "hierarchical_result")

    # question 10
    kmeans_res = []
    for clusters_number in range(6, 21):
        cluster_list = alg_project3_solution.kmeans_clustering(
            singleton_list, clusters_number, 5)
        kmeans_res.append([
            clusters_number,
            alg_project3_solution.compute_distortion(cluster_list, data_table)
        ])

    hier_res = []
    for clusters_number in range(20, 5, -1):
        cluster_list = alg_project3_solution.hierarchical_clustering(
            singleton_list, clusters_number)
        hier_res.append([
            clusters_number,
            alg_project3_solution.compute_distortion(cluster_list, data_table)
        ])

    hier_res.reverse()
    # draw the clusters using matplotlib or simplegui
    if DESKTOP:
        # alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, False)
        # alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, True)  # add cluster centers
        create_separate_plots(kmeans_res, hier_res)
    else:
        alg_clusters_simplegui.PlotClusters(
            data_table,
            cluster_list)  # use toggle in GUI to add cluster centers