def run_example():
    """
    Load a data table, compute a list of clusters and 
    plot a list of clusters

    Set DESKTOP = True/False to use either matplotlib or simplegui
    """
    data_table = load_data_table(DATA_3108_URL)

    singleton_list = []
    for line in data_table:
        singleton_list.append(
            alg_cluster.Cluster(set([line[0]]), line[1], line[2], line[3],
                                line[4]))

    #cluster_list = sequential_clustering(singleton_list, 15)
    cluster_list = hierarchical_clustering(singleton_list, 15)

    print "Displaying", len(cluster_list), "sequential clusters"

    #cluster_list = alg_project3_solution.hierarchical_clustering(singleton_list, 9)
    #print "Displaying", len(cluster_list), "hierarchical clusters"

    #cluster_list = alg_project3_solution.kmeans_clustering(singleton_list, 9, 5)
    #print "Displaying", len(cluster_list), "k-means clusters"

    # draw the clusters using matplotlib or simplegui
    if DESKTOP:
        alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, True)
    else:
        alg_clusters_simplegui.PlotClusters(data_table, cluster_list)
Ejemplo n.º 2
0
def run_example():
    """
    Load a data table, compute a list of clusters and 
    plot a list of clusters

    Set DESKTOP = True/False to use either matplotlib or simplegui
    """
    data_table = load_data_table(DATA_3108_URL)
    
    singleton_list = []
    for line in data_table:
        singleton_list.append(alg_cluster.Cluster(set([line[0]]), line[1], line[2], line[3], line[4]))
        
    cluster_list = sequential_clustering(singleton_list, 15)    
    print "Displaying", len(cluster_list), "sequential clusters"

    #cluster_list = alg_project3_solution.hierarchical_clustering(singleton_list, 9)
    #print "Displaying", len(cluster_list), "hierarchical clusters"

    #cluster_list = alg_project3_solution.kmeans_clustering(singleton_list, 9, 5)   
    #print "Displaying", len(cluster_list), "k-means clusters"

            
    # draw the clusters using matplotlib or simplegui
    if DESKTOP:
        alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, False)
        #alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, True)  #add cluster centers
    else:
        alg_clusters_simplegui.PlotClusters(data_table, cluster_list)   # use toggle in GUI to add cluster centers
Ejemplo n.º 3
0
def run_example(x):
    """
    Load a data table, compute a list of clusters and 
    plot a list of clusters

    Set DESKTOP = True/False to use either matplotlib or simplegui
    """
    data_table = load_data_table(DATA_111_URL)

    singleton_list = []
    for line in data_table:
        singleton_list.append(
            alg_cluster.Cluster(set([line[0]]), line[1], line[2], line[3],
                                line[4]))

    #cluster_list = sequential_clustering(singleton_list, 15)
    #print "Displaying", len(cluster_list), "sequential clusters"

    #cluster_list = alg_project3_solution.hierarchical_clustering(singleton_list, 9)
    #print "Displaying", len(cluster_list), "hierarchical clusters"

    cluster_list = alg_project3_solution.kmeans_clustering(
        singleton_list, x, 5)
    #print "Displaying", len(cluster_list), "k-means clusters"
    return cluster_list
    print sum(
        map(lambda cluster: cluster.cluster_error(data_table), cluster_list))

    # draw the clusters using matplotlib or simplegui
    if DESKTOP:
        alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, False)
        #alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, True)  #add cluster centers
    else:
        #alg_clusters_simplegui.PlotClusters(data_table, cluster_list)   # use toggle in GUI to add cluster centers
        pass
Ejemplo n.º 4
0
def run_example():
    """
    Load a data table, compute a list of clusters and
    plot a list of clusters

    Set DESKTOP = True/False to use either matplotlib or simplegui
    """
    data_table = load_data_table(DATA_111_URL)

    singleton_list = []
    for line in data_table:
        singleton_list.append(
            alg_cluster.Cluster(set([line[0]]), line[1], line[2], line[3],
                                line[4]))

    #cluster_list = sequential_clustering(singleton_list, 15)
    #print "Displaying", len(cluster_list), "sequential clusters"

    #cluster_list = closest_pairs_and_clustering_algorithms.hierarchical_clustering(singleton_list, 9)
    #print "Displaying", len(cluster_list), "hierarchical clusters"

    cluster_list = closest_pairs_and_clustering_algorithms.kmeans_clustering(
        singleton_list, 9, 5)
    print "Displaying", len(cluster_list), "k-means clusters"

    # draw the clusters using matplotlib or simplegui
    if DESKTOP:
        #alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, False)
        alg_clusters_matplotlib.plot_clusters(data_table, cluster_list,
                                              True)  #add cluster centers
    else:
        alg_clusters_simplegui.PlotClusters(
            data_table,
            cluster_list)  # use toggle in GUI to add cluster centers
def run_example():
    """
    Load a data table, compute a list of clusters and
    plot a list of clusters

    """
    data_table = load_data_table(DATA_3108_URL)

    singleton_list = []
    for line in data_table:
        singleton_list.append(project_3.Cluster(set([line[0]]), line[1], line[2], line[3], line[4]))


    # cluster_list = sequential_clustering(singleton_list, 15)
    # print("Displaying", len(cluster_list), "sequential clusters")


    #cluster_list = hierarchical_clustering(singleton_list, 15)
    #pprint(cluster_list)
    #print("Displaying", len(cluster_list), "hierarchical clusters")

    cluster_list = kmeans_clustering(singleton_list, 20, 20)
    print("Displaying", len(cluster_list), "k-means clusters")


    # draw the clusters using matplotlib
    alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, True)
Ejemplo n.º 6
0
def run_example():
    """ Load a data table, compute a list of clusters and plot a list of clusters. Set DESKTOP = True/False to use either matplotlib or simplegui """
    data_table = load_data_table(DATA_3108_URL)
    #data_table = load_data_table(DATA_111_URL)

    singleton_list = []
    for line in data_table:
        singleton_list.append(
            alg_cluster.Cluster(set([line[0]]), line[1], line[2], line[3],
                                line[4]))

    # ************** Here we have to choose the type of clustering we want to use for visualization ********************
    #cluster_list = sequential_clustering(singleton_list, 15); print "Displaying", len(cluster_list), "sequential clusters"
    #cluster_list = alg_project3_solution.hierarchical_clustering(singleton_list, 9); print "Displaying", len(cluster_list), "hierarchical clusters"
    cluster_list = alg_project3_solution.kmeans_clustering(
        singleton_list, 9, 5)
    print "Displaying", len(cluster_list), "k-means clusters"

    if DESKTOP:  # draw the clusters using matplotlib or simplegui
        alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, False)
        # alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, True)  # add cluster centers
    else:
        alg_clusters_simplegui.PlotClusters(
            data_table,
            cluster_list)  # use toggle in GUI to add cluster centers
Ejemplo n.º 7
0
def run_example():
    """
    Load a data table, compute a list of clusters and 
    plot a list of clusters

    Set DESKTOP = True/False to use either matplotlib or simplegui
    """
    data_table = load_data_table(DATA_111_URL)
    
    singleton_list = []
    for line in data_table:
        singleton_list.append(alg_cluster.Cluster(set([line[0]]), line[1], line[2], line[3], line[4]))
        
#    cluster_list = sequential_clustering(singleton_list, 15)	
#    print "Displaying", len(cluster_list), "sequential clusters"

    #cluster_list = alg_project3_solution.hierarchical_clustering(singleton_list, 9)
    #print "Displaying", len(cluster_list), "hierarchical clusters"

    cluster_list = alg_project3_solution.kmeans_clustering(singleton_list, 9, 5)	
    print "Displaying", len(cluster_list), "k-means clusters"
    
    distortion = sum([clstr.cluster_error(data_table) for clstr in cluster_list])
    print "Distortion of clustering =", distortion
            
    # draw the clusters using matplotlib or simplegui
    if DESKTOP:
        alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, True)
    else:
        alg_clusters_simplegui.PlotClusters(data_table, cluster_list)
def run_example():
    """
    Load a data table, compute a list of clusters and 
    plot a list of clusters

    Set DESKTOP = True/False to use either matplotlib or simplegui
    """
    data_table = load_data_table(DATA_24_URL)
    
    # data_table = gen_random_clusters(100)[0]
    
    #singleton_list = []
    #for line in data_table:
        #singleton_list.append(alg_cluster.Cluster(set([line[0]]), line[1], line[2], line[3], line[4]))
    singleton_list = gen_random_clusters(100)
        
    #cluster_list = sequential_clustering(singleton_list, 15)	
    #print "Displaying", len(cluster_list), "sequential clusters"

    cluster_list = module3_project.hierarchical_clustering(singleton_list, 10)
    print "Displaying", len(cluster_list), "hierarchical clusters"

    #cluster_list = alg_project3_solution.kmeans_clustering(singleton_list, 9, 5)	
    #print "Displaying", len(cluster_list), "k-means clusters"

            
    # draw the clusters using matplotlib or simplegui
    
    if DESKTOP:
        alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, True)
    else:
        alg_clusters_simplegui.PlotClusters(data_table, cluster_list)
Ejemplo n.º 9
0
def visualize_data(cluster_input, data, method=None, display_centers=False):
    """
    Load a data table, compute a list of clusters and 
    plot a list of clusters

    Set DESKTOP = True/False to use either matplotlib or simplegui
    """
    data_table = load_data_table(data)
    
    singleton_list = []
    for line in data_table:
        singleton_list.append(alg_cluster.Cluster(set([line[0]]), line[1], line[2], line[3], line[4]))
        
    if method == None:
        cluster_list = sequential_clustering(singleton_list, cluster_input)	
        print("Displaying", len(cluster_list), "sequential clusters")
    elif method == 'hierarchical_clustering':
        cluster_list = clustering.hierarchical_clustering(singleton_list, cluster_input)
        print("Displaying", len(cluster_list), "hierarchical clusters")
    elif method == 'kmeans_clustering':
        cluster_list = clustering.kmeans_clustering(singleton_list,
                                                    cluster_input[0],
                                                    cluster_input[1])
        print("Displaying", len(cluster_list), "k-means clusters")
    else:
        print("ERROR: method entered into visualize_data not recognized")

    alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, display_centers)
Ejemplo n.º 10
0
def run_example():
    """
    Load a data table, compute a list of clusters and 
    plot a list of clusters

    Set DESKTOP = True/False to use either matplotlib or simplegui
    """
    data_table = load_data_table(DATA_896_URL)
    
    singleton_list = []
    for line in data_table:
        singleton_list.append(alg_cluster.Cluster(set([line[0]]), line[1], line[2], line[3], line[4]))
        
    #cluster_list = sequential_clustering(singleton_list, 15)    
    #print "Displaying", len(cluster_list), "sequential clusters"

    cluster_list = alg_project3_solution.hierarchical_clustering(singleton_list, 50)
    print "Displaying", len(cluster_list), "hierarchical clusters"

    #cluster_list = alg_project3_solution.kmeans_clustering(singleton_list, 20, 5)   
    #print "Displaying", len(cluster_list), "k-means clusters"

    print 'Calculating distortion...'
    print alg_project3_solution.compute_distortion(cluster_list, data_table)
    
            
    # draw the clusters using matplotlib or simplegui
    if DESKTOP:
        alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, True)
    else:
        alg_clusters_simplegui.PlotClusters(data_table, cluster_list)
Ejemplo n.º 11
0
def question_5_6():
    """
    Load a data table, compute a list of clusters and 
    plot a list of clusters

    Set DESKTOP = True/False to use either matplotlib or simplegui
    """
    data_table = load_data_table(DATA_111_URL)
    
    singleton_list = []
    for line in data_table:
        singleton_list.append(alg_cluster.Cluster(set([line[0]]), line[1], line[2], line[3], line[4]))
        
# =============================================================================
#     cluster_list = sequential_clustering(singleton_list, 15)	
#     print("Displaying", len(cluster_list), "sequential clusters")
# =============================================================================

    cluster_list = hierarchical_clustering(singleton_list, 20)
    print("Displaying", len(cluster_list), "hierarchical clusters")

# =============================================================================
#     cluster_list = kmeans_clustering(singleton_list, 9, 5)	
#     print("Displaying", len(cluster_list), "k-means clusters")
# =============================================================================

            
    # draw the clusters using matplotlib or simplegui

    #alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, False)
    alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, True)  #add cluster centers
def q6():
	data_table = viz.load_data_table(viz.DATA_111_URL)
	singleton_list=[]
	for line in data_table:
		singleton_list.append(alg_cluster.Cluster(set([line[0]]), line[1], line[2], line[3], line[4]))
	cluster_list = alg_project3.kmeans_clustering(singleton_list, 9, 5)
	alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, True)   
def q2():
	data_table = viz.load_data_table(viz.DATA_3108_URL)
	singleton_list = []
	for line in data_table:
		singleton_list.append(alg_cluster.Cluster(set([line[0]]), line[1], line[2], line[3], line[4]))  
	cluster_list = alg_project3.hierarchical_clustering(singleton_list, 15)
	alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, True)   
Ejemplo n.º 14
0
def visualize(datafile, output, cluster_func):
    data_table = load_data_table(datafile)
    clusters = [alg_cluster.Cluster(set([x[0]]), x[1], x[2], x[3], x[4])
                for x in data_table]
    clusters = cluster_func(clusters)
    print "Displaying", len(clusters), "clusters"
    alg_clusters_matplotlib.plot_clusters(data_table, clusters, True, output)
    return clusters
def q2():
    data_table = viz.load_data_table(viz.DATA_3108_URL)
    singleton_list = []
    for line in data_table:
        singleton_list.append(
            alg_cluster.Cluster(set([line[0]]), line[1], line[2], line[3],
                                line[4]))
    cluster_list = alg_project3.hierarchical_clustering(singleton_list, 15)
    alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, True)
def q6():
    data_table = viz.load_data_table(viz.DATA_111_URL)
    singleton_list = []
    for line in data_table:
        singleton_list.append(
            alg_cluster.Cluster(set([line[0]]), line[1], line[2], line[3],
                                line[4]))
    cluster_list = alg_project3.kmeans_clustering(singleton_list, 9, 5)
    alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, True)
Ejemplo n.º 17
0
def run_question_3():
    data_table = pro3_viz.load_data_table(DATA_3108_URL)
    data_table_cl = clusterize_data(data_table)
    cluster_list = pro3.kmeans_clustering(data_table_cl, 15, 5)
    print "Displaying", len(cluster_list), "k-means clusters"

    # alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, False)  # simple filled-in circles
    alg_clusters_matplotlib.plot_clusters(data_table, cluster_list,
                                          True)  # add cluster centers
Ejemplo n.º 18
0
def run_question_5():
    data_table = pro3_viz.load_data_table(DATA_111_URL)
    data_table_cl = clusterize_data(data_table)
    cluster_list = pro3.hierarchical_clustering(data_table_cl, 9)
    print "Displaying", len(cluster_list), "hierarchical clusters"

    # alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, False)  # simple filled-in circles
    alg_clusters_matplotlib.plot_clusters(data_table, cluster_list,
                                          True)  # add cluster centers

    return cluster_list
def question5_plot():
    """
    Generate the plot for question 5
    """
    data_table = load_data_table(DATA_111_URL)

    singleton_list = []
    for line in data_table:
        cluster = Cluster(set([line[0]]), line[1], line[2], line[3], line[4])
        singleton_list.append(cluster)
    cluster_list = hierarchical_clustering(singleton_list, 9)
    plot_clusters(data_table, cluster_list, True)
Ejemplo n.º 20
0
def question6(URL, file_to_save, number_clusters=9, iterations=5, centers=False):
    '''

    :return:
    '''
    data_table, singleton_list = alg_project3_viz.run_example(URL)
    cluster_list = kmeans_clustering(singleton_list, number_clusters, iterations)
    print "Displaying", len(cluster_list), "k-means clusters"
    if centers:
        file = file_to_save[:-4] + 'with_centers' + '.png'
        alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, file, True)
    alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, file_to_save, False)
Ejemplo n.º 21
0
def question2(URL, file_to_save, number_clusters=15, centers=False):
    '''

    :return:
    '''
    data_table, singleton_list = alg_project3_viz.run_example(URL)
    cluster_list = hierarchical_clustering(singleton_list, number_clusters)
    print "Displaying", len(cluster_list), "hierarchical clusters"
    if centers:
        file = file_to_save[:-4] + 'with_centers' + '.png'
        alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, file, centers)
    alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, file_to_save, centers)
def question3_plot():
    """
    Generate the plot for question 3
    """
    data_table = load_data_table(DATA_3108_URL)

    singleton_list = []
    for line in data_table:
        cluster = Cluster(set([line[0]]), line[1], line[2], line[3], line[4])
        singleton_list.append(cluster)
    cluster_list = kmeans_clustering(singleton_list, 15, 5)
    plot_clusters(data_table, cluster_list, True)
Ejemplo n.º 23
0
def run_example():
    """
    Load a data table, compute a list of clusters and 
    plot a list of clusters

    Set DESKTOP = True/False to use either matplotlib or simplegui
    """
    #data_table = load_data_table(DATA_3108_URL)
    #data_table = load_data_table(DATA_896_URL)
    data_table = load_data_table(DATA_290_URL)
    #data_table = load_data_table(DATA_111_URL)
    
    singleton_list = []
    for line in data_table:
        singleton_list.append(alg_cluster.Cluster(set([line[0]]), line[1], line[2], line[3], line[4]))

    original_dict = create_dictionary(data_table)
    
    #original_list = []
    #for line in data_table:
    #    original_list.append(alg_cluster.Cluster(set([line[0]]), line[1], line[2], line[3], line[4]))

    #original_list = list(singleton_list)
    #original_list = singleton_list[:]
    
    #cluster_list = sequential_clustering(singleton_list, 15)	
    #print "Displaying", len(cluster_list), "sequential clusters"

    print "About to display ...."

    #cluster_list = alg_project3_solution.hierarchical_clustering(singleton_list, 16)
    #cluster_list = alg_project3_solution.hierarchical_clustering(singleton_list, 9)

    #print "Displaying", len(cluster_list), "hierarchical clusters"

    cluster_list = alg_project3_solution.kmeans_clustering(singleton_list, 9, 5)	
    #print "Displaying", len(cluster_list), "k-means clusters"


    # compute the distortion
    if (True):
        distortion = compute_distortion(cluster_list, data_table)
        print distortion
     
        

    # draw the clusters using matplotlib or simplegui
    if (False):    
        if DESKTOP:
            #alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, False)
            alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, True)  #add cluster centers
        else:
            alg_clusters_simplegui.PlotClusters(data_table, cluster_list)   # use toggle in GUI to add cluster centers
def Question5():
    data_table = load_data_table(DATA_111_URL)
    singleton_list = []
    for line in data_table:
        singleton_list.append(alg_cluster.Cluster(set([line[0]]), line[1], line[2], line[3], line[4]))
    cluster_list = module3_project.hierarchical_clustering(singleton_list, 9)
    print "Displaying", len(cluster_list), "hierarchical clusters" 
    # draw the clusters using matplotlib or simplegui
    
    if DESKTOP:
        alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, True)
    else:
        alg_clusters_simplegui.PlotClusters(data_table, cluster_list)   
Ejemplo n.º 25
0
def run_question(number, data_set):
    """
    Load a data table, compute a list of clusters and 
    plot a list of clusters.
    Set DESKTOP = True/False to use either matplotlib or simplegui
    """
    global DESKTOP
    print "Loading data table ..."
    data_table = load_data_table(data_set)
    print "Data table loaded.  Creating clusters ..."
    singleton_list = []

    # set correct number of clusters
    if number in [2, 3]:
        num_clusters = 15
    elif number in [5, 6]:
        num_clusters = 9
    print "\nQuestion number:  ", number
    print "Number of clusters to be calculated:  ", num_clusters

    # parse data_table into cluster objects
    for line in data_table:
        singleton_list.append(
            alg_cluster.Cluster(set([line[0]]), line[1], line[2], line[3],
                                line[4]))
    print "\nCluster list created.  Passing list to hierarchical_clustering ..."

    # calculate clusters
    if number == 0:
        cluster_list = sequential_clustering(singleton_list, 15)
        print "Displaying", len(cluster_list), "sequential clusters"
    elif number in [2, 5]:
        cluster_list = cpf.hierarchical_clustering(singleton_list,
                                                   num_clusters)
        print "Displaying", len(cluster_list), "hierarchical clusters"
    elif number in [3, 6]:
        cluster_list = cpf.kmeans_clustering(singleton_list, num_clusters, 5)
        print "Displaying", len(cluster_list), "k-means clusters"
    else:
        "Please pass a valid number to run_question.  Valid options are 0, 2, 3, 5, or 6."

    # draw the clusters using matplotlib or simplegui
    if DESKTOP:
        # alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, False)
        alg_clusters_matplotlib.plot_clusters(data_table, cluster_list,
                                              True)  #add cluster centers
    else:
        alg_clusters_simplegui.PlotClusters(
            data_table,
            cluster_list)  # use toggle in GUI to add cluster centers
Ejemplo n.º 26
0
def assignment_q2():
    """
    Load a data table, compute a list of clusters and
    plot a list of clusters
    """
    data_table = load_data_table(DATA_3108_URL)
    singleton_list = []
    for line in data_table:
        singleton_list.append(alg_cluster.Cluster({line[0]}, line[1], line[2], line[3], line[4]))

    cluster_list = project.hierarchical_clustering(singleton_list, 15)
    print "Displaying", len(cluster_list), "hierarchical clusters"

    alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, True)
def run_kmeans_example():
    """
    Load a data table, compute a list of clusters and
    plot a list of clusters

    Set DESKTOP = True/False to use either matplotlib or simplegui
    """

    data_table = load_data_table(DATA_896_URL)
    singleton_list = []
    for line in data_table:
        singleton_list.append(
            alg_cluster.Cluster(set([line[0]]), line[1], line[2], line[3],
                                line[4]))

    def compute_distortion(cluster_list):
        error = 0
        for cluster in cluster_list:
            error += cluster.cluster_error(data_table)
        return error

    error = []
    for cluster_num in range(6, 21):
        cluster_list = kmeans_clustering(singleton_list, cluster_num, 5)
        error.append(compute_distortion(cluster_list))
        singleton_list = []
        for line in data_table:
            singleton_list.append(
                alg_cluster.Cluster(set([line[0]]), line[1], line[2], line[3],
                                    line[4]))
    return error

    print("Displaying", len(cluster_list), "kmeans clusters")

    # cluster_list = alg_project3_solution.hierarchical_clustering(singleton_list, 9)
    # print "Displaying", len(cluster_list), "hierarchical clusters"

    # cluster_list = alg_project3_solution.kmeans_clustering(singleton_list, 9, 5)
    # print "Displaying", len(cluster_list), "k-means clusters"

    # draw the clusters using matplotlib or simplegui
    if DESKTOP:
        # alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, False)
        alg_clusters_matplotlib.plot_clusters(data_table, cluster_list,
                                              True)  # add cluster centers
    else:
        alg_clusters_simplegui.PlotClusters(
            data_table,
            cluster_list)  # use toggle in GUI to add cluster centers
def Question5():
    data_table = load_data_table(DATA_111_URL)
    singleton_list = []
    for line in data_table:
        singleton_list.append(
            alg_cluster.Cluster(set([line[0]]), line[1], line[2], line[3],
                                line[4]))
    cluster_list = module3_project.hierarchical_clustering(singleton_list, 9)
    print "Displaying", len(cluster_list), "hierarchical clusters"
    # draw the clusters using matplotlib or simplegui

    if DESKTOP:
        alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, True)
    else:
        alg_clusters_simplegui.PlotClusters(data_table, cluster_list)
def Q2_Q3_Q5_Q6_viz(data_file, clustering_algo, num_clusters, num_iterations, centers):
	'''Questions 2-6 Answer'''
	data_table = project.load_data_table(data_file)
	singleton_list = []
	for line in data_table:
	    singleton_list.append(alg_cluster.Cluster(set([line[0]]), line[1], line[2], line[3], line[4]))
	if clustering_algo == "h":
		cluster_list = project.hierarchical_clustering(singleton_list, num_clusters)
		print "Displaying", len(cluster_list), "hierarchical clusters"
	elif clustering_algo == "k":
		cluster_list = project.kmeans_clustering(singleton_list, num_clusters, num_iterations)
		print "Displaying", len(cluster_list), "kmeans clusters"
	else:	
		print "Clustering method not recognized.\nPlease use 'h' for hierarchical_clustering\nor use 'k' for kmeans_clustering"   
		return
	alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, centers) #False to remove cluster centers.  True to include them
Ejemplo n.º 30
0
def run_example():
    """
    Load a data table, compute a list of clusters and
    plot a list of clusters

    Set DESKTOP = True/False to use either matplotlib or simplegui
    """
    data_table = load_data_table(DATA_111_URL)

    singleton_list = []
    for line in data_table:
        singleton_list.append(
            alg_cluster.Cluster(set([line[0]]), line[1], line[2], line[3],
                                line[4]))

    # cluster_list = sequential_clustering(singleton_list, 15)
    # print "Displaying", len(cluster_list), "sequential clusters"

    # Question 2 answer: uncomment bottom two lines in block comment with
    # data_table being equal to load_data_table(DATA_3108_URL)
    # cluster_list = alg_project3_solution.hierarchical_clustering(singleton_list, 9)
    # print "Displaying", len(cluster_list), "hierarchical clusters"

    # Question 3 answer: uncomment bottom two lines in block comment with
    # data_table being equal to load_data_table(DATA_3108_URL)
    # cluster_list = alg_project3_solution.kmeans_clustering(singleton_list, 15, 5)
    # print "Displaying", len(cluster_list), "k-means clusters"

    # Question 5 answer: uncomment bottom two lines in block comment with
    # data_table being equal to load_data_table(DATA_111_URL)
    # cluster_list = alg_project3_solution.hierarchical_clustering(singleton_list, 9)
    # print "Displaying", len(cluster_list), "hierarchical clusters"

    # Question 6 answer: uncomment bottom two lines in block comment with
    # data_table being equal to load_data_table(DATA_111_URL)
    # cluster_list = alg_project3_solution.kmeans_clustering(singleton_list, 9, 5)
    # print "Displaying", len(cluster_list), "k-means clusters"

    # draw the clusters using matplotlib or simplegui
    if DESKTOP:
        alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, True)
    else:
        alg_clusters_simplegui.PlotClusters(
            data_table,
            cluster_list)  # use toggle in GUI to add cluster centers
Ejemplo n.º 31
0
def run_example(data = 3108, algorithm = "sequential", display_centers = False):
    """
    Load a data table, compute a list of clusters and
    plot a list of clusters

    Set DESKTOP = True/False to use either matplotlib or simplegui
    """

    if data == 3108:
        data_url = DATA_3108_URL
    if data == 896:
        data_url = DATA_896_URL
    if data == 290:
        data_url = DATA_290_URL
    if data == 111:
        data_url = DATA_111_URL

    data_table = load_data_table(data_url)

    singleton_list = []
    for line in data_table:
        singleton_list.append(alg_cluster.Cluster(set([line[0]]), line[1], line[2], line[3], line[4]))

    if algorithm == "sequential":
        cluster_list = sequential_clustering(singleton_list, 15)
        print "Displaying", len(cluster_list), "sequential clusters"

    if algorithm == "hierarchical":
        cluster_list = alg_project3_solution.hierarchical_clustering(singleton_list, 9)
        print "Displaying", len(cluster_list), "hierarchical clusters"

    if algorithm == "k-means":
        cluster_list = alg_project3_solution.kmeans_clustering(singleton_list, 9, 5)
        print "Displaying", len(cluster_list), "k-means clusters"


    # draw the clusters using matplotlib or simplegui
    # display_centers = True adds cluster centers
    if DESKTOP:
        alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, display_centers)
    else:
        alg_clusters_simplegui.PlotClusters(data_table, cluster_list)   # use toggle in GUI to add cluster centers

    return
Ejemplo n.º 32
0
def run_example():
    """
    Load a data table, compute a list of clusters and 
    plot a list of clusters

    Set DESKTOP = True/False to use either matplotlib or simplegui
    """
    #data_table = load_data_table(DATA_3108_URL)
    #data_table = load_data_table(DATA_111_URL)
    #data_table = load_data_table(DATA_290_URL)
    data_table = load_data_table(DATA_896_URL)
    
    
    singleton_list = []
    for line in data_table:
        singleton_list.append(alg_cluster.Cluster(set([line[0]]), line[1], line[2], line[3], line[4]))
        
    #cluster_list = sequential_clustering(singleton_list, 9)	
    #print "Displaying", len(cluster_list), "sequential clusters"

   
    #start_time = time.clock()
    cluster_list = alg_project3_solution.hierarchical_clustering(singleton_list, 9)
    #print "hierarchical:",compute_distortion(cluster_list)
    #print "time taken for plot:"+ str(time.clock() - start_time) 
    print "Displaying", len(cluster_list), "hierarchical clusters"

    #start_time = time.clock()
    #cluster_list = alg_project3_solution.kmeans_clustering(singleton_list, 9, 5)
    #print "kmeans: ",compute_distortion(cluster_list)
    #print "time taken for plot:"+ str(time.clock() - start_time) 
    #print "Displaying", len(cluster_list), "k-means clusters"

            
    # draw the clusters using matplotlib or simplegui
    if DESKTOP:
        alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, False)
        
        alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, True)  #add cluster centers
        #print ""
        
    else:
        alg_clusters_simplegui.PlotClusters(data_table, cluster_list)   # use toggle in GUI to add cluster centers
Ejemplo n.º 33
0
def assignment_q6():
    """
    Load a data table, compute a list of clusters and
    plot a list of clusters
    """
    data_table = load_data_table(DATA_111_URL)
    singleton_list = []
    for line in data_table:
        singleton_list.append(alg_cluster.Cluster({line[0]}, line[1], line[2], line[3], line[4]))

    cluster_list = project.kmeans_clustering(singleton_list, 9, 5)

    # q7
    distortion = compute_distortion(cluster_list, data_table)
    print "kmeans_clustering distortion:", distortion

    print "Displaying", len(cluster_list), "k-means clusters"

    alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, True)
Ejemplo n.º 34
0
def run_example():
    """
    Load a data table, compute a list of clusters and
    plot a list of clusters

    Set DESKTOP = True/False to use either matplotlib or simplegui
    """
    data_table = load_data_table(DATA_896_URL)
    singleton_list = []
    for line in data_table:
        singleton_list.append(
            alg_cluster.Cluster(set([line[0]]), line[1], line[2], line[3],
                                line[4]))

    # cluster_list = sequential_clustering(singleton_list, 15)
    # print "Displaying", len(cluster_list), "sequential clusters"

    cluster_list = alg_project3_solution.hierarchical_clustering(
        singleton_list, 9)
    print "Displaying", len(cluster_list), "hierarchical clusters"

    # cluster_list = alg_project3_solution.kmeans_clustering(singleton_list, 9, 5)
    # print "Displaying", len(cluster_list), "k-means clusters"

    # calculate cluster_error of hierarchical_clustering and kmeans_clustering
    hc_error = compute_distortion(
        alg_project3_solution.hierarchical_clustering(singleton_list, 9),
        data_table)
    kmc_error = compute_distortion(
        alg_project3_solution.kmeans_clustering(singleton_list, 9, 5),
        data_table)
    print "cluster_error:\nhierarchical_clustring: ", hc_error, "\nkmeans_clustering: ", kmc_error

    # draw the clusters using matplotlib or simplegui
    if DESKTOP:
        # alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, False)
        alg_clusters_matplotlib.plot_clusters(data_table, cluster_list,
                                              True)  #add cluster centers
    else:
        alg_clusters_simplegui.PlotClusters(
            data_table,
            cluster_list)  # use toggle in GUI to add cluster centers
Ejemplo n.º 35
0
def run_example():
    """
    Load a data table, compute a list of clusters and
    plot a list of clusters

    """
    data_table = load_data_table(DATA_111_URL)

    singleton_list = []
    for line in data_table:
        singleton_list.append(alg_cluster.Cluster(set([line[0]]), \
            line[1], line[2], line[3], line[4]))

    #cluster_list = project3.hierarchical_clustering(singleton_list, 9)
    #print "Displaying", len(cluster_list), "hierarchical clusters"

    cluster_list = project3.kmeans_clustering(singleton_list, 9, 5)
    print "Displaying", len(cluster_list), "k-means clusters"

    # draw the clusters using matplotlib or simplegui
    alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, True)
Ejemplo n.º 36
0
def run_question(number, data_set):
    """
    Load a data table, compute a list of clusters and 
    plot a list of clusters.
    Set DESKTOP = True/False to use either matplotlib or simplegui
    """
    global DESKTOP
    print "Loading data table ..."
    data_table = load_data_table(data_set)
    print "Data table loaded.  Creating clusters ..."

    singleton_list = []
    for line in data_table:
        singleton_list.append(
            alg_cluster.Cluster(set([line[0]]), line[1], line[2], line[3],
                                line[4]))
    print "Cluster list created.  Passing list to hierarchical_clustering ..."

    if number == 0:
        cluster_list = sequential_clustering(singleton_list, 15)
        print "Displaying", len(cluster_list), "sequential clusters"
    elif number in [2, 5]:
        cluster_list = cpf.hierarchical_clustering(singleton_list, 9)
        print "Displaying", len(cluster_list), "hierarchical clusters"
    elif number in [3, 6]:
        cluster_list = cpf.kmeans_clustering(singleton_list, 9, 5)
        print "Displaying", len(cluster_list), "k-means clusters"
    else:
        "Please pass a valid number to run_question."

    # draw the clusters using matplotlib or simplegui
    if DESKTOP:
        # alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, False)
        alg_clusters_matplotlib.plot_clusters(data_table, cluster_list,
                                              True)  #add cluster centers
    else:
        alg_clusters_simplegui.PlotClusters(
            data_table,
            cluster_list)  # use toggle in GUI to add cluster centers
Ejemplo n.º 37
0
def plot_Q6():
    """
    Load a data table, compute a list of clusters and 
    plot a list of clusters

    Set DESKTOP = True/False to use either matplotlib or simplegui
    """
    DIRECTORY = "http://commondatastorage.googleapis.com/codeskulptor-assets/"
    DATA_111_URL = DIRECTORY + "data_clustering/unifiedCancerData_111.csv"

    data_table = viz.load_data_table(DATA_111_URL)

    singleton_list = []
    for line in data_table:
        singleton_list.append(
            alg_cluster.Cluster(set([line[0]]), line[1], line[2], line[3],
                                line[4]))

    cluster_list = project.kmeans_clustering(singleton_list, 9, 5)
    print "Displaying", len(cluster_list), "hierarchical clusters"

    # draw the clusters using matplotlib
    alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, False)
Ejemplo n.º 38
0
def run_example():
    """
    Load a data table, compute a list of clusters and
    plot a list of clusters

    Set DESKTOP = True/False to use either matplotlib or simplegui
    """
    data_table = load_data_table(DATA_290_URL)

    singleton_list = []
    for line in data_table:
        singleton_list.append(
            alg_cluster.Cluster(set([line[0]]), line[1], line[2], line[3],
                                line[4]))

    # cluster_list = sequential_clustering(singleton_list, 15)

    # print "Displaying", len(cluster_list), "sequential clusters"

    # cluster_list = alg_project3_solution.hierarchical_clustering(singleton_list, 16)
    # distortion = application3.compute_distortion(cluster_list, data_table)
    # print "Displaying", len(cluster_list), "hierarchical clusters, distortion:", distortion

    cluster_list = alg_project3_solution.kmeans_clustering(
        singleton_list, 16, 5)
    distortion = application3.compute_distortion(cluster_list, data_table)
    print "Displaying", len(
        cluster_list), "k-means clusters, distortion:", distortion

    # draw the clusters using matplotlib or simplegui
    if DESKTOP:
        alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, True)
        # alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, True)  #add cluster centers
    else:
        alg_clusters_simplegui.PlotClusters(
            data_table,
            cluster_list)  # use toggle in GUI to add cluster centers
Ejemplo n.º 39
0
def run_example():
    """
    Load a data table, compute a list of clusters and 
    plot a list of clusters

    Set DESKTOP = True/False to use either matplotlib or simplegui
    """
    data_table = load_data_table(DATA_3108_URL)

    singleton_list = []
    for line in data_table:
        singleton_list.append(alg_cluster.Cluster({line[0]}, line[1], line[2], line[3], line[4]))

    # cluster_list = sequential_clustering(singleton_list, 50)
    # print "Displaying", len(cluster_list), "sequential clusters"

    # cluster_list = project.hierarchical_clustering(singleton_list, 15)
    # print "Displaying", len(cluster_list), "hierarchical clusters"

    cluster_list = project.kmeans_clustering(singleton_list, 15, 5)
    print "Displaying", len(cluster_list), "k-means clusters"

    # draw the clusters using matplotlib or simplegui
    alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, True)
Ejemplo n.º 40
0
def run_example():
    """
    Load a data table, compute a list of clusters and 
    plot a list of clusters

    Set DESKTOP = True/False to use either matplotlib or simplegui
    """
    data_table = load_data_table('unifiedCancerData_111.csv')

    singleton_list = []
    for line in data_table:
        singleton_list.append(
            alg_cluster.Cluster(set([line[0]]), line[1], line[2], line[3],
                                line[4]))

    # cluster_list = sequential_clustering(singleton_list, 15)
    # print "Displaying", len(cluster_list), "sequential clusters"

    # cluster_list = alg_project3_solution.hierarchical_clustering(singleton_list, 9)
    # print 'hierarchical error', app3_7.compute_distortion(cluster_list, data_table)
    # print "Displaying", len(cluster_list), "hierarchical clusters"

    cluster_list = alg_project3_solution.kmeans_clustering(
        singleton_list, 9, 5)
    print 'k means error', app3_7.compute_distortion(cluster_list, data_table)
    print "Displaying", len(cluster_list), "k-means clusters"

    # draw the clusters using matplotlib or simplegui
    if DESKTOP:
        # alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, False)
        alg_clusters_matplotlib.plot_clusters(data_table, cluster_list,
                                              True)  #add cluster centers
    else:
        alg_clusters_simplegui.PlotClusters(
            data_table,
            cluster_list)  # use toggle in GUI to add cluster centers
Ejemplo n.º 41
0
import sys
sys.path.append('../../3_closest_pairs_&_clustering_algorithms')
import data.load_clusters as lc
import data.cluster as cl
import clustering as clr
import alg_clusters_matplotlib as cplot

data_table = lc.load_data_table(lc.DATA_896_URL) #DATA_3108_URL DATA_290_URL
    
singleton_list = []
for line in data_table:
    singleton_list.append(cl.Cluster(set([line[0]]), line[1], line[2], line[3], line[4]))

c = 7 # cluster count

cluster_list = clr.hierarchical_clustering(singleton_list, c)

        
cplot.plot_clusters(data_table, cluster_list, True)
    

def run_example():
    """
    Load a data table, compute a list of clusters and 
    plot a list of clusters

    Set DESKTOP = True/False to use either matplotlib or simplegui
    """
    algo_used = 1  # 1: sequential clusters, 2: hierarchical clusters, 3: k-means clusters
    
    data_urls = [DATA_3108_URL, DATA_896_URL, DATA_290_URL, DATA_111_URL]
    source = 3 # pick which data source url
    data_table = load_data_table(data_urls[source - 1]) 
    

    
    def clustering(algo_used, num_clusters, num_iter = 5):
        """
        Uses specified algorithm to cluster data
        
        input: int for specified algorithm, data_table
        output: cluster_list
        """     
        singleton_list = []
        for line in data_table:
            singleton_list.append(alg_cluster.Cluster(set([line[0]]), line[1], line[2], line[3], line[4]))
        
        if algo_used == 1:
            cluster_list = sequential_clustering(singleton_list, num_clusters)
            print "Displaying", len(cluster_list), "sequential clusters"
        elif algo_used == 2:
            cluster_list = prj3.hierarchical_clustering(singleton_list, num_clusters)
            print "Displaying", len(cluster_list), "hierarchical clusters"
        elif algo_used == 3:
            cluster_list = prj3.kmeans_clustering(singleton_list, num_clusters, num_iter)
            print "Displaying", len(cluster_list), "k-means clusters"
        
        return cluster_list


    def gen_random_clusters(num_clusters):
        """
        Creates a list of clusters where each cluster in this list corresponds to one randomly generated point in the 2 x 2 square
        Input: number of clusters (int)
        Output: list of random clusters that is num_clusters long (list)
        """
        cluster_list = []
        for cluster in xrange(num_clusters):
            x = random.choice([1, -1]) * random.random()
            y = random.choice([1, -1]) * random.random()
            cluster_list.append(alg_cluster.Cluster(set([]), x, y, 1, 0))
        return cluster_list
        
        
    def question_one():
        """
        Function for answering first question
        """
        xvals = range(2, 200)
        slow_yvals = []
        fast_yvals = []
        for num in xvals:
            cluster_list = gen_random_clusters(num)
            initial = time.time()
            answer = prj3.slow_closest_pairs(cluster_list)
            final = time.time()
            slow_yvals.append(final - initial)
        for num in xvals:
            cluster_list = gen_random_clusters(num)
            initial = time.time()
            answer = prj3.fast_closest_pair(cluster_list)
            final = time.time()
            fast_yvals.append(final - initial)
        slow_line = plt.plot(xvals, slow_yvals, color='r', label="Slow Closest Pair")
        fast_line = plt.plot(xvals, fast_yvals, color='b', label="Fast Closest Pair")
        plt.legend(loc=2)
        plt.title("Efficiency of Slow and Fast Closest Pairs Algorithms")
        plt.xlabel("Number of Clusters")
        plt.ylabel("Run Times in Milliseconds")
        plt.show()
    
    
    def compute_distortion(cluster_list):
        """
        Takes a list of clusters and uses cluster_error to compute its distortion.
        
        input: list of clusters, original data table
        output: cluster distortion int
        """
        distortion = 0
        for cluster in cluster_list:
            distortion += cluster.cluster_error(data_table)
        return distortion
    
    def question_ten():
        """
        Function for answering question 10
        """
        xvals = xrange(6, 21)
        kmeans_y = []
        high_y = []
        
        for clusters in xvals:
            kmeans_y.append(compute_distortion(clustering(3, clusters)))
        for clusters in xvals:
            high_y.append(compute_distortion(clustering(2, clusters)))
        
        kmeans_line = plt.plot(xvals, kmeans_y, color='r', label="K-Means Clustering")
        high_line = plt.plot(xvals, high_y, color='b', label="Hierarchical Clustering")
        plt.legend()
        plt.title("Distortion Comparison Between Clustering Methods on 290 County Data Set")
        plt.xlabel("Number of Output Clusters")
        plt.ylabel("Distortion")
        plt.show()

    #question_one()
    #question_ten()

    # draw the clusters using matplotlib or simplegui
    cluster_list = clustering(1, 5)
    if DESKTOP:
        alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, True)
    else:
        alg_clusters_simplegui.PlotClusters(data_table, cluster_list)