def run_example(): """ Load a data table, compute a list of clusters and plot a list of clusters Set DESKTOP = True/False to use either matplotlib or simplegui """ data_table = load_data_table(DATA_3108_URL) singleton_list = [] for line in data_table: singleton_list.append( alg_cluster.Cluster(set([line[0]]), line[1], line[2], line[3], line[4])) #cluster_list = sequential_clustering(singleton_list, 15) cluster_list = hierarchical_clustering(singleton_list, 15) print "Displaying", len(cluster_list), "sequential clusters" #cluster_list = alg_project3_solution.hierarchical_clustering(singleton_list, 9) #print "Displaying", len(cluster_list), "hierarchical clusters" #cluster_list = alg_project3_solution.kmeans_clustering(singleton_list, 9, 5) #print "Displaying", len(cluster_list), "k-means clusters" # draw the clusters using matplotlib or simplegui if DESKTOP: alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, True) else: alg_clusters_simplegui.PlotClusters(data_table, cluster_list)
def run_example(): """ Load a data table, compute a list of clusters and plot a list of clusters Set DESKTOP = True/False to use either matplotlib or simplegui """ data_table = load_data_table(DATA_3108_URL) singleton_list = [] for line in data_table: singleton_list.append(alg_cluster.Cluster(set([line[0]]), line[1], line[2], line[3], line[4])) cluster_list = sequential_clustering(singleton_list, 15) print "Displaying", len(cluster_list), "sequential clusters" #cluster_list = alg_project3_solution.hierarchical_clustering(singleton_list, 9) #print "Displaying", len(cluster_list), "hierarchical clusters" #cluster_list = alg_project3_solution.kmeans_clustering(singleton_list, 9, 5) #print "Displaying", len(cluster_list), "k-means clusters" # draw the clusters using matplotlib or simplegui if DESKTOP: alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, False) #alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, True) #add cluster centers else: alg_clusters_simplegui.PlotClusters(data_table, cluster_list) # use toggle in GUI to add cluster centers
def run_example(x): """ Load a data table, compute a list of clusters and plot a list of clusters Set DESKTOP = True/False to use either matplotlib or simplegui """ data_table = load_data_table(DATA_111_URL) singleton_list = [] for line in data_table: singleton_list.append( alg_cluster.Cluster(set([line[0]]), line[1], line[2], line[3], line[4])) #cluster_list = sequential_clustering(singleton_list, 15) #print "Displaying", len(cluster_list), "sequential clusters" #cluster_list = alg_project3_solution.hierarchical_clustering(singleton_list, 9) #print "Displaying", len(cluster_list), "hierarchical clusters" cluster_list = alg_project3_solution.kmeans_clustering( singleton_list, x, 5) #print "Displaying", len(cluster_list), "k-means clusters" return cluster_list print sum( map(lambda cluster: cluster.cluster_error(data_table), cluster_list)) # draw the clusters using matplotlib or simplegui if DESKTOP: alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, False) #alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, True) #add cluster centers else: #alg_clusters_simplegui.PlotClusters(data_table, cluster_list) # use toggle in GUI to add cluster centers pass
def run_example(): """ Load a data table, compute a list of clusters and plot a list of clusters Set DESKTOP = True/False to use either matplotlib or simplegui """ data_table = load_data_table(DATA_111_URL) singleton_list = [] for line in data_table: singleton_list.append( alg_cluster.Cluster(set([line[0]]), line[1], line[2], line[3], line[4])) #cluster_list = sequential_clustering(singleton_list, 15) #print "Displaying", len(cluster_list), "sequential clusters" #cluster_list = closest_pairs_and_clustering_algorithms.hierarchical_clustering(singleton_list, 9) #print "Displaying", len(cluster_list), "hierarchical clusters" cluster_list = closest_pairs_and_clustering_algorithms.kmeans_clustering( singleton_list, 9, 5) print "Displaying", len(cluster_list), "k-means clusters" # draw the clusters using matplotlib or simplegui if DESKTOP: #alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, False) alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, True) #add cluster centers else: alg_clusters_simplegui.PlotClusters( data_table, cluster_list) # use toggle in GUI to add cluster centers
def run_example(): """ Load a data table, compute a list of clusters and plot a list of clusters """ data_table = load_data_table(DATA_3108_URL) singleton_list = [] for line in data_table: singleton_list.append(project_3.Cluster(set([line[0]]), line[1], line[2], line[3], line[4])) # cluster_list = sequential_clustering(singleton_list, 15) # print("Displaying", len(cluster_list), "sequential clusters") #cluster_list = hierarchical_clustering(singleton_list, 15) #pprint(cluster_list) #print("Displaying", len(cluster_list), "hierarchical clusters") cluster_list = kmeans_clustering(singleton_list, 20, 20) print("Displaying", len(cluster_list), "k-means clusters") # draw the clusters using matplotlib alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, True)
def run_example(): """ Load a data table, compute a list of clusters and plot a list of clusters. Set DESKTOP = True/False to use either matplotlib or simplegui """ data_table = load_data_table(DATA_3108_URL) #data_table = load_data_table(DATA_111_URL) singleton_list = [] for line in data_table: singleton_list.append( alg_cluster.Cluster(set([line[0]]), line[1], line[2], line[3], line[4])) # ************** Here we have to choose the type of clustering we want to use for visualization ******************** #cluster_list = sequential_clustering(singleton_list, 15); print "Displaying", len(cluster_list), "sequential clusters" #cluster_list = alg_project3_solution.hierarchical_clustering(singleton_list, 9); print "Displaying", len(cluster_list), "hierarchical clusters" cluster_list = alg_project3_solution.kmeans_clustering( singleton_list, 9, 5) print "Displaying", len(cluster_list), "k-means clusters" if DESKTOP: # draw the clusters using matplotlib or simplegui alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, False) # alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, True) # add cluster centers else: alg_clusters_simplegui.PlotClusters( data_table, cluster_list) # use toggle in GUI to add cluster centers
def run_example(): """ Load a data table, compute a list of clusters and plot a list of clusters Set DESKTOP = True/False to use either matplotlib or simplegui """ data_table = load_data_table(DATA_111_URL) singleton_list = [] for line in data_table: singleton_list.append(alg_cluster.Cluster(set([line[0]]), line[1], line[2], line[3], line[4])) # cluster_list = sequential_clustering(singleton_list, 15) # print "Displaying", len(cluster_list), "sequential clusters" #cluster_list = alg_project3_solution.hierarchical_clustering(singleton_list, 9) #print "Displaying", len(cluster_list), "hierarchical clusters" cluster_list = alg_project3_solution.kmeans_clustering(singleton_list, 9, 5) print "Displaying", len(cluster_list), "k-means clusters" distortion = sum([clstr.cluster_error(data_table) for clstr in cluster_list]) print "Distortion of clustering =", distortion # draw the clusters using matplotlib or simplegui if DESKTOP: alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, True) else: alg_clusters_simplegui.PlotClusters(data_table, cluster_list)
def run_example(): """ Load a data table, compute a list of clusters and plot a list of clusters Set DESKTOP = True/False to use either matplotlib or simplegui """ data_table = load_data_table(DATA_24_URL) # data_table = gen_random_clusters(100)[0] #singleton_list = [] #for line in data_table: #singleton_list.append(alg_cluster.Cluster(set([line[0]]), line[1], line[2], line[3], line[4])) singleton_list = gen_random_clusters(100) #cluster_list = sequential_clustering(singleton_list, 15) #print "Displaying", len(cluster_list), "sequential clusters" cluster_list = module3_project.hierarchical_clustering(singleton_list, 10) print "Displaying", len(cluster_list), "hierarchical clusters" #cluster_list = alg_project3_solution.kmeans_clustering(singleton_list, 9, 5) #print "Displaying", len(cluster_list), "k-means clusters" # draw the clusters using matplotlib or simplegui if DESKTOP: alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, True) else: alg_clusters_simplegui.PlotClusters(data_table, cluster_list)
def visualize_data(cluster_input, data, method=None, display_centers=False): """ Load a data table, compute a list of clusters and plot a list of clusters Set DESKTOP = True/False to use either matplotlib or simplegui """ data_table = load_data_table(data) singleton_list = [] for line in data_table: singleton_list.append(alg_cluster.Cluster(set([line[0]]), line[1], line[2], line[3], line[4])) if method == None: cluster_list = sequential_clustering(singleton_list, cluster_input) print("Displaying", len(cluster_list), "sequential clusters") elif method == 'hierarchical_clustering': cluster_list = clustering.hierarchical_clustering(singleton_list, cluster_input) print("Displaying", len(cluster_list), "hierarchical clusters") elif method == 'kmeans_clustering': cluster_list = clustering.kmeans_clustering(singleton_list, cluster_input[0], cluster_input[1]) print("Displaying", len(cluster_list), "k-means clusters") else: print("ERROR: method entered into visualize_data not recognized") alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, display_centers)
def run_example(): """ Load a data table, compute a list of clusters and plot a list of clusters Set DESKTOP = True/False to use either matplotlib or simplegui """ data_table = load_data_table(DATA_896_URL) singleton_list = [] for line in data_table: singleton_list.append(alg_cluster.Cluster(set([line[0]]), line[1], line[2], line[3], line[4])) #cluster_list = sequential_clustering(singleton_list, 15) #print "Displaying", len(cluster_list), "sequential clusters" cluster_list = alg_project3_solution.hierarchical_clustering(singleton_list, 50) print "Displaying", len(cluster_list), "hierarchical clusters" #cluster_list = alg_project3_solution.kmeans_clustering(singleton_list, 20, 5) #print "Displaying", len(cluster_list), "k-means clusters" print 'Calculating distortion...' print alg_project3_solution.compute_distortion(cluster_list, data_table) # draw the clusters using matplotlib or simplegui if DESKTOP: alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, True) else: alg_clusters_simplegui.PlotClusters(data_table, cluster_list)
def question_5_6(): """ Load a data table, compute a list of clusters and plot a list of clusters Set DESKTOP = True/False to use either matplotlib or simplegui """ data_table = load_data_table(DATA_111_URL) singleton_list = [] for line in data_table: singleton_list.append(alg_cluster.Cluster(set([line[0]]), line[1], line[2], line[3], line[4])) # ============================================================================= # cluster_list = sequential_clustering(singleton_list, 15) # print("Displaying", len(cluster_list), "sequential clusters") # ============================================================================= cluster_list = hierarchical_clustering(singleton_list, 20) print("Displaying", len(cluster_list), "hierarchical clusters") # ============================================================================= # cluster_list = kmeans_clustering(singleton_list, 9, 5) # print("Displaying", len(cluster_list), "k-means clusters") # ============================================================================= # draw the clusters using matplotlib or simplegui #alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, False) alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, True) #add cluster centers
def q6(): data_table = viz.load_data_table(viz.DATA_111_URL) singleton_list=[] for line in data_table: singleton_list.append(alg_cluster.Cluster(set([line[0]]), line[1], line[2], line[3], line[4])) cluster_list = alg_project3.kmeans_clustering(singleton_list, 9, 5) alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, True)
def q2(): data_table = viz.load_data_table(viz.DATA_3108_URL) singleton_list = [] for line in data_table: singleton_list.append(alg_cluster.Cluster(set([line[0]]), line[1], line[2], line[3], line[4])) cluster_list = alg_project3.hierarchical_clustering(singleton_list, 15) alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, True)
def visualize(datafile, output, cluster_func): data_table = load_data_table(datafile) clusters = [alg_cluster.Cluster(set([x[0]]), x[1], x[2], x[3], x[4]) for x in data_table] clusters = cluster_func(clusters) print "Displaying", len(clusters), "clusters" alg_clusters_matplotlib.plot_clusters(data_table, clusters, True, output) return clusters
def q2(): data_table = viz.load_data_table(viz.DATA_3108_URL) singleton_list = [] for line in data_table: singleton_list.append( alg_cluster.Cluster(set([line[0]]), line[1], line[2], line[3], line[4])) cluster_list = alg_project3.hierarchical_clustering(singleton_list, 15) alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, True)
def q6(): data_table = viz.load_data_table(viz.DATA_111_URL) singleton_list = [] for line in data_table: singleton_list.append( alg_cluster.Cluster(set([line[0]]), line[1], line[2], line[3], line[4])) cluster_list = alg_project3.kmeans_clustering(singleton_list, 9, 5) alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, True)
def run_question_3(): data_table = pro3_viz.load_data_table(DATA_3108_URL) data_table_cl = clusterize_data(data_table) cluster_list = pro3.kmeans_clustering(data_table_cl, 15, 5) print "Displaying", len(cluster_list), "k-means clusters" # alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, False) # simple filled-in circles alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, True) # add cluster centers
def run_question_5(): data_table = pro3_viz.load_data_table(DATA_111_URL) data_table_cl = clusterize_data(data_table) cluster_list = pro3.hierarchical_clustering(data_table_cl, 9) print "Displaying", len(cluster_list), "hierarchical clusters" # alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, False) # simple filled-in circles alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, True) # add cluster centers return cluster_list
def question5_plot(): """ Generate the plot for question 5 """ data_table = load_data_table(DATA_111_URL) singleton_list = [] for line in data_table: cluster = Cluster(set([line[0]]), line[1], line[2], line[3], line[4]) singleton_list.append(cluster) cluster_list = hierarchical_clustering(singleton_list, 9) plot_clusters(data_table, cluster_list, True)
def question6(URL, file_to_save, number_clusters=9, iterations=5, centers=False): ''' :return: ''' data_table, singleton_list = alg_project3_viz.run_example(URL) cluster_list = kmeans_clustering(singleton_list, number_clusters, iterations) print "Displaying", len(cluster_list), "k-means clusters" if centers: file = file_to_save[:-4] + 'with_centers' + '.png' alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, file, True) alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, file_to_save, False)
def question2(URL, file_to_save, number_clusters=15, centers=False): ''' :return: ''' data_table, singleton_list = alg_project3_viz.run_example(URL) cluster_list = hierarchical_clustering(singleton_list, number_clusters) print "Displaying", len(cluster_list), "hierarchical clusters" if centers: file = file_to_save[:-4] + 'with_centers' + '.png' alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, file, centers) alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, file_to_save, centers)
def question3_plot(): """ Generate the plot for question 3 """ data_table = load_data_table(DATA_3108_URL) singleton_list = [] for line in data_table: cluster = Cluster(set([line[0]]), line[1], line[2], line[3], line[4]) singleton_list.append(cluster) cluster_list = kmeans_clustering(singleton_list, 15, 5) plot_clusters(data_table, cluster_list, True)
def run_example(): """ Load a data table, compute a list of clusters and plot a list of clusters Set DESKTOP = True/False to use either matplotlib or simplegui """ #data_table = load_data_table(DATA_3108_URL) #data_table = load_data_table(DATA_896_URL) data_table = load_data_table(DATA_290_URL) #data_table = load_data_table(DATA_111_URL) singleton_list = [] for line in data_table: singleton_list.append(alg_cluster.Cluster(set([line[0]]), line[1], line[2], line[3], line[4])) original_dict = create_dictionary(data_table) #original_list = [] #for line in data_table: # original_list.append(alg_cluster.Cluster(set([line[0]]), line[1], line[2], line[3], line[4])) #original_list = list(singleton_list) #original_list = singleton_list[:] #cluster_list = sequential_clustering(singleton_list, 15) #print "Displaying", len(cluster_list), "sequential clusters" print "About to display ...." #cluster_list = alg_project3_solution.hierarchical_clustering(singleton_list, 16) #cluster_list = alg_project3_solution.hierarchical_clustering(singleton_list, 9) #print "Displaying", len(cluster_list), "hierarchical clusters" cluster_list = alg_project3_solution.kmeans_clustering(singleton_list, 9, 5) #print "Displaying", len(cluster_list), "k-means clusters" # compute the distortion if (True): distortion = compute_distortion(cluster_list, data_table) print distortion # draw the clusters using matplotlib or simplegui if (False): if DESKTOP: #alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, False) alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, True) #add cluster centers else: alg_clusters_simplegui.PlotClusters(data_table, cluster_list) # use toggle in GUI to add cluster centers
def Question5(): data_table = load_data_table(DATA_111_URL) singleton_list = [] for line in data_table: singleton_list.append(alg_cluster.Cluster(set([line[0]]), line[1], line[2], line[3], line[4])) cluster_list = module3_project.hierarchical_clustering(singleton_list, 9) print "Displaying", len(cluster_list), "hierarchical clusters" # draw the clusters using matplotlib or simplegui if DESKTOP: alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, True) else: alg_clusters_simplegui.PlotClusters(data_table, cluster_list)
def run_question(number, data_set): """ Load a data table, compute a list of clusters and plot a list of clusters. Set DESKTOP = True/False to use either matplotlib or simplegui """ global DESKTOP print "Loading data table ..." data_table = load_data_table(data_set) print "Data table loaded. Creating clusters ..." singleton_list = [] # set correct number of clusters if number in [2, 3]: num_clusters = 15 elif number in [5, 6]: num_clusters = 9 print "\nQuestion number: ", number print "Number of clusters to be calculated: ", num_clusters # parse data_table into cluster objects for line in data_table: singleton_list.append( alg_cluster.Cluster(set([line[0]]), line[1], line[2], line[3], line[4])) print "\nCluster list created. Passing list to hierarchical_clustering ..." # calculate clusters if number == 0: cluster_list = sequential_clustering(singleton_list, 15) print "Displaying", len(cluster_list), "sequential clusters" elif number in [2, 5]: cluster_list = cpf.hierarchical_clustering(singleton_list, num_clusters) print "Displaying", len(cluster_list), "hierarchical clusters" elif number in [3, 6]: cluster_list = cpf.kmeans_clustering(singleton_list, num_clusters, 5) print "Displaying", len(cluster_list), "k-means clusters" else: "Please pass a valid number to run_question. Valid options are 0, 2, 3, 5, or 6." # draw the clusters using matplotlib or simplegui if DESKTOP: # alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, False) alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, True) #add cluster centers else: alg_clusters_simplegui.PlotClusters( data_table, cluster_list) # use toggle in GUI to add cluster centers
def assignment_q2(): """ Load a data table, compute a list of clusters and plot a list of clusters """ data_table = load_data_table(DATA_3108_URL) singleton_list = [] for line in data_table: singleton_list.append(alg_cluster.Cluster({line[0]}, line[1], line[2], line[3], line[4])) cluster_list = project.hierarchical_clustering(singleton_list, 15) print "Displaying", len(cluster_list), "hierarchical clusters" alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, True)
def run_kmeans_example(): """ Load a data table, compute a list of clusters and plot a list of clusters Set DESKTOP = True/False to use either matplotlib or simplegui """ data_table = load_data_table(DATA_896_URL) singleton_list = [] for line in data_table: singleton_list.append( alg_cluster.Cluster(set([line[0]]), line[1], line[2], line[3], line[4])) def compute_distortion(cluster_list): error = 0 for cluster in cluster_list: error += cluster.cluster_error(data_table) return error error = [] for cluster_num in range(6, 21): cluster_list = kmeans_clustering(singleton_list, cluster_num, 5) error.append(compute_distortion(cluster_list)) singleton_list = [] for line in data_table: singleton_list.append( alg_cluster.Cluster(set([line[0]]), line[1], line[2], line[3], line[4])) return error print("Displaying", len(cluster_list), "kmeans clusters") # cluster_list = alg_project3_solution.hierarchical_clustering(singleton_list, 9) # print "Displaying", len(cluster_list), "hierarchical clusters" # cluster_list = alg_project3_solution.kmeans_clustering(singleton_list, 9, 5) # print "Displaying", len(cluster_list), "k-means clusters" # draw the clusters using matplotlib or simplegui if DESKTOP: # alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, False) alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, True) # add cluster centers else: alg_clusters_simplegui.PlotClusters( data_table, cluster_list) # use toggle in GUI to add cluster centers
def Question5(): data_table = load_data_table(DATA_111_URL) singleton_list = [] for line in data_table: singleton_list.append( alg_cluster.Cluster(set([line[0]]), line[1], line[2], line[3], line[4])) cluster_list = module3_project.hierarchical_clustering(singleton_list, 9) print "Displaying", len(cluster_list), "hierarchical clusters" # draw the clusters using matplotlib or simplegui if DESKTOP: alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, True) else: alg_clusters_simplegui.PlotClusters(data_table, cluster_list)
def Q2_Q3_Q5_Q6_viz(data_file, clustering_algo, num_clusters, num_iterations, centers): '''Questions 2-6 Answer''' data_table = project.load_data_table(data_file) singleton_list = [] for line in data_table: singleton_list.append(alg_cluster.Cluster(set([line[0]]), line[1], line[2], line[3], line[4])) if clustering_algo == "h": cluster_list = project.hierarchical_clustering(singleton_list, num_clusters) print "Displaying", len(cluster_list), "hierarchical clusters" elif clustering_algo == "k": cluster_list = project.kmeans_clustering(singleton_list, num_clusters, num_iterations) print "Displaying", len(cluster_list), "kmeans clusters" else: print "Clustering method not recognized.\nPlease use 'h' for hierarchical_clustering\nor use 'k' for kmeans_clustering" return alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, centers) #False to remove cluster centers. True to include them
def run_example(): """ Load a data table, compute a list of clusters and plot a list of clusters Set DESKTOP = True/False to use either matplotlib or simplegui """ data_table = load_data_table(DATA_111_URL) singleton_list = [] for line in data_table: singleton_list.append( alg_cluster.Cluster(set([line[0]]), line[1], line[2], line[3], line[4])) # cluster_list = sequential_clustering(singleton_list, 15) # print "Displaying", len(cluster_list), "sequential clusters" # Question 2 answer: uncomment bottom two lines in block comment with # data_table being equal to load_data_table(DATA_3108_URL) # cluster_list = alg_project3_solution.hierarchical_clustering(singleton_list, 9) # print "Displaying", len(cluster_list), "hierarchical clusters" # Question 3 answer: uncomment bottom two lines in block comment with # data_table being equal to load_data_table(DATA_3108_URL) # cluster_list = alg_project3_solution.kmeans_clustering(singleton_list, 15, 5) # print "Displaying", len(cluster_list), "k-means clusters" # Question 5 answer: uncomment bottom two lines in block comment with # data_table being equal to load_data_table(DATA_111_URL) # cluster_list = alg_project3_solution.hierarchical_clustering(singleton_list, 9) # print "Displaying", len(cluster_list), "hierarchical clusters" # Question 6 answer: uncomment bottom two lines in block comment with # data_table being equal to load_data_table(DATA_111_URL) # cluster_list = alg_project3_solution.kmeans_clustering(singleton_list, 9, 5) # print "Displaying", len(cluster_list), "k-means clusters" # draw the clusters using matplotlib or simplegui if DESKTOP: alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, True) else: alg_clusters_simplegui.PlotClusters( data_table, cluster_list) # use toggle in GUI to add cluster centers
def run_example(data = 3108, algorithm = "sequential", display_centers = False): """ Load a data table, compute a list of clusters and plot a list of clusters Set DESKTOP = True/False to use either matplotlib or simplegui """ if data == 3108: data_url = DATA_3108_URL if data == 896: data_url = DATA_896_URL if data == 290: data_url = DATA_290_URL if data == 111: data_url = DATA_111_URL data_table = load_data_table(data_url) singleton_list = [] for line in data_table: singleton_list.append(alg_cluster.Cluster(set([line[0]]), line[1], line[2], line[3], line[4])) if algorithm == "sequential": cluster_list = sequential_clustering(singleton_list, 15) print "Displaying", len(cluster_list), "sequential clusters" if algorithm == "hierarchical": cluster_list = alg_project3_solution.hierarchical_clustering(singleton_list, 9) print "Displaying", len(cluster_list), "hierarchical clusters" if algorithm == "k-means": cluster_list = alg_project3_solution.kmeans_clustering(singleton_list, 9, 5) print "Displaying", len(cluster_list), "k-means clusters" # draw the clusters using matplotlib or simplegui # display_centers = True adds cluster centers if DESKTOP: alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, display_centers) else: alg_clusters_simplegui.PlotClusters(data_table, cluster_list) # use toggle in GUI to add cluster centers return
def run_example(): """ Load a data table, compute a list of clusters and plot a list of clusters Set DESKTOP = True/False to use either matplotlib or simplegui """ #data_table = load_data_table(DATA_3108_URL) #data_table = load_data_table(DATA_111_URL) #data_table = load_data_table(DATA_290_URL) data_table = load_data_table(DATA_896_URL) singleton_list = [] for line in data_table: singleton_list.append(alg_cluster.Cluster(set([line[0]]), line[1], line[2], line[3], line[4])) #cluster_list = sequential_clustering(singleton_list, 9) #print "Displaying", len(cluster_list), "sequential clusters" #start_time = time.clock() cluster_list = alg_project3_solution.hierarchical_clustering(singleton_list, 9) #print "hierarchical:",compute_distortion(cluster_list) #print "time taken for plot:"+ str(time.clock() - start_time) print "Displaying", len(cluster_list), "hierarchical clusters" #start_time = time.clock() #cluster_list = alg_project3_solution.kmeans_clustering(singleton_list, 9, 5) #print "kmeans: ",compute_distortion(cluster_list) #print "time taken for plot:"+ str(time.clock() - start_time) #print "Displaying", len(cluster_list), "k-means clusters" # draw the clusters using matplotlib or simplegui if DESKTOP: alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, False) alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, True) #add cluster centers #print "" else: alg_clusters_simplegui.PlotClusters(data_table, cluster_list) # use toggle in GUI to add cluster centers
def assignment_q6(): """ Load a data table, compute a list of clusters and plot a list of clusters """ data_table = load_data_table(DATA_111_URL) singleton_list = [] for line in data_table: singleton_list.append(alg_cluster.Cluster({line[0]}, line[1], line[2], line[3], line[4])) cluster_list = project.kmeans_clustering(singleton_list, 9, 5) # q7 distortion = compute_distortion(cluster_list, data_table) print "kmeans_clustering distortion:", distortion print "Displaying", len(cluster_list), "k-means clusters" alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, True)
def run_example(): """ Load a data table, compute a list of clusters and plot a list of clusters Set DESKTOP = True/False to use either matplotlib or simplegui """ data_table = load_data_table(DATA_896_URL) singleton_list = [] for line in data_table: singleton_list.append( alg_cluster.Cluster(set([line[0]]), line[1], line[2], line[3], line[4])) # cluster_list = sequential_clustering(singleton_list, 15) # print "Displaying", len(cluster_list), "sequential clusters" cluster_list = alg_project3_solution.hierarchical_clustering( singleton_list, 9) print "Displaying", len(cluster_list), "hierarchical clusters" # cluster_list = alg_project3_solution.kmeans_clustering(singleton_list, 9, 5) # print "Displaying", len(cluster_list), "k-means clusters" # calculate cluster_error of hierarchical_clustering and kmeans_clustering hc_error = compute_distortion( alg_project3_solution.hierarchical_clustering(singleton_list, 9), data_table) kmc_error = compute_distortion( alg_project3_solution.kmeans_clustering(singleton_list, 9, 5), data_table) print "cluster_error:\nhierarchical_clustring: ", hc_error, "\nkmeans_clustering: ", kmc_error # draw the clusters using matplotlib or simplegui if DESKTOP: # alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, False) alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, True) #add cluster centers else: alg_clusters_simplegui.PlotClusters( data_table, cluster_list) # use toggle in GUI to add cluster centers
def run_example(): """ Load a data table, compute a list of clusters and plot a list of clusters """ data_table = load_data_table(DATA_111_URL) singleton_list = [] for line in data_table: singleton_list.append(alg_cluster.Cluster(set([line[0]]), \ line[1], line[2], line[3], line[4])) #cluster_list = project3.hierarchical_clustering(singleton_list, 9) #print "Displaying", len(cluster_list), "hierarchical clusters" cluster_list = project3.kmeans_clustering(singleton_list, 9, 5) print "Displaying", len(cluster_list), "k-means clusters" # draw the clusters using matplotlib or simplegui alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, True)
def run_question(number, data_set): """ Load a data table, compute a list of clusters and plot a list of clusters. Set DESKTOP = True/False to use either matplotlib or simplegui """ global DESKTOP print "Loading data table ..." data_table = load_data_table(data_set) print "Data table loaded. Creating clusters ..." singleton_list = [] for line in data_table: singleton_list.append( alg_cluster.Cluster(set([line[0]]), line[1], line[2], line[3], line[4])) print "Cluster list created. Passing list to hierarchical_clustering ..." if number == 0: cluster_list = sequential_clustering(singleton_list, 15) print "Displaying", len(cluster_list), "sequential clusters" elif number in [2, 5]: cluster_list = cpf.hierarchical_clustering(singleton_list, 9) print "Displaying", len(cluster_list), "hierarchical clusters" elif number in [3, 6]: cluster_list = cpf.kmeans_clustering(singleton_list, 9, 5) print "Displaying", len(cluster_list), "k-means clusters" else: "Please pass a valid number to run_question." # draw the clusters using matplotlib or simplegui if DESKTOP: # alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, False) alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, True) #add cluster centers else: alg_clusters_simplegui.PlotClusters( data_table, cluster_list) # use toggle in GUI to add cluster centers
def plot_Q6(): """ Load a data table, compute a list of clusters and plot a list of clusters Set DESKTOP = True/False to use either matplotlib or simplegui """ DIRECTORY = "http://commondatastorage.googleapis.com/codeskulptor-assets/" DATA_111_URL = DIRECTORY + "data_clustering/unifiedCancerData_111.csv" data_table = viz.load_data_table(DATA_111_URL) singleton_list = [] for line in data_table: singleton_list.append( alg_cluster.Cluster(set([line[0]]), line[1], line[2], line[3], line[4])) cluster_list = project.kmeans_clustering(singleton_list, 9, 5) print "Displaying", len(cluster_list), "hierarchical clusters" # draw the clusters using matplotlib alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, False)
def run_example(): """ Load a data table, compute a list of clusters and plot a list of clusters Set DESKTOP = True/False to use either matplotlib or simplegui """ data_table = load_data_table(DATA_290_URL) singleton_list = [] for line in data_table: singleton_list.append( alg_cluster.Cluster(set([line[0]]), line[1], line[2], line[3], line[4])) # cluster_list = sequential_clustering(singleton_list, 15) # print "Displaying", len(cluster_list), "sequential clusters" # cluster_list = alg_project3_solution.hierarchical_clustering(singleton_list, 16) # distortion = application3.compute_distortion(cluster_list, data_table) # print "Displaying", len(cluster_list), "hierarchical clusters, distortion:", distortion cluster_list = alg_project3_solution.kmeans_clustering( singleton_list, 16, 5) distortion = application3.compute_distortion(cluster_list, data_table) print "Displaying", len( cluster_list), "k-means clusters, distortion:", distortion # draw the clusters using matplotlib or simplegui if DESKTOP: alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, True) # alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, True) #add cluster centers else: alg_clusters_simplegui.PlotClusters( data_table, cluster_list) # use toggle in GUI to add cluster centers
def run_example(): """ Load a data table, compute a list of clusters and plot a list of clusters Set DESKTOP = True/False to use either matplotlib or simplegui """ data_table = load_data_table(DATA_3108_URL) singleton_list = [] for line in data_table: singleton_list.append(alg_cluster.Cluster({line[0]}, line[1], line[2], line[3], line[4])) # cluster_list = sequential_clustering(singleton_list, 50) # print "Displaying", len(cluster_list), "sequential clusters" # cluster_list = project.hierarchical_clustering(singleton_list, 15) # print "Displaying", len(cluster_list), "hierarchical clusters" cluster_list = project.kmeans_clustering(singleton_list, 15, 5) print "Displaying", len(cluster_list), "k-means clusters" # draw the clusters using matplotlib or simplegui alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, True)
def run_example(): """ Load a data table, compute a list of clusters and plot a list of clusters Set DESKTOP = True/False to use either matplotlib or simplegui """ data_table = load_data_table('unifiedCancerData_111.csv') singleton_list = [] for line in data_table: singleton_list.append( alg_cluster.Cluster(set([line[0]]), line[1], line[2], line[3], line[4])) # cluster_list = sequential_clustering(singleton_list, 15) # print "Displaying", len(cluster_list), "sequential clusters" # cluster_list = alg_project3_solution.hierarchical_clustering(singleton_list, 9) # print 'hierarchical error', app3_7.compute_distortion(cluster_list, data_table) # print "Displaying", len(cluster_list), "hierarchical clusters" cluster_list = alg_project3_solution.kmeans_clustering( singleton_list, 9, 5) print 'k means error', app3_7.compute_distortion(cluster_list, data_table) print "Displaying", len(cluster_list), "k-means clusters" # draw the clusters using matplotlib or simplegui if DESKTOP: # alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, False) alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, True) #add cluster centers else: alg_clusters_simplegui.PlotClusters( data_table, cluster_list) # use toggle in GUI to add cluster centers
import sys sys.path.append('../../3_closest_pairs_&_clustering_algorithms') import data.load_clusters as lc import data.cluster as cl import clustering as clr import alg_clusters_matplotlib as cplot data_table = lc.load_data_table(lc.DATA_896_URL) #DATA_3108_URL DATA_290_URL singleton_list = [] for line in data_table: singleton_list.append(cl.Cluster(set([line[0]]), line[1], line[2], line[3], line[4])) c = 7 # cluster count cluster_list = clr.hierarchical_clustering(singleton_list, c) cplot.plot_clusters(data_table, cluster_list, True)
def run_example(): """ Load a data table, compute a list of clusters and plot a list of clusters Set DESKTOP = True/False to use either matplotlib or simplegui """ algo_used = 1 # 1: sequential clusters, 2: hierarchical clusters, 3: k-means clusters data_urls = [DATA_3108_URL, DATA_896_URL, DATA_290_URL, DATA_111_URL] source = 3 # pick which data source url data_table = load_data_table(data_urls[source - 1]) def clustering(algo_used, num_clusters, num_iter = 5): """ Uses specified algorithm to cluster data input: int for specified algorithm, data_table output: cluster_list """ singleton_list = [] for line in data_table: singleton_list.append(alg_cluster.Cluster(set([line[0]]), line[1], line[2], line[3], line[4])) if algo_used == 1: cluster_list = sequential_clustering(singleton_list, num_clusters) print "Displaying", len(cluster_list), "sequential clusters" elif algo_used == 2: cluster_list = prj3.hierarchical_clustering(singleton_list, num_clusters) print "Displaying", len(cluster_list), "hierarchical clusters" elif algo_used == 3: cluster_list = prj3.kmeans_clustering(singleton_list, num_clusters, num_iter) print "Displaying", len(cluster_list), "k-means clusters" return cluster_list def gen_random_clusters(num_clusters): """ Creates a list of clusters where each cluster in this list corresponds to one randomly generated point in the 2 x 2 square Input: number of clusters (int) Output: list of random clusters that is num_clusters long (list) """ cluster_list = [] for cluster in xrange(num_clusters): x = random.choice([1, -1]) * random.random() y = random.choice([1, -1]) * random.random() cluster_list.append(alg_cluster.Cluster(set([]), x, y, 1, 0)) return cluster_list def question_one(): """ Function for answering first question """ xvals = range(2, 200) slow_yvals = [] fast_yvals = [] for num in xvals: cluster_list = gen_random_clusters(num) initial = time.time() answer = prj3.slow_closest_pairs(cluster_list) final = time.time() slow_yvals.append(final - initial) for num in xvals: cluster_list = gen_random_clusters(num) initial = time.time() answer = prj3.fast_closest_pair(cluster_list) final = time.time() fast_yvals.append(final - initial) slow_line = plt.plot(xvals, slow_yvals, color='r', label="Slow Closest Pair") fast_line = plt.plot(xvals, fast_yvals, color='b', label="Fast Closest Pair") plt.legend(loc=2) plt.title("Efficiency of Slow and Fast Closest Pairs Algorithms") plt.xlabel("Number of Clusters") plt.ylabel("Run Times in Milliseconds") plt.show() def compute_distortion(cluster_list): """ Takes a list of clusters and uses cluster_error to compute its distortion. input: list of clusters, original data table output: cluster distortion int """ distortion = 0 for cluster in cluster_list: distortion += cluster.cluster_error(data_table) return distortion def question_ten(): """ Function for answering question 10 """ xvals = xrange(6, 21) kmeans_y = [] high_y = [] for clusters in xvals: kmeans_y.append(compute_distortion(clustering(3, clusters))) for clusters in xvals: high_y.append(compute_distortion(clustering(2, clusters))) kmeans_line = plt.plot(xvals, kmeans_y, color='r', label="K-Means Clustering") high_line = plt.plot(xvals, high_y, color='b', label="Hierarchical Clustering") plt.legend() plt.title("Distortion Comparison Between Clustering Methods on 290 County Data Set") plt.xlabel("Number of Output Clusters") plt.ylabel("Distortion") plt.show() #question_one() #question_ten() # draw the clusters using matplotlib or simplegui cluster_list = clustering(1, 5) if DESKTOP: alg_clusters_matplotlib.plot_clusters(data_table, cluster_list, True) else: alg_clusters_simplegui.PlotClusters(data_table, cluster_list)