def perform_k_means_clustering(data, program_options: Options) -> ClusteredData: # The data that will be returned clustered_data = ClusteredData(data, list(), program_options=program_options) km = KMeans(init='k-means++', n_clusters=program_options.NUMBER_CLUSTERS, n_init=program_options.K_MEANS_N_INIT, n_jobs=-1) km.fit(data) k_mean_labels = km.predict(data) k_means_cluster_centers_indices = km.cluster_centers_ n_clusters_ = len(k_means_cluster_centers_indices) for k in range(n_clusters_): class_members = k_mean_labels == k cluster = Cluster(cluster_centre=k_means_cluster_centers_indices[k], nodes=data[class_members], cluster_type=ClusterType.FULL_CLUSTER, program_options=program_options) clustered_data.add_cluster(cluster) print("k-mean clusters", k_mean_labels) return clustered_data
def perform_affinity_propagation(data, program_options: Options) -> ClusteredData: # The data that will be returned clustered_data = ClusteredData(data, list(), program_options=program_options) af = AffinityPropagation(convergence_iter=program_options.AFFINITY_PROPAGATION_CONVERGENCE_ITERATIONS, max_iter=program_options.AFFINITY_PROPAGATION_MAX_ITERATIONS).fit(data) affinity_propagation_cluster_centers_indices = af.cluster_centers_indices_ affinity_propagation_labels = af.labels_ n_clusters_ = len(affinity_propagation_cluster_centers_indices) print('Estimated number of AffinityPropagation clusters: %d' % n_clusters_) for k in range(n_clusters_): class_members = affinity_propagation_labels == k cluster_center = data[affinity_propagation_cluster_centers_indices[k]] cluster = Cluster(cluster_centre=cluster_center, nodes=data[class_members], cluster_type=ClusterType.FULL_CLUSTER, program_options=program_options) clustered_data.add_cluster(cluster) return clustered_data
def perform_birch_clustering(data, program_options: Options) -> ClusteredData: # The data that will be returned clustered_data = ClusteredData(data, list(), program_options=program_options) brc = Birch(branching_factor=program_options.BIRCH_BRANCHING_FACTOR, n_clusters=program_options.NUMBER_CLUSTERS, threshold=program_options.BIRCH_THRESHOLD) brc.fit(data) birch_labels = brc.predict(data) for k in range(brc.n_clusters): class_members = birch_labels == k nodes_in_cluster = data[class_members] # birch has no way of telling you the final cluster centres so have to calculate it yourself cluster_centre = nodes_in_cluster.mean(axis=0) cluster = Cluster(cluster_centre=cluster_centre, nodes=nodes_in_cluster, cluster_type=ClusterType.FULL_CLUSTER, program_options=program_options) clustered_data.add_cluster(cluster) print("birch clusters", birch_labels) return clustered_data
def perform_optics_clustering(data, program_options: Options) -> ClusteredData: # The data that will be returned clustered_data = ClusteredData(data, list(), program_options=program_options) op = OPTICS(min_samples=program_options.OPTICS_MIN_SAMPLES, n_jobs=-1) op.fit(data) optic_labels = op.labels_ for k in range(optic_labels.max() + 1): class_members = optic_labels == k nodes_in_cluster = data[class_members] # optics has no way of telling you the final cluster centres so have to calculate it yourself cluster_centre = nodes_in_cluster.mean(axis=0) cluster = Cluster(cluster_centre=cluster_centre, nodes=nodes_in_cluster, cluster_type=ClusterType.FULL_CLUSTER, program_options=program_options) clustered_data.add_cluster(cluster) if optic_labels.min() == -1: class_members = optic_labels == -1 # There are unclassified nodes unclassified_nodes = data[class_members] for unclassified_node in unclassified_nodes: cluster_to_add = Cluster(unclassified_node, [unclassified_node], cluster_type=ClusterType.UNCLASSIFIED_NODE_CLUSTER, program_options=program_options) clustered_data.add_unclassified_node(cluster_to_add) return clustered_data
def plot_clustered_graph(plot_colours, cluster_data: ClusteredData, program_options): # This plotting was adapted from the affinity propagation sklearn example i = 0 for k, col in zip(range(len(cluster_data.get_clusters())), plot_colours): class_members = cluster_data.get_clusters()[k].nodes cluster_center = cluster_data.get_clusters()[k].get_cluster_centre() plt.plot(class_members[:, 0], class_members[:, 1], col + '.') plt.plot(cluster_center[0], cluster_center[1], 'o', markerfacecolor=col, markeredgecolor='k', markersize=14) plt.annotate(i, xy=(cluster_center[0], cluster_center[1]), fontsize=10, ha='center', va='center') i += 1 for x in class_members: plt.plot([cluster_center[0], x[0]], [cluster_center[1], x[1]], col, linewidth=0.5) unclassified_nodes = cluster_data.get_unclassified_nodes() if len(unclassified_nodes) > 0: for k in unclassified_nodes: plt.plot(k.cluster_centre[0], k.cluster_centre[1], 'o', markerfacecolor='k', markeredgecolor='k', markersize=6) plt.title(program_options.TSP_PROBLEM_NAME + ' ' + str(program_options.CLUSTER_TYPE) + ': clusters: %d noise: %d' % ( len(cluster_data.get_clusters()), len(cluster_data.get_unclassified_nodes()))) plt.savefig(program_options.OUTPUT_DIRECTORY + program_options.TSP_PROBLEM_NAME + "-" + str(program_options.CLUSTER_TYPE) + "-clustering.png", dpi=program_options.PLT_DPI_VALUE) if program_options.DISPLAY_PLOTS: plt.show() plt.close()
def perform_dbscan_clustering(data, program_options: Options) -> ClusteredData: if program_options.AUTOMATE_DBSCAN_EPS: program_options.DBSCAN_EPS = dbscan_eps_finder.find_using_nearest_neighbours(problem_data_array=data, program_options=program_options) # The data that will be returned clustered_data = ClusteredData(data, list(), program_options) db = DBSCAN(eps=program_options.DBSCAN_EPS, min_samples=program_options.DBSCAN_MIN_SAMPLES, n_jobs=-1).fit(data) core_samples_mask = np.zeros_like(db.labels_, dtype=bool) core_samples_mask[db.core_sample_indices_] = True db_labels = db.labels_ db_n_clusters_ = len(set(db_labels)) - (1 if -1 in db_labels else 0) n_noise_ = list(db_labels).count(-1) for k in range(db_n_clusters_): class_members = db_labels == k nodes_in_cluster = data[class_members] cluster_centre = nodes_in_cluster.mean(axis=0) cluster = Cluster(cluster_centre=cluster_centre, nodes=nodes_in_cluster, cluster_type=ClusterType.FULL_CLUSTER, program_options=program_options) clustered_data.add_cluster(cluster) # These are the nodes that could not be placed into a cluster if n_noise_ > 0: class_members = db_labels == -1 unclassified_nodes = data[class_members] for unclassified_node in unclassified_nodes: cluster_to_add = Cluster(unclassified_node, [unclassified_node], cluster_type=ClusterType.UNCLASSIFIED_NODE_CLUSTER, program_options=program_options) clustered_data.add_unclassified_node(cluster_to_add) return clustered_data
def setup_cluster(): test_program_options = Options(output_directory="test", tsp_problem_name="test", aco_ant_count=10, aco_iterations=5, file_name="test", output_directory_2_opt_animation="test", output_directory_aco_animation="test", cluster_tour_type=InternalClusterPathFinderType.ACO) # This data has three clusters one big one that has the first four nodes and two other clusters of size one # that have only 1 node in test_nodes = [[1, 1], [1.5, 1], [1, 1.5], [1, 2], [2, 1], [2, 2]] # need to turn this list into an np array nodes = np.asarray(test_nodes) clustered_data: ClusteredData = ClusteredData(nodes=nodes, clusters=list(), program_options=test_program_options) main_cluster = Cluster(cluster_centre=[1.25, 1.5], nodes=np.asarray([[1, 1], [1.5, 1], [1, 1.5], [1, 2]]), cluster_type=ClusterType.FULL_CLUSTER, program_options=test_program_options) clustered_data.add_cluster(main_cluster) unclassified_node = Cluster(cluster_centre=[2, 1], nodes=np.asarray([[2, 1]]), cluster_type=ClusterType.UNCLASSIFIED_NODE_CLUSTER, program_options=test_program_options) clustered_data.add_unclassified_node(unclassified_node) unclassified_node = Cluster(cluster_centre=[2, 2], nodes=np.asarray([[2, 2]]), cluster_type=ClusterType.UNCLASSIFIED_NODE_CLUSTER, program_options=test_program_options) clustered_data.add_unclassified_node(unclassified_node) clustered_data.aco_cluster_tour = (0, 1, 2) return clustered_data, test_nodes
def run_algorithm_with_options(program_options: Options, problem_data_array, problem: tsplib95.Problem): program_start_time = timeit.default_timer() # key is the node location and the value is the node id node_location_to_id_dict = dict() # Key is the node id and the value is the node location node_id_to_location_dict = dict() counter = 0 for node in problem_data_array: node_location_to_id_dict[repr(node)] = counter node_id_to_location_dict[counter] = node counter += 1 colors = cycle('bgrcmybgrcmybgrcmybgrcmy') clustered_data = None if program_options.SHOULD_CLUSTER: if program_options.CLUSTER_TYPE is ClusterAlgorithmType.K_MEANS: clustered_data = perform_k_means_clustering( problem_data_array, program_options) if program_options.CLUSTER_TYPE is ClusterAlgorithmType.AFFINITY_PROPAGATION: clustered_data = perform_affinity_propagation( problem_data_array, program_options) if program_options.CLUSTER_TYPE is ClusterAlgorithmType.BIRCH: clustered_data = perform_birch_clustering(problem_data_array, program_options) if program_options.CLUSTER_TYPE is ClusterAlgorithmType.DBSCAN: clustered_data = perform_dbscan_clustering(problem_data_array, program_options) if program_options.CLUSTER_TYPE is ClusterAlgorithmType.OPTICS: clustered_data = perform_optics_clustering(problem_data_array, program_options) else: clustered_data = ClusteredData(nodes=problem_data_array, clusters=list(), program_options=program_options) for node in problem_data_array: cluster = Cluster( cluster_centre=node, nodes=[node], cluster_type=ClusterType.UNCLASSIFIED_NODE_CLUSTER, program_options=program_options) clustered_data.add_unclassified_node(cluster) # Set the overall node dicts onto the clustering object clustered_data.node_location_to_id_dict = node_location_to_id_dict clustered_data.node_id_to_location_dict = node_id_to_location_dict cluster_nodes_dict = clustered_data.get_dict_node_id_location_mapping_aco() logging.debug("%s nodes after clustering", len(cluster_nodes_dict)) # Raise an error if only 1 cluster has come out of this because ACO needs more than 1 cluster to run over if len(cluster_nodes_dict) <= 1: raise ValueError( "Need more than one cluster from the clustering algorithm") aco_tour_improvement_plotter: TourImprovementAnimator = TourImprovementAnimator( cluster_nodes_dict, problem_type="aco", program_options=program_options) before = timeit.default_timer() if program_options.ACO_TYPE is ACOType.ACO_MULTITHREADED: colony = AntColony( nodes=cluster_nodes_dict, distance_callback=aco_distance_callback, alpha=program_options.ACO_ALPHA_VALUE, beta=program_options.ACO_BETA_VALUE, pheromone_evaporation_coefficient=program_options.ACO_RHO_VALUE, pheromone_constant=program_options.ACO_Q_VALUE, ant_count=program_options.ACO_ANT_COUNT, tour_improvement_animator=aco_tour_improvement_plotter, iterations=program_options.ACO_ITERATIONS) answer = colony.mainloop() elif program_options.ACO_TYPE is ACOType.ACO_PY: solver = acopy.Solver(rho=program_options.ACO_RHO_VALUE, q=program_options.ACO_Q_VALUE) colony = acopy.Colony(alpha=program_options.ACO_ALPHA_VALUE, beta=program_options.ACO_BETA_VALUE) logger_plugin = LoggerPlugin() iteration_plotter_plugin = IterationPlotterPlugin( tour_improvement_animator=aco_tour_improvement_plotter) solver.add_plugin(logger_plugin) solver.add_plugin(iteration_plotter_plugin) graph = clustered_data.turn_clusters_into_nx_graph(problem) solution = solver.solve(graph, colony, limit=program_options.ACO_ITERATIONS, gen_size=program_options.ACO_ANT_COUNT) answer = solution.nodes else: raise NotImplementedError() after = timeit.default_timer() dif = after - before logging.debug("Time taken for initial global %s aco %s", program_options.ACO_TYPE, dif) clustered_data.aco_cluster_tour = answer clustered_data.find_nodes_to_move_between_clusters() if program_options.CLUSTER_TOUR_TYPE is InternalClusterPathFinderType.ACO: if program_options.ACO_TYPE is ACOType.ACO_MULTITHREADED: clustered_data.find_tours_within_clusters_using_multithreaded_aco() elif program_options.ACO_TYPE is ACOType.ACO_PY: clustered_data.find_tours_within_clusters_using_acopy() else: raise NotImplementedError() elif program_options.CLUSTER_TOUR_TYPE is InternalClusterPathFinderType.GREEDY_NEAREST_NODE: clustered_data.find_tours_within_clusters_using_greedy_closest_nodes() else: raise NotImplementedError() tour_node_coordinates = clustered_data.get_ordered_nodes_for_all_clusters() # Tour as node ids instead of node locations tour_node_id = [] for node in tour_node_coordinates: tour_node_id.append(node_location_to_id_dict[repr(node)]) clustered_data.node_level_tour = tour_node_id tour_node_id_set = set(tour_node_id) valid = len(tour_node_id) == len(tour_node_id_set) == len( problem_data_array) logging.debug("Tour is valid %s", valid) length_before = calculate_distance_for_tour(tour_node_id, node_id_to_location_dict) logging.debug("Length before 2-opt is %s", length_before) # If the option to run 2opt is set then process 2-opt if program_options.SHOULD_RUN_2_OPT: tsp_2_opt_graph_animator = TourImprovementAnimator( node_id_to_location_dict, problem_type="2-opt", program_options=program_options) before = timeit.default_timer() final_route = run_2_opt( existing_route=tour_node_id, node_id_to_location_dict=node_id_to_location_dict, distance_calculator_callback=calculate_distance_for_tour, tsp_2_opt_animator=tsp_2_opt_graph_animator) after = timeit.default_timer() dif = after - before logging.debug("Time taken for 2-opt %s", dif) length_after = calculate_distance_for_tour(final_route, node_id_to_location_dict) logging.debug("Length after 2-opt is %s", length_after) logging.debug("Final route after 2-opt is %s", final_route) program_end_time = timeit.default_timer() dif = program_end_time - program_start_time logging.debug("Time taken for entire program %s", dif) # These are the tour plotters so should be ignored for time calculations logging.debug("Starting tour plotters") # plot the tours for each cluster clustered_data.plot_all_cluster_tours() # This is the graph that shows all the clusters plot_clustered_graph(colors, cluster_data=clustered_data, program_options=program_options) # Plot all the nodes in the problem, no tour plot_nodes(problem_data_array, program_options=program_options) # Plot the ACO tour of the clusters plot_aco_clustered_tour(answer, clustered_data, program_options=program_options) # Plot the tour pre 2-opt plot_complete_tsp_tour(tour_node_id, node_id_to_location_dict, title="TSP Tour Before 2-opt. Length: " + str(length_before), program_options=program_options) # If 2opt was ran then you can safely print out all the 2-opt related graphs if program_options.SHOULD_RUN_2_OPT: # Plot the tour post 2-opt plot_complete_tsp_tour(final_route, node_id_to_location_dict, title="TSP Tour After 2-opt. Length: " + str(length_after), program_options=program_options) # Plot the tour post 2-opt with node ids printed plot_complete_tsp_tour(final_route, node_id_to_location_dict, title="Final TSP Tour With Node ID", node_id_shown=True, program_options=program_options) if program_options.ANIMATE_IMPROVEMENTS: # Create an animation of the 2-opt incremental improvement tsp_2_opt_graph_animator.animate( output_directory_animation_graphs=program_options. OUTPUT_DIRECTORY_2_OPT_ANIMATION) if program_options.ANIMATE_IMPROVEMENTS: # Create an animation of the aco incremental improvement aco_tour_improvement_plotter.animate( output_directory_animation_graphs=program_options. OUTPUT_DIRECTORY_ACO_ANIMATION) logging.debug("Finished tour plotters")