Exemple #1
0
def perform_k_means_clustering(data, program_options: Options) -> ClusteredData:
    # The data that will be returned
    clustered_data = ClusteredData(data, list(), program_options=program_options)

    km = KMeans(init='k-means++', n_clusters=program_options.NUMBER_CLUSTERS, n_init=program_options.K_MEANS_N_INIT,
                n_jobs=-1)
    km.fit(data)
    k_mean_labels = km.predict(data)
    k_means_cluster_centers_indices = km.cluster_centers_
    n_clusters_ = len(k_means_cluster_centers_indices)
    for k in range(n_clusters_):
        class_members = k_mean_labels == k
        cluster = Cluster(cluster_centre=k_means_cluster_centers_indices[k], nodes=data[class_members],
                          cluster_type=ClusterType.FULL_CLUSTER, program_options=program_options)
        clustered_data.add_cluster(cluster)

    print("k-mean clusters", k_mean_labels)
    return clustered_data
Exemple #2
0
def perform_affinity_propagation(data, program_options: Options) -> ClusteredData:
    # The data that will be returned
    clustered_data = ClusteredData(data, list(), program_options=program_options)

    af = AffinityPropagation(convergence_iter=program_options.AFFINITY_PROPAGATION_CONVERGENCE_ITERATIONS,
                             max_iter=program_options.AFFINITY_PROPAGATION_MAX_ITERATIONS).fit(data)
    affinity_propagation_cluster_centers_indices = af.cluster_centers_indices_
    affinity_propagation_labels = af.labels_
    n_clusters_ = len(affinity_propagation_cluster_centers_indices)
    print('Estimated number of AffinityPropagation clusters: %d' % n_clusters_)

    for k in range(n_clusters_):
        class_members = affinity_propagation_labels == k
        cluster_center = data[affinity_propagation_cluster_centers_indices[k]]

        cluster = Cluster(cluster_centre=cluster_center, nodes=data[class_members],
                          cluster_type=ClusterType.FULL_CLUSTER, program_options=program_options)
        clustered_data.add_cluster(cluster)

    return clustered_data
Exemple #3
0
def perform_birch_clustering(data, program_options: Options) -> ClusteredData:
    # The data that will be returned
    clustered_data = ClusteredData(data, list(), program_options=program_options)

    brc = Birch(branching_factor=program_options.BIRCH_BRANCHING_FACTOR, n_clusters=program_options.NUMBER_CLUSTERS,
                threshold=program_options.BIRCH_THRESHOLD)
    brc.fit(data)
    birch_labels = brc.predict(data)

    for k in range(brc.n_clusters):
        class_members = birch_labels == k
        nodes_in_cluster = data[class_members]
        # birch has no way of telling you the final cluster centres so have to calculate it yourself
        cluster_centre = nodes_in_cluster.mean(axis=0)
        cluster = Cluster(cluster_centre=cluster_centre, nodes=nodes_in_cluster, cluster_type=ClusterType.FULL_CLUSTER,
                          program_options=program_options)
        clustered_data.add_cluster(cluster)

    print("birch clusters", birch_labels)

    return clustered_data
Exemple #4
0
def perform_optics_clustering(data, program_options: Options) -> ClusteredData:
    # The data that will be returned
    clustered_data = ClusteredData(data, list(), program_options=program_options)

    op = OPTICS(min_samples=program_options.OPTICS_MIN_SAMPLES, n_jobs=-1)
    op.fit(data)
    optic_labels = op.labels_

    for k in range(optic_labels.max() + 1):
        class_members = optic_labels == k
        nodes_in_cluster = data[class_members]
        # optics has no way of telling you the final cluster centres so have to calculate it yourself
        cluster_centre = nodes_in_cluster.mean(axis=0)
        cluster = Cluster(cluster_centre=cluster_centre, nodes=nodes_in_cluster, cluster_type=ClusterType.FULL_CLUSTER,
                          program_options=program_options)
        clustered_data.add_cluster(cluster)

    if optic_labels.min() == -1:
        class_members = optic_labels == -1
        # There are unclassified nodes
        unclassified_nodes = data[class_members]
        for unclassified_node in unclassified_nodes:
            cluster_to_add = Cluster(unclassified_node, [unclassified_node],
                                     cluster_type=ClusterType.UNCLASSIFIED_NODE_CLUSTER,
                                     program_options=program_options)
            clustered_data.add_unclassified_node(cluster_to_add)

    return clustered_data
Exemple #5
0
def plot_clustered_graph(plot_colours, cluster_data: ClusteredData, program_options):
    # This plotting was adapted from the affinity propagation sklearn example
    i = 0
    for k, col in zip(range(len(cluster_data.get_clusters())), plot_colours):
        class_members = cluster_data.get_clusters()[k].nodes
        cluster_center = cluster_data.get_clusters()[k].get_cluster_centre()

        plt.plot(class_members[:, 0], class_members[:, 1], col + '.')
        plt.plot(cluster_center[0], cluster_center[1], 'o', markerfacecolor=col, markeredgecolor='k', markersize=14)
        plt.annotate(i, xy=(cluster_center[0], cluster_center[1]), fontsize=10, ha='center', va='center')
        i += 1

        for x in class_members:
            plt.plot([cluster_center[0], x[0]], [cluster_center[1], x[1]], col, linewidth=0.5)

    unclassified_nodes = cluster_data.get_unclassified_nodes()
    if len(unclassified_nodes) > 0:
        for k in unclassified_nodes:
            plt.plot(k.cluster_centre[0], k.cluster_centre[1], 'o', markerfacecolor='k', markeredgecolor='k',
                     markersize=6)

    plt.title(program_options.TSP_PROBLEM_NAME + ' ' + str(program_options.CLUSTER_TYPE) + ': clusters: %d noise: %d' % (
    len(cluster_data.get_clusters()), len(cluster_data.get_unclassified_nodes())))
    plt.savefig(program_options.OUTPUT_DIRECTORY + program_options.TSP_PROBLEM_NAME + "-" + str(program_options.CLUSTER_TYPE) + "-clustering.png", dpi=program_options.PLT_DPI_VALUE)

    if program_options.DISPLAY_PLOTS:
        plt.show()

    plt.close()
Exemple #6
0
def perform_dbscan_clustering(data, program_options: Options) -> ClusteredData:
    if program_options.AUTOMATE_DBSCAN_EPS:
        program_options.DBSCAN_EPS = dbscan_eps_finder.find_using_nearest_neighbours(problem_data_array=data,
                                                                                     program_options=program_options)

    # The data that will be returned
    clustered_data = ClusteredData(data, list(), program_options)

    db = DBSCAN(eps=program_options.DBSCAN_EPS, min_samples=program_options.DBSCAN_MIN_SAMPLES, n_jobs=-1).fit(data)
    core_samples_mask = np.zeros_like(db.labels_, dtype=bool)
    core_samples_mask[db.core_sample_indices_] = True
    db_labels = db.labels_
    db_n_clusters_ = len(set(db_labels)) - (1 if -1 in db_labels else 0)
    n_noise_ = list(db_labels).count(-1)

    for k in range(db_n_clusters_):
        class_members = db_labels == k
        nodes_in_cluster = data[class_members]
        cluster_centre = nodes_in_cluster.mean(axis=0)
        cluster = Cluster(cluster_centre=cluster_centre, nodes=nodes_in_cluster, cluster_type=ClusterType.FULL_CLUSTER,
                          program_options=program_options)
        clustered_data.add_cluster(cluster)

    # These are the nodes that could not be placed into a cluster
    if n_noise_ > 0:
        class_members = db_labels == -1
        unclassified_nodes = data[class_members]
        for unclassified_node in unclassified_nodes:
            cluster_to_add = Cluster(unclassified_node, [unclassified_node],
                                     cluster_type=ClusterType.UNCLASSIFIED_NODE_CLUSTER,
                                     program_options=program_options)
            clustered_data.add_unclassified_node(cluster_to_add)

    return clustered_data
 def setup_cluster():
     test_program_options = Options(output_directory="test", tsp_problem_name="test", aco_ant_count=10,
                                    aco_iterations=5, file_name="test", output_directory_2_opt_animation="test",
                                    output_directory_aco_animation="test", cluster_tour_type=InternalClusterPathFinderType.ACO)
     # This data has three clusters one big one that has the first four nodes and two other clusters of size one
     # that have only 1 node in
     test_nodes = [[1, 1], [1.5, 1], [1, 1.5], [1, 2], [2, 1], [2, 2]]
     # need to turn this list into an np array
     nodes = np.asarray(test_nodes)
     clustered_data: ClusteredData = ClusteredData(nodes=nodes, clusters=list(),
                                                   program_options=test_program_options)
     main_cluster = Cluster(cluster_centre=[1.25, 1.5], nodes=np.asarray([[1, 1], [1.5, 1], [1, 1.5], [1, 2]]),
                            cluster_type=ClusterType.FULL_CLUSTER, program_options=test_program_options)
     clustered_data.add_cluster(main_cluster)
     unclassified_node = Cluster(cluster_centre=[2, 1], nodes=np.asarray([[2, 1]]),
                                 cluster_type=ClusterType.UNCLASSIFIED_NODE_CLUSTER,
                                 program_options=test_program_options)
     clustered_data.add_unclassified_node(unclassified_node)
     unclassified_node = Cluster(cluster_centre=[2, 2], nodes=np.asarray([[2, 2]]),
                                 cluster_type=ClusterType.UNCLASSIFIED_NODE_CLUSTER,
                                 program_options=test_program_options)
     clustered_data.add_unclassified_node(unclassified_node)
     clustered_data.aco_cluster_tour = (0, 1, 2)
     return clustered_data, test_nodes
Exemple #8
0
def run_algorithm_with_options(program_options: Options, problem_data_array,
                               problem: tsplib95.Problem):
    program_start_time = timeit.default_timer()

    # key is the node location and the value is the node id
    node_location_to_id_dict = dict()

    # Key is the node id and the value is the node location
    node_id_to_location_dict = dict()
    counter = 0

    for node in problem_data_array:
        node_location_to_id_dict[repr(node)] = counter
        node_id_to_location_dict[counter] = node
        counter += 1

    colors = cycle('bgrcmybgrcmybgrcmybgrcmy')

    clustered_data = None
    if program_options.SHOULD_CLUSTER:
        if program_options.CLUSTER_TYPE is ClusterAlgorithmType.K_MEANS:
            clustered_data = perform_k_means_clustering(
                problem_data_array, program_options)
        if program_options.CLUSTER_TYPE is ClusterAlgorithmType.AFFINITY_PROPAGATION:
            clustered_data = perform_affinity_propagation(
                problem_data_array, program_options)
        if program_options.CLUSTER_TYPE is ClusterAlgorithmType.BIRCH:
            clustered_data = perform_birch_clustering(problem_data_array,
                                                      program_options)
        if program_options.CLUSTER_TYPE is ClusterAlgorithmType.DBSCAN:
            clustered_data = perform_dbscan_clustering(problem_data_array,
                                                       program_options)
        if program_options.CLUSTER_TYPE is ClusterAlgorithmType.OPTICS:
            clustered_data = perform_optics_clustering(problem_data_array,
                                                       program_options)
    else:
        clustered_data = ClusteredData(nodes=problem_data_array,
                                       clusters=list(),
                                       program_options=program_options)

        for node in problem_data_array:
            cluster = Cluster(
                cluster_centre=node,
                nodes=[node],
                cluster_type=ClusterType.UNCLASSIFIED_NODE_CLUSTER,
                program_options=program_options)
            clustered_data.add_unclassified_node(cluster)

    # Set the overall node dicts onto the clustering object
    clustered_data.node_location_to_id_dict = node_location_to_id_dict
    clustered_data.node_id_to_location_dict = node_id_to_location_dict
    cluster_nodes_dict = clustered_data.get_dict_node_id_location_mapping_aco()

    logging.debug("%s nodes after clustering", len(cluster_nodes_dict))

    # Raise an error if only 1 cluster has come out of this because ACO needs more than 1 cluster to run over
    if len(cluster_nodes_dict) <= 1:
        raise ValueError(
            "Need more than one cluster from the clustering algorithm")

    aco_tour_improvement_plotter: TourImprovementAnimator = TourImprovementAnimator(
        cluster_nodes_dict,
        problem_type="aco",
        program_options=program_options)
    before = timeit.default_timer()
    if program_options.ACO_TYPE is ACOType.ACO_MULTITHREADED:
        colony = AntColony(
            nodes=cluster_nodes_dict,
            distance_callback=aco_distance_callback,
            alpha=program_options.ACO_ALPHA_VALUE,
            beta=program_options.ACO_BETA_VALUE,
            pheromone_evaporation_coefficient=program_options.ACO_RHO_VALUE,
            pheromone_constant=program_options.ACO_Q_VALUE,
            ant_count=program_options.ACO_ANT_COUNT,
            tour_improvement_animator=aco_tour_improvement_plotter,
            iterations=program_options.ACO_ITERATIONS)
        answer = colony.mainloop()

    elif program_options.ACO_TYPE is ACOType.ACO_PY:
        solver = acopy.Solver(rho=program_options.ACO_RHO_VALUE,
                              q=program_options.ACO_Q_VALUE)
        colony = acopy.Colony(alpha=program_options.ACO_ALPHA_VALUE,
                              beta=program_options.ACO_BETA_VALUE)

        logger_plugin = LoggerPlugin()
        iteration_plotter_plugin = IterationPlotterPlugin(
            tour_improvement_animator=aco_tour_improvement_plotter)

        solver.add_plugin(logger_plugin)
        solver.add_plugin(iteration_plotter_plugin)

        graph = clustered_data.turn_clusters_into_nx_graph(problem)
        solution = solver.solve(graph,
                                colony,
                                limit=program_options.ACO_ITERATIONS,
                                gen_size=program_options.ACO_ANT_COUNT)
        answer = solution.nodes

    else:
        raise NotImplementedError()

    after = timeit.default_timer()
    dif = after - before

    logging.debug("Time taken for initial global %s aco %s",
                  program_options.ACO_TYPE, dif)

    clustered_data.aco_cluster_tour = answer
    clustered_data.find_nodes_to_move_between_clusters()

    if program_options.CLUSTER_TOUR_TYPE is InternalClusterPathFinderType.ACO:
        if program_options.ACO_TYPE is ACOType.ACO_MULTITHREADED:
            clustered_data.find_tours_within_clusters_using_multithreaded_aco()
        elif program_options.ACO_TYPE is ACOType.ACO_PY:
            clustered_data.find_tours_within_clusters_using_acopy()
        else:
            raise NotImplementedError()

    elif program_options.CLUSTER_TOUR_TYPE is InternalClusterPathFinderType.GREEDY_NEAREST_NODE:
        clustered_data.find_tours_within_clusters_using_greedy_closest_nodes()
    else:
        raise NotImplementedError()
    tour_node_coordinates = clustered_data.get_ordered_nodes_for_all_clusters()

    # Tour as node ids instead of node locations
    tour_node_id = []

    for node in tour_node_coordinates:
        tour_node_id.append(node_location_to_id_dict[repr(node)])

    clustered_data.node_level_tour = tour_node_id
    tour_node_id_set = set(tour_node_id)
    valid = len(tour_node_id) == len(tour_node_id_set) == len(
        problem_data_array)

    logging.debug("Tour is valid %s", valid)

    length_before = calculate_distance_for_tour(tour_node_id,
                                                node_id_to_location_dict)
    logging.debug("Length before 2-opt is %s", length_before)

    # If the option to run 2opt is set then process 2-opt
    if program_options.SHOULD_RUN_2_OPT:
        tsp_2_opt_graph_animator = TourImprovementAnimator(
            node_id_to_location_dict,
            problem_type="2-opt",
            program_options=program_options)

        before = timeit.default_timer()
        final_route = run_2_opt(
            existing_route=tour_node_id,
            node_id_to_location_dict=node_id_to_location_dict,
            distance_calculator_callback=calculate_distance_for_tour,
            tsp_2_opt_animator=tsp_2_opt_graph_animator)
        after = timeit.default_timer()

        dif = after - before
        logging.debug("Time taken for 2-opt %s", dif)

        length_after = calculate_distance_for_tour(final_route,
                                                   node_id_to_location_dict)
        logging.debug("Length after 2-opt is %s", length_after)

        logging.debug("Final route after 2-opt is %s", final_route)

    program_end_time = timeit.default_timer()
    dif = program_end_time - program_start_time
    logging.debug("Time taken for entire program %s", dif)

    # These are the tour plotters so should be ignored for time calculations
    logging.debug("Starting tour plotters")

    # plot the tours for each cluster
    clustered_data.plot_all_cluster_tours()

    # This is the graph that shows all the clusters
    plot_clustered_graph(colors,
                         cluster_data=clustered_data,
                         program_options=program_options)

    # Plot all the nodes in the problem, no tour
    plot_nodes(problem_data_array, program_options=program_options)

    # Plot the ACO tour of the clusters
    plot_aco_clustered_tour(answer,
                            clustered_data,
                            program_options=program_options)

    # Plot the tour pre 2-opt
    plot_complete_tsp_tour(tour_node_id,
                           node_id_to_location_dict,
                           title="TSP Tour Before 2-opt. Length: " +
                           str(length_before),
                           program_options=program_options)

    # If 2opt was ran then you can safely print out all the 2-opt related graphs
    if program_options.SHOULD_RUN_2_OPT:
        # Plot the tour post 2-opt
        plot_complete_tsp_tour(final_route,
                               node_id_to_location_dict,
                               title="TSP Tour After 2-opt. Length: " +
                               str(length_after),
                               program_options=program_options)

        # Plot the tour post 2-opt with node ids printed
        plot_complete_tsp_tour(final_route,
                               node_id_to_location_dict,
                               title="Final TSP Tour With Node ID",
                               node_id_shown=True,
                               program_options=program_options)

        if program_options.ANIMATE_IMPROVEMENTS:
            # Create an animation of the 2-opt incremental improvement
            tsp_2_opt_graph_animator.animate(
                output_directory_animation_graphs=program_options.
                OUTPUT_DIRECTORY_2_OPT_ANIMATION)

    if program_options.ANIMATE_IMPROVEMENTS:
        # Create an animation of the aco incremental improvement
        aco_tour_improvement_plotter.animate(
            output_directory_animation_graphs=program_options.
            OUTPUT_DIRECTORY_ACO_ANIMATION)

    logging.debug("Finished tour plotters")