Example #1
0
def test_get_degree_nodes(GRAPH_1):
    # check that even + odd == total
    assert len(get_odd_nodes(GRAPH_1)) + len(get_even_nodes(GRAPH_1)) == len(
        GRAPH_1.nodes())
    # check that there is no overlap between odd and even
    assert set(get_odd_nodes(GRAPH_1)).intersection(
        get_even_nodes(GRAPH_1)) == set()
Example #2
0
def rpp(edgelist_filename,
        start_node=None,
        edge_weight='distance',
        verbose=False):
    """
    Solving the RPP from beginning (load network data) to end (finding optimal route).  This optimization makes a
     relatively strong assumption: the starting graph must stay a connected graph when optional edges are removed.
    If this is not so, an assertion is raised.  This class of RPP generalizes to the CPP strategy.

    Args:
        edgelist_filename (str): filename of edgelist.  See cpp.py for more details
        start_node (str): name of starting node.  See cpp.py for more details
        edge_weight (str): name edge attribute that indicates distance to minimize in CPP
        verbose (boolean): log info messages?

    Returns:
        tuple(list[tuple(str, str, dict)], networkx.MultiGraph]:
        Each tuple is a direction (from one node to another) from the CPP solution route.
          The first element is the starting ("from") node.
          The second element is the end ("to") node.
          The third element is the dict of edge attributes for that edge.
        The original graph is returned as well.  This is needed for visualization
    """

    logger_rpp.disabled = not verbose

    logger_rpp.info('read edgelist')
    el = read_edgelist(edgelist_filename, keep_optional=True)

    logger_rpp.info('create full and required graph')
    g_full = create_networkx_graph_from_edgelist(el)
    g_req = create_required_graph(g_full)
    assert_graph_is_connected(g_req)

    logger_rpp.info('getting odd node pairs')
    odd_nodes = get_odd_nodes(g_req)
    odd_node_pairs = list(itertools.combinations(odd_nodes, 2))

    logger_rpp.info('get shortest paths between odd nodes')
    odd_node_pairs_shortest_paths = get_shortest_paths_distances(
        g_full, odd_node_pairs, edge_weight)

    logger_rpp.info('Find min weight matching using blossom algorithm')
    g_odd_complete = create_complete_graph(odd_node_pairs_shortest_paths,
                                           flip_weights=True)
    odd_matching = dedupe_matching(
        nx.algorithms.max_weight_matching(g_odd_complete, True))

    logger_rpp.info('add the min weight matching edges to g')
    g_aug = add_augmenting_path_to_graph(g_req, odd_matching)

    logger_rpp.info('get eulerian circuit route')
    circuit = list(create_eulerian_circuit(g_aug, g_full, start_node))

    return circuit, g_full
Example #3
0
def test_get_shortest_paths_distances(GRAPH_1):
    odd_nodes = get_odd_nodes(GRAPH_1)
    odd_node_pairs = list(itertools.combinations(odd_nodes, 2))

    # coarsely checking structure of `get_shortest_paths_distances` return value
    odd_node_pairs_shortest_paths = get_shortest_paths_distances(
        GRAPH_1, odd_node_pairs, 'distance')
    assert len(odd_node_pairs_shortest_paths) == 1
    assert type(odd_node_pairs_shortest_paths) == dict
    bc_key = ('b', 'c') if ('b', 'c') in odd_node_pairs_shortest_paths else (
        'c', 'b')  # tuple keys are unordered
    assert odd_node_pairs_shortest_paths[bc_key] == 5
Example #4
0
def cpp(edgelist_filename,
        start_node=None,
        edge_weight='distance',
        verbose=False):
    """
    Solving the CPP from beginning (load network data) to end (finding optimal route).
    Can be run from command line with arguments from cpp.py, or from an interactive Python session (ex jupyter notebook)

    Args:
        edgelist_filename (str): filename of edgelist.  See cpp.py for more details
        start_node (str): name of starting node.  See cpp.py for more details
        edge_weight (str): name edge attribute that indicates distance to minimize in CPP
        verbose (boolean): log info messages?

    Returns:
        tuple(list[tuple(str, str, dict)], networkx.MultiGraph]:
        Each tuple is a direction (from one node to another) from the CPP solution route.
          The first element is the starting ("from") node.
          The second element is the end ("to") node.
          The third element is the dict of edge attributes for that edge.
        The original graph is returned as well.  This is needed for visualization
    """
    logger_cpp.disabled = not verbose

    logger_cpp.info('read edgelist and create base graph')
    el = read_edgelist(edgelist_filename, keep_optional=False)
    g = create_networkx_graph_from_edgelist(el)

    logger_cpp.info('get augmenting path for odd nodes')
    odd_nodes = get_odd_nodes(g)
    odd_node_pairs = list(itertools.combinations(odd_nodes, 2))
    odd_node_pairs_shortest_paths = get_shortest_paths_distances(
        g, odd_node_pairs, edge_weight)
    g_odd_complete = create_complete_graph(odd_node_pairs_shortest_paths,
                                           flip_weights=True)

    logger_cpp.info('Find min weight matching using blossom algorithm')
    odd_matching = dedupe_matching(
        nx.algorithms.max_weight_matching(g_odd_complete, True))

    logger_cpp.info('add the min weight matching edges to g')
    g_aug = add_augmenting_path_to_graph(g, odd_matching)

    logger_cpp.info('get eulerian circuit route')
    circuit = list(create_eulerian_circuit(g_aug, g, start_node))

    return circuit, g
Example #5
0
def test_get_shortest_paths_distances():
    df = read_edgelist(EDGELIST)
    graph = create_networkx_graph_from_edgelist(df, edge_id='id')

    odd_nodes = get_odd_nodes(graph)
    odd_node_pairs = list(itertools.combinations(odd_nodes, 2))

    # coarsely checking structure of `get_shortest_paths_distances` return value
    odd_node_pairs_shortest_paths = get_shortest_paths_distances(
        graph, odd_node_pairs, 'distance')
    assert len(odd_node_pairs_shortest_paths) == 630
    assert type(odd_node_pairs_shortest_paths) == dict

    # check that each node name appears the same number of times in `get_shortest_paths_distances` return value
    node_names = list(
        itertools.chain(*[i[0]
                          for i in odd_node_pairs_shortest_paths.items()]))
    assert set(pd.value_counts(node_names)) == set([35])
def main():
    # Connect to Sqlite3 & create table
    sqlite3_conn = dbfun.create_subway_sqlite3(clear_db=True)
    dbfun.add_stations_table_sqlite3(sqlite3_conn)
    dbfun.add_edges_table_sqlite3(sqlite3_conn)

    edgelist = './Data/Paths-Decision-Points.csv'

    el = ppg.read_edgelist(edgelist, keep_optional=False)
    g = ppg.create_networkx_graph_from_edgelist(el)

    odd_nodes = ppg.get_odd_nodes(g)

    # This for loop gets all the euler paths for every combination of start and end nodes,
    # saves the routes/statistics as a dictionary, and inserts it into the database
    for odd_node_pair in itertools.combinations(odd_nodes, 2):
        circuit_name = odd_node_pair[0] + ' - ' + odd_node_pair[1]

        path_stats = {'path': circuit_name}

        logging.basicConfig(level=logging.INFO)
        logger = logging.getLogger(__name__)
        logger.info(f'Solved CPP for {circuit_name}')

        # For some reason, the no_return_cpp is returning the path backwards so the end_node is passed as the start
        circuit, graph = no_return_cpp(edgelist, odd_node_pair[1], odd_node_pair[0])

        # Formats the route and adds it to the dictionary along with the other stats
        route = '-'.join([edge[0] for edge in circuit])
        route = route + '-' + odd_node_pair[1]
        path_stats.update(calculate_postman_solution_stats(circuit))
        path_stats['route'] = route

        # Inserts into Sqlite3
        dbfun.insert_into_sqlite3(sqlite3_conn, path_stats)

    # Add rankings
    dbfun.add_route_ranks(sqlite3_conn)
Example #7
0
    def solve_brooks(self, required):
        """A reimplementation of andrew brooks' rpp solver

           using a different input format.
           This only works if the required edges are connected."""

        g_full = self.create_graph(required)
        g_req = graph.create_required_graph(g_full)
        graph.assert_graph_is_connected(g_req)
        odd_nodes = graph.get_odd_nodes(g_req)
        odd_node_pairs = list(itertools.combinations(odd_nodes, 2))
        odd_node_pairs_shortest_paths = self.get_shortest_paths_distances(
            g_full, odd_node_pairs, 'distance')
        g_odd_complete = graph.create_complete_graph(
            odd_node_pairs_shortest_paths, flip_weights=True)
        odd_matching = graph.dedupe_matching(
            nx.algorithms.max_weight_matching(g_odd_complete, True))
        g_aug = graph.add_augmenting_path_to_graph(g_req, odd_matching)

        start_node = next(iter(required))[0]
        circuit = list(graph.create_eulerian_circuit(g_aug, g_full,
                                                     start_node))

        return circuit
Example #8
0
    def solve_fredrickson(self, required):
        """Solve using Fredrickson's heuristic"""
        g_full = self.create_graph(required)
        self.show(g_full)
        g_req = graph.create_required_graph(g_full)
        node_pairs = list(itertools.combinations(g_req, 2))
        # g_aug is the G' from Frederickson's heuristic
        # Do we use q_req or g_full here?
        g_aug = g_req.copy()
        # Add edges
        for i, pair in enumerate(node_pairs):
            try:
                d = nx.dijkstra_path_length(g_full,
                                            pair[0],
                                            pair[1],
                                            weight='distance')
                g_aug.add_edge(pair[0],
                               pair[1],
                               distance=d,
                               id=-i,
                               required=False)
            except:
                logger.exception("Dijkstra failed")

        for edge in g_aug.edges():
            # remove duplicate edges
            # actually, I think I can remove any longer edge from the parallel edges
            # (As long as they are not required).
            data = g_aug[edge[0]][edge[1]]
            if len(data) > 1:
                deletions = set()
                for pair in itertools.combinations(data.keys(), 2):
                    key1, key2 = pair
                    # FIXME: what if edge2 is required and edge1 is not???
                    if data[key1]['distance'] - data[key2]['distance'] < 1e-09 \
                            and not data[key2]['required']:
                        #g_aug.remove_edge(edge[0], edge[1], key2)
                        deletions.add(key2)
                for key in deletions:
                    g_aug.remove_edge(edge[0], edge[1], key)

            # loop through the rest and remove edges that have a duplicate length in combination
            deletions = set()
            for node in g_aug[edge[0]]:
                if edge[1] in g_aug[node]:
                    # Remove duplicates
                    # FIXME: need to check all parallel edges
                    if g_aug[edge[0]][node][0]['distance'] - g_aug[node][edge[1]][0]['distance'] < 1e-09 \
                            and not g_aug[edge[0]][edge[1]][0]['required']:
                        #g_aug.remove_edge(edge[0], edge[1], 0)
                        deletions.add(edge)
                        break
            for edge in deletions:
                g_aug.remove_edge(edge[0], edge[1])

        T = nx.minimum_spanning_tree(g_aug, 'distance')
        if os.getenv('C42_LOGLEVEL') == 'DEBUG':
            self.show(T)
        # perhaps we need to add the required edges (again) to T, so far they were all included anyway
        # Let's test it first and then see further

        odd_nodes = graph.get_odd_nodes(T)
        odd_node_pairs = list(itertools.combinations(odd_nodes, 2))
        odd_node_pairs_shortest_paths = graph.get_shortest_paths_distances(
            g_full, odd_node_pairs, 'distance')
        g_complete = graph.create_complete_graph(odd_node_pairs_shortest_paths,
                                                 flip_weights=True)
        if os.getenv('C42_LOGLEVEL') == 'DEBUG':
            self.show(g_complete)
        M = graph.dedupe_matching(
            nx.algorithms.max_weight_matching(g_complete, True))
        g_aug = graph.add_augmenting_path_to_graph(T, M)

        start_node = next(iter(required))[0]
        circuit = list(graph.create_eulerian_circuit(g_aug, g_full,
                                                     start_node))
        return circuit
Example #9
0
def cpp(edgelist_filename,
        start_node=None,
        edge_weight='distance',
        verbose=False,
        graphml=False,
        max_distance=None,
        max_degree_connect=0,
        g=None):
    """
    Solving the CPP from beginning (load network data) to end (finding optimal route).
    Can be run from command line with arguments from cpp.py, or from an interactive Python session (ex jupyter notebook)

    Args:
        edgelist_filename (str): filename of edgelist.  See cpp.py for more details
        start_node (str): name of starting node.  See cpp.py for more details
        edge_weight (str): name edge attribute that indicates distance to minimize in CPP
        verbose (boolean): log info messages?
        graphml (boolean): is edgelist filename a in graphml format?
        max_distance (double): NOT IMPLEMENTED
        max_degree_connect (int): NOT IMPLEMENTED
        g (networkx multigraph): pre-loaded networkx MultiGraph. Either g or edgelist_filename must be specified. If both are given, filename will be used.

    Returns:
        tuple(list[tuple(str, str, dict)], networkx.MultiGraph]:
        Each tuple is a direction (from one node to another) from the CPP solution route.
          The first element is the starting ("from") node.
          The second element is the end ("to") node.
          The third element is the dict of edge attributes for that edge.
        The original graph is returned as well.  This is needed for visualization
    """
    logger_cpp.disabled = not verbose

    reset_ids = False

    logger_cpp.info('initialize graph')
    if edgelist_filename is not None:
        # edgelist filename is given - load graph from file
        if graphml:
            g = read_graphml(edgelist_filename,
                             edge_weight=edge_weight,
                             max_degree_connect=max_degree_connect)

            # make sure edge id exists and is unique
            shared_keys = set.intersection(
                *[set(z.keys()) for x, y, z in list(g.edges(data=True))])
            if 'id' not in shared_keys:
                reset_ids = True
            else:
                # id is already specified - ensure that it is unique
                if len({edg[3]['id']
                        for edg in g.edges(keys=True, data=True)
                        }) != g.number_of_edges():
                    warnings.warn(
                        "Edgelist contains field named 'id' but the values provided are not unique."
                        "Replacing id field with uniquely defined values.")
                    #raise ValueError("If id is specified on edges of g_full it must be unique!")
                    reset_ids = True

        else:
            el = read_edgelist(edgelist_filename, keep_optional=False)
            g = create_networkx_graph_from_edgelist(el)
    elif g is None:
        # none of edgelist filename or g is given - no graph specified
        raise TypeError("One of edgelist_filename or g must be given!")
    else:
        # use g - must ensure that format matches the expected format
        g = nx.MultiGraph(g)
        # check for all needed fields - if id is not set it will be set manually
        shared_keys = set.intersection(
            *[set(z.keys()) for x, y, z in list(g.edges(data=True))])
        if edge_weight not in shared_keys:
            raise ValueError(
                "g must include value for '{}' for every edge".format(
                    edge_weight))
        if 'id' not in shared_keys:
            # create new id
            reset_ids = True
        else:
            # id is already specified - ensure that it is unique
            if len({edg[3]['id']
                    for edg in g.edges(keys=True, data=True)
                    }) != g.number_of_edges():
                warnings.warn(
                    "Edgelist contains field named 'id' but the values provided are not unique."
                    "Replacing id field with uniquely defined values.")
                reset_ids = True

    # if needed, create new id
    if reset_ids:
        for ii, edg in enumerate(g.edges(keys=True)):
            g.edges[edg]['id'] = str(ii)

    # if start node is given, make sure it's a string!
    if start_node is not None:
        start_node = str(start_node)

    logger_cpp.info('get augmenting path for odd nodes')
    odd_nodes = get_odd_nodes(g)
    odd_node_pairs = list(itertools.combinations(odd_nodes, 2))

    # 'x' and 'y' is not in the generated graphml file, so this filtering is not supported until x and y is added
    # odd_node_pairs = filter_by_haversine_distance(g, odd_node_pairs, max_distance=max_distance)

    start = time.time()
    odd_node_pairs_shortest_paths = get_shortest_paths_distances(
        g, odd_node_pairs, edge_weight)
    g_odd_complete = create_complete_graph(odd_node_pairs_shortest_paths,
                                           flip_weights=True)

    logger_cpp.info('Find min weight matching using blossom algorithm')
    odd_matching = dedupe_matching(
        nx.algorithms.max_weight_matching(g_odd_complete, True))

    logger_cpp.info('add the min weight matching edges to g')
    g_aug = add_augmenting_path_to_graph(g, odd_matching)

    print(len(get_odd_nodes(g)), ' odd nodes, now', len(get_odd_nodes(g_aug)),
          nx.is_connected(g_aug))
    logger_cpp.info('get eulerian circuit route')

    #pdb.set_trace();

    circuit = list(create_eulerian_circuit(g_aug, g, start_node))
    end = time.time()
    print('matching and augment time:', end - start)

    # Remove already visited nodes starting from the back (since we dont care about the "full circuit")
    new_ending_idx = len(circuit) - 1
    for idx in range(0, len(circuit), 1):
        end_offset_idx = len(circuit) - 1 - idx
        if circuit[idx][0] == circuit[end_offset_idx][0] or circuit[idx][
                0] == circuit[end_offset_idx][1] or circuit[idx][1] == circuit[
                    end_offset_idx][0] or circuit[idx][1] == circuit[
                        end_offset_idx][1]:
            new_ending_idx = end_offset_idx
        else:
            break

    circuit = circuit[idx + 1:]
    print('Removed', idx, 'edges from the circuit start')

    return circuit, g
Example #10
0
def rpp(edgelist_filename=None,
        start_node=None,
        edge_weight='distance',
        verbose=False,
        graphml=False,
        max_distance=None,
        max_degree_connect=None,
        g_full=None):
    """
    Solving the RPP from beginning (load network data) to end (finding optimal route).  This optimization makes a
     relatively strong assumption: the starting graph must stay a connected graph when optional edges are removed.
    If this is not so, an assertion is raised.  This class of RPP generalizes to the CPP strategy.

    Args:
        edgelist_filename (str): filename of edgelist.  See cpp.py for more details
        start_node (str or can be cast to str): name of starting node.  See cpp.py for more details
        edge_weight (str): name edge attribute that indicates distance to minimize in CPP
        verbose (boolean): log info messages?
        graphml (boolean): is edgelist filename a in graphml format?
        max_distance (double): NOT IMPLEMENTED
        max_degree_connect (int): min degree of a node in the full graph -- nodes with smaller degree are connected with all-to-all optional edges. Use -1 for all-to-all graph.
        g_full (networkx multigraph): pre-loaded networkx MultiGraph. Either g_full or edgelist_filename must be specified. If both are given, filename will be used.

    Returns:
        tuple(list[tuple(str, str, dict)], networkx.MultiGraph]:
        Each tuple is a direction (from one node to another) from the CPP solution route.
          The first element is the starting ("from") node.
          The second element is the end ("to") node.
          The third element is the dict of edge attributes for that edge.
        The original graph is returned as well.  This is needed for visualization
    """

    print("Running RPP solver!")

    #pdb.set_trace()

    logger_rpp.disabled = not verbose
    logger_rpp.info('initialize full graph')

    reset_ids = False

    if edgelist_filename is not None:
        # edgelist filename is given - load graph from file

        if graphml:
            # read in the graph
            g_full = read_graphml(edgelist_filename, edge_weight,
                                  max_degree_connect)

            # make sure edge id exists and is unique
            shared_keys = set.intersection(
                *[set(z.keys()) for x, y, z in list(g_full.edges(data=True))])
            if 'id' not in shared_keys:
                reset_ids = True
            else:
                # id is already specified - ensure that it is unique
                if len({
                        edg[3]['id']
                        for edg in g_full.edges(keys=True, data=True)
                }) != g_full.number_of_edges():
                    warnings.warn(
                        "Edgelist contains field named 'id' but the values provided are not unique."
                        "Replacing id field with uniquely defined values.")
                    #raise ValueError("If id is specified on edges of g_full it must be unique!")
                    reset_ids = True

        else:
            # regular csv file format...
            el = read_edgelist(edgelist_filename, keep_optional=True)
            g_full = create_networkx_graph_from_edgelist(el)
    elif g_full is None:
        # none of edgelist filename or g_full is given - no graph specified
        raise TypeError("One of edgelist_filename or g_full must be given!")
    else:
        # use g_full - must ensure that format matches the expected format
        g_full = nx.MultiGraph(g_full)
        # check for all needed fields - if id is not set it will be set manually
        shared_keys = set.intersection(
            *[set(z.keys()) for x, y, z in list(g_full.edges(data=True))])
        if not all([x in shared_keys for x in {'required', edge_weight}]):
            raise ValueError(
                "g_full must include values for 'required' and '{}' for every edge"
                .format(edge_weight))
        if 'id' not in shared_keys:
            # not every edge has a defined edge id - create a new one.
            reset_ids = True
        else:
            # id is already specified - ensure that it is unique
            if len({
                    edg[3]['id']
                    for edg in g_full.edges(keys=True, data=True)
            }) != g_full.number_of_edges():
                warnings.warn(
                    "Edgelist contains field named 'id' but the values provided are not unique."
                    "Replacing id field with uniquely defined values.")
                reset_ids = True

    # if needed, create new id
    if reset_ids:
        for ii, edg in enumerate(g_full.edges(keys=True)):
            g_full.edges[edg]['id'] = str(ii)

    # if start node is given, make sure it's a string!
    if start_node is not None:
        start_node = str(start_node)

    # if required graph is not connected, use additional edges from g_full to make it connected
    logger_rpp.info('create required graph')
    g_req = create_required_graph(g_full)
    if not is_connected(g_req):
        make_connected(g_req, g_full, edge_weight)  # THIS STEP COULD BE SLOW

    logger_rpp.info('getting odd node pairs')
    odd_nodes = get_odd_nodes(g_req)
    odd_node_pairs = list(itertools.combinations(odd_nodes, 2))

    start = time.time()
    logger_rpp.info('get shortest paths between odd nodes')
    odd_node_pairs_shortest_paths = get_shortest_paths_distances(
        g_full, odd_node_pairs, edge_weight)

    logger_rpp.info('Find min weight matching using blossom algorithm')
    g_odd_complete = create_complete_graph(odd_node_pairs_shortest_paths,
                                           flip_weights=True)
    odd_matching = dedupe_matching(
        nx.algorithms.max_weight_matching(g_odd_complete, True))

    logger_rpp.info('add the min weight matching edges to g')
    g_aug = add_augmenting_path_to_graph(g_req, odd_matching)

    logger_rpp.info('get eulerian circuit route')

    #pdb.set_trace();

    circuit = list(
        create_eulerian_circuit(g_aug,
                                g_full,
                                start_node,
                                edge_weight=edge_weight))
    end = time.time()
    print('matching and augment time:', end - start)

    # Remove already visited nodes starting from the back (since we dont care about the "full circuit")
    new_ending_idx = len(circuit) - 1
    for idx in range(0, len(circuit), 1):
        end_offset_idx = len(circuit) - 1 - idx
        if circuit[idx][0] == circuit[end_offset_idx][0] or circuit[idx][
                0] == circuit[end_offset_idx][1] or circuit[idx][1] == circuit[
                    end_offset_idx][0] or circuit[idx][1] == circuit[
                        end_offset_idx][1]:
            new_ending_idx = end_offset_idx
        else:
            break

    circuit = circuit[idx + 1:]
    print('Removed', idx, 'edges from the circuit start')

    return circuit, g_full