def test_get_degree_nodes(GRAPH_1): # check that even + odd == total assert len(get_odd_nodes(GRAPH_1)) + len(get_even_nodes(GRAPH_1)) == len( GRAPH_1.nodes()) # check that there is no overlap between odd and even assert set(get_odd_nodes(GRAPH_1)).intersection( get_even_nodes(GRAPH_1)) == set()
def rpp(edgelist_filename, start_node=None, edge_weight='distance', verbose=False): """ Solving the RPP from beginning (load network data) to end (finding optimal route). This optimization makes a relatively strong assumption: the starting graph must stay a connected graph when optional edges are removed. If this is not so, an assertion is raised. This class of RPP generalizes to the CPP strategy. Args: edgelist_filename (str): filename of edgelist. See cpp.py for more details start_node (str): name of starting node. See cpp.py for more details edge_weight (str): name edge attribute that indicates distance to minimize in CPP verbose (boolean): log info messages? Returns: tuple(list[tuple(str, str, dict)], networkx.MultiGraph]: Each tuple is a direction (from one node to another) from the CPP solution route. The first element is the starting ("from") node. The second element is the end ("to") node. The third element is the dict of edge attributes for that edge. The original graph is returned as well. This is needed for visualization """ logger_rpp.disabled = not verbose logger_rpp.info('read edgelist') el = read_edgelist(edgelist_filename, keep_optional=True) logger_rpp.info('create full and required graph') g_full = create_networkx_graph_from_edgelist(el) g_req = create_required_graph(g_full) assert_graph_is_connected(g_req) logger_rpp.info('getting odd node pairs') odd_nodes = get_odd_nodes(g_req) odd_node_pairs = list(itertools.combinations(odd_nodes, 2)) logger_rpp.info('get shortest paths between odd nodes') odd_node_pairs_shortest_paths = get_shortest_paths_distances( g_full, odd_node_pairs, edge_weight) logger_rpp.info('Find min weight matching using blossom algorithm') g_odd_complete = create_complete_graph(odd_node_pairs_shortest_paths, flip_weights=True) odd_matching = dedupe_matching( nx.algorithms.max_weight_matching(g_odd_complete, True)) logger_rpp.info('add the min weight matching edges to g') g_aug = add_augmenting_path_to_graph(g_req, odd_matching) logger_rpp.info('get eulerian circuit route') circuit = list(create_eulerian_circuit(g_aug, g_full, start_node)) return circuit, g_full
def test_get_shortest_paths_distances(GRAPH_1): odd_nodes = get_odd_nodes(GRAPH_1) odd_node_pairs = list(itertools.combinations(odd_nodes, 2)) # coarsely checking structure of `get_shortest_paths_distances` return value odd_node_pairs_shortest_paths = get_shortest_paths_distances( GRAPH_1, odd_node_pairs, 'distance') assert len(odd_node_pairs_shortest_paths) == 1 assert type(odd_node_pairs_shortest_paths) == dict bc_key = ('b', 'c') if ('b', 'c') in odd_node_pairs_shortest_paths else ( 'c', 'b') # tuple keys are unordered assert odd_node_pairs_shortest_paths[bc_key] == 5
def cpp(edgelist_filename, start_node=None, edge_weight='distance', verbose=False): """ Solving the CPP from beginning (load network data) to end (finding optimal route). Can be run from command line with arguments from cpp.py, or from an interactive Python session (ex jupyter notebook) Args: edgelist_filename (str): filename of edgelist. See cpp.py for more details start_node (str): name of starting node. See cpp.py for more details edge_weight (str): name edge attribute that indicates distance to minimize in CPP verbose (boolean): log info messages? Returns: tuple(list[tuple(str, str, dict)], networkx.MultiGraph]: Each tuple is a direction (from one node to another) from the CPP solution route. The first element is the starting ("from") node. The second element is the end ("to") node. The third element is the dict of edge attributes for that edge. The original graph is returned as well. This is needed for visualization """ logger_cpp.disabled = not verbose logger_cpp.info('read edgelist and create base graph') el = read_edgelist(edgelist_filename, keep_optional=False) g = create_networkx_graph_from_edgelist(el) logger_cpp.info('get augmenting path for odd nodes') odd_nodes = get_odd_nodes(g) odd_node_pairs = list(itertools.combinations(odd_nodes, 2)) odd_node_pairs_shortest_paths = get_shortest_paths_distances( g, odd_node_pairs, edge_weight) g_odd_complete = create_complete_graph(odd_node_pairs_shortest_paths, flip_weights=True) logger_cpp.info('Find min weight matching using blossom algorithm') odd_matching = dedupe_matching( nx.algorithms.max_weight_matching(g_odd_complete, True)) logger_cpp.info('add the min weight matching edges to g') g_aug = add_augmenting_path_to_graph(g, odd_matching) logger_cpp.info('get eulerian circuit route') circuit = list(create_eulerian_circuit(g_aug, g, start_node)) return circuit, g
def test_get_shortest_paths_distances(): df = read_edgelist(EDGELIST) graph = create_networkx_graph_from_edgelist(df, edge_id='id') odd_nodes = get_odd_nodes(graph) odd_node_pairs = list(itertools.combinations(odd_nodes, 2)) # coarsely checking structure of `get_shortest_paths_distances` return value odd_node_pairs_shortest_paths = get_shortest_paths_distances( graph, odd_node_pairs, 'distance') assert len(odd_node_pairs_shortest_paths) == 630 assert type(odd_node_pairs_shortest_paths) == dict # check that each node name appears the same number of times in `get_shortest_paths_distances` return value node_names = list( itertools.chain(*[i[0] for i in odd_node_pairs_shortest_paths.items()])) assert set(pd.value_counts(node_names)) == set([35])
def main(): # Connect to Sqlite3 & create table sqlite3_conn = dbfun.create_subway_sqlite3(clear_db=True) dbfun.add_stations_table_sqlite3(sqlite3_conn) dbfun.add_edges_table_sqlite3(sqlite3_conn) edgelist = './Data/Paths-Decision-Points.csv' el = ppg.read_edgelist(edgelist, keep_optional=False) g = ppg.create_networkx_graph_from_edgelist(el) odd_nodes = ppg.get_odd_nodes(g) # This for loop gets all the euler paths for every combination of start and end nodes, # saves the routes/statistics as a dictionary, and inserts it into the database for odd_node_pair in itertools.combinations(odd_nodes, 2): circuit_name = odd_node_pair[0] + ' - ' + odd_node_pair[1] path_stats = {'path': circuit_name} logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) logger.info(f'Solved CPP for {circuit_name}') # For some reason, the no_return_cpp is returning the path backwards so the end_node is passed as the start circuit, graph = no_return_cpp(edgelist, odd_node_pair[1], odd_node_pair[0]) # Formats the route and adds it to the dictionary along with the other stats route = '-'.join([edge[0] for edge in circuit]) route = route + '-' + odd_node_pair[1] path_stats.update(calculate_postman_solution_stats(circuit)) path_stats['route'] = route # Inserts into Sqlite3 dbfun.insert_into_sqlite3(sqlite3_conn, path_stats) # Add rankings dbfun.add_route_ranks(sqlite3_conn)
def solve_brooks(self, required): """A reimplementation of andrew brooks' rpp solver using a different input format. This only works if the required edges are connected.""" g_full = self.create_graph(required) g_req = graph.create_required_graph(g_full) graph.assert_graph_is_connected(g_req) odd_nodes = graph.get_odd_nodes(g_req) odd_node_pairs = list(itertools.combinations(odd_nodes, 2)) odd_node_pairs_shortest_paths = self.get_shortest_paths_distances( g_full, odd_node_pairs, 'distance') g_odd_complete = graph.create_complete_graph( odd_node_pairs_shortest_paths, flip_weights=True) odd_matching = graph.dedupe_matching( nx.algorithms.max_weight_matching(g_odd_complete, True)) g_aug = graph.add_augmenting_path_to_graph(g_req, odd_matching) start_node = next(iter(required))[0] circuit = list(graph.create_eulerian_circuit(g_aug, g_full, start_node)) return circuit
def solve_fredrickson(self, required): """Solve using Fredrickson's heuristic""" g_full = self.create_graph(required) self.show(g_full) g_req = graph.create_required_graph(g_full) node_pairs = list(itertools.combinations(g_req, 2)) # g_aug is the G' from Frederickson's heuristic # Do we use q_req or g_full here? g_aug = g_req.copy() # Add edges for i, pair in enumerate(node_pairs): try: d = nx.dijkstra_path_length(g_full, pair[0], pair[1], weight='distance') g_aug.add_edge(pair[0], pair[1], distance=d, id=-i, required=False) except: logger.exception("Dijkstra failed") for edge in g_aug.edges(): # remove duplicate edges # actually, I think I can remove any longer edge from the parallel edges # (As long as they are not required). data = g_aug[edge[0]][edge[1]] if len(data) > 1: deletions = set() for pair in itertools.combinations(data.keys(), 2): key1, key2 = pair # FIXME: what if edge2 is required and edge1 is not??? if data[key1]['distance'] - data[key2]['distance'] < 1e-09 \ and not data[key2]['required']: #g_aug.remove_edge(edge[0], edge[1], key2) deletions.add(key2) for key in deletions: g_aug.remove_edge(edge[0], edge[1], key) # loop through the rest and remove edges that have a duplicate length in combination deletions = set() for node in g_aug[edge[0]]: if edge[1] in g_aug[node]: # Remove duplicates # FIXME: need to check all parallel edges if g_aug[edge[0]][node][0]['distance'] - g_aug[node][edge[1]][0]['distance'] < 1e-09 \ and not g_aug[edge[0]][edge[1]][0]['required']: #g_aug.remove_edge(edge[0], edge[1], 0) deletions.add(edge) break for edge in deletions: g_aug.remove_edge(edge[0], edge[1]) T = nx.minimum_spanning_tree(g_aug, 'distance') if os.getenv('C42_LOGLEVEL') == 'DEBUG': self.show(T) # perhaps we need to add the required edges (again) to T, so far they were all included anyway # Let's test it first and then see further odd_nodes = graph.get_odd_nodes(T) odd_node_pairs = list(itertools.combinations(odd_nodes, 2)) odd_node_pairs_shortest_paths = graph.get_shortest_paths_distances( g_full, odd_node_pairs, 'distance') g_complete = graph.create_complete_graph(odd_node_pairs_shortest_paths, flip_weights=True) if os.getenv('C42_LOGLEVEL') == 'DEBUG': self.show(g_complete) M = graph.dedupe_matching( nx.algorithms.max_weight_matching(g_complete, True)) g_aug = graph.add_augmenting_path_to_graph(T, M) start_node = next(iter(required))[0] circuit = list(graph.create_eulerian_circuit(g_aug, g_full, start_node)) return circuit
def cpp(edgelist_filename, start_node=None, edge_weight='distance', verbose=False, graphml=False, max_distance=None, max_degree_connect=0, g=None): """ Solving the CPP from beginning (load network data) to end (finding optimal route). Can be run from command line with arguments from cpp.py, or from an interactive Python session (ex jupyter notebook) Args: edgelist_filename (str): filename of edgelist. See cpp.py for more details start_node (str): name of starting node. See cpp.py for more details edge_weight (str): name edge attribute that indicates distance to minimize in CPP verbose (boolean): log info messages? graphml (boolean): is edgelist filename a in graphml format? max_distance (double): NOT IMPLEMENTED max_degree_connect (int): NOT IMPLEMENTED g (networkx multigraph): pre-loaded networkx MultiGraph. Either g or edgelist_filename must be specified. If both are given, filename will be used. Returns: tuple(list[tuple(str, str, dict)], networkx.MultiGraph]: Each tuple is a direction (from one node to another) from the CPP solution route. The first element is the starting ("from") node. The second element is the end ("to") node. The third element is the dict of edge attributes for that edge. The original graph is returned as well. This is needed for visualization """ logger_cpp.disabled = not verbose reset_ids = False logger_cpp.info('initialize graph') if edgelist_filename is not None: # edgelist filename is given - load graph from file if graphml: g = read_graphml(edgelist_filename, edge_weight=edge_weight, max_degree_connect=max_degree_connect) # make sure edge id exists and is unique shared_keys = set.intersection( *[set(z.keys()) for x, y, z in list(g.edges(data=True))]) if 'id' not in shared_keys: reset_ids = True else: # id is already specified - ensure that it is unique if len({edg[3]['id'] for edg in g.edges(keys=True, data=True) }) != g.number_of_edges(): warnings.warn( "Edgelist contains field named 'id' but the values provided are not unique." "Replacing id field with uniquely defined values.") #raise ValueError("If id is specified on edges of g_full it must be unique!") reset_ids = True else: el = read_edgelist(edgelist_filename, keep_optional=False) g = create_networkx_graph_from_edgelist(el) elif g is None: # none of edgelist filename or g is given - no graph specified raise TypeError("One of edgelist_filename or g must be given!") else: # use g - must ensure that format matches the expected format g = nx.MultiGraph(g) # check for all needed fields - if id is not set it will be set manually shared_keys = set.intersection( *[set(z.keys()) for x, y, z in list(g.edges(data=True))]) if edge_weight not in shared_keys: raise ValueError( "g must include value for '{}' for every edge".format( edge_weight)) if 'id' not in shared_keys: # create new id reset_ids = True else: # id is already specified - ensure that it is unique if len({edg[3]['id'] for edg in g.edges(keys=True, data=True) }) != g.number_of_edges(): warnings.warn( "Edgelist contains field named 'id' but the values provided are not unique." "Replacing id field with uniquely defined values.") reset_ids = True # if needed, create new id if reset_ids: for ii, edg in enumerate(g.edges(keys=True)): g.edges[edg]['id'] = str(ii) # if start node is given, make sure it's a string! if start_node is not None: start_node = str(start_node) logger_cpp.info('get augmenting path for odd nodes') odd_nodes = get_odd_nodes(g) odd_node_pairs = list(itertools.combinations(odd_nodes, 2)) # 'x' and 'y' is not in the generated graphml file, so this filtering is not supported until x and y is added # odd_node_pairs = filter_by_haversine_distance(g, odd_node_pairs, max_distance=max_distance) start = time.time() odd_node_pairs_shortest_paths = get_shortest_paths_distances( g, odd_node_pairs, edge_weight) g_odd_complete = create_complete_graph(odd_node_pairs_shortest_paths, flip_weights=True) logger_cpp.info('Find min weight matching using blossom algorithm') odd_matching = dedupe_matching( nx.algorithms.max_weight_matching(g_odd_complete, True)) logger_cpp.info('add the min weight matching edges to g') g_aug = add_augmenting_path_to_graph(g, odd_matching) print(len(get_odd_nodes(g)), ' odd nodes, now', len(get_odd_nodes(g_aug)), nx.is_connected(g_aug)) logger_cpp.info('get eulerian circuit route') #pdb.set_trace(); circuit = list(create_eulerian_circuit(g_aug, g, start_node)) end = time.time() print('matching and augment time:', end - start) # Remove already visited nodes starting from the back (since we dont care about the "full circuit") new_ending_idx = len(circuit) - 1 for idx in range(0, len(circuit), 1): end_offset_idx = len(circuit) - 1 - idx if circuit[idx][0] == circuit[end_offset_idx][0] or circuit[idx][ 0] == circuit[end_offset_idx][1] or circuit[idx][1] == circuit[ end_offset_idx][0] or circuit[idx][1] == circuit[ end_offset_idx][1]: new_ending_idx = end_offset_idx else: break circuit = circuit[idx + 1:] print('Removed', idx, 'edges from the circuit start') return circuit, g
def rpp(edgelist_filename=None, start_node=None, edge_weight='distance', verbose=False, graphml=False, max_distance=None, max_degree_connect=None, g_full=None): """ Solving the RPP from beginning (load network data) to end (finding optimal route). This optimization makes a relatively strong assumption: the starting graph must stay a connected graph when optional edges are removed. If this is not so, an assertion is raised. This class of RPP generalizes to the CPP strategy. Args: edgelist_filename (str): filename of edgelist. See cpp.py for more details start_node (str or can be cast to str): name of starting node. See cpp.py for more details edge_weight (str): name edge attribute that indicates distance to minimize in CPP verbose (boolean): log info messages? graphml (boolean): is edgelist filename a in graphml format? max_distance (double): NOT IMPLEMENTED max_degree_connect (int): min degree of a node in the full graph -- nodes with smaller degree are connected with all-to-all optional edges. Use -1 for all-to-all graph. g_full (networkx multigraph): pre-loaded networkx MultiGraph. Either g_full or edgelist_filename must be specified. If both are given, filename will be used. Returns: tuple(list[tuple(str, str, dict)], networkx.MultiGraph]: Each tuple is a direction (from one node to another) from the CPP solution route. The first element is the starting ("from") node. The second element is the end ("to") node. The third element is the dict of edge attributes for that edge. The original graph is returned as well. This is needed for visualization """ print("Running RPP solver!") #pdb.set_trace() logger_rpp.disabled = not verbose logger_rpp.info('initialize full graph') reset_ids = False if edgelist_filename is not None: # edgelist filename is given - load graph from file if graphml: # read in the graph g_full = read_graphml(edgelist_filename, edge_weight, max_degree_connect) # make sure edge id exists and is unique shared_keys = set.intersection( *[set(z.keys()) for x, y, z in list(g_full.edges(data=True))]) if 'id' not in shared_keys: reset_ids = True else: # id is already specified - ensure that it is unique if len({ edg[3]['id'] for edg in g_full.edges(keys=True, data=True) }) != g_full.number_of_edges(): warnings.warn( "Edgelist contains field named 'id' but the values provided are not unique." "Replacing id field with uniquely defined values.") #raise ValueError("If id is specified on edges of g_full it must be unique!") reset_ids = True else: # regular csv file format... el = read_edgelist(edgelist_filename, keep_optional=True) g_full = create_networkx_graph_from_edgelist(el) elif g_full is None: # none of edgelist filename or g_full is given - no graph specified raise TypeError("One of edgelist_filename or g_full must be given!") else: # use g_full - must ensure that format matches the expected format g_full = nx.MultiGraph(g_full) # check for all needed fields - if id is not set it will be set manually shared_keys = set.intersection( *[set(z.keys()) for x, y, z in list(g_full.edges(data=True))]) if not all([x in shared_keys for x in {'required', edge_weight}]): raise ValueError( "g_full must include values for 'required' and '{}' for every edge" .format(edge_weight)) if 'id' not in shared_keys: # not every edge has a defined edge id - create a new one. reset_ids = True else: # id is already specified - ensure that it is unique if len({ edg[3]['id'] for edg in g_full.edges(keys=True, data=True) }) != g_full.number_of_edges(): warnings.warn( "Edgelist contains field named 'id' but the values provided are not unique." "Replacing id field with uniquely defined values.") reset_ids = True # if needed, create new id if reset_ids: for ii, edg in enumerate(g_full.edges(keys=True)): g_full.edges[edg]['id'] = str(ii) # if start node is given, make sure it's a string! if start_node is not None: start_node = str(start_node) # if required graph is not connected, use additional edges from g_full to make it connected logger_rpp.info('create required graph') g_req = create_required_graph(g_full) if not is_connected(g_req): make_connected(g_req, g_full, edge_weight) # THIS STEP COULD BE SLOW logger_rpp.info('getting odd node pairs') odd_nodes = get_odd_nodes(g_req) odd_node_pairs = list(itertools.combinations(odd_nodes, 2)) start = time.time() logger_rpp.info('get shortest paths between odd nodes') odd_node_pairs_shortest_paths = get_shortest_paths_distances( g_full, odd_node_pairs, edge_weight) logger_rpp.info('Find min weight matching using blossom algorithm') g_odd_complete = create_complete_graph(odd_node_pairs_shortest_paths, flip_weights=True) odd_matching = dedupe_matching( nx.algorithms.max_weight_matching(g_odd_complete, True)) logger_rpp.info('add the min weight matching edges to g') g_aug = add_augmenting_path_to_graph(g_req, odd_matching) logger_rpp.info('get eulerian circuit route') #pdb.set_trace(); circuit = list( create_eulerian_circuit(g_aug, g_full, start_node, edge_weight=edge_weight)) end = time.time() print('matching and augment time:', end - start) # Remove already visited nodes starting from the back (since we dont care about the "full circuit") new_ending_idx = len(circuit) - 1 for idx in range(0, len(circuit), 1): end_offset_idx = len(circuit) - 1 - idx if circuit[idx][0] == circuit[end_offset_idx][0] or circuit[idx][ 0] == circuit[end_offset_idx][1] or circuit[idx][1] == circuit[ end_offset_idx][0] or circuit[idx][1] == circuit[ end_offset_idx][1]: new_ending_idx = end_offset_idx else: break circuit = circuit[idx + 1:] print('Removed', idx, 'edges from the circuit start') return circuit, g_full