def test_read_sleeping_giant_edgelist(): df = read_edgelist(EDGELIST, keep_optional=True) # check that our Sleeping Giant example dataset contains the correct fields and values assert [ 'node1', 'node2', 'trail', 'color', 'distance', 'estimate', 'required' ] in df.columns.values assert math.isclose(df[df['required'] == 1]['distance'].sum(), 26.01) assert math.isclose(df['distance'].sum(), 30.48) df_req = read_edgelist(EDGELIST, keep_optional=False) assert math.isclose(df_req['distance'].sum(), 26.01) assert 'req' not in df_req.columns
def test_create_networkx_graph_from_edgelist(): df = read_edgelist(EDGELIST, keep_optional=True) graph = create_networkx_graph_from_edgelist(df, edge_id='id') # check that our starting graph is created correctly assert isinstance(graph, nx.MultiGraph) assert len(graph.edges()) == 133 assert len(graph.nodes()) == 78 assert graph['b_end_east']['b_y'][0]['color'] == 'blue' assert graph['b_end_east']['b_y'][0]['trail'] == 'b' assert graph['b_end_east']['b_y'][0]['distance'] == 1.32 # check that starting graph with required trails only is correct df_req = read_edgelist(EDGELIST, keep_optional=False) graph_req = create_networkx_graph_from_edgelist(df_req, edge_id='id') assert isinstance(graph_req, nx.MultiGraph) assert len(graph_req.edges()) == 121 assert len(graph_req.nodes()) == 74
def rpp(edgelist_filename, start_node=None, edge_weight='distance', verbose=False): """ Solving the RPP from beginning (load network data) to end (finding optimal route). This optimization makes a relatively strong assumption: the starting graph must stay a connected graph when optional edges are removed. If this is not so, an assertion is raised. This class of RPP generalizes to the CPP strategy. Args: edgelist_filename (str): filename of edgelist. See cpp.py for more details start_node (str): name of starting node. See cpp.py for more details edge_weight (str): name edge attribute that indicates distance to minimize in CPP verbose (boolean): log info messages? Returns: tuple(list[tuple(str, str, dict)], networkx.MultiGraph]: Each tuple is a direction (from one node to another) from the CPP solution route. The first element is the starting ("from") node. The second element is the end ("to") node. The third element is the dict of edge attributes for that edge. The original graph is returned as well. This is needed for visualization """ logger_rpp.disabled = not verbose logger_rpp.info('read edgelist') el = read_edgelist(edgelist_filename, keep_optional=True) logger_rpp.info('create full and required graph') g_full = create_networkx_graph_from_edgelist(el) g_req = create_required_graph(g_full) assert_graph_is_connected(g_req) logger_rpp.info('getting odd node pairs') odd_nodes = get_odd_nodes(g_req) odd_node_pairs = list(itertools.combinations(odd_nodes, 2)) logger_rpp.info('get shortest paths between odd nodes') odd_node_pairs_shortest_paths = get_shortest_paths_distances( g_full, odd_node_pairs, edge_weight) logger_rpp.info('Find min weight matching using blossom algorithm') g_odd_complete = create_complete_graph(odd_node_pairs_shortest_paths, flip_weights=True) odd_matching = dedupe_matching( nx.algorithms.max_weight_matching(g_odd_complete, True)) logger_rpp.info('add the min weight matching edges to g') g_aug = add_augmenting_path_to_graph(g_req, odd_matching) logger_rpp.info('get eulerian circuit route') circuit = list(create_eulerian_circuit(g_aug, g_full, start_node)) return circuit, g_full
def test_read_edgelist_w_ids(GRAPH_1_EDGELIST_W_ID_CSV): with warnings.catch_warnings(record=True) as w: df = read_edgelist(GRAPH_1_EDGELIST_W_ID_CSV) # make sure correct warning was given assert len(w) == 1 assert issubclass(w[-1].category, UserWarning) assert "Edgelist contains field named 'id'" in str(w[-1].message) assert df.shape == (5, 4) assert set(df.columns) == set(['distance', 'node1', 'node2', 'id'])
def rpp(edgelist_filename, complete_g, start_node=None, edge_weight='distance', turn_weight_coefficient=1): """ Solving the RPP from beginning (load network data) to end (finding optimal route). This optimization makes a relatively strong assumption: the starting graph must stay a connected graph when optional edges are removed. If this is not so, an assertion is raised. This class of RPP generalizes to the CPP strategy. Args: edgelist_filename (str): filename of edgelist. start_node (str): name of starting node. edge_weight (str): name edge attribute that indicates distance to minimize in CPP turn_weight_coefficient (float): turn weight coefficient used to add turn_weight attributes to g_full Returns: list[tuple(str, str, dict)]: Each tuple is a direction (from one node to another) from the CPP solution route. The first element is the starting ("from") node. The second element is the end ("to") node. The third element is the dict of edge attributes for that edge. """ el = read_edgelist(edgelist_filename) g_full = create_networkx_graph_from_edgelist(el) g_full, pos = sl.create_pos_and_add_to_graph(g_full, complete_g) g_full = create_turn_weight_edge_attr( g_full, length_weight='distance', normalization_coefficient=turn_weight_coefficient) g_req = create_required_graph(g_full) sl.visualize_g_req(g_req, pos) assert_graph_is_strongly_connected(g_req) g_aug = sl.make_graph_eulerian(g_req, g_full) sl.is_graph_eulerian(g_aug) circuit = list( create_eulerian_circuit( g_aug, g_full, str(start_node), edge_weight_name=turn_weight_function_distance)) return circuit
def cpp(edgelist_filename, start_node=None, edge_weight='distance', verbose=False): """ Solving the CPP from beginning (load network data) to end (finding optimal route). Can be run from command line with arguments from cpp.py, or from an interactive Python session (ex jupyter notebook) Args: edgelist_filename (str): filename of edgelist. See cpp.py for more details start_node (str): name of starting node. See cpp.py for more details edge_weight (str): name edge attribute that indicates distance to minimize in CPP verbose (boolean): log info messages? Returns: tuple(list[tuple(str, str, dict)], networkx.MultiGraph]: Each tuple is a direction (from one node to another) from the CPP solution route. The first element is the starting ("from") node. The second element is the end ("to") node. The third element is the dict of edge attributes for that edge. The original graph is returned as well. This is needed for visualization """ logger_cpp.disabled = not verbose logger_cpp.info('read edgelist and create base graph') el = read_edgelist(edgelist_filename, keep_optional=False) g = create_networkx_graph_from_edgelist(el) logger_cpp.info('get augmenting path for odd nodes') odd_nodes = get_odd_nodes(g) odd_node_pairs = list(itertools.combinations(odd_nodes, 2)) odd_node_pairs_shortest_paths = get_shortest_paths_distances( g, odd_node_pairs, edge_weight) g_odd_complete = create_complete_graph(odd_node_pairs_shortest_paths, flip_weights=True) logger_cpp.info('Find min weight matching using blossom algorithm') odd_matching = dedupe_matching( nx.algorithms.max_weight_matching(g_odd_complete, True)) logger_cpp.info('add the min weight matching edges to g') g_aug = add_augmenting_path_to_graph(g, odd_matching) logger_cpp.info('get eulerian circuit route') circuit = list(create_eulerian_circuit(g_aug, g, start_node)) return circuit, g
def test_nodelist_edgelist_overlap(): """ Test that the nodelist and the edgelist contain the same node names. If using X,Y coordinates for plotting and not all nodes have attributes, this could get messy. """ eldf = read_edgelist(EDGELIST, keep_optional=True) nldf = pd.read_csv(NODELIST) edgelist_nodes = set(eldf['node1'].append(eldf['node2'])) nodelist_nodes = set(nldf['id']) nodes_in_el_but_not_nl = edgelist_nodes - nodelist_nodes assert nodes_in_el_but_not_nl == set(), \ "Warning: The following nodes are in the edgelist, but not the nodelist: {}".format(nodes_in_el_but_not_nl) nodes_in_nl_but_not_el = nodelist_nodes - edgelist_nodes assert nodes_in_nl_but_not_el == set(), \ "Warning: The following nodes are in the nodelist, but not the edgelist: {}".format(nodes_in_nl_but_not_el)
def test_get_shortest_paths_distances(): df = read_edgelist(EDGELIST) graph = create_networkx_graph_from_edgelist(df, edge_id='id') odd_nodes = get_odd_nodes(graph) odd_node_pairs = list(itertools.combinations(odd_nodes, 2)) # coarsely checking structure of `get_shortest_paths_distances` return value odd_node_pairs_shortest_paths = get_shortest_paths_distances( graph, odd_node_pairs, 'distance') assert len(odd_node_pairs_shortest_paths) == 630 assert type(odd_node_pairs_shortest_paths) == dict # check that each node name appears the same number of times in `get_shortest_paths_distances` return value node_names = list( itertools.chain(*[i[0] for i in odd_node_pairs_shortest_paths.items()])) assert set(pd.value_counts(node_names)) == set([35])
def main(): # Connect to Sqlite3 & create table sqlite3_conn = dbfun.create_subway_sqlite3(clear_db=True) dbfun.add_stations_table_sqlite3(sqlite3_conn) dbfun.add_edges_table_sqlite3(sqlite3_conn) edgelist = './Data/Paths-Decision-Points.csv' el = ppg.read_edgelist(edgelist, keep_optional=False) g = ppg.create_networkx_graph_from_edgelist(el) odd_nodes = ppg.get_odd_nodes(g) # This for loop gets all the euler paths for every combination of start and end nodes, # saves the routes/statistics as a dictionary, and inserts it into the database for odd_node_pair in itertools.combinations(odd_nodes, 2): circuit_name = odd_node_pair[0] + ' - ' + odd_node_pair[1] path_stats = {'path': circuit_name} logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) logger.info(f'Solved CPP for {circuit_name}') # For some reason, the no_return_cpp is returning the path backwards so the end_node is passed as the start circuit, graph = no_return_cpp(edgelist, odd_node_pair[1], odd_node_pair[0]) # Formats the route and adds it to the dictionary along with the other stats route = '-'.join([edge[0] for edge in circuit]) route = route + '-' + odd_node_pair[1] path_stats.update(calculate_postman_solution_stats(circuit)) path_stats['route'] = route # Inserts into Sqlite3 dbfun.insert_into_sqlite3(sqlite3_conn, path_stats) # Add rankings dbfun.add_route_ranks(sqlite3_conn)
def test_add_node_attributes(): # create objects for testing df = read_edgelist(EDGELIST) graph = create_networkx_graph_from_edgelist(df, edge_id='id') nodelist_df = pd.read_csv(NODELIST) graph_node_attrs = add_node_attributes(graph, nodelist_df) assert len(graph_node_attrs.nodes()) == 74 # check that each node attribute has an X and Y coordinate for k, v in graph_node_attrs.nodes(data=True): assert 'X' in v assert 'Y' in v # spot check node attributes for first node node_data_from_graph = list(graph_node_attrs.nodes(data=True)) node_names = [n[0] for n in node_data_from_graph] assert 'rs_end_north' in node_names key = node_names.index('rs_end_north') assert node_data_from_graph[key][1]['X'] == 1772 assert node_data_from_graph[key][1]['Y'] == 172
def test_read_edgelist(GRAPH_1_EDGELIST_CSV): df = read_edgelist(GRAPH_1_EDGELIST_CSV) assert df.shape == (5, 3) assert set(df.columns) == set(['distance', 'node1', 'node2'])
def cpp(edgelist_filename, start_node=None, edge_weight='distance', verbose=False, graphml=False, max_distance=None, max_degree_connect=0, g=None): """ Solving the CPP from beginning (load network data) to end (finding optimal route). Can be run from command line with arguments from cpp.py, or from an interactive Python session (ex jupyter notebook) Args: edgelist_filename (str): filename of edgelist. See cpp.py for more details start_node (str): name of starting node. See cpp.py for more details edge_weight (str): name edge attribute that indicates distance to minimize in CPP verbose (boolean): log info messages? graphml (boolean): is edgelist filename a in graphml format? max_distance (double): NOT IMPLEMENTED max_degree_connect (int): NOT IMPLEMENTED g (networkx multigraph): pre-loaded networkx MultiGraph. Either g or edgelist_filename must be specified. If both are given, filename will be used. Returns: tuple(list[tuple(str, str, dict)], networkx.MultiGraph]: Each tuple is a direction (from one node to another) from the CPP solution route. The first element is the starting ("from") node. The second element is the end ("to") node. The third element is the dict of edge attributes for that edge. The original graph is returned as well. This is needed for visualization """ logger_cpp.disabled = not verbose reset_ids = False logger_cpp.info('initialize graph') if edgelist_filename is not None: # edgelist filename is given - load graph from file if graphml: g = read_graphml(edgelist_filename, edge_weight=edge_weight, max_degree_connect=max_degree_connect) # make sure edge id exists and is unique shared_keys = set.intersection( *[set(z.keys()) for x, y, z in list(g.edges(data=True))]) if 'id' not in shared_keys: reset_ids = True else: # id is already specified - ensure that it is unique if len({edg[3]['id'] for edg in g.edges(keys=True, data=True) }) != g.number_of_edges(): warnings.warn( "Edgelist contains field named 'id' but the values provided are not unique." "Replacing id field with uniquely defined values.") #raise ValueError("If id is specified on edges of g_full it must be unique!") reset_ids = True else: el = read_edgelist(edgelist_filename, keep_optional=False) g = create_networkx_graph_from_edgelist(el) elif g is None: # none of edgelist filename or g is given - no graph specified raise TypeError("One of edgelist_filename or g must be given!") else: # use g - must ensure that format matches the expected format g = nx.MultiGraph(g) # check for all needed fields - if id is not set it will be set manually shared_keys = set.intersection( *[set(z.keys()) for x, y, z in list(g.edges(data=True))]) if edge_weight not in shared_keys: raise ValueError( "g must include value for '{}' for every edge".format( edge_weight)) if 'id' not in shared_keys: # create new id reset_ids = True else: # id is already specified - ensure that it is unique if len({edg[3]['id'] for edg in g.edges(keys=True, data=True) }) != g.number_of_edges(): warnings.warn( "Edgelist contains field named 'id' but the values provided are not unique." "Replacing id field with uniquely defined values.") reset_ids = True # if needed, create new id if reset_ids: for ii, edg in enumerate(g.edges(keys=True)): g.edges[edg]['id'] = str(ii) # if start node is given, make sure it's a string! if start_node is not None: start_node = str(start_node) logger_cpp.info('get augmenting path for odd nodes') odd_nodes = get_odd_nodes(g) odd_node_pairs = list(itertools.combinations(odd_nodes, 2)) # 'x' and 'y' is not in the generated graphml file, so this filtering is not supported until x and y is added # odd_node_pairs = filter_by_haversine_distance(g, odd_node_pairs, max_distance=max_distance) start = time.time() odd_node_pairs_shortest_paths = get_shortest_paths_distances( g, odd_node_pairs, edge_weight) g_odd_complete = create_complete_graph(odd_node_pairs_shortest_paths, flip_weights=True) logger_cpp.info('Find min weight matching using blossom algorithm') odd_matching = dedupe_matching( nx.algorithms.max_weight_matching(g_odd_complete, True)) logger_cpp.info('add the min weight matching edges to g') g_aug = add_augmenting_path_to_graph(g, odd_matching) print(len(get_odd_nodes(g)), ' odd nodes, now', len(get_odd_nodes(g_aug)), nx.is_connected(g_aug)) logger_cpp.info('get eulerian circuit route') #pdb.set_trace(); circuit = list(create_eulerian_circuit(g_aug, g, start_node)) end = time.time() print('matching and augment time:', end - start) # Remove already visited nodes starting from the back (since we dont care about the "full circuit") new_ending_idx = len(circuit) - 1 for idx in range(0, len(circuit), 1): end_offset_idx = len(circuit) - 1 - idx if circuit[idx][0] == circuit[end_offset_idx][0] or circuit[idx][ 0] == circuit[end_offset_idx][1] or circuit[idx][1] == circuit[ end_offset_idx][0] or circuit[idx][1] == circuit[ end_offset_idx][1]: new_ending_idx = end_offset_idx else: break circuit = circuit[idx + 1:] print('Removed', idx, 'edges from the circuit start') return circuit, g
def rpp(edgelist_filename=None, start_node=None, edge_weight='distance', verbose=False, graphml=False, max_distance=None, max_degree_connect=None, g_full=None): """ Solving the RPP from beginning (load network data) to end (finding optimal route). This optimization makes a relatively strong assumption: the starting graph must stay a connected graph when optional edges are removed. If this is not so, an assertion is raised. This class of RPP generalizes to the CPP strategy. Args: edgelist_filename (str): filename of edgelist. See cpp.py for more details start_node (str or can be cast to str): name of starting node. See cpp.py for more details edge_weight (str): name edge attribute that indicates distance to minimize in CPP verbose (boolean): log info messages? graphml (boolean): is edgelist filename a in graphml format? max_distance (double): NOT IMPLEMENTED max_degree_connect (int): min degree of a node in the full graph -- nodes with smaller degree are connected with all-to-all optional edges. Use -1 for all-to-all graph. g_full (networkx multigraph): pre-loaded networkx MultiGraph. Either g_full or edgelist_filename must be specified. If both are given, filename will be used. Returns: tuple(list[tuple(str, str, dict)], networkx.MultiGraph]: Each tuple is a direction (from one node to another) from the CPP solution route. The first element is the starting ("from") node. The second element is the end ("to") node. The third element is the dict of edge attributes for that edge. The original graph is returned as well. This is needed for visualization """ print("Running RPP solver!") #pdb.set_trace() logger_rpp.disabled = not verbose logger_rpp.info('initialize full graph') reset_ids = False if edgelist_filename is not None: # edgelist filename is given - load graph from file if graphml: # read in the graph g_full = read_graphml(edgelist_filename, edge_weight, max_degree_connect) # make sure edge id exists and is unique shared_keys = set.intersection( *[set(z.keys()) for x, y, z in list(g_full.edges(data=True))]) if 'id' not in shared_keys: reset_ids = True else: # id is already specified - ensure that it is unique if len({ edg[3]['id'] for edg in g_full.edges(keys=True, data=True) }) != g_full.number_of_edges(): warnings.warn( "Edgelist contains field named 'id' but the values provided are not unique." "Replacing id field with uniquely defined values.") #raise ValueError("If id is specified on edges of g_full it must be unique!") reset_ids = True else: # regular csv file format... el = read_edgelist(edgelist_filename, keep_optional=True) g_full = create_networkx_graph_from_edgelist(el) elif g_full is None: # none of edgelist filename or g_full is given - no graph specified raise TypeError("One of edgelist_filename or g_full must be given!") else: # use g_full - must ensure that format matches the expected format g_full = nx.MultiGraph(g_full) # check for all needed fields - if id is not set it will be set manually shared_keys = set.intersection( *[set(z.keys()) for x, y, z in list(g_full.edges(data=True))]) if not all([x in shared_keys for x in {'required', edge_weight}]): raise ValueError( "g_full must include values for 'required' and '{}' for every edge" .format(edge_weight)) if 'id' not in shared_keys: # not every edge has a defined edge id - create a new one. reset_ids = True else: # id is already specified - ensure that it is unique if len({ edg[3]['id'] for edg in g_full.edges(keys=True, data=True) }) != g_full.number_of_edges(): warnings.warn( "Edgelist contains field named 'id' but the values provided are not unique." "Replacing id field with uniquely defined values.") reset_ids = True # if needed, create new id if reset_ids: for ii, edg in enumerate(g_full.edges(keys=True)): g_full.edges[edg]['id'] = str(ii) # if start node is given, make sure it's a string! if start_node is not None: start_node = str(start_node) # if required graph is not connected, use additional edges from g_full to make it connected logger_rpp.info('create required graph') g_req = create_required_graph(g_full) if not is_connected(g_req): make_connected(g_req, g_full, edge_weight) # THIS STEP COULD BE SLOW logger_rpp.info('getting odd node pairs') odd_nodes = get_odd_nodes(g_req) odd_node_pairs = list(itertools.combinations(odd_nodes, 2)) start = time.time() logger_rpp.info('get shortest paths between odd nodes') odd_node_pairs_shortest_paths = get_shortest_paths_distances( g_full, odd_node_pairs, edge_weight) logger_rpp.info('Find min weight matching using blossom algorithm') g_odd_complete = create_complete_graph(odd_node_pairs_shortest_paths, flip_weights=True) odd_matching = dedupe_matching( nx.algorithms.max_weight_matching(g_odd_complete, True)) logger_rpp.info('add the min weight matching edges to g') g_aug = add_augmenting_path_to_graph(g_req, odd_matching) logger_rpp.info('get eulerian circuit route') #pdb.set_trace(); circuit = list( create_eulerian_circuit(g_aug, g_full, start_node, edge_weight=edge_weight)) end = time.time() print('matching and augment time:', end - start) # Remove already visited nodes starting from the back (since we dont care about the "full circuit") new_ending_idx = len(circuit) - 1 for idx in range(0, len(circuit), 1): end_offset_idx = len(circuit) - 1 - idx if circuit[idx][0] == circuit[end_offset_idx][0] or circuit[idx][ 0] == circuit[end_offset_idx][1] or circuit[idx][1] == circuit[ end_offset_idx][0] or circuit[idx][1] == circuit[ end_offset_idx][1]: new_ending_idx = end_offset_idx else: break circuit = circuit[idx + 1:] print('Removed', idx, 'edges from the circuit start') return circuit, g_full