def _sort_nodes(graph): """ NetworkX does not preserve any node order for edges in MultiDiGraphs. Given a graph (component) where all nodes are of degree 1 or 2, this calculates the sequence of nodes from one node to the next. If the component has any nodes with degree 1, it must have exactly two nodes of degree 1 by this constraint (long road, strung out like a line). One of the 1-degree nodes are chosen as the start-node. If all nodes have degree 2, we have a loop and the start/end node is chosen arbitrarily. Args: graph (networkx graph): Returns: list of node ids that constitute a direct path (tour) through each node. """ edge_type = is_graph_line_or_circle(graph) degree1_nodes = [n[0] for n in graph.degree() if n[1] == 1] if edge_type == 'line': start_node, end_node = degree1_nodes nodes = spm.dijkstra_path(graph, start_node, end_node) elif edge_type == 'circle': nodes = [n[0] for n in list(nx.eulerian_circuit(graph))] else: raise RuntimeError('Unrecognized edge_type') assert len(nodes) == len(graph.nodes()) return nodes
def create_rpp_edgelist(g_strongly_connected, graph_full, edge_weight='distance', max_distance=1600): """ Create the edgelist for the RPP algorithm. This includes: - Required state edges (deduped) - Required non-state roads that connect state roads into one connected component with minimum additional distance - Optional roads that connect the nodes of the contracted state edges (these distances are calculated here using first haversine distance to filter the candidate set down using `max_distance` as a threshold, then calculating the true shortest path distance) Args: g_strongly_connected (networkx MultiDiGraph): of strongly connected roads graph_full (networkx Graph): full graph with all granular edges edge_weight (str): edge attribute for distance in `g_strongly_connected` and `graph_full` max_distance (int): max haversine distance used to add candidate optional edges for. Returns: Dataframe of edgelist described above. """ dfrpp_list = [] for e in g_strongly_connected.edges(data=True): if 'granular' not in e[2]: dfrpp_list.append({ 'start_node': e[0], 'end_node': e[1], 'distance_haversine': e[2]['distance'], 'required': 1, 'distance': e[2]['distance'], 'path': e[2]['path'], 'length': e[2]['length'] }) for n1, n2 in [comb for comb in itertools.combinations(g_strongly_connected.nodes(), 2)]: if n1 == n2: continue if not g_strongly_connected.has_edge(n1, n2): distance_haversine = haversine(g_strongly_connected.nodes[n1]['x'], g_strongly_connected.nodes[n1]['y'], g_strongly_connected.nodes[n2]['x'], g_strongly_connected.nodes[n2]['y']) # only add optional edges whose haversine distance is less than `max_distance` if distance_haversine > max_distance: continue dfrpp_list.append({ 'start_node': n1, 'end_node': n2, 'distance_haversine': distance_haversine, 'required': 0, 'distance': spm.dijkstra_path_length(graph_full, n1, n2, edge_weight), 'path': spm.dijkstra_path(graph_full, n1, n2, edge_weight), 'length': nx.dijkstra_path_length(graph_full, n1, n2, 'length') }) # create dataframe dfrpp = pd.DataFrame(dfrpp_list) # create order dfrpp = dfrpp[['start_node', 'end_node', 'distance_haversine', 'distance', 'required', 'path', 'length']] return dfrpp
def create_eulerian_circuit(graph_augmented, graph_original, start_node=None, edge_weight_name='distance'): """ networkx.eulerian_circuit only returns the order in which we hit each node. It does not return the attributes of the edges needed to complete the circuit. This is necessary for the postman problem where we need to keep track of which edges have been covered already when multiple edges exist between two nodes. We also need to annotate the edges added to make the eulerian to follow the actual shortest path trails (not the direct shortest path pairings between the odd nodes for which there might not be a direct trail) Args: graph_augmented (networkx graph): graph w links between odd degree nodes created from `add_augmenting_path_to_graph`. graph_original (networkx graph): orginal graph created from `create_networkx_graph_from_edgelist` start_node (str): name of starting (and ending) node for CPP solution. Returns: networkx graph (`graph_original`) augmented with edges directly between the odd nodes """ euler_circuit = list( nx.eulerian_circuit(graph_augmented, source=start_node, keys=True)) assert len(graph_augmented.edges()) == len( euler_circuit ), 'graph and euler_circuit do not have equal number of edges.' for edge in euler_circuit: #JC changed from nx.shortest_path to spm.dijkstra_path aug_path = spm.dijkstra_path(graph_original, edge[0], edge[1], weight=edge_weight_name) edge_attr = graph_augmented[edge[0]][edge[1]][edge[2]] if not edge_attr.get('augmented'): yield edge + (edge_attr, ) else: for edge_aug in list(zip(aug_path[:-1], aug_path[1:])): # find edge with shortest distance (if there are two parallel edges between the same nodes) edge_aug_dict = graph_original[edge_aug[0]][edge_aug[1]] edge_key = min(edge_aug_dict.keys(), key=(lambda k: edge_aug_dict[k]['distance'] )) # index with min distance edge_aug_shortest = edge_aug_dict[edge_key] edge_aug_shortest['augmented'] = True edge_aug_shortest['id'] = edge_aug_dict[edge_key]['id'] yield edge_aug + ( edge_key, edge_aug_shortest, )
def add_augmenting_path_to_graph(g_req, g_full, min_weight_pairs, edge_weight_name='distance'): """ Add the min weight matching edges to the original graph Note the resulting graph could (and likely will) have edges that didn't exist on the original graph. To get the true circuit, we must breakdown these augmented edges into the shortest path through the edges that do exist. This is done with `create_eulerian_circuit`. Args: graph (networkx graph): min_weight_pairs (list[2tuples): output of `dedupe_matching` specifying the odd degree nodes to link together edge_weight_name (str): edge attribute used for distance calculation Returns: networkx graph: `graph` augmented with edges between the odd nodes specified in `min_weight_pairs` """ graph_aug = g_req.copy() # so we don't mess with the original graph for pair in min_weight_pairs: path = spm.dijkstra_path(g_full, pair[0], pair[1], weight=edge_weight_name) turn_length = g_full[path[0]][path[1]][0]['turn_length'] graph_aug.add_edge( pair[0], pair[1], **{ 'distance': spm.dijkstra_path_length(g_full, pair[0], pair[1], weight=edge_weight_name), 'augmented': True, 'turn_length': turn_length, 'length': nx.dijkstra_path_length(g_full, pair[0], pair[1], weight='length') }) return graph_aug
def find_minimum_weight_edges_to_connect_components(dfsp, graph, edge_weight='distance', top=10): """ Given a dataframe of haversine distances between many pairs of nodes, calculate the min weight way to connect all the components in `graph`. At each iteration, the true shortest path (dijkstra_path_length) is calculated for the top `top` closest node pairs using haversine distance. This heuristic improves efficiency at the cost of potentially not finding the true min weight connectors. If this is a concern, increase `top`. Args: dfsp (dataframe): calculated with `shortest_paths_between_components` with haversine distance between all node candidate node pairs graph (networkx graph): used for the true shortest path calculation edge_weight (str): edge attribute used shortest path calculation in `graph` top (int): number of pairs for which shortest path calculation is performed at each iteration Returns: list[tuple3] containing just the connectors needed to connect all the components in `graph`. """ # find shortest edges to add to make one big connected component dfsp = dfsp.copy() new_required_edges = [] while sum(dfsp.index[dfsp['start_comp'] != dfsp['end_comp']]) > 0: # calculate path distance for top 10 shortest dfsp['path_distance'] = None dfsp['path'] = dfsp['path2'] = [[]] * len(dfsp) for i in dfsp.index[dfsp['start_comp'] != dfsp['end_comp']][0:top]: if dfsp.loc[i]['path_distance'] is None: dfsp.loc[i, 'path_distance'] = spm.dijkstra_path_length(graph, dfsp.loc[i, 'start_node'], dfsp.loc[i, 'end_node'], edge_weight) dfsp.at[i, 'path'] = spm.dijkstra_path(graph, dfsp.loc[i, 'start_node'], dfsp.loc[i, 'end_node'], edge_weight) dfsp.at[i, 'length'] = nx.dijkstra_path_length(graph, dfsp.loc[i, 'start_node'], dfsp.loc[i, 'end_node'], 'length') dfsp.sort_values('path_distance', inplace=True) # The first index where start and end comps are different is the index containing the shortest connecting path between start comp and end comp first_index = dfsp.index[dfsp['start_comp'] != dfsp['end_comp']][0] start_comp = dfsp.loc[first_index]['start_comp'] end_comp = dfsp.loc[first_index]['end_comp'] start_node = dfsp.loc[first_index]['start_node'] end_node = dfsp.loc[first_index]['end_node'] path_distance = dfsp.loc[first_index]['path_distance'] path = dfsp.loc[first_index]['path'] length = dfsp.loc[first_index]['length'] dfsp_rev_pair = dfsp[dfsp['start_comp'] == end_comp] dfsp_rev_pair = dfsp_rev_pair[dfsp_rev_pair['end_comp'] == start_comp] for i in dfsp_rev_pair.index[0:top]: if dfsp_rev_pair.loc[i]['path_distance'] is None: dfsp_rev_pair.loc[i, 'path_distance'] = spm.dijkstra_path_length(graph, dfsp_rev_pair.loc[i, 'start_node'], dfsp_rev_pair.loc[i, 'end_node'], edge_weight) dfsp_rev_pair.at[i, 'path'] = spm.dijkstra_path(graph, dfsp_rev_pair.loc[i, 'start_node'], dfsp_rev_pair.loc[i, 'end_node'], edge_weight) dfsp_rev_pair.at[i, 'length'] = nx.dijkstra_path_length(graph, dfsp.loc[i, 'start_node'], dfsp.loc[i, 'end_node'], 'length') dfsp_rev_pair.sort_values('path_distance', inplace=True) first_index_rev = dfsp_rev_pair.index[0] start_node_rev = dfsp_rev_pair.loc[first_index_rev]['start_node'] end_node_rev = dfsp_rev_pair.loc[first_index_rev]['end_node'] path_distance_rev = dfsp_rev_pair.loc[first_index_rev]['path_distance'] path_rev = dfsp_rev_pair.loc[first_index_rev]['path'] length_rev = dfsp_rev_pair.loc[first_index_rev]['length'] dfsp.loc[dfsp['end_comp'] == end_comp, 'end_comp'] = start_comp dfsp.loc[dfsp['start_comp'] == end_comp, 'start_comp'] = start_comp dfsp.sort_values('haversine_distance', inplace=True) new_required_edges.append((start_node, end_node, {'distance': path_distance, 'path': path, 'length': length, 'start_comp': start_comp, 'end_comp': end_comp})) new_required_edges.append((start_node_rev, end_node_rev, {'distance': path_distance_rev, 'length': length_rev, 'path': path_rev, 'start_comp': end_comp, 'end_comp': start_comp})) return new_required_edges