def test_dead_bypass(): # run mod_boruvka on graph with dead node and make sure # all connected components are NOT within their respective mvMax # of eachother (otherwise they should be connected) g = graph_with_dead_node() subgraphs = UnionFind() rtree = Rtree() # build min span forest via mod_boruvka msf_g = mod_boruvka(g, subgraphs=subgraphs, rtree=rtree) # calculate all min distances between components # distances between all nodes (euclidean for now) coord_list = msf_g.coords.values() c = np.array(coord_list) all_dists = np.sqrt(((c[np.newaxis, :, :] - c[:, np.newaxis, :]) ** 2) .sum(2)) # now get the component distances (min dist between components) components = subgraphs.connected_components() component_dists = {} # set init dists to inf for pair in itertools.product(components, components): component_dists[pair] = np.inf # keep the min distance between components for node_pair_dist in np.ndenumerate(all_dists): comp1 = subgraphs[node_pair_dist[0][0]] comp2 = subgraphs[node_pair_dist[0][1]] dist = node_pair_dist[1] if dist < component_dists[(comp1, comp2)]: component_dists[(comp1, comp2)] = dist # now check whether components are within # their respective mvmax of eachother # (if so, we've got a problem) missed_connections = [] for pair in itertools.product(components, components): if pair[0] != pair[1] and \ subgraphs.budget[pair[0]] >= component_dists[pair] and \ subgraphs.budget[pair[1]] >= component_dists[pair]: missed_connections.append(pair) assert len(missed_connections) == 0, "missed connections: " + \ str(missed_connections)
def test_min_edges(): # run mod_boruvka on graph with high mv max and long edge and make sure # that the result is an MST # of eachother (otherwise they should be connected) g = graph_high_mvmax_long_edge() subgraphs = UnionFind() rtree = Rtree() # build min span forest via mod_boruvka msf_g = mod_boruvka(g, subgraphs=subgraphs, rtree=rtree) # use networkx to build mst and compare coord_list = msf_g.coords.values() c = np.array(coord_list) all_dists = np.sqrt(((c[np.newaxis, :, :] - c[:, np.newaxis, :]) ** 2). sum(2)) complete_g = nx.Graph(all_dists) mst_g = nx.minimum_spanning_tree(complete_g) mst_edge_set = set([frozenset(e) for e in mst_g.edges()]) msf_edge_set = set([frozenset(e) for e in msf_g.edges()]) assert msf_edge_set == mst_edge_set
def test_union_budget(): net = init_network(5000) subgraphs = UnionFind() nodes = net.nodes(data=True) pairs = zip(nodes[:-1], nodes[1:]) mv = None for ((n1, d1), (n2, d2)) in pairs: if mv is None: mv = d1['budget'] mv = (mv + d2['budget']) - \ spherical_distance([d1['coords'], d2['coords']]) subgraphs.add_component(n1, budget=d1['budget']) subgraphs.add_component(n2, budget=d2['budget']) d = spherical_distance([d1['coords'], d2['coords']]) subgraphs.union(n1, n2, d) eq_(np.allclose(subgraphs.budget[subgraphs[1]], mv), True)
def test_line_subgraph_intersection(): """ Test case where precision and odd geometry issues occur """ # initialize network, nodes existing_net_file = os.path.join("data", "katsina", "existing.shp") demand_nodes_file = os.path.join("data", "katsina", "metrics.csv") network = nio.read_shp_geograph(existing_net_file, simplify=False) network.coords = { "g-" + str(n): network.coords[n] for n in network.nodes() } new_labels = ["g-" + str(n) for n in network.nodes()] nx.relabel_nodes(network, dict(zip(network.nodes(), new_labels)), copy=False) nodes = nio.read_csv_geograph(demand_nodes_file, "x", "y") # populate disjoint set of subgraphs subgraphs = UnionFind() # only one connected component, so just add all nodes associated # with first node net_nodes = network.nodes() parent = net_nodes[0] subgraphs.add_component(parent, budget=0) for node in net_nodes[1:]: subgraphs.add_component(node, budget=0) subgraphs.union(parent, node, 0) # now find projections onto grid rtree = network.get_rtree_index() projected = network.project_onto(nodes, rtree_index=rtree) projected.remove_nodes_from(network) assert len(projected.edges()) == 1, "should only be 1 projected edge" edge = projected.edges()[0] p1, p2 = projected.coords[edge[0]], projected.coords[edge[1]] invalid, subgraphs = gm.line_subgraph_intersection(subgraphs, rtree, p1, p2) assert not invalid, "edge should intersect network only once"
def test_line_subgraph_intersection(): """ Test case where precision and odd geometry issues occur """ # initialize network, nodes existing_net_file = os.path.join("data", "katsina", "existing.shp") demand_nodes_file = os.path.join("data", "katsina", "metrics.csv") network = nio.read_shp_geograph(existing_net_file, simplify=False) network.coords = {"g-" + str(n): network.coords[n] for n in network.nodes()} new_labels = ["g-" + str(n) for n in network.nodes()] nx.relabel_nodes(network, dict(zip(network.nodes(), new_labels)), copy=False) nodes = nio.read_csv_geograph(demand_nodes_file, "x", "y") # populate disjoint set of subgraphs subgraphs = UnionFind() # only one connected component, so just add all nodes associated # with first node net_nodes = network.nodes() parent = net_nodes[0] subgraphs.add_component(parent, budget=0) for node in net_nodes[1:]: subgraphs.add_component(node, budget=0) subgraphs.union(parent, node, 0) # now find projections onto grid rtree = network.get_rtree_index() projected = network.project_onto(nodes, rtree_index=rtree) projected.remove_nodes_from(network) assert len(projected.edges()) == 1, "should only be 1 projected edge" edge = projected.edges()[0] p1, p2 = projected.coords[edge[0]], projected.coords[edge[1]] invalid, subgraphs = gm.line_subgraph_intersection(subgraphs, rtree, p1, p2) assert not invalid, "edge should intersect network only once"
def merge_network_and_nodes(network, demand_nodes, single_network=True): """ merge the network and nodes GeoGraphs to set up the Graph, UnionFind (DisjoinSet), and RTree datastructures for use in network algorithms Args: network: graph representing existing network (assumes node ids don't conflict with net (demand) nodes) demand_nodes: graph of nodes representing demand single_network: whether subgraphs of network are unioned into a single network Returns: graph: graph with demand nodes and their nearest nodes to the existing network (i.e. 'fake' nodes) subgraphs: UnionFind datastructure populated with fake nodes and associated with the appropriate connected component or the entire subgraph (depending on ``single_subgraph`` param) rtree: spatial index populated with the edges from the existing network """ # project demand nodes onto network rtree = network.get_rtree_index() grid_with_fakes = network.project_onto(demand_nodes, rtree_index=rtree) # get only the fake nodes and the associated network edges demand_node_set = set(demand_nodes.nodes()) net_plus_demand = set(network.nodes()).union(demand_node_set) fakes = set(grid_with_fakes.nodes()) - net_plus_demand # fake node should only have 2 neighbors from the existing network # that is the nearest edge def get_fake_edge(node): return tuple(set( grid_with_fakes.neighbors(node)) - demand_node_set) edge_fakes = [(get_fake_edge(fake), fake) for fake in fakes] # Init the DisjointSet subgraphs = UnionFind() assert len(network.nodes()) > 1, \ "network must have more than 1 node" if single_network: # just union all nodes to a single parent nodes = network.nodes() # add parent parent = nodes[0] subgraphs.add_component(parent, budget=network.node[parent]['budget']) for node in nodes[1:]: subgraphs.add_component(node, budget=network.node[node]['budget']) # The existing grid nodes are on the grid (so distance is 0) subgraphs.union(parent, node, 0) else: # Build the subnet components # Get the network components to init budget centers subnets = nx.connected_components(network) for sub in subnets: # union all nodes to parent of subnet parent = sub[0] subgraphs.add_component(parent, budget=network.node[parent]['budget']) # Merge remaining nodes with component for node in sub[1:]: subgraphs.add_component(node, budget=network.node[node]['budget']) # The existing grid nodes are on the grid (so distance is 0) subgraphs.union(parent, node, 0) # setup merged graph to be populated with fake nodes merged = GeoGraph(demand_nodes.srs, demand_nodes.coords, data=demand_nodes) # merge fakes in for ((u, v), fake) in edge_fakes: # Make sure something wonky isn't going on assert(subgraphs[u] == subgraphs[v]) # Add the fake node to the big net merged.add_node(fake, budget=np.inf) merged.coords[fake] = grid_with_fakes.coords[fake] # Merge the fake node with the grid subgraph subgraphs.add_component(fake, budget=np.inf) subgraphs.union(fake, u, 0) return merged, subgraphs, rtree
def p_mod_boruvka(G, subgraphs=None, rtree=None): V = set(T.nodes(data=False)) coords = np.row_stack(nx.get_node_attributes(T, 'coords').values()) projcoords = ang_to_vec_coords(coords) kdtree = KDTree(projcoords) if subgraphs is None: if rtree is not None: raise ValueError('RTree passed without UnionFind') rtree = Rtree() # modified to handle queues, children, mv subgraphs = UnionFind() # Tests whether the node is a projection on the existing grid, using its MV is_fake = lambda n: subgraphs.budget[n] == np.inf def find_nn(node_tuple): u, up = node_tuple v, _ = kdtree.query_subset(up, list(V - {u})) return u, v, spherical_distance([coords[u], coords[v]]) # find the nearest neigbor of all nodes p = mp.Pool(processes=6) neighbors = p.map(find_nn, enumerate(projcoords)) p.close() # push the results into their respective queues for u, v, d in neighbors: subgraphs.add_component(u, budget=T.node[u]['budget']) subgraphs.queues[u].push((u, v), d) # list to hold mst edges Et = [] last_state = None while Et != last_state: # consolidates top candidate edges from each subgraph Ep = PriorityQueue() def update_queues(component): q_top = subgraphs.queues[component].top() try: (u, v) = q_top except: return (None, None, None), np.inf component_set = subgraphs.component_set(u) disjointVC = list(V - set(component_set)) if not disjointVC: return (None, None, None), np.inf while v in component_set: subgraphs.queues[component].pop() vprime, _ = kdtree.query_subset(projcoords[u], disjointVC) dm = spherical_distance([coords[u], coords[vprime]]) subgraphs.queues[component].push((u, vprime), dm) (u, v) = subgraphs.queues[component].top() else: dm = spherical_distance([coords[u], coords[v]]) return (u, v, dm), dm p = mp.Pool(processes=6) foreign_neighbors = map(update_queues, subgraphs.connected_components(component_subset=V)) p.close() for neighbor in foreign_neighbors: obj, priority = neighbor if priority != np.inf: Ep.push(*neighbor) last_state = deepcopy(Et) # add all the edges in E' to Et so long as no cycles are created while Ep._queue: (um, vm, dm) = Ep.pop() # if doesn't create cycle and subgraph has enough MV if subgraphs[um] != subgraphs[vm] and \ (subgraphs.budget[subgraphs[um]] >= dm or is_fake(um)): # test that the connecting subgraph can receive the MV if subgraphs.budget[subgraphs[vm]] >= dm or is_fake(vm): # doesn't create cycles from line segment intersection invalid_edge, intersections =\ line_subgraph_intersection(subgraphs, rtree, coords[um], coords[vm]) if not invalid_edge: # edges should not intersect a subgraph more than once assert(filter(lambda n: n > 1, intersections.values()) == []) # merge the subgraphs subgraphs.union(um, vm, dm) # Union all intersecting subgraphs # and update budgets (happens within union) map(lambda (n, _): subgraphs.union(um, n, 0), filter(lambda (n, i): i == 1 and subgraphs[n] != subgraphs[um], intersections.iteritems())) # index the newly added edge box = make_bounding_box(coords[um], coords[vm]) # Object is (u.label, v.label), (u.coord, v.coord) rtree.insert(hash((um, vm)), box, obj=((um, vm), (coords[um], coords[vm]))) Et += [(um, vm)] T.remove_edges_from(T.edges()) T.add_edges_from(Et) return T
def mod_boruvka(G, subgraphs=None, rtree=None): """ algorithm to calculate the minimum spanning forest of nodes in GeoGraph G with 'budget' based restrictions on edges. Uses a modified version of Boruvka's algorithm NOTE: subgraphs is modified as a side-effect...may remove in future (useful for testing right now) Args: G: GeoGraph of nodes to be connected if appropriate Nodes should have 'budget' attribute subgraphs: UnionFind data structure representing existing network's connected components AND the 'fake' nodes projected onto it. This is the basis for the agglomerative nearest neighbor approach in this algorithm. rtree: RTree based index of existing network Returns: GeoGraph: representing minimum spanning forest of G subject to the budget based restrictions """ # special case (already MST) if G.number_of_nodes() < 2: return G V = set(G.nodes()) coords = np.row_stack(G.coords.values()) projcoords = ang_to_vec_coords(coords) if G.is_geographic() else coords kdtree = KDTree(projcoords) # Handle "dead" components D = set() if subgraphs is None: if rtree is not None: raise ValueError('RTree passed without UnionFind') rtree = Rtree() # modified to handle queues, children, mv subgraphs = UnionFind() # <helper_functions> def candidate_components(C): """ return the set of candidate nearest components for the connected component containing C. Do not consider those in C's connected component or those that are 'dead'. """ component_set = subgraphs.component_set(C) return list((V - set(component_set)) - D) def update_nn_component(C, candidates): """ find the nearest neighbor pair for the connected component represented by c. candidates represents the list of components from which to select. """ (v, vm) = subgraphs.queues[C].top() # vm ∈ C {not a foreign nearest neighbor} # go through the queue until an edge is found between this node # and the set of candidates, updating the neighbors in the connected # components queue in the process. while vm not in candidates: subgraphs.queues[C].pop() um, _ = kdtree.query_subset(projcoords[v], candidates) dm = square_distance(projcoords[v], projcoords[um]) subgraphs.push(subgraphs.queues[C], (v, um), dm) # Note: v will always be a vertex in this connected component # vm *may* be external (v, vm) = subgraphs.queues[C].top() return (v, vm) # Tests whether the node is a projection on the existing grid, using its MV is_fake = lambda n: subgraphs.budget[n] == np.inf # Test whether the component is dead # i.e. can never connect to another node def is_dead(c, nn_dist): return not is_fake(c) and subgraphs.budget[c] < nn_dist # "true" distance between components def component_dist(c1, c2): dist = 0 if G.is_geographic(): dist = spherical_distance([coords[c1], coords[c2]]) else: dist = euclidean_distance([coords[c1], coords[c2]]) return dist # </helper_functions> # Initialize the connected components holding a single node # and push the nearest neighbor into its queue for v in V: vm, _ = kdtree.query_subset(projcoords[v], list(V - {v})) dm = square_distance(projcoords[v], projcoords[vm]) subgraphs.add_component(v, budget=G.node[v]['budget']) subgraphs.push(subgraphs.queues[v], (v, vm), dm) # Add to dead set if warranted nn_dist = component_dist(v, vm) if is_dead(v, nn_dist): # here components are single nodes # so no need to worry about adding children to dead set if v not in D: D.add(v) Et = [] # Initialize MST edges to empty list last_state = None # MST is complete when no progress was made in the prior iteration while Et != last_state: # This is a candidate list of edges that might be added to the MST Ep = PriorityQueue() # ∀ C of G; where c <- connected component for C in subgraphs.connected_components(component_subset=V): candidates = candidate_components(C) # Skip if no valid candidates if not candidates: continue (v, vm) = update_nn_component(C, candidates) # Add to dead set if warranted nn_dist = component_dist(v, vm) if is_dead(C, nn_dist): # add all dead components to the dead set D # (note that fake nodes can never be dead) for c in subgraphs.component_set(C): if c not in D and not is_fake(c): D.add(c) # One more round to root out connections to dead components # found in above iteration. # Need to do this BEFORE pushing candidate edges into Ep. # Otherwise we might be testing only 'dead' candidates # and therefore mistakenly think we were done (since # no new edges would have been added) for C in subgraphs.connected_components(component_subset=V): candidates = candidate_components(C) # Skip if no valid candidates if not candidates: continue (v, vm) = update_nn_component(C, candidates) # Calculate nn_dist for comparison to mv later nn_dist = component_dist(v, vm) # Append the top priority edge from the subgraph to the candidate # edge set Ep.push((v, vm, nn_dist), nn_dist) last_state = deepcopy(Et) # Candidate Test # At this point we have all of our nearest neighbor component edge # candidates defined for this "round" # # Now test all candidate edges in Ep for cycles and satisfaction of # custom criteria while Ep._queue: (um, vm, dm) = Ep.pop() # if doesn't create cycle # and subgraphs have enough MV # and we're not connecting 2 fake nodes # then allow the connection if subgraphs[um] != subgraphs[vm] and \ (subgraphs.budget[subgraphs[um]] >= dm or is_fake(um)) and \ (subgraphs.budget[subgraphs[vm]] >= dm or is_fake(vm)) and \ not (is_fake(um) and is_fake(vm)): # doesn't create cycles from line segment intersection invalid_edge, intersections = \ line_subgraph_intersection(subgraphs, rtree, coords[um], coords[vm]) if not invalid_edge: # edges should not intersect a subgraph more than once assert(filter(lambda n: n > 1, intersections.values()) == []) # merge the subgraphs subgraphs.union(um, vm, dm) # For all intersected subgraphs update the mv to that # created by the edge intersecting them, # TODO: This should be updated in not such a naive method map(lambda (n, _): subgraphs.union(um, n, 0), filter(lambda (n, i): i == 1 and subgraphs[n] != subgraphs[um], intersections.iteritems())) # index the newly added edge box = make_bounding_box(coords[um], coords[vm]) # Object is (u.label, v.label), (u.coord, v.coord) rtree.insert(hash((um, vm)), box, obj=((um, vm), (coords[um], coords[vm]))) Et += [(um, vm, {'weight': dm})] # create new GeoGraph with results result = G.copy() result.coords = G.coords result.remove_edges_from(result.edges()) result.add_edges_from(Et) return result
def merge_network_and_nodes(network, demand_nodes, single_network=True, spherical_accuracy=False): """ merge the network and nodes GeoGraphs to set up the Graph, UnionFind (DisjoinSet), and RTree datastructures for use in network algorithms Args: network: graph representing existing network (assumes node ids don't conflict with net (demand) nodes) demand_nodes: graph of nodes representing demand single_network: whether subgraphs of network are unioned into a single network spherical_accuracy: Whether to connect nodes to network on a sphere Returns: graph: graph with demand nodes and their nearest nodes to the existing network (i.e. 'fake' nodes) subgraphs: UnionFind datastructure populated with fake nodes and associated with the appropriate connected component or the entire subgraph (depending on ``single_subgraph`` param) rtree: spatial index populated with the edges from the existing network """ # project demand nodes onto network rtree = network.get_rtree_index() grid_with_fakes = network.project_onto(demand_nodes, rtree_index=rtree, spherical_accuracy=spherical_accuracy) # get only the fake nodes and the associated network edges demand_node_set = set(demand_nodes.nodes()) net_plus_demand = set(network.nodes()).union(demand_node_set) fakes = set(grid_with_fakes.nodes()) - net_plus_demand def get_fake_edge(node): """ fake node should only have 2 neighbors from the existing network that is the nearest edge """ return tuple(set(grid_with_fakes.neighbors(node)) - demand_node_set) edge_fakes = [(get_fake_edge(fake), fake) for fake in fakes] # Init the DisjointSet subgraphs = UnionFind() assert len(network.nodes()) > 1, \ "network must have more than 1 node" if single_network: # just union all nodes to a single parent nodes = network.nodes() # add parent parent = nodes[0] subgraphs.add_component(parent, budget=network.node[parent]['budget']) for node in nodes[1:]: subgraphs.add_component(node, budget=network.node[node]['budget']) # The existing grid nodes are on the grid (so distance is 0) subgraphs.union(parent, node, 0) else: # Build the subnet components # Get the network components to init budget centers subnets = nx.connected_components(network) for sub in subnets: # union all nodes to parent of subnet sub_list = list(sub) parent = sub_list[0] subgraphs.add_component(parent, budget=network.node[parent]['budget']) # Merge remaining nodes with component for node in sub_list[1:]: subgraphs.add_component(node, budget=network.node[node]['budget']) # The existing grid nodes are on the grid (so distance is 0) subgraphs.union(parent, node, 0) # setup merged graph to be populated with fake nodes merged = GeoGraph(demand_nodes.srs, demand_nodes.coords, data=demand_nodes) # merge fakes in for ((u, v), fake) in edge_fakes: # Make sure something wonky isn't going on assert(subgraphs[u] == subgraphs[v]) # Add the fake node to the big net # NOTE: fake nodes always have np.inf budget merged.add_node(fake, budget=np.inf) merged.coords[fake] = grid_with_fakes.coords[fake] # Merge the fake node with the grid subgraph subgraphs.add_component(fake, budget=np.inf) subgraphs.union(fake, u, 0) return merged, subgraphs, rtree
def mod_kruskal(G, subgraphs=None, rtree=None): """ algorithm to compute the euclidean minimum spanning forest of nodes in GeoGraph G with 'budget' based restrictions on edges Uses a modified version of Kruskal's algorithm NOTE: subgraphs is modified as a side-effect...may remove in future (useful for testing right now) Args: G: GeoGraph of nodes to be connected if appropriate Nodes should have 'budget' attribute subgraphs: UnionFind data structure representing existing network's connected components AND the 'fake' nodes projected onto it. This is the basis for the agglomerative nearest neighbor approach in this algorithm. NOTE: ONLY the existing networks components are represented in the subgraphs argument. The nodes in G will be added within this function rtree: RTree based index of existing network Returns: GeoGraph: representing minimum spanning forest of G subject to the budget based restrictions """ # special case (already MST) if G.number_of_nodes() < 2: return G def is_fake(node): """ Tests whether the node is a projection on the existing grid, using its MV """ return subgraphs.budget[node] == np.inf # handy to have coords array coords = np.row_stack(G.coords.values()) if subgraphs is None: assert rtree is not None, \ "subgraphs (disjoint set) required when rtree is passed" rtree = Rtree() # modified to handle queues, children, mv subgraphs = UnionFind() # add nodes and budgets from G to subgraphs as components for node in G.nodes(): subgraphs.add_component(node, budget=G.node[node]['budget']) # get fully connected graph g = G.get_connected_weighted_graph() # edges in MSF Et = [] # connect the shortest safe edge until all edges have been tested # at which point, we have made all possible connections for u, v, w in sorted(g.edges(data=True), key=lambda x: x[2]['weight']): # if doesn't create cycle # and subgraphs have enough MV # and we're not connecting 2 fake nodes # then allow the connection w = w['weight'] if subgraphs[u] != subgraphs[v] and \ (subgraphs.budget[subgraphs[u]] >= w or is_fake(u)) and \ (subgraphs.budget[subgraphs[v]] >= w or is_fake(v)) and \ not (is_fake(u) and is_fake(v)): # doesn't create cycles from line segment intersection invalid_edge, intersections = \ line_subgraph_intersection(subgraphs, rtree, coords[u], coords[v]) if not invalid_edge: # edges should not intersect a subgraph more than once assert(filter(lambda n: n > 1, intersections.values()) == []) # merge the subgraphs subgraphs.union(u, v, w) # For all intersected subgraphs update the mv to that # created by the edge intersecting them map(lambda (n, _): subgraphs.union(u, n, 0), filter(lambda (n, i): i == 1 and subgraphs[n] != subgraphs[u], intersections.iteritems())) # index the newly added edge box = make_bounding_box(coords[u], coords[v]) # Object is (u.label, v.label), (u.coord, v.coord) rtree.insert(hash((u, v)), box, obj=((u, v), (coords[u], coords[v]))) Et += [(u, v, {'weight': w})] # create new GeoGraph with results result = G.copy() result.coords = G.coords result.remove_edges_from(result.edges()) result.add_edges_from(Et) return result
def nodes_plus_grid(): """ Return: nodes as graph and grid as UnionFind/Rtree combo This example input demonstrates the "more" optimal nature of mod_boruvka vs mod_kruskal. 2(10) | 1(4) | sqrt(5){ /| | / | | / | }3 | } 5 (2)0 | | 1{| | | +-+-3-+-4-+-+-+-+-+-+-+5-+-+-+ <-- existing grid In this case, all nodes will be connected via either algorithm, but the graph produced by mod_kruskal will have edge (4,1) whereas mod_boruvka will produce a graph with edge (0,1). Therefore, the mod_boruvka graph is more optimal. """ mv_max_values = [2, 4, 10] coords = np.array([[0.0, 1.0], [1.0, 3.0], [10.0, 5.0]]) coords_dict = dict(enumerate(coords)) nodes = GeoGraph(gm.PROJ4_FLAT_EARTH, coords=coords_dict) nx.set_node_attributes(nodes, 'budget', dict(enumerate(mv_max_values))) grid_coords = np.array([[-5.0, 0.0], [15.0, 0.0]]) grid = GeoGraph(gm.PROJ4_FLAT_EARTH, {'grid-' + str(n): c for n, c in enumerate(grid_coords)}) nx.set_node_attributes(grid, 'budget', {n: 0 for n in grid.nodes()}) grid.add_edges_from([('grid-0', 'grid-1')]) # now find projections onto grid rtree = grid.get_rtree_index() projected = grid.project_onto(nodes, rtree_index=rtree) projected.remove_nodes_from(grid) projected.remove_nodes_from(nodes) # populate disjoint set of subgraphs subgraphs = UnionFind() # only one connected component, so just associate all nodes # with first node of grid parent = grid.nodes()[0] subgraphs.add_component(parent, budget=grid.node[parent]['budget']) for node in grid.nodes()[1:]: subgraphs.add_component(node, budget=grid.node[node]['budget']) subgraphs.union(parent, node, 0) # and the projected "fake" nodes for node in projected.nodes(): subgraphs.add_component(node, budget=np.inf) subgraphs.union(parent, node, 0) # add projected nodes to node set nodes.add_nodes_from(projected, budget=np.inf) # merge coords nodes.coords = dict(nodes.coords, **projected.coords) return nodes, subgraphs, rtree
def test_component_functions(): """ Tests whether UnionFind component/connected_component methods work as expected """ # demand nodes are 0-3 plus a 'fake node' at 4 demand_components = set(range(5)) # existing grid nodes external_components = set(['grid-0', 'grid-1', 'grid-2']) subgraphs = UnionFind() for i in demand_components: subgraphs.add_component(i) for g in external_components: subgraphs.add_component(g) # assign weights to eventual connected component roots subgraphs.weights[4] = np.inf subgraphs.weights[2] = np.inf subgraphs.weights['grid-1'] = np.inf # first connect the grid nodes and the fake node grid_union_pairs = [(4, 'grid-0'), ('grid-1', 'grid-2')] for g1, g2 in grid_union_pairs: subgraphs.union(g1, g2, 1) # should be 3 components at this point (2 within demand set) eq_(subgraphs.connected_components(component_subset=demand_components), set(range(5))) eq_(subgraphs.connected_components(), set(range(5) + ['grid-1'])) # connect others (including a connection to the grid via fake node 4) union_pairs = [(0, 1), (2, 3), (0, 4)] for u1, u2 in union_pairs: subgraphs.union(u1, u2, 1) # test component sets eq_(set(subgraphs.component_set(0)), set([0, 1, 4, 'grid-0'])) eq_(set(subgraphs.component_set(2)), set([2, 3])) # test connected components eq_(subgraphs.connected_components(), set([4, 2, 'grid-1'])) # connected component with ('grid-1', 'grid-2') should be filtered out eq_(subgraphs.connected_components(component_subset=demand_components), set([4, 2]))
def mod_boruvka(G, subgraphs=None, rtree=None): """ algorithm to calculate the minimum spanning forest of nodes in GeoGraph G with 'budget' based restrictions on edges. Uses a modified version of Boruvka's algorithm NOTE: subgraphs is modified as a side-effect...may remove in future (useful for testing right now) Args: G: GeoGraph of nodes to be connected if appropriate Nodes should have 'budget' attribute subgraphs: UnionFind data structure representing existing network's connected components AND the 'fake' nodes projected onto it. This is the basis for the agglomerative nearest neighbor approach in this algorithm. rtree: RTree based index of existing network Returns: GeoGraph: representing minimum spanning forest of G subject to the budget based restrictions """ # special case (already MST) if G.number_of_nodes() < 2: return G V = set(G.nodes()) # GeoGraph coords may be ndarray or dict if isinstance(G.coords, np.ndarray): coords = G.coords else: coords = np.row_stack(G.coords.values()) projcoords = ang_to_vec_coords(coords) if G.is_geographic() else coords kdtree = KDTree(projcoords) # Handle "dead" components D = set() if subgraphs is None: if rtree is not None: raise ValueError('RTree passed without UnionFind') rtree = Rtree() # modified to handle queues, children, mv subgraphs = UnionFind() # <helper_functions> def candidate_components(C): """ return the set of candidate nearest components for the connected component containing C. Do not consider those in C's connected component or those that are 'dead'. """ component_set = subgraphs.component_set(C) return list((V - set(component_set)) - D) def update_nn_component(C, candidates): """ find the nearest neighbor pair for the connected component represented by c. candidates represents the list of components from which to select. """ (v, vm) = subgraphs.queues[C].top() # vm ∈ C {not a foreign nearest neighbor} # go through the queue until an edge is found between this node # and the set of candidates, updating the neighbors in the connected # components queue in the process. while vm not in candidates: subgraphs.queues[C].pop() um, _ = kdtree.query_subset(projcoords[v], candidates) dm = square_distance(projcoords[v], projcoords[um]) subgraphs.push(subgraphs.queues[C], (v, um), dm) # Note: v will always be a vertex in this connected component # vm *may* be external (v, vm) = subgraphs.queues[C].top() return (v, vm) def is_fake(node): """ Tests whether the node is a projection on the existing grid """ return subgraphs.budget[node] == np.inf # Test whether the component is dead # i.e. can never connect to another node def is_dead(c, nn_dist): return not is_fake(c) and subgraphs.budget[c] < nn_dist # "true" distance between components def component_dist(c1, c2): dist = 0 if G.is_geographic(): dist = spherical_distance([coords[c1], coords[c2]]) else: dist = euclidean_distance([coords[c1], coords[c2]]) return dist # </helper_functions> # Initialize the connected components holding a single node # and push the nearest neighbor into its queue for v in V: vm, _ = kdtree.query_subset(projcoords[v], list(V - {v})) dm = square_distance(projcoords[v], projcoords[vm]) subgraphs.add_component(v, budget=G.node[v]['budget']) subgraphs.push(subgraphs.queues[v], (v, vm), dm) # Add to dead set if warranted nn_dist = component_dist(v, vm) if is_dead(v, nn_dist): # here components are single nodes # so no need to worry about adding children to dead set if v not in D: D.add(v) Et = [] # Initialize MST edges to empty list last_state = None # MST is complete when no progress was made in the prior iteration while Et != last_state: # This is a candidate list of edges that might be added to the MST Ep = PriorityQueue() # ∀ C of G; where c <- connected component for C in subgraphs.connected_components(component_subset=V): candidates = candidate_components(C) # Skip if no valid candidates if not candidates: continue (v, vm) = update_nn_component(C, candidates) # Add to dead set if warranted nn_dist = component_dist(v, vm) if is_dead(C, nn_dist): # add all dead components to the dead set D # (note that fake nodes can never be dead) for c in subgraphs.component_set(C): if c not in D and not is_fake(c): D.add(c) # One more round to root out connections to dead components # found in above iteration. # Need to do this BEFORE pushing candidate edges into Ep. # Otherwise we might be testing only 'dead' candidates # and therefore mistakenly think we were done (since # no new edges would have been added) for C in subgraphs.connected_components(component_subset=V): candidates = candidate_components(C) # Skip if no valid candidates if not candidates: continue (v, vm) = update_nn_component(C, candidates) # Calculate nn_dist for comparison to mv later nn_dist = component_dist(v, vm) # Append the top priority edge from the subgraph to the candidate # edge set Ep.push((v, vm, nn_dist), nn_dist) last_state = deepcopy(Et) # Candidate Test # At this point we have all of our nearest neighbor component edge # candidates defined for this "round" # # Now test all candidate edges in Ep for cycles and satisfaction of # custom criteria while Ep._queue: (um, vm, dm) = Ep.pop() # if doesn't create cycle # and subgraphs have enough MV # and we're not connecting 2 fake nodes # then allow the connection if subgraphs[um] != subgraphs[vm] and \ (subgraphs.budget[subgraphs[um]] >= dm or is_fake(um)) and \ (subgraphs.budget[subgraphs[vm]] >= dm or is_fake(vm)) and \ not (is_fake(um) and is_fake(vm)): # doesn't create cycles from line segment intersection invalid_edge, intersections = \ line_subgraph_intersection(subgraphs, rtree, coords[um], coords[vm]) if not invalid_edge: # edges should not intersect a subgraph more than once assert(filter(lambda n: n > 1, intersections.values()) == []) # merge the subgraphs subgraphs.union(um, vm, dm) # For all intersected subgraphs update the mv to that # created by the edge intersecting them, # TODO: This should be updated in not such a naive method map(lambda (n, _): subgraphs.union(um, n, 0), filter(lambda (n, i): i == 1 and subgraphs[n] != subgraphs[um], intersections.iteritems())) # index the newly added edge box = make_bounding_box(coords[um], coords[vm]) # Object is (u.label, v.label), (u.coord, v.coord) rtree.insert(hash((um, vm)), box, obj=((um, vm), (coords[um], coords[vm]))) Et += [(um, vm, {'weight': dm})] # create new GeoGraph with results result = G.copy() result.coords = G.coords result.remove_edges_from(result.edges()) result.add_edges_from(Et) return result