def kruskal_mst(G): """Finds MST using Kruskal's algoritm Params -------- G: NetworkX Graph An input weighted graph to find MST Returns -------- T: NetworkX Graph A minimum spanning tree """ T = nx.Graph() T.add_nodes_from(G.nodes) forest = UnionFind(G) edges = G.edges(data=True) def get_weights(): """Unpacks the weights from a dictionary""" for u, v, dct in edges: w = dct.get('weight', 1) yield w, u, v, dct edges = sorted(get_weights(), key=itemgetter(0)) for w, u, v, _ in edges: if forest[u] != forest[v]: T.add_edge(u, v, weight=w) forest.union(u, v) return T
def kruskal(G: "networkx Graph object") -> (list, list, list): """ Function that recieves a graph an returns the history of the algorithm, the Mininum Spanning Tree and the edges that causes cycles """ # List for the minimum spanning tree mst = [] # List for the history of the algorithm history = [] # List for the edges tat causes cycles cycles = [] # Disjoint structure for calculating the edges that causes cycles subtrees = UnionFind() # Sort the edges in ascendant order edges = sorted(G.edges(data=True), key=lambda t: t[2].get('weight', 1)) # For node A, Node B, Weight in the sorted edges for u, v, d in edges: # If the edge doesn't make a cycle append it to the mst if subtrees[u] != subtrees[v]: mst.append((u, v)) subtrees.union(u, v) # Else append it to a cycle else: cycles.append((u, v)) history.append((u, v)) return history, mst, cycles
def max_spacing(self, features=['curvature'], num_clusters=2): """ Adds cluster group attribute to example based on features and numbe of clusters using max spcaing algorithm """ example_pairs = list(combinations(range(len(self.examples)), 2)) pair_dist = lambda x: self.dist(x, features) pair_to_edge = lambda x: [x[0], x[1], pair_dist(x)] edges = list(map(pair_to_edge, example_pairs)) edges.sort(key=lambda x: x[2]) union_find = UnionFind() #list(map(lambda x: union_find[x], range(len(self.examples)))) current_num_clusters = len(self.examples) while current_num_clusters != num_clusters: edge = edges.pop(0) if union_find[edge[0]] != union_find[edge[1]]: union_find.union(edge[0], edge[1]) current_num_clusters -= 1 for (index, example) in enumerate(self.examples): example.cluster_group = union_find[index] print(list(map(lambda x: x.cluster_group, self.examples)))
def _min_span_edges(self, data=True): ''' Computes minumum spanning edges between nodes in Graph based on Kruskal algorithm. Only works for undirected graphs. Directed graphs will throw an error. Parameters ========== data: Bool Returns ======= generator ''' from networkx.utils import UnionFind ############################################################## # networkx implementation for finding minimum spanning edges # ############################################################## if self.graph.is_directed(): raise nx.NetworkXError( "Mimimum spanning tree not defined for directed graphs.") subtrees = UnionFind() edges = sorted(self.graph.edges(data=True)) for u, v, d in edges: if subtrees[u] != subtrees[v]: if data: yield (u, v, d) else: yield (u, v) subtrees.union(u, v)
def kruskal_mst_edges(G, minimum, weight='weight', keys=True, data=True): subtrees = UnionFind() if G.is_multigraph(): edges = G.edges(keys=True, data=True) else: edges = G.edges(data=True) getweight = lambda t: t[-1].get(weight, 1) edges = sorted(edges, key=getweight, reverse=not minimum) is_multigraph = G.is_multigraph() # Multigraphs need to handle edge keys in addition to edge data. if is_multigraph: for u, v, k, d in edges: if subtrees[u] != subtrees[v]: if keys: if data: yield (u, v, k, d) else: yield (u, v, k) else: if data: yield (u, v, d) else: yield (u, v) subtrees.union(u, v) else: for u, v, d in edges: if subtrees[u] != subtrees[v]: if data: yield (u, v, d) else: yield (u, v) subtrees.union(u, v)
def get_steiner_tree(self): if self.steiner_tree is None: # Path expansion from scratch self.steiner_tree = nx.Graph() self.steiner_cost = 0 edges = [] for parent, timestamp in self.terminals.items(): for child in self.metric_steiner_tree[parent]: if child in self.terminals and self.terminals[ child] > timestamp: new_path = [] self._fill_path(child, parent, new_path) for i in range(len(new_path)): prev_node = new_path[i] next_node = new_path[i + 1] if ( i + 1) < len(new_path) else parent heapq.heappush(edges, (self._get_edge_weight( prev_node, next_node), prev_node, next_node)) # Kruskal's algorithm to break cycles subtrees = UnionFind() while edges: d, u, v = heapq.heappop(edges) if subtrees[u] != subtrees[v]: self.steiner_tree.add_edge(u, v, weight=d) self.steiner_cost += d subtrees.union(u, v) return self.steiner_tree, self.steiner_cost
def connectAllNodesInRadius(self, graph, kdTree, radius): # keep a union-find data structure to improve search performance by not # allowing cycles in the graph graphPartitions = UnionFind() for currNodeLabel, currNodeData in list(graph.nodes(data=True)): currPos = currNodeData['pos'] # search for all nodes in radius of the current node in question pointToCheck = currPos.flatten() neighborLabelsInRadius = kdTree.query_ball_point( pointToCheck, radius) # adding all NEW edges that don't collide to the graph for neighborLabel in neighborLabelsInRadius: goalPos = graph.getNodeData(neighborLabel, 'pos') collides = self.checkCollision(currPos, goalPos) notInSameComponent = self.checkConnectivity( graphPartitions, currNodeLabel, neighborLabel) if (not collides) and notInSameComponent: weight = self.calculateDist(currPos, goalPos) graph.add_edge(currNodeLabel, neighborLabel, weight=weight) # need to update union-find data with the new edge graphPartitions.union(currNodeLabel, neighborLabel) return graph
def kruskal_mst_edges(G, weight='weight', data=True): """Generate edges in a minimum spanning forest of an undirected weighted graph. Parameters ---------- G : NetworkX Graph weight : string Edge data key to use for weight (default 'weight'). data : bool, optional If True yield the edge data along with the edge. Returns ------- edges : iterator A generator that produces edges in the minimum spanning tree. The edges are three-tuples (u,v,w) where w is the weight. """ if G.is_directed(): raise nx.NetworkXError( "Mimimum spanning tree not defined for directed graphs.") subtrees = UnionFind() #average sum of incident edges track_sum = {} for edge in G.edges(data=True): edge = list(edge) incident_degree = G.degree(edge[0]) + G.degree(edge[1]) - 2 sum_of_incident = sum([ v3['weight'] for v1, v2, v3 in G.edges.data() if v1 == edge[0] or v2 == edge[0] ]) sum_of_incident += sum([ v3['weight'] for v1, v2, v3 in G.edges.data() if v1 == edge[1] or v2 == edge[1] ]) average_sum_of_incident = sum_of_incident / incident_degree edge[2]['weight'] += average_sum_of_incident track_sum[str(edge[0]) + str(edge[1]) + str(edge[2]['weight'])] = average_sum_of_incident """ track_sum = {} for edge in G.edges(data=True): edge = list(edge) incident_degree = G.degree(edge[0]) + G.degree(edge[1]) - 2 sum_of_incident = sum([v3['weight'] for v1,v2,v3 in G.edges.data() if v1==edge[0] or v2==edge[0]]) sum_of_incident += sum([v3['weight'] for v1,v2,v3 in G.edges.data() if v1==edge[1] or v2==edge[1]]) edge[2]['weight'] += incident_degree track_sum[str(edge[0]) + str(edge[1]) + str(edge[2]['weight'])] = incident_degree """ edges = sorted(G.edges(data=True), key=lambda t: t[2].get(weight, 1)) for u, v, d in edges: if subtrees[u] != subtrees[v]: if data: remove = track_sum.get(str(u) + str(v) + str(d['weight'])) d['weight'] -= remove yield (u, v, d) else: yield (u, v) subtrees.union(u, v)
def minimum_spanning_edges(G): """Generate edges in a minimum spanning forest of an undirected weighted graph. A minimum spanning tree is a subgraph of the graph (a tree) with the minimum sum of edge weights. A spanning forest is a union of the spanning trees for each connected component of the graph. Parameters ---------- G : NetworkX Graph Returns ------- edges : iterator A generator that produces edges in the minimum spanning tree. The edges are three-tuples (u,v,w) where w is the weight. Examples -------- >>> G=nx.cycle_graph(4) >>> G.add_edge(0,3,weight=2) # assign weight 2 to edge 0-3 >>> mst=nx.minimum_spanning_edges(G) # a generator of MST edges >>> edgelist=list(mst) # make a list of the edges >>> print(sorted(edgelist)) [(0, 1, {'weight': 1}), (1, 2, {'weight': 1}), (2, 3, {'weight': 1})] >>> T=nx.Graph(edgelist) # build a graph of the MST. >>> print(sorted(T.edges(data=True))) [(0, 1, {'weight': 1}), (1, 2, {'weight': 1}), (2, 3, {'weight': 1})] Notes ----- Uses Kruskal's algorithm. If the graph edges do not have a weight attribute a default weight of 1 will be assigned. Modified code from David Eppstein, April 2006 http://www.ics.uci.edu/~eppstein/PADS/ """ # Modified code from David Eppstein, April 2006 # http://www.ics.uci.edu/~eppstein/PADS/ # Kruskal's algorithm: sort edges by weight, and add them one at a time. # We use Kruskal's algorithm, first because it is very simple to # implement once UnionFind exists, and second, because the only slow # part (the sort) is sped up by being built in to Python. from networkx.utils import UnionFind if G.is_directed(): raise NetworkXError(\ "Mimimum spanning tree not defined for directed graphs.") subtrees = UnionFind() edges = sorted((G[u][v].get('weight', 1), u, v) for u in G for v in G[u]) for W, u, v in edges: if subtrees[u] != subtrees[v]: yield (u, v, {'weight': W}) subtrees.union(u, v)
def minimum_spanning_edges(G): """Generate edges in a minimum spanning forest of an undirected weighted graph. A minimum spanning tree is a subgraph of the graph (a tree) with the minimum sum of edge weights. A spanning forest is a union of the spanning trees for each connected component of the graph. Parameters ---------- G : NetworkX Graph Returns ------- edges : iterator A generator that produces edges in the minimum spanning tree. The edges are three-tuples (u,v,w) where w is the weight. Examples -------- >>> G=nx.cycle_graph(4) >>> G.add_edge(0,3,weight=2) # assign weight 2 to edge 0-3 >>> mst=nx.minimum_spanning_edges(G) # a generator of MST edges >>> edgelist=list(mst) # make a list of the edges >>> print(sorted(edgelist)) [(0, 1, {'weight': 1}), (1, 2, {'weight': 1}), (2, 3, {'weight': 1})] >>> T=nx.Graph(edgelist) # build a graph of the MST. >>> print(sorted(T.edges(data=True))) [(0, 1, {'weight': 1}), (1, 2, {'weight': 1}), (2, 3, {'weight': 1})] Notes ----- Uses Kruskal's algorithm. If the graph edges do not have a weight attribute a default weight of 1 will be assigned. Modified code from David Eppstein, April 2006 http://www.ics.uci.edu/~eppstein/PADS/ """ # Modified code from David Eppstein, April 2006 # http://www.ics.uci.edu/~eppstein/PADS/ # Kruskal's algorithm: sort edges by weight, and add them one at a time. # We use Kruskal's algorithm, first because it is very simple to # implement once UnionFind exists, and second, because the only slow # part (the sort) is sped up by being built in to Python. from networkx.utils import UnionFind if G.is_directed(): raise NetworkXError(\ "Mimimum spanning tree not defined for directed graphs.") subtrees = UnionFind() edges = sorted((G[u][v].get('weight',1),u,v) for u in G for v in G[u]) for W,u,v in edges: if subtrees[u] != subtrees[v]: yield (u,v,{'weight':W}) subtrees.union(u,v)
def minimum_spanning_edges(G, weight='weight', data=True): """ Generate edges in a minimum spanning forest of an undirected weighted graph. A minimum spanning tree is a subgraph of the graph (a tree) with the minimum sum of edge weights. A spanning forest is a union of the spanning trees for each connected component of the graph. :param G: NetworkX Graph :param weight: Edge data key to use for weight (default 'weight') :param data: If True yield the edge data along with the edge (optional) :return edges: iterator. A generator that produces edges in the minimum spanning tree. The edges are three-tuples (u,v,w) where w is the weight. Examples -------- >>> G=nx.cycle_graph(4) >>> G.add_edge(0,3,weight=2) # assign weight 2 to edge 0-3 >>> mst=nx.minimum_spanning_edges(G,data=False) # a generator of MST edges >>> edgelist=list(mst) # make a list of the edges >>> print(sorted(edgelist)) [(0, 1), (1, 2), (2, 3)] Notes ----- Uses Kruskal's algorithm. If the graph edges do not have a weight attribute a default weight of 1 will be used. Modified code from David Eppstein, April 2006 http://www.ics.uci.edu/~eppstein/PADS/ """ # Modified code from David Eppstein, April 2006 # http://www.ics.uci.edu/~eppstein/PADS/ # Kruskal's algorithm: sort edges by weight, and add them one at a time. # We use Kruskal's algorithm, first because it is very simple to # implement once UnionFind exists, and second, because the only slow # part (the sort) is sped up by being built in to Python. from networkx.utils import UnionFind if G.is_directed(): raise nx.NetworkXError( "Mimimum spanning tree not defined for directed graphs.") subtrees = UnionFind() edges = sorted(G.edges(data=True), key=lambda t: t[2].get(weight, 1.0)) for u, v, d in edges: if subtrees[u] != subtrees[v]: if data: yield (u, v, d) else: yield (u, v) subtrees.union(u, v)
def kruskal(maze): walls = [] for node in maze.graph.nodes(): walls.extend(w for w in maze.walls(node) if w not in walls) cells = UnionFind() for c in maze.graph.nodes(): cells[c] random.shuffle(walls) for c1, c2 in walls: if cells[c1] != cells[c2]: yield 'connect', (c1, c2) cells.union(c1, c2)
def minimum_spanning_edges(G, weight='weight', data=False): if G.is_directed(): raise nx.NetworkXError( "Mimimum spanning tree not defined for directed graphs.") subtrees = UnionFind() edges = list(G.edges()) random.shuffle(edges) for u, v in edges: if subtrees[u] != subtrees[v]: yield (u, v) subtrees.union(u, v)
def kruskal_mst_edges(G, minimum, weight='weight', data=True): subtrees = UnionFind() edges = sorted(G.edges(data=True), key=lambda t: t[2].get(weight, 1), reverse=not minimum) for u, v, d in edges: if subtrees[u] != subtrees[v]: if data: yield (u, v, d) else: yield (u, v) subtrees.union(u, v)
def mst(g, weight='weight', data=True): # Initialize a UnionFind() object to store the spanning edges of the MST mst = UnionFind() # Sort the edges in ascending order of their weights edges = sorted(g.edges(data = True), key = lambda t: t[2].get(weight, 1)) # For each edge (u, v) by ordered weight, check if adding the edge # will create a cycle; if not, add to the mst, else discard the edge for (u, v, w) in edges: if mst[u] != mst[v]: yield (u, v, w) mst.union(u, v)
def minimum_spanning_edges(G,weight='weight',data=True): from networkx.utils import UnionFind if G.is_directed(): raise nx.NetworkXError( "Mimimum spanning tree not defined for directed graphs.") subtrees = UnionFind() edges = sorted(G.edges(data=True),key=lambda t: t[2].get(weight,1)) for u,v,d in edges: if subtrees[u] != subtrees[v]: if data: yield (u,v,d) else: yield (u,v) subtrees.union(u,v)
def boruvka_mst(G): """Finds MST using Boruvka's algoritm Params -------- G: NetworkX Graph An input weighted graph to find MST Returns -------- T: NetworkX Graph A minimum spanning tree """ T = nx.Graph() T.add_nodes_from(G.nodes) forest = UnionFind(G) def find_edge(comp): """Finds the minimum edge for the given connected component""" minw = np.inf border = None for e in nx.edge_boundary(G, comp, data=True): w = e[-1].get('weight', 1) if w < minw: minw = w border = e return border min_edges = (find_edge(comp) for comp in forest.to_sets()) min_edges = [edge for edge in min_edges if edge is not None] while min_edges: min_edges = (find_edge(comp) for comp in forest.to_sets()) min_edges = [edge for edge in min_edges if edge is not None] for u, v, w in min_edges: if forest[u] != forest[v]: T.add_edge(u, v, weight=w['weight']) forest.union(u, v) return T
def kruskal_mst(G): """Generate a minimum spanning tree of an undirected graph. Uses Kruskal's algorithm. Parameters ---------- G : NetworkX Graph Returns ------- A generator that produces edges in the minimum spanning tree. The edges are three-tuples (u,v,w) where w is the weight. Examples -------- >>> G=nx.cycle_graph(4) >>> G.add_edge(0,3,weight=2) # assign weight 2 to edge 0-3 >>> mst=nx.kruskal_mst(G) # a generator of MST edges >>> edgelist=list(mst) # make a list of the edges >>> print sorted(edgelist) [(0, 1, {'weight': 1}), (1, 2, {'weight': 1}), (2, 3, {'weight': 1})] >>> T=nx.Graph(edgelist) # build a graph of the MST. >>> print sorted(T.edges(data=True)) [(0, 1, {'weight': 1}), (1, 2, {'weight': 1}), (2, 3, {'weight': 1})] Notes ----- Modified code from David Eppstein, April 2006 http://www.ics.uci.edu/~eppstein/PADS/ """ # Modified code from David Eppstein, April 2006 # http://www.ics.uci.edu/~eppstein/PADS/ # Kruskal's algorithm: sort edges by weight, and add them one at a time. # We use Kruskal's algorithm, first because it is very simple to # implement once UnionFind exists, and second, because the only slow # part (the sort) is sped up by being built in to Python. from networkx.utils import UnionFind subtrees = UnionFind() edges = sorted((G[u][v].get('weight', 1), u, v) for u in G for v in G[u]) for W, u, v in edges: if subtrees[u] != subtrees[v]: yield (u, v, {'weight': W}) subtrees.union(u, v)
def minimum_spanning_edges(G, weight='weight', data=True): """Generate edges in a minimum spanning forest of an undirected weighted graph. A minimum spanning tree is a subgraph of the graph (a tree) with the minimum sum of edge weights. A spanning forest is a union of the spanning trees for each connected component of the graph. Parameters ---------- G : NetworkX Graph weight : string Edge data key to use for weight (default 'weight'). data : bool, optional If True yield the edge data along with the edge. Returns ------- edges : iterator A generator that produces edges in the minimum spanning tree. The edges are three-tuples (u,v,w) where w is the weight. Notes ----- Uses Kruskal's algorithm. """ from networkx.utils import UnionFind if G.is_directed(): raise nx.NetworkXError( "Mimimum spanning tree not defined for directed graphs.") subtrees = UnionFind() edges = sorted(G.edges(data=True), key=lambda t: t[2].get(weight, 1)) for u, v, d in edges: if subtrees[u] != subtrees[v]: if data: yield (u, v, d) else: yield (u, v) subtrees.union(u, v)
def kruskal_mst(G): """Generate a minimum spanning tree of an undirected graph. Uses Kruskal's algorithm. Parameters ---------- G : NetworkX Graph Returns ------- A generator that produces edges in the minimum spanning tree. The edges are three-tuples (u,v,w) where w is the weight. Examples -------- >>> G=nx.cycle_graph(4) >>> G.add_edge(0,3,weight=2) # assign weight 2 to edge 0-3 >>> mst=nx.kruskal_mst(G) # a generator of MST edges >>> edgelist=list(mst) # make a list of the edges >>> print sorted(edgelist) [(0, 1, {'weight': 1}), (1, 2, {'weight': 1}), (2, 3, {'weight': 1})] >>> T=nx.Graph(edgelist) # build a graph of the MST. >>> print sorted(T.edges(data=True)) [(0, 1, {'weight': 1}), (1, 2, {'weight': 1}), (2, 3, {'weight': 1})] Notes ----- Modified code from David Eppstein, April 2006 http://www.ics.uci.edu/~eppstein/PADS/ """ # Modified code from David Eppstein, April 2006 # http://www.ics.uci.edu/~eppstein/PADS/ # Kruskal's algorithm: sort edges by weight, and add them one at a time. # We use Kruskal's algorithm, first because it is very simple to # implement once UnionFind exists, and second, because the only slow # part (the sort) is sped up by being built in to Python. from networkx.utils import UnionFind subtrees = UnionFind() edges = sorted((G[u][v].get('weight',1),u,v) for u in G for v in G[u]) for W,u,v in edges: if subtrees[u] != subtrees[v]: yield (u,v,{'weight':W}) subtrees.union(u,v)
def krustal(G): T = nx.Graph() subtrees = UnionFind() # make list of all edges in G edge_pool = list(E(G)) #loop until no more edges left to consider adding while len(edge_pool) > 0: #get the shortest edge in the edge list, I don't know how to sort yet edge = get_min_edge(G, edge_pool) #remove the edge from the pool for the next iteration edge_pool.remove(edge) #check if the edge will form a cycle, if not then add it to the tree if subtrees[edge[0]] != subtrees[edge[1]]: subtrees.union(edge[0], edge[1]) T.add_edge(*edge) return T
def kruskal(G, pos): subG = nx.empty_graph() subsets = UnionFind() edgelist = [] for edge in G.edges: edgelist.append([edge[0], edge[1], G[edge[0]][edge[1]]['weight']]) edgelist.sort(key=lambda x: x[2]) i = 0 for edge in edgelist: edge = edgelist[i] if subsets[edge[0]] != subsets[edge[1]]: subsets.union(edge[0], edge[1]) subG.add_edge(edge[0], edge[1], weight=edge[2], color='g') drawGraph(G, pos, subG) plt.show() #time.sleep(.5) i += 1 drawGraph(subG, pos, subG) plt.show() return subG
def minimum_spanning_edges(G, weight='weight', data=True): """ Examples -------- >>> G=nx.cycle_graph(4) >>> G.add_edge(0,3,weight=2) # assign weight 2 to edge 0-3 >>> mst=nx.minimum_spanning_edges(G,data=False) # a generator of MST edges >>> edgelist=list(mst) # make a list of the edges >>> print(sorted(edgelist)) [(0, 1), (1, 2), (2, 3)] Notes ----- Uses Kruskal's algorithm. If the graph edges do not have a weight attribute a default weight of 1 will be used. Modified code from David Eppstein, April 2006 http://www.ics.uci.edu/~eppstein/PADS/ """ from networkx.utils import UnionFind if G.is_directed(): raise nx.NetworkXError( "Mimimum spanning tree not defined for directed graphs.") subtrees = UnionFind() edges = sorted(G.edges(data=True), key=lambda t: t[2].get(weight, 1)) probability = 99 for u, v, d in edges: r = rand.randint(1, 101) if subtrees[u] != subtrees[v]: if data and (r in range(probability)): yield (u, v, d) else: yield (u, v) subtrees.union(u, v) probability -= 1
def calculate_hamming_clusters(numbers): numbers_map = defaultdict(list) for index, node in enumerate(numbers): numbers_map[node].append(index) # no duplicates in the union find union_find = UnionFind(numbers_map) hamming_distance_one = [1 << i for i in range(24)] hamming_distance_two = [ 1 << i ^ 1 << j for i, j in itertools.combinations(range(24), 2) ] hamming_distances = [*hamming_distance_one, *hamming_distance_two] keys = list(numbers_map) for distance_mask in hamming_distances: for key in keys: key2 = key ^ distance_mask if numbers_map[key2]: union_find.union(key, key2) return len(list(union_find.to_sets()))
def lca_networkx(G, root, pairs): """ [`networkx.algorithms.lowest_common_ancestor`][nx] Implemented according to CLRS page 584 (3rd edition). Compare to [epp] [nx]: https://github.com/networkx/networkx/master/ [epp]: https://www.ics.uci.edu/~eppstein/PADS/LCA.py """ from collections import defaultdict from networkx.utils import UnionFind, arbitrary_element from networkx import dfs_postorder_nodes pair_dict = defaultdict(set) for u, v in pairs: pair_dict[u].add(v) pair_dict[v].add(u) # Iterative implementation of Tarjan's offline lca algorithm # as described in CLRS on page 521. uf = UnionFind() ancestors = {} for node in G: ancestors[node] = uf[node] colors = defaultdict(bool) for node in dfs_postorder_nodes(G, root): colors[node] = True for v in pair_dict[node]: if colors[v]: if (v, node) in pairs: yield (v, node), ancestors[uf[v]] if node != root: parent = arbitrary_element(G.pred[node]) uf.union(parent, node) ancestors[uf[parent]] = parent
def main(): H, W = pin(2) Q = pin(1) uf = UnionFind() C = Counter() for _ in [0] * Q: t = pin(1) if t == 1: x, y = pin(2) x -= 1 y -= 1 C[(x, y)] = 1 for i in [(0, 1), (0, -1), (1, 0), (-1, 0)]: if C[(x + i[0], y + i[1])] == 1: uf.union((x, y), ((x + i[0], y + i[1]))) else: xa, ya, xb, yb = pin(4) if C[(xa - 1, ya - 1)] == 0: print("No") elif uf[(xa - 1, ya - 1)] == uf[(xb - 1, yb - 1)]: print("Yes") else: print("No") return
def boruvka_mst_edges(G, minimum=True, weight='weight', keys=False, data=True, ignore_nan=False): """Iterate over edges of a Borůvka's algorithm min/max spanning tree. Parameters ---------- G : NetworkX Graph The edges of `G` must have distinct weights, otherwise the edges may not form a tree. minimum : bool (default: True) Find the minimum (True) or maximum (False) spanning tree. weight : string (default: 'weight') The name of the edge attribute holding the edge weights. keys : bool (default: True) This argument is ignored since this function is not implemented for multigraphs; it exists only for consistency with the other minimum spanning tree functions. data : bool (default: True) Flag for whether to yield edge attribute dicts. If True, yield edges `(u, v, d)`, where `d` is the attribute dict. If False, yield edges `(u, v)`. ignore_nan : bool (default: False) If a NaN is found as an edge weight normally an exception is raised. If `ignore_nan is True` then that edge is ignored instead. """ # Initialize a forest, assuming initially that it is the discrete # partition of the nodes of the graph. forest = UnionFind(G) def best_edge(component): """Returns the optimum (minimum or maximum) edge on the edge boundary of the given set of nodes. A return value of ``None`` indicates an empty boundary. """ sign = 1 if minimum else -1 minwt = float('inf') boundary = None for e in nx.edge_boundary(G, component, data=True): wt = e[-1].get(weight, 1) * sign if isnan(wt): if ignore_nan: continue msg = "NaN found as an edge weight. Edge %s" raise ValueError(msg % (e, )) if wt < minwt: minwt = wt boundary = e return boundary # Determine the optimum edge in the edge boundary of each component # in the forest. best_edges = (best_edge(component) for component in forest.to_sets()) best_edges = [edge for edge in best_edges if edge is not None] # If each entry was ``None``, that means the graph was disconnected, # so we are done generating the forest. while best_edges: # Determine the optimum edge in the edge boundary of each # component in the forest. # # This must be a sequence, not an iterator. In this list, the # same edge may appear twice, in different orientations (but # that's okay, since a union operation will be called on the # endpoints the first time it is seen, but not the second time). # # Any ``None`` indicates that the edge boundary for that # component was empty, so that part of the forest has been # completed. # # TODO This can be parallelized, both in the outer loop over # each component in the forest and in the computation of the # minimum. (Same goes for the identical lines outside the loop.) best_edges = (best_edge(component) for component in forest.to_sets()) best_edges = [edge for edge in best_edges if edge is not None] # Join trees in the forest using the best edges, and yield that # edge, since it is part of the spanning tree. # # TODO This loop can be parallelized, to an extent (the union # operation must be atomic). for u, v, d in best_edges: if forest[u] != forest[v]: if data: yield u, v, d else: yield u, v forest.union(u, v)
def tree_all_pairs_lowest_common_ancestor(G, root=None, pairs=None): r"""Yield the lowest common ancestor for sets of pairs in a tree. Parameters ---------- G : NetworkX directed graph (must be a tree) root : node, optional (default: None) The root of the subtree to operate on. If None, assume the entire graph has exactly one source and use that. pairs : iterable or iterator of pairs of nodes, optional (default: None) The pairs of interest. If None, Defaults to all pairs of nodes under `root` that have a lowest common ancestor. Returns ------- lcas : generator of tuples `((u, v), lca)` where `u` and `v` are nodes in `pairs` and `lca` is their lowest common ancestor. Notes ----- Only defined on non-null trees represented with directed edges from parents to children. Uses Tarjan's off-line lowest-common-ancestors algorithm. Runs in time $O(4 \times (V + E + P))$ time, where 4 is the largest value of the inverse Ackermann function likely to ever come up in actual use, and $P$ is the number of pairs requested (or $V^2$ if all are needed). Tarjan, R. E. (1979), "Applications of path compression on balanced trees", Journal of the ACM 26 (4): 690-715, doi:10.1145/322154.322161. See Also -------- all_pairs_lowest_common_ancestor (similar routine for general DAGs) lowest_common_ancestor (just a single pair for general DAGs) """ if len(G) == 0: raise nx.NetworkXPointlessConcept("LCA meaningless on null graphs.") elif None in G: raise nx.NetworkXError("None is not a valid node.") # Index pairs of interest for efficient lookup from either side. if pairs is not None: pair_dict = defaultdict(set) # See note on all_pairs_lowest_common_ancestor. if not isinstance(pairs, (Mapping, Set)): pairs = set(pairs) for u, v in pairs: for n in (u, v): if n not in G: msg = f"The node {str(n)} is not in the digraph." raise nx.NodeNotFound(msg) pair_dict[u].add(v) pair_dict[v].add(u) # If root is not specified, find the exactly one node with in degree 0 and # use it. Raise an error if none are found, or more than one is. Also check # for any nodes with in degree larger than 1, which would imply G is not a # tree. if root is None: for n, deg in G.in_degree: if deg == 0: if root is not None: msg = "No root specified and tree has multiple sources." raise nx.NetworkXError(msg) root = n elif deg > 1: msg = "Tree LCA only defined on trees; use DAG routine." raise nx.NetworkXError(msg) if root is None: raise nx.NetworkXError("Graph contains a cycle.") # Iterative implementation of Tarjan's offline lca algorithm # as described in CLRS on page 521 (2nd edition)/page 584 (3rd edition) uf = UnionFind() ancestors = {} for node in G: ancestors[node] = uf[node] colors = defaultdict(bool) for node in nx.dfs_postorder_nodes(G, root): colors[node] = True for v in (pair_dict[node] if pairs is not None else G): if colors[v]: # If the user requested both directions of a pair, give it. # Otherwise, just give one. if pairs is not None and (node, v) in pairs: yield (node, v), ancestors[uf[v]] if pairs is None or (v, node) in pairs: yield (v, node), ancestors[uf[v]] if node != root: parent = arbitrary_element(G.pred[node]) uf.union(parent, node) ancestors[uf[parent]] = parent
def kruskal_mst_edges(G, minimum, weight='weight', keys=True, data=True, ignore_nan=False): """Iterate over edges of a Kruskal's algorithm min/max spanning tree. Parameters ---------- G : NetworkX Graph The graph holding the tree of interest. minimum : bool (default: True) Find the minimum (True) or maximum (False) spanning tree. weight : string (default: 'weight') The name of the edge attribute holding the edge weights. keys : bool (default: True) If `G` is a multigraph, `keys` controls whether edge keys ar yielded. Otherwise `keys` is ignored. data : bool (default: True) Flag for whether to yield edge attribute dicts. If True, yield edges `(u, v, d)`, where `d` is the attribute dict. If False, yield edges `(u, v)`. ignore_nan : bool (default: False) If a NaN is found as an edge weight normally an exception is raised. If `ignore_nan is True` then that edge is ignored instead. """ subtrees = UnionFind() if G.is_multigraph(): edges = G.edges(keys=True, data=True) def filter_nan_edges(edges=edges, weight=weight): sign = 1 if minimum else -1 for u, v, k, d in edges: wt = d.get(weight, 1) * sign if isnan(wt): if ignore_nan: continue msg = "NaN found as an edge weight. Edge %s" raise ValueError(msg % ((u, v, k, d),)) yield wt, u, v, k, d else: edges = G.edges(data=True) def filter_nan_edges(edges=edges, weight=weight): sign = 1 if minimum else -1 for u, v, d in edges: wt = d.get(weight, 1) * sign if isnan(wt): if ignore_nan: continue msg = "NaN found as an edge weight. Edge %s" raise ValueError(msg % ((u, v, d),)) yield wt, u, v, d edges = sorted(filter_nan_edges(), key=itemgetter(0)) # Multigraphs need to handle edge keys in addition to edge data. if G.is_multigraph(): for wt, u, v, k, d in edges: if subtrees[u] != subtrees[v]: if keys: if data: yield u, v, k, d else: yield u, v, k else: if data: yield u, v, d else: yield u, v subtrees.union(u, v) else: for wt, u, v, d in edges: if subtrees[u] != subtrees[v]: if data: yield (u, v, d) else: yield (u, v) subtrees.union(u, v)
def kruskal_mst_edges(G, minimum, weight='weight', keys=True, data=True, ignore_nan=False): """Iterate over edges of a Kruskal's algorithm min/max spanning tree. Parameters ---------- G : NetworkX Graph The graph holding the tree of interest. minimum : bool (default: True) Find the minimum (True) or maximum (False) spanning tree. weight : string (default: 'weight') The name of the edge attribute holding the edge weights. keys : bool (default: True) If `G` is a multigraph, `keys` controls whether edge keys ar yielded. Otherwise `keys` is ignored. data : bool (default: True) Flag for whether to yield edge attribute dicts. If True, yield edges `(u, v, d)`, where `d` is the attribute dict. If False, yield edges `(u, v)`. ignore_nan : bool (default: False) If a NaN is found as an edge weight normally an exception is raised. If `ignore_nan is True` then that edge is ignored instead. """ subtrees = UnionFind() if G.is_multigraph(): edges = G.edges(keys=True, data=True) def filter_nan_edges(edges=edges, weight=weight): sign = 1 if minimum else -1 for u, v, k, d in edges: wt = d.get(weight, 1) * sign if isnan(wt): if ignore_nan: continue msg = "NaN found as an edge weight. Edge %s" raise ValueError(msg % ((u, v, f, k, d), )) yield wt, u, v, k, d else: edges = G.edges(data=True) def filter_nan_edges(edges=edges, weight=weight): sign = 1 if minimum else -1 for u, v, d in edges: wt = d.get(weight, 1) * sign if isnan(wt): if ignore_nan: continue msg = "NaN found as an edge weight. Edge %s" raise ValueError(msg % ((u, v, d), )) yield wt, u, v, d edges = sorted(filter_nan_edges(), key=itemgetter(0)) # Multigraphs need to handle edge keys in addition to edge data. if G.is_multigraph(): for wt, u, v, k, d in edges: if subtrees[u] != subtrees[v]: if keys: if data: yield u, v, k, d else: yield u, v, k else: if data: yield u, v, d else: yield u, v subtrees.union(u, v) else: for wt, u, v, d in edges: if subtrees[u] != subtrees[v]: if data: yield (u, v, d) else: yield (u, v) subtrees.union(u, v)
if __name__ == "__main__": filename = "clustering_big.txt" with open(filename, "r") as f: lines = f.readlines() n_nodes, n_bits = map(int, lines[0].split()) print(f'{n_nodes} nodes') print(f'{n_bits} bits per node') numbers = [int(''.join(line.split()), 2) for line in lines[1:]] nodes = {} for node, num in enumerate(numbers): if num not in nodes: nodes[num] = set() nodes[num].add(node) uf = UnionFind(range(n_nodes)) distances = [1 << i for i in range(n_bits)] distances += [(1 << ix_1) ^ (1 << ix_2) for (ix_1, ix_2) in itertools.combinations(range(n_bits), 2)] distances.append(0) for distance in distances: for number in nodes.keys(): if (number ^ distance) in nodes: for node_from in nodes[number]: for node_to in nodes[number ^ distance]: uf.union(node_from, node_to) print(len(list(uf.to_sets()))) # 6118
def boruvka_mst_edges(G, minimum=True, weight='weight', keys=False, data=True): """Iterates over the edges of a minimum spanning tree as computed by Borůvka's algorithm. `G` is a NetworkX graph. Also, the edges must have distinct weights, otherwise the edges may not form a tree. `weight` is the edge attribute that stores the edge weights. Each edge in the graph must have such an attribute, otherwise a :exc:`KeyError` will be raised. If `data` is True, this iterator yields edges of the form ``(u, v, d)``, where ``u`` and ``v`` are nodes and ``d`` is the edge attribute dictionary. Otherwise, it yields edges of the form ``(u, v)``. The `keys` argument is ignored, since this function is not implemented for multigraphs; it exists only for consistency with the other minimum spanning tree functions. """ opt = min if minimum else max # Initialize a forest, assuming initially that it is the discrete # partition of the nodes of the graph. forest = UnionFind(G) def best_edge(component): """Returns the optimum (minimum or maximum) edge on the edge boundary of the given set of nodes. A return value of ``None`` indicates an empty boundary. """ # TODO In Python 3.4 and later, we can just do # # boundary = nx.edge_boundary(G, component, data=weight) # return opt(boundary, key=lambda e: e[-1][weight], default=None) # # which is better because it doesn't require creating a list. boundary = list(nx.edge_boundary(G, component, data=True)) if not boundary: return None return opt(boundary, key=lambda e: e[-1][weight]) # Determine the optimum edge in the edge boundary of each component # in the forest. best_edges = (best_edge(component) for component in forest.to_sets()) best_edges = [edge for edge in best_edges if edge is not None] # If each entry was ``None``, that means the graph was disconnected, # so we are done generating the forest. while best_edges: # Determine the optimum edge in the edge boundary of each # component in the forest. # # This must be a sequence, not an iterator. In this list, the # same edge may appear twice, in different orientations (but # that's okay, since a union operation will be called on the # endpoints the first time it is seen, but not the second time). # # Any ``None`` indicates that the edge boundary for that # component was empty, so that part of the forest has been # completed. # # TODO This can be parallelized, both in the outer loop over # each component in the forest and in the computation of the # minimum. (Same goes for the identical lines outside the loop.) best_edges = (best_edge(component) for component in forest.to_sets()) best_edges = [edge for edge in best_edges if edge is not None] # Join trees in the forest using the best edges, and yield that # edge, since it is part of the spanning tree. # # TODO This loop can be parallelized, to an extent (the union # operation must be atomic). for u, v, d in best_edges: if forest[u] != forest[v]: if data: yield u, v, d else: yield u, v forest.union(u, v)
def kruskal_mst_edges(G, weight='weight', data=True): """Generate edges in a minimum spanning forest of an undirected weighted graph. Parameters ---------- G : NetworkX Graph weight : string Edge data key to use for weight (default 'weight'). data : bool, optional If True yield the edge data along with the edge. Returns ------- edges : iterator A generator that produces edges in the minimum spanning tree. The edges are three-tuples (u,v,w) where w is the weight. """ subtrees = UnionFind() edges = sorted( G.edges(data=True), key=lambda t: t[2].get("weight")) #sorted by edge weights first edges_no_weights = [e[0:2] for e in edges] #same order as edges edges_copy = edges.copy() available_edges = len(G.nodes) - 1 dominated = set() #set of dominated vertices num_edges_mst = 0 while num_edges_mst < len(G.nodes): i = 0 u, v, d = edges_copy[0] while (u in dominated and v in dominated and i + 1 < len(edges_copy) and subtrees[u] == subtrees[v]): i += 1 u, v, d = edges_copy[i] if subtrees[u] != subtrees[v]: dominated.add(u) dominated.add(v) new_v_reached = 0 u_subtree = [] v_subtree = [] #for loop to find the number of new vertices reached: for x, y, w in edges_copy: if x in set(G.__getitem__(u)) or y in set( G.__getitem__(v)): #neighbors of u and v if x not in dominated: new_v_reached += 1 if y not in dominated: new_v_reached += 1 #all parts of current mst subtrees.union(u, v) curr_mst_vertices = [list(s) for s in subtrees.to_sets() if u in s] curr_mst_vertices = curr_mst_vertices[0] current_tree = nx.Graph() for v1, v2, v3 in G.edges.data(): if v1 in curr_mst_vertices and v2 in curr_mst_vertices: edges_ = edges[edges_no_weights.index((v1, v2))] current_tree.add_edge(v1, v2, weight=v3['weight']) before = average_pairwise_distance(current_tree) #add one edge to find increase in cost for v1, v2, v3 in edges_copy: #print(v1,v2,v3) new_tree = current_tree.copy() #edge (u, X) (v, X) (X, u) (X, v) if (v1 == u and v2 != v) or (v1 == v and v2 != v) or ( v2 == u and v1 != v) or (v2 == v and v1 != u): new_tree.add_edge(v1, v2, weight=v3['weight']) after = average_pairwise_distance(new_tree) edge_update = edges_copy[edges_no_weights.index((v1, v2))] edge_update_list = list(edge_update[0:2]) if after - before > 0 and new_v_reached != 0: edge_update_list.append( {'weight': ((after - before) / new_v_reached)}) edges_copy[edges_no_weights.index( (v1, v2))] = edge_update_list #update edges_copy edges_copy = sorted(edges_copy, key=lambda x: x[2]['weight']) #available_edges -= 1 num_edges_mst += 1 yield (u, v, edges[edges_no_weights.index((u, v))][2]) if num_edges_mst == len(G.nodes) - 1: break
def minimum_spanning_edges(G, weight="weight", data=True): """Generate edges in a minimum spanning forest of an undirected weighted graph. A minimum spanning tree is a subgraph of the graph (a tree) with the minimum sum of edge weights. A spanning forest is a union of the spanning trees for each connected component of the graph. Parameters ---------- G : NetworkX Graph weight : string Edge data key to use for weight (default 'weight'). data : bool, optional If True yield the edge data along with the edge. Returns ------- edges : iterator A generator that produces edges in the minimum spanning tree. The edges are three-tuples (u,v,w) where w is the weight. Examples -------- >>> G=nx.cycle_graph(4) >>> G.add_edge(0,3,weight=2) # assign weight 2 to edge 0-3 >>> mst=nx.minimum_spanning_edges(G,data=False) # a generator of MST edges >>> edgelist=list(mst) # make a list of the edges >>> print(sorted(edgelist)) [(0, 1), (1, 2), (2, 3)] Notes ----- Uses Kruskal's algorithm. If the graph edges do not have a weight attribute a default weight of 1 will be used. Modified code from David Eppstein, April 2006 http://www.ics.uci.edu/~eppstein/PADS/ """ # Modified code from David Eppstein, April 2006 # http://www.ics.uci.edu/~eppstein/PADS/ # Kruskal's algorithm: sort edges by weight, and add them one at a time. # We use Kruskal's algorithm, first because it is very simple to # implement once UnionFind exists, and second, because the only slow # part (the sort) is sped up by being built in to Python. from networkx.utils import UnionFind if G.is_directed(): raise nx.NetworkXError("Mimimum spanning tree not defined for directed graphs.") subtrees = UnionFind() edges = sorted(G.edges(data=True), key=lambda t: t[2].get(weight, 1)) for u, v, d in edges: if subtrees[u] != subtrees[v]: if data: yield (u, v, d) else: yield (u, v) subtrees.union(u, v)
def boruvka_mst_edges(G, minimum=True, weight='weight', keys=False, data=True, ignore_nan=False): """Iterate over edges of a Borůvka's algorithm min/max spanning tree. Parameters ---------- G : NetworkX Graph The edges of `G` must have distinct weights, otherwise the edges may not form a tree. minimum : bool (default: True) Find the minimum (True) or maximum (False) spanning tree. weight : string (default: 'weight') The name of the edge attribute holding the edge weights. keys : bool (default: True) This argument is ignored since this function is not implemented for multigraphs; it exists only for consistency with the other minimum spanning tree functions. data : bool (default: True) Flag for whether to yield edge attribute dicts. If True, yield edges `(u, v, d)`, where `d` is the attribute dict. If False, yield edges `(u, v)`. ignore_nan : bool (default: False) If a NaN is found as an edge weight normally an exception is raised. If `ignore_nan is True` then that edge is ignored instead. """ # Initialize a forest, assuming initially that it is the discrete # partition of the nodes of the graph. forest = UnionFind(G) def best_edge(component): """Returns the optimum (minimum or maximum) edge on the edge boundary of the given set of nodes. A return value of ``None`` indicates an empty boundary. """ sign = 1 if minimum else -1 minwt = float('inf') boundary = None for e in nx.edge_boundary(G, component, data=True): wt = e[-1].get(weight, 1) * sign if isnan(wt): if ignore_nan: continue msg = "NaN found as an edge weight. Edge %s" raise ValueError(msg % (e,)) if wt < minwt: minwt = wt boundary = e return boundary # Determine the optimum edge in the edge boundary of each component # in the forest. best_edges = (best_edge(component) for component in forest.to_sets()) best_edges = [edge for edge in best_edges if edge is not None] # If each entry was ``None``, that means the graph was disconnected, # so we are done generating the forest. while best_edges: # Determine the optimum edge in the edge boundary of each # component in the forest. # # This must be a sequence, not an iterator. In this list, the # same edge may appear twice, in different orientations (but # that's okay, since a union operation will be called on the # endpoints the first time it is seen, but not the second time). # # Any ``None`` indicates that the edge boundary for that # component was empty, so that part of the forest has been # completed. # # TODO This can be parallelized, both in the outer loop over # each component in the forest and in the computation of the # minimum. (Same goes for the identical lines outside the loop.) best_edges = (best_edge(component) for component in forest.to_sets()) best_edges = [edge for edge in best_edges if edge is not None] # Join trees in the forest using the best edges, and yield that # edge, since it is part of the spanning tree. # # TODO This loop can be parallelized, to an extent (the union # operation must be atomic). for u, v, d in best_edges: if forest[u] != forest[v]: if data: yield u, v, d else: yield u, v forest.union(u, v)
def kruskal_mst_edges(G, minimum, weight="weight", keys=True, data=True, ignore_nan=False, partition=None): """ Iterate over edge of a Kruskal's algorithm min/max spanning tree. Parameters ---------- G : NetworkX Graph The graph holding the tree of interest. minimum : bool (default: True) Find the minimum (True) or maximum (False) spanning tree. weight : string (default: 'weight') The name of the edge attribute holding the edge weights. keys : bool (default: True) If `G` is a multigraph, `keys` controls whether edge keys ar yielded. Otherwise `keys` is ignored. data : bool (default: True) Flag for whether to yield edge attribute dicts. If True, yield edges `(u, v, d)`, where `d` is the attribute dict. If False, yield edges `(u, v)`. ignore_nan : bool (default: False) If a NaN is found as an edge weight normally an exception is raised. If `ignore_nan is True` then that edge is ignored instead. partition : string (default: None) The name of the edge attribute holding the partition data, if it exists. Partition data is written to the edges using the `EdgePartition` enum. If a partition exists, all included edges and none of the excluded edges will appear in the final tree. Open edges may or may not be used. Yields ------ edge tuple The edges as discovered by Kruskal's method. Each edge can take the following forms: `(u, v)`, `(u, v, d)` or `(u, v, k, d)` depending on the `key` and `data` parameters """ subtrees = UnionFind() if G.is_multigraph(): edges = G.edges(keys=True, data=True) else: edges = G.edges(data=True) """ Sort the edges of the graph with respect to the partition data. Edges are returned in the following order: * Included edges * Open edges from smallest to largest weight * Excluded edges """ included_edges = [] open_edges = [] for e in edges: d = e[-1] wt = d.get(weight, 1) if isnan(wt): if ignore_nan: continue raise ValueError(f"NaN found as an edge weight. Edge {e}") edge = (wt, ) + e if d.get(partition) == EdgePartition.INCLUDED: included_edges.append(edge) elif d.get(partition) == EdgePartition.EXCLUDED: continue else: open_edges.append(edge) if minimum: sorted_open_edges = sorted(open_edges, key=itemgetter(0)) else: sorted_open_edges = sorted(open_edges, key=itemgetter(0), reverse=True) # Condense the lists into one included_edges.extend(sorted_open_edges) sorted_edges = included_edges del open_edges, sorted_open_edges, included_edges # Multigraphs need to handle edge keys in addition to edge data. if G.is_multigraph(): for wt, u, v, k, d in sorted_edges: if subtrees[u] != subtrees[v]: if keys: if data: yield u, v, k, d else: yield u, v, k else: if data: yield u, v, d else: yield u, v subtrees.union(u, v) else: for wt, u, v, d in sorted_edges: if subtrees[u] != subtrees[v]: if data: yield u, v, d else: yield u, v subtrees.union(u, v)
from networkx.utils import UnionFind H, W = map(int, input().split()) Q = int(input()) P = set() uf = UnionFind() for _ in range(Q): t, *q = map(int, input().split()) if t == 1: x, y = (a-1 for a in q) P.add((x, y)) for u, v in [(x, y+1), (x, y-1), (x+1, y), (x-1, y)]: if (u, v) in P: uf.union((x, y), (u, v)) else: xa, ya, xb, yb = (a-1 for a in q) if {(xa, ya), (xb, yb)}<=P and uf[(xa, ya)]==uf[(xb, yb)]: print("Yes") else: print("No")
def tree_all_pairs_lowest_common_ancestor(G, root=None, pairs=None): r"""Yield the lowest common ancestor for sets of pairs in a tree. Parameters ---------- G : NetworkX directed graph (must be a tree) root : node, optional (default: None) The root of the subtree to operate on. If None, assume the entire graph has exactly one source and use that. pairs : iterable or iterator of pairs of nodes, optional (default: None) The pairs of interest. If None, Defaults to all pairs of nodes under `root` that have a lowest common ancestor. Returns ------- lcas : generator of tuples `((u, v), lca)` where `u` and `v` are nodes in `pairs` and `lca` is their lowest common ancestor. Notes ----- Only defined on non-null trees represented with directed edges from parents to children. Uses Tarjan's off-line lowest-common-ancestors algorithm. Runs in time $O(4 \times (V + E + P))$ time, where 4 is the largest value of the inverse Ackermann function likely to ever come up in actual use, and $P$ is the number of pairs requested (or $V^2$ if all are needed). Tarjan, R. E. (1979), "Applications of path compression on balanced trees", Journal of the ACM 26 (4): 690-715, doi:10.1145/322154.322161. See Also -------- all_pairs_lowest_common_ancestor (similar routine for general DAGs) lowest_common_ancestor (just a single pair for general DAGs) """ if len(G) == 0: raise nx.NetworkXPointlessConcept("LCA meaningless on null graphs.") elif None in G: raise nx.NetworkXError("None is not a valid node.") # Index pairs of interest for efficient lookup from either side. if pairs is not None: pair_dict = defaultdict(set) # See note on all_pairs_lowest_common_ancestor. if not isinstance(pairs, (Mapping, Set)): pairs = set(pairs) for u, v in pairs: for n in (u, v): if n not in G: msg = "The node %s is not in the digraph." % str(n) raise nx.NodeNotFound(msg) pair_dict[u].add(v) pair_dict[v].add(u) # If root is not specified, find the exactly one node with in degree 0 and # use it. Raise an error if none are found, or more than one is. Also check # for any nodes with in degree larger than 1, which would imply G is not a # tree. if root is None: for n, deg in G.in_degree: if deg == 0: if root is not None: msg = "No root specified and tree has multiple sources." raise nx.NetworkXError(msg) root = n elif deg > 1: msg = "Tree LCA only defined on trees; use DAG routine." raise nx.NetworkXError(msg) if root is None: raise nx.NetworkXError("Graph contains a cycle.") # Iterative implementation of Tarjan's offline lca algorithm # as described in CLRS on page 521 (2nd edition)/page 584 (3rd edition) uf = UnionFind() ancestors = {} for node in G: ancestors[node] = uf[node] colors = defaultdict(bool) for node in nx.dfs_postorder_nodes(G, root): colors[node] = True for v in (pair_dict[node] if pairs is not None else G): if colors[v]: # If the user requested both directions of a pair, give it. # Otherwise, just give one. if pairs is not None and (node, v) in pairs: yield (node, v), ancestors[uf[v]] if pairs is None or (v, node) in pairs: yield (v, node), ancestors[uf[v]] if node != root: parent = arbitrary_element(G.pred[node]) uf.union(parent, node) ancestors[uf[parent]] = parent