def clustering(nodes, num_nodes): uf = UnionFind(num_nodes) for node in nodes.keys(): us = nodes.get(node) one_bit_candidates = get_one_bit_candidates(node, nodes) two_bit_candidates = get_two_bit_candidates(node, nodes) candidates = one_bit_candidates + two_bit_candidates if len(us) > 1: for i in range(len(us)): for j in range(i + 1, len(us)): uf.union(us[i], us[j]) for v in candidates: uf.union(us[0], v) return uf.size()
def kruskal_mst_improved(self) -> float: """ Finds the minimum spanning tree (MST) using improved Kruskal's MST Algorithm. :return: float """ # 1. Sort the edges in order of increasing cost [O(mlog m)] edges = sorted(self._edge_list) # 2. Initialize T = {empty}, which is the current spanning tree curr_spanning_tree = [] # 3. Create a Union Find of vertices # object -> vertex # group -> connected component w.r.t. the edges in T # Each of the vertex is on its own isolated connected component. union_find = UnionFind(self._vtx_list) # 4. For each edge e = (v, w) in the sorted edge list [O(nlog n)] for edge in edges: # Check whether adding e to T causes cycles in T # This is equivalent to checking whether there exists a v-w path in # T before adding e. # This is equivalent to checking whether the leaders of v and w in # the UnionFind are the same. if edge.end1.leader is not edge.end2.leader: curr_spanning_tree.append(edge) # Fuse the two connected components to a single one group_name_v, group_name_w = edge.end1.leader.obj_name, \ edge.end2.leader.obj_name union_find.union(group_name_v, group_name_w) # Originally we would think it involves O(mn) leader updates; however, # we can change to a "vertex-centric" view: # Consider the number of leader updates for a single vertex: # Every time the leader of this vertex gets updated, the size of its # connected components at least doubles, so suppose it experiences x # leader updates in total, we have # 2^x <= n # x <= log2 n # Thus, each vertex experiences O(log n) leader updates, leading to a # O(nlog n) leader updates in total. return sum(map(lambda x: x.cost, curr_spanning_tree))
def find_best_delta_by_num_ccs_for_given_k(permuted_sim, edges, k): if k < 2: raise ValueError("k must be at least 2") max_num_ccs = 0 #initially, each node is its own CC of size 1, so none is of size >= k for k >= 2 bestDeltas = [edges[0].weight] uf = UnionFind() for edge in edges: uf.union(edge.node1, edge.node2) num_ccs = len([root for root in uf.roots if uf.weights[root] >= k]) if num_ccs > max_num_ccs: max_num_ccs = num_ccs bestDeltas = [edge.weight] elif num_ccs == max_num_ccs: bestDeltas.append(edge.weight) return max_num_ccs, bestDeltas
def accountsMerge(self, accounts): """ :type accounts: List[List[str]] :rtype: List[List[str]] """ email_to_id, id_to_name = self.create_mappings(accounts) uf = UnionFind(len(email_to_id)) # union emails within an account for account in accounts: p = email_to_id[account[1]] for i in range(2, len(account)): q = email_to_id[account[i]] uf.union(p, q) # collect emails by tree for email, p in email_to_id.iteritems(): parent = uf.find(p) id_to_emails[parent].append(email) return [[id_to_name[p]] + sorted(emails) for p, emails in id_to_emails.items()]
def k_cluster(graph, k): graph = sorted(graph, key=operator.itemgetter(2), reverse=True) vertices = list(set([i for ec in graph for i in ec[:2]])) ufc = UnionFind(vertices) min_spacing = graph[-1][-1] clusters = ufc.get() while len(ufc) >= k and graph: u, v, min_spacing = graph.pop() ufc.union(u, v) # keep the iteration going even the number of clusters already reaches k # due to the fact that we want to eliminate cyclic edges after the last union # to find the correct min spacing if len(ufc) == k: clusters = ufc.get() # in case of a single cluster, theoretically min_spacing should be infinite if len(ufc) == 1: min_spacing = float('inf') return min_spacing, clusters
def smallestStringWithSwaps(self, s: str, pairs: List[List[int]]) -> str: n = len(s) uf = UnionFind(n) roots = [] for pair in pairs: uf.union(pair[0], pair[1]) # now we have the indices organized into connected components connected_components: DefaultDict[int, List[str]] = defaultdict(list) for i in range(n): roots.append(uf.find(i)) connected_components[roots[i]].append(s[i]) # we need to sort each connected component alphabetically for root, chars in connected_components.items(): connected_components[root] = sorted(chars) # now we can iterate through the string, find the root of each # index, and put the highest ranked element in that component # at that index smallest_str = [] for i in range(n): smallest_str.append(connected_components[roots[i]].pop(0)) return "".join(smallest_str)
def kruskal(edges, vertices): """ Runs the Kruskal algorithm using the edges and vertices. """ cost = 0 mst = [] n = len(vertices) # Sort the edges by weight. edges = sorted(edges, key=lambda x: x[1]) uf = UnionFind(vertices) for ((u, v), w) in edges: # If the edge doesn't create a circle if uf.find(u) != uf.find(v): # add it to the MST. uf.union(u, v) mst.append((u, v)) cost += w # Stop when the MST has |V|-1 edges. if len(mst) == n - 1: return cost, mst
def add_documents(name: str, event_ids: List[int], tweet_urls: Dict[int, models.URL], session): uf = UnionFind() tweets = session.query(models.Tweet).filter( models.Tweet.event_id_id.in_(event_ids)).all() for tweet in tqdm(tweets, desc="Iterating over tweets (create sets)"): uf.make_set(tweet.tweet_id) url_obj = tweet_urls.get(tweet.tweet_id) if url_obj: uf.make_set(url_obj.expanded_url) for tweet in tqdm(tweets, desc="Iterating over tweets (join sets)"): if tweet.in_reply_to_status_id: uf.union(tweet.tweet_id, int(tweet.in_reply_to_status_id)) if tweet.retweet_of_id: uf.union(tweet.tweet_id, int(tweet.retweet_of_id)) url_obj = tweet_urls.get(tweet.tweet_id) if url_obj: uf.union(tweet.tweet_id, url_obj.expanded_url) with session.begin(): group_doc = dict() groups = map(lambda g: str(uf.find(g)), uf.groups) for rep in groups: document = models.Document(url=rep) group_doc[rep] = document for tweet in tqdm(tweets, desc="Iterating over tweets (set documents)"): id = str(uf.find(tweet.tweet_id)) doc = group_doc[id] tweet.document = doc return uf
def construct_euclidean_minimim_spanning_tree(x_coords, y_coords): """ construct minimum spanning tree(Kruskal's algorithm) ref: https://ja.wikipedia.org/wiki/%E3%82%AF%E3%83%A9%E3%82%B9%E3%82%AB%E3%83%AB%E6%B3%95 """ edges = [] for i in range(NUM_NODE): for j in range(i + 1, NUM_NODE): distance_ij = math.hypot(x_coords[i] - x_coords[j], y_coords[i] - y_coords[j]) edges.append(Edge(i, j, distance_ij)) edges.append(Edge(j, i, distance_ij)) edges.sort(key=Edge.get_distance) uf = UnionFind(NUM_NODE) minimum_spanning_tree = Graph(NUM_NODE) for edge in edges: if (uf.belongsSameGroup(edge.source, edge.target)): continue uf.union(edge.source, edge.target) minimum_spanning_tree.addEdge(edge.source, edge.target, edge.distance) minimum_spanning_tree.addEdge(edge.target, edge.source, edge.distance) return minimum_spanning_tree
def kruskals(graph): """ :param graph: undigraph :return: """ T = set() # empty edge set union_find = UnionFind() new = {} for v in graph: new_edges = [] set_v = union_find.make_set(v) for u, w in graph[v]: new_edges.append((union_find.make_set(u), w)) new[set_v] = new_edges edges = [(u, v[0], v[1]) for u in new for v in new[u]] edges.sort(key=lambda x: x[-1]) for u, v, _ in edges: if union_find.find_set(u) != union_find.find_set(v): T.add((union_find.get_key(u), union_find.get_key(v))) union_find.union(u, v) return T
def minSpanningTree(nodes, edges): ''' Input: A set of nodes and a set of edges, where each edge is specified by a tuple (u,v) where u,v are distinct nodes in "nodes". Output: A set of edges that forms a minimum-weight spanning tree of the graph (if it is connected). Running time: O( |E|*log |E| ) ''' mst = [] shuffle(edges) # randomizes the edges to be connected uf = UnionFind(nodes) for e in edges: if uf.union(e[0], e[1]): mst.append((e[0], e[1], 1)) # each edge is given a weight 1 return mst
def numIslands(self, grid: List[List[str]]) -> int: if not grid: return 0 n_rows = len(grid) n_cols = len(grid[0]) #n_land = sum([row.count("1") for row in grid]) #print(f"n_land = {n_land}") #uf = SimpleUnionFind(n_rows * n_cols) uf = UnionFind(n_rows * n_cols) # Suffices to check in the two directions (right, down) of our loops. for r in range(n_rows): for c in range(n_cols): if grid[r][c] == "0": uf.n -= 1 elif grid[r][c] == "1": grid[r][c] = "2" # mark as visited # Union (r, c) w/ (r+1, c) if land cell if (r + 1 < n_rows) and grid[r + 1][c] == "1": uf.union(r * n_cols + c, (r + 1) * n_cols + c) #if (r-1 >= 0) and grid[r-1][c] == "1": # uf.union(r*n_cols + c, (r-1)*n_cols + c) # Union (r, c) w/ (r, c+1) if land cell if (c + 1 < n_cols) and grid[r][c + 1] == "1": uf.union(r * n_cols + c, r * n_cols + c + 1) #if (c-1 >= 0) and grid[r][c-1] == "1": # uf.union(r*n_cols + c, r*n_cols + c-1) return uf.n
def clustering_with_max_spacing(self, k: int) -> float: """ Clusters the graph into the given number of cluster using maximum spacing as the objective function, which is to maximize the minimum distance between a pair of separated points, using Single-link Algorithm, which is exactly the same as Kruskal's MST Algorithm. :param k: int :return: float """ # Check whether the input k is greater than 1 if k <= 1: raise IllegalArgumentError( 'The number of clusters must be greater than 1.') edges = sorted(self._edge_list) # Initially, each point is in a separate cluster. union_find = UnionFind(self._vtx_list) stopped = False for edge in edges: if edge.end1.leader is not edge.end2.leader: if stopped: return edge.cost # Let p, q = closest pair of separated points, which determines # the current spacing # Merge the clusters containing p and q into a single cluster group_name_p, group_name_q = edge.end1.leader.obj_name, \ edge.end2.leader.obj_name union_find.union(group_name_p, group_name_q) if union_find.num_of_groups() == k: # Repeat until only k # clusters # The maximum spacing is simply the cost of the next # cheapest crossing edge among different connected # components. stopped = True return 0.0 # Codes should never reach here.
def solve(self, board: List[List[str]]) -> None: if not board: return n_rows = len(board) n_cols = len(board[0]) if n_rows < 3 or n_cols < 3: return dummy = n_rows * n_cols # index for dummy node #uf = SimpleUnionFind(dummy + 1) uf = UnionFind(dummy + 1) for r in range(n_rows): for c in range(n_cols): if board[r][c] == 'O': i = r * n_cols + c # Connect border 'O' cells to dummy node. if r in (0, n_rows - 1) or c in (0, n_cols - 1): uf.union(i, dummy) else: # connect interior 'O' cells to neighbor 'O' cells if board[r - 1][c] == 'O': uf.union(i, i - n_cols) if board[r + 1][c] == 'O': uf.union(i, i + n_cols) if board[r][c - 1] == 'O': uf.union(i, i - 1) if board[r][c + 1] == 'O': uf.union(i, i + 1) for r in range(1, n_rows - 1): for c in range(1, n_cols - 1): if board[r][c] == 'O' and not uf.connected( r * n_cols + c, dummy): board[r][c] = 'X'
def __init__( self, systems ): si0 = [] # sj0 = [] # sv0 = [] # For the construction of the sparse weighted connectivity matrix biconnect = UnionFind(len(systems.sysnames)) # Partitioning of the systems by reversible-jump connectedness. for i, (jpname, loc2globi, jp2loci, namei) in enumerate(zip(systems.jpnames, systems.loc2globs, systems.jp2locs, systems.sysnames)): for j in range(len(jpname)): k = systems.sysdict[jpname[j]] # Get the index of target jpnamek = systems.jpdicts[k] loc2globk = systems.loc2globs[k] jp2lock = systems.jp2locs[k] if i < k or namei not in jpnamek: continue # We only want to consider reversible jumps, and only once per pair. m = jpnamek[namei] # Index of system i in system k numerotation si0.append(loc2globi[jp2loci[j]]) # Assets connectivity (jp only) sj0.append(loc2globk[jp2lock[m]]) # sv0.append(JUMP_CONDUCTIVITY) biconnect.union(i, k) # Create anchors to prevent the matrix from being singular # Anchors are jumpoints, there is 1 per connected set of systems # A Robin condition will be added to these points and we will check the rhs # thanks to them because in u (not utilde) the flux on these anchors should # be 0, otherwise, it means that the 2 non-null terms on the rhs are on # separate sets of systems. self.anchors = [systems.loc2globs[i][0] for i in biconnect.findall() if systems.loc2globs[i]] # Get the stiffness inside systems self.default_lanes = (si0,sj0,sv0) self.internal_lanes = inSysStiff( systems.nodess, systems.factass, systems.g2ass, systems.loc2globs ) self.vertex_factions = [[] for _ in systems.g2ass] # Factions that have visited each vertex (in the global enumeration)
def longestConsecutive(self, nums): """ :type nums: List[int] :rtype: int """ if not nums: return 0 n = len(nums) uf = UnionFind(n) pos = {} # num => index for i, num in enumerate(nums): # skip duplicate numbers if num in pos: continue pos[num] = i # always union into larget group if num - 1 in pos: uf.union(pos[num - 1], i) if num + 1 in pos: uf.union(pos[num + 1], i) return max(uf.sizes)
def test_union_find(): uf = UnionFind(5) assert (uf.find(0) != uf.find(1)) assert (uf.find(0) == uf.find(0)) assert (uf.find(0) != uf.find(2)) assert (uf.n_subset == 5) uf.union(1, 0) assert (uf.is_same_subset(0, 1)) assert (not uf.is_same_subset(0, 2)) assert (uf.n_subset == 4) uf.union(2, 4) subsets = uf.get_subsets() expected_subsets = [{0, 1}, {2, 4}, {3}] assert (len(subsets) == len(expected_subsets)) for i in expected_subsets: assert (i in subsets) uf.union(2, 3) uf.union(3, 4) uf.union(0, 4) assert (uf.find(1) == uf.find(3)) assert (uf.n_subset == 1)
def connected_component_labelling(bool_input_image, connectivity_type=CONNECTIVITY_8): """ 2 pass algorithm using disjoint-set data structure with Union-Find algorithms to maintain record of label equivalences. Input: binary image as 2D boolean array. Output: 2D integer array of labelled pixels. 1st pass: label image and record label equivalence classes. 2nd pass: replace labels with their root labels. (optional 3rd pass: Flatten labels so they are consecutive integers starting from 1.) """ if connectivity_type != 4 and connectivity_type != 8: raise ValueError("Invalid connectivity type (choose 4 or 8)") image_width = len(bool_input_image[0]) image_height = len(bool_input_image) # initialize efficient 2D int array with numpy # N.B. numpy matrix addressing syntax: array[y,x] labelled_image = np.zeros((image_height, image_width), dtype=np.int16) uf = UnionFind() # initialise union find data structure current_label = 1 # initialise label counter # 1st Pass: label image and record label equivalences for y, row in enumerate(bool_input_image): for x, pixel in enumerate(row): if pixel == False: # Background pixel - leave output pixel value as 0 pass else: # Foreground pixel - work out what its label should be # Get set of neighbour's labels labels = neighbouring_labels(labelled_image, connectivity_type, x, y) if not labels: # If no neighbouring foreground pixels, new label -> use current_label labelled_image[y, x] = current_label uf.MakeSet(current_label) # record label in disjoint set current_label = current_label + 1 # increment for next time else: # Pixel is definitely part of a connected component: get smallest label of # neighbours smallest_label = min(labels) labelled_image[y, x] = smallest_label if len( labels ) > 1: # More than one type of label in component -> add # equivalence class for label in labels: uf.Union(uf.GetNode(smallest_label), uf.GetNode(label)) # 2nd Pass: replace labels with their root labels final_labels = {} new_label_number = 1 for y, row in enumerate(labelled_image): for x, pixel_value in enumerate(row): if pixel_value > 0: # Foreground pixel # Get element's set's representative value and use as the pixel's new label new_label = uf.Find(uf.GetNode(pixel_value)).value labelled_image[y, x] = new_label # Add label to list of labels used, for 3rd pass (flattening label list) if new_label not in final_labels: final_labels[new_label] = new_label_number new_label_number = new_label_number + 1 # 3rd Pass: flatten label list so labels are consecutive integers starting from 1 (in order # top to bottom, left to right) # Different implementation of disjoint-set may remove the need for 3rd pass? for y, row in enumerate(labelled_image): for x, pixel_value in enumerate(row): if pixel_value > 0: # Foreground pixel labelled_image[y, x] = final_labels[pixel_value] return labelled_image
# This file describes a distance function (equivalently, a complete graph with edge costs). # It has the following format: # [number_of_nodes] # [edge 1 node 1] [edge 1 node 2] [edge 1 cost] # [edge 2 node 1] [edge 2 node 2] [edge 2 cost] # There is one edge (i,j)(i,j) for each choice of 1≤i<j≤n, where n is the number of nodes. # For example, the third line of the file is "1 3 5250", indicating that the distance between # nodes 1 and 3 (equivalently, the cost of the edge (1,3)) is 5250. You can assume that distances # are positive, but you should NOT assume that they are distinct. # Your task in this problem is to run the clustering algorithm from lecture on this data set, # where the target number k of clusters is set to 4. What is the maximum spacing of a 4-clustering? from union_find import UnionFind with open("clustering1.txt", "r") as f: size = int(next(f)) unionFind = UnionFind(size) elements = [] for line in f: first, second, distance = map(lambda x: int(x), line.split()) elements.append([first, second, distance]) elements.sort(key=lambda x: x[2]) max_spacing = 0 for e in elements: if unionFind.size >= 4: max_spacing = e[2] unionFind.union(e[0], e[1]) else: break print(max_spacing)
def test_init_with_invalid_size(self): with self.assertRaises(ValueError): uf = UnionFind(0) with self.assertRaises(ValueError): uf = UnionFind(-5)
from union_find import UnionFind, UnionFindLabel uf = UnionFind(6) print(uf.parents) # [-1, -1, -1, -1, -1, -1] print(uf) # 0: [0] # 1: [1] # 2: [2] # 3: [3] # 4: [4] # 5: [5] uf.union(0, 2) print(uf.parents) # [-2, -1, 0, -1, -1, -1] print(uf) # 0: [0, 2] # 1: [1] # 3: [3] # 4: [4] # 5: [5] uf.union(1, 3) print(uf.parents) uf.union(4, 5) print(uf.parents) uf.union(1, 4) print(uf.parents)
from union_find import UnionFind if __name__ == '__main__': uf = UnionFind(4) uf.union(1, 2) uf.union(3, 4) uf.union(1, 3) output = set() output.add(1) if uf.get_leaders() != output: print('mismatch: expected: %s, actual: %s' % (output, uf.get_leaders())) raise Exception
def test_union_with_valid_values(self): uf = UnionFind(10) for i in range(11): for j in range(11): uf.union(i, j)
# suma = sum(a) # sumb = sum(b) return sqrt(sum((a[i] - b[i])**2 for i in range(n))) n = len(data) m = 5 edges = [] for i in range(n): for j in range(i + 1, n): edges.append((dist(data[i], data[j]), i, j)) edges.sort() # print(edges) uni = UnionFind(n) for (cost, i, j) in edges: if uni.group_count() == m: break if uni.size(i) > 12 or uni.size(j) > 12: continue if (uni.union(i, j)): print(cost, i, j) print(uni.all_group_members()) import subprocess for root, group in uni.all_group_members().items(): dir_name = "img" + str(root) subprocess.run(["mkdir", dir_name]) for i in group: img_name = str(images[i])
def test_init_with_valid_size(self): uf = UnionFind(5) self.assertEqual(uf.size, 5)
""" https://code.google.com/codejam/contest/90101/dashboard#s=p1&a=3 Used Union-Find Data Structure """ import os import sys import numpy as np sys.path.append(os.path.abspath('./src/helpers')) from union_find import UnionFind f = open(sys.argv[1]) T = int(f.readline().strip()) for c in range(T): uf = UnionFind() H, W = map(int, f.readline().strip().split(" ")) arr = np.zeros((H, W)) for i in range(H): for j, x in enumerate(map(int, f.readline().strip().split(" "))): arr[i][j] = x for i in range(H): for j in range(W): neighbours = [(x, y) for (x, y) in ((i - 1, j), (i, j - 1), (i, j + 1), (i + 1, j)) if x >= 0 and x < H and y >= 0 and y < W] min_a, min_b = i, j for (x, y) in neighbours: if arr[x][y] < arr[min_a][min_b]: min_a, min_b = x, y if (min_a, min_b) != (i, j):
def __init__(self): self.union_find = UnionFind() self.one_distances = [] self.two_distances = []
def __init__(self): self.edges = [] self.union_find = UnionFind()
from union_find import UnionFind V, E = list(map(int, input().split())) STW = sorted([list(map(int, input().split())) for _ in range(E)], key=lambda x: x[2]) union_find = UnionFind(V + 1) ans = 0 for s, t, w in STW: if not union_find.same(s, t): ans += w union_find.unite(s, t) print(ans)
from union_find import UnionFind if __name__ == "__main__": connections = [[4, 3], [3, 8], [6, 5], [9, 4], [2, 1], [8, 9], \ [5, 0], [7, 2], [6, 1], [1, 0], [6, 7]] nodes = set([node for links in connections for node in links]) union_find = UnionFind(nodes) for link in connections: p = link[0] q = link[1] if not union_find.connected(p, q): union_find.union(p, q) print str(p) + " " + str(q) print union_find.connected(0, 1) # (0, 8)