def kruscal(G): node_count = len(G.nodes()) uf = UnionFind() # we need tree as result of this algorithm, not set of vertices # so edges are kept in union-find structures for convenience # this means that stop condition is uf reaching size of 2*V-1 # /we need V-1 edges to connect V vertices assuming no cycles/ def uflist_to_graph(uf_list): edges = filter(lambda x: isinstance(x, tuple), uf_list) spt = nx.Graph(edges) for u,v in edges: spt[u][v]["weight"] = G[u][v]["weight"] return spt sorted_edges = sorted(G.edges(), key = lambda (u,v): G[u][v]["weight"]) # create singleton sets for every vertex # if not is not yet in UnionFind, it'll be added by [] operator for u,v in sorted_edges: u_root = uf[u] v_root = uf[v] if u_root != v_root: uf.union(u_root, v_root) # add edge to union-find uf.union(u_root, uf[(u,v)]) # terminate if all vertices in uf uflist = list(uf) if len(uflist) == 2*node_count-1: return uflist_to_graph(uflist) raise Exception("Something went wrong, algorithm should never end up here")
def __init__(self, G: nx.Graph): n = G.number_of_nodes() # 初始化边表 self.edges = [] # 初始化总权值 self.weight = 0 edges = sorted(G.edges, key=lambda e: G.edges[e]['weight'], reverse=True) uf = UnionFind() while len(self.edges) < n - 1: v, w = edges.pop() if uf[v] == uf[w]: # 如果已在同一个连通部件中,则此边作废 continue # 将该边加入生成树,同时更新总权值 uf.union(v, w) weight = G[v][w]['weight'] self.weight += weight # 将该边的颜色改为红色 G[v][w]['color'] = 'red' self.edges.append((v, w, weight))
def hamming_clustering(nodes: list, n_nodes: int, n_bits: int): # Init nodes_int = [int(n, 2) for n in nodes] nodes_map = dict() for i in range(n_nodes): try: nodes_map[nodes_int[i]].append(i) except KeyError: nodes_map[nodes_int[i]] = [i] uf = UnionFind(range(n_nodes)) # Bit masks dist1 = [1 << i for i in range(n_bits)] dist2 = list() for i in range(n_bits): for j in range(i, n_bits): dist2.append(2**i + 2**j) bit_masks = list(set(dist1 + dist2)) # Union if identical vertices for eq_list in nodes_map.values(): if len(eq_list) > 1: for item in eq_list[1:]: uf.union(eq_list[0], item) # Calculation for k in nodes_map.keys(): for d in bit_masks: try: uf.union(nodes_map[k][0], nodes_map[k ^ d][0]) except KeyError: pass return len(list(map(sorted, uf.to_sets())))
def kruscal(G): node_count = len(G.nodes()) uf = UnionFind() # we need tree as result of this algorithm, not set of vertices # so edges are kept in union-find structures for convenience # this means that stop condition is uf reaching size of 2*V-1 # /we need V-1 edges to connect V vertices assuming no cycles/ def uflist_to_graph(uf_list): edges = filter(lambda x: isinstance(x, tuple), uf_list) spt = nx.Graph(edges) for u, v in edges: spt[u][v]["weight"] = G[u][v]["weight"] return spt sorted_edges = sorted(G.edges(), key=lambda (u, v): G[u][v]["weight"]) # create singleton sets for every vertex # if not is not yet in UnionFind, it'll be added by [] operator for u, v in sorted_edges: u_root = uf[u] v_root = uf[v] if u_root != v_root: uf.union(u_root, v_root) # add edge to union-find uf.union(u_root, uf[(u, v)]) # terminate if all vertices in uf uflist = list(uf) if len(uflist) == 2 * node_count - 1: return uflist_to_graph(uflist) raise Exception("Something went wrong, algorithm should never end up here")
def union_graph(graph, bitmask, ln): my_set = set([i for i in range(ln)]) u_find = UnionFind(my_set) for key in graph: l_list = list(graph[key]) l_value = len(l_list) while l_value > 1: u_find.union(l_list[l_value - 1], l_list[l_value - 2]) l_value -= 1 for value in bitmask: for key1 in graph: key2 = key1 ^ value if key2 in graph: x1 = graph[key1] x2 = graph[key2] u_find.union(x1[0], x2[0]) pointer_set = set(u_find[x] for x in my_set) num_clusters = len(pointer_set) return num_clusters
def main(): if len(sys.argv) == 2: txt = sys.argv[1] graph = {} total_nodes = [] total_bits = [] node_position = 1 clusters = [] with open(txt, 'r') as file: for line in file: if len(line.split()) == 2: #total_nodes.append(int(line.split()[0])) total_bits.append(int(line.split()[1])) else: if int("".join(line.split()), 2) in graph: clusters.remove(graph[int("".join(line.split()), 2)]) graph[int("".join(line.split()), 2)] = node_position clusters.append(node_position) node_position += 1 bits = total_bits[0] #generate hamming distance 1 and hamming distance 2 for the bit masks bit_mask_1 = [1 << i for i in range(bits)] #bit_mask 2 is generated by XORing all pairs of bit_mask_1 bit_mask_2 = [] for combo in combinations(range(bits), 2): bit_mask_2.append(bit_mask_1[combo[0]] ^ bit_mask_1[combo[1]]) bit_mask = bit_mask_1 + bit_mask_2 my_set = set(clusters) u_find = UnionFind(my_set) for bitmask in bit_mask: for key1 in graph: key2 = key1 ^ bitmask if key2 in graph: if u_find[graph[key1]] != u_find[graph[key2]]: u_find.union(graph[key1], graph[key2]) result = list(map(sorted, u_find.to_sets())) print(len(result))
class Components: def __init__(self, g): """ :rtype: nxgraph """ self.uf = UnionFind() #init uf datastruct g = nx.DiGraph.to_undirected(g) # shallow copy is fine for c in nx.algorithms.components.connected_components(g): self.uf.union(*c) def merge(self, u, v): self.uf.union(u, v) def split(self, g): """reinit for now, optimize later""" self.__init__(g) def find_component(self, u): """returns connected component of u as set""" for c in self.uf.to_sets(): if u in c: return c
for line in file: l = [s for s in line.split()] G.append(int(''.join(l), 2)) # Convert each node from binary to integer for i in range(len(G)): if G[i] not in V: V[G[i]] = set() V[G[i]].add(i) else: V[G[i]].add(i) # Initialize UnionFind-instance my_set = set([i for i in range(len(G))]) u_find = UnionFind(my_set) # Iterate through nodes and distances, XOR each key with the distances to check, whether the resulting node exists. # If yes - call union() to merge their respective sets in UnionFind for key_1, value in V.items(): for i in range(len(distances)): key_2 = key_1^distances[i] if key_2 in V: for value_1 in V[key_1]: for value_2 in V[key_2]: u_find.union(value_1, value_2) # Create a set of clusters' names and output their quantity (k) names_of_clusters = set([u_find[x] for x in my_set]) k = len(names_of_clusters) print(k)
with open('clustering1.txt') as file: k = 4 n = int(file.readline()) E = [] while file: try: p, q, weight = map(int, file.readline().split()) p -=1; q-=1 heapq.heappush(E, (weight, {p, q})) except ValueError: break; uFind = UnionFind(range(n)) while len({uFind[v] for v in range(n)}) > k: weight, (p, q) = heapq.heappop(E) uFind.union(p, q) while uFind[p] == uFind[q]: weight, (p, q) = heapq.heappop(E) print(weight) # -------------------------------------------------------------------------------------------------------------------- # # Big Clustering from networkx.utils.union_find import UnionFind if __name__ == '__main__': with open("clustering_big.txt") as file:
# Create an array of bit-masks for the distances, using bit-shifts mask1 = [1 << t for t in range(0, 24)] mask2 = [1 << i | 1 << j for i in range(0, 24) for j in range(i + 1, 24)] mask = mask1 + mask2 # the mask is right # use ^ (i.e. xor) to apply mask to codes, changing each digit # initialize all UnionFind sets ufs = UnionFind(list(range(1, 200001))) # for each node, search if its neighbors exist and union two sets for i in range(1, 200001): synvalue = hamming[id_code[i]] if len(synvalue) > 1: for j in synvalue: if j != i: ufs.union(i, j) for m in mask: # find if such neighbor(s) exist (and list the ids) try: neighbor = hamming[id_code[i] ^ m] except KeyError: continue else: for n in neighbor: ufs.union(i, n) pdb.set_trace() # get final leaders in the UFS, then count the final number cluster cluster_leaders = set([ufs[x] for x in range(1, 200001)]) # set is implemented with mapping, so the search operation is O(1) on average # The number of clusters