def hamming_clustering(nodes: list, n_nodes: int, n_bits: int): # Init nodes_int = [int(n, 2) for n in nodes] nodes_map = dict() for i in range(n_nodes): try: nodes_map[nodes_int[i]].append(i) except KeyError: nodes_map[nodes_int[i]] = [i] uf = UnionFind(range(n_nodes)) # Bit masks dist1 = [1 << i for i in range(n_bits)] dist2 = list() for i in range(n_bits): for j in range(i, n_bits): dist2.append(2**i + 2**j) bit_masks = list(set(dist1 + dist2)) # Union if identical vertices for eq_list in nodes_map.values(): if len(eq_list) > 1: for item in eq_list[1:]: uf.union(eq_list[0], item) # Calculation for k in nodes_map.keys(): for d in bit_masks: try: uf.union(nodes_map[k][0], nodes_map[k ^ d][0]) except KeyError: pass return len(list(map(sorted, uf.to_sets())))
def main(): if len(sys.argv) == 2: txt = sys.argv[1] graph = {} total_nodes = [] total_bits = [] node_position = 1 clusters = [] with open(txt, 'r') as file: for line in file: if len(line.split()) == 2: #total_nodes.append(int(line.split()[0])) total_bits.append(int(line.split()[1])) else: if int("".join(line.split()), 2) in graph: clusters.remove(graph[int("".join(line.split()), 2)]) graph[int("".join(line.split()), 2)] = node_position clusters.append(node_position) node_position += 1 bits = total_bits[0] #generate hamming distance 1 and hamming distance 2 for the bit masks bit_mask_1 = [1 << i for i in range(bits)] #bit_mask 2 is generated by XORing all pairs of bit_mask_1 bit_mask_2 = [] for combo in combinations(range(bits), 2): bit_mask_2.append(bit_mask_1[combo[0]] ^ bit_mask_1[combo[1]]) bit_mask = bit_mask_1 + bit_mask_2 my_set = set(clusters) u_find = UnionFind(my_set) for bitmask in bit_mask: for key1 in graph: key2 = key1 ^ bitmask if key2 in graph: if u_find[graph[key1]] != u_find[graph[key2]]: u_find.union(graph[key1], graph[key2]) result = list(map(sorted, u_find.to_sets())) print(len(result))
class Components: def __init__(self, g): """ :rtype: nxgraph """ self.uf = UnionFind() #init uf datastruct g = nx.DiGraph.to_undirected(g) # shallow copy is fine for c in nx.algorithms.components.connected_components(g): self.uf.union(*c) def merge(self, u, v): self.uf.union(u, v) def split(self, g): """reinit for now, optimize later""" self.__init__(g) def find_component(self, u): """returns connected component of u as set""" for c in self.uf.to_sets(): if u in c: return c
nodes = [bitArrToInt(item) for item in nodes] s = set(nodes) disjoint = {item: [] for item in nodes} disjoint = UnionFind(disjoint) def approach1(): for i in range(0, len(nodes)): for j in range(i + 1, len(nodes)): if countSetBitsRec(nodes[i] ^ nodes[j]) <= 2: disjoint.union(nodes[i], nodes[j]) def approach2(): for item in nodes: for d1 in computeDist1(item): if d1 in s: disjoint.union(item, d1) for d2 in computeDist2(item): if d2 in s: disjoint.union(item, d2) approach2() print(len(list(map(sorted, disjoint.to_sets())))) dt_end = time.time() print('time taken: ' + str(dt_end - dt_start))