def hamming_clustering(nodes: list, n_nodes: int, n_bits: int):
    # Init
    nodes_int = [int(n, 2) for n in nodes]
    nodes_map = dict()
    for i in range(n_nodes):
        try:
            nodes_map[nodes_int[i]].append(i)
        except KeyError:
            nodes_map[nodes_int[i]] = [i]
    uf = UnionFind(range(n_nodes))

    # Bit masks
    dist1 = [1 << i for i in range(n_bits)]
    dist2 = list()
    for i in range(n_bits):
        for j in range(i, n_bits):
            dist2.append(2**i + 2**j)
    bit_masks = list(set(dist1 + dist2))

    # Union if identical vertices
    for eq_list in nodes_map.values():
        if len(eq_list) > 1:
            for item in eq_list[1:]:
                uf.union(eq_list[0], item)

    # Calculation
    for k in nodes_map.keys():
        for d in bit_masks:
            try:
                uf.union(nodes_map[k][0], nodes_map[k ^ d][0])
            except KeyError:
                pass
    return len(list(map(sorted, uf.to_sets())))
Exemple #2
0
def main():
    if len(sys.argv) == 2:
        txt = sys.argv[1]
        graph = {}
        total_nodes = []
        total_bits = []
        node_position = 1
        clusters = []
        with open(txt, 'r') as file:
            for line in file:
                if len(line.split()) == 2:
                    #total_nodes.append(int(line.split()[0]))
                    total_bits.append(int(line.split()[1]))
                else:
                    if int("".join(line.split()), 2) in graph:
                        clusters.remove(graph[int("".join(line.split()), 2)])
                    graph[int("".join(line.split()), 2)] = node_position
                    clusters.append(node_position)
                    node_position += 1
        bits = total_bits[0]
        #generate hamming distance 1 and hamming distance 2 for the bit masks
        bit_mask_1 = [1 << i for i in range(bits)]
        #bit_mask 2 is generated by XORing all pairs of bit_mask_1
        bit_mask_2 = []
        for combo in combinations(range(bits), 2):
            bit_mask_2.append(bit_mask_1[combo[0]] ^ bit_mask_1[combo[1]])
        bit_mask = bit_mask_1 + bit_mask_2
        my_set = set(clusters)
        u_find = UnionFind(my_set)
        for bitmask in bit_mask:
            for key1 in graph:
                key2 = key1 ^ bitmask
                if key2 in graph:
                    if u_find[graph[key1]] != u_find[graph[key2]]:
                        u_find.union(graph[key1], graph[key2])
        result = list(map(sorted, u_find.to_sets()))
        print(len(result))
Exemple #3
0
class Components:
    def __init__(self, g):
        """

        :rtype: nxgraph
        """
        self.uf = UnionFind()  #init uf datastruct

        g = nx.DiGraph.to_undirected(g)  # shallow copy is fine
        for c in nx.algorithms.components.connected_components(g):
            self.uf.union(*c)

    def merge(self, u, v):
        self.uf.union(u, v)

    def split(self, g):
        """reinit for now, optimize later"""
        self.__init__(g)

    def find_component(self, u):
        """returns connected component of u as set"""
        for c in self.uf.to_sets():
            if u in c:
                return c
Exemple #4
0
nodes = [bitArrToInt(item) for item in nodes]
s = set(nodes)

disjoint = {item: [] for item in nodes}
disjoint = UnionFind(disjoint)


def approach1():
    for i in range(0, len(nodes)):
        for j in range(i + 1, len(nodes)):
            if countSetBitsRec(nodes[i] ^ nodes[j]) <= 2:
                disjoint.union(nodes[i], nodes[j])


def approach2():
    for item in nodes:
        for d1 in computeDist1(item):
            if d1 in s:
                disjoint.union(item, d1)

        for d2 in computeDist2(item):
            if d2 in s:
                disjoint.union(item, d2)


approach2()
print(len(list(map(sorted, disjoint.to_sets()))))

dt_end = time.time()
print('time taken: ' + str(dt_end - dt_start))