def clustering(n, input_data): """ args: input_data input_data = {'0000100110': [1,2], '0110101100': [3,4] , '1000100110' : [5], ... } """ uf = UF(n) input_dict = {k: v for k, v in input_data.items()} # Merge all 0 distance for bin_str, u_vertices in input_dict.items(): try: input_dict[bin_str] except KeyError: continue else: v_vertices = input_dict[bin_str] for u in u_vertices: for v in v_vertices: if (uf.find(u - 1) != uf.find(v - 1)): uf.union(u - 1, v - 1) # Merge all 1 distance for bin_str, u_vertices in input_dict.items(): one_comb = compute_one_distance(bin_str) for node in one_comb: try: input_dict[node] except KeyError: continue else: v_vertices = input_dict[node] for u in u_vertices: for v in v_vertices: if (uf.find(u - 1) != uf.find(v - 1)): uf.union(u - 1, v - 1) # Merge all 2 distances for bin_str, u_vertices in input_dict.items(): two_comb = compute_two_distance(bin_str) for node in two_comb: try: input_dict[node] except KeyError: continue else: v_vertices = input_dict[node] for u in u_vertices: for v in v_vertices: if uf.find(u - 1) != uf.find(v - 1): uf.union(u - 1, v - 1) return len(uf.get_clusters())