コード例 #1
0
def kruscal(G):
    node_count = len(G.nodes())
    uf = UnionFind()

    # we need tree as result of this algorithm, not set of vertices
    # so edges are kept in union-find structures for convenience
    # this means that stop condition is uf reaching size of 2*V-1
    # /we need V-1 edges to connect V vertices assuming no cycles/
    def uflist_to_graph(uf_list):
        edges = filter(lambda x: isinstance(x, tuple), uf_list)
        spt = nx.Graph(edges)
        for u,v in edges:
            spt[u][v]["weight"] = G[u][v]["weight"]
        return spt

    sorted_edges = sorted(G.edges(), key = lambda (u,v): G[u][v]["weight"])

    # create singleton sets for every vertex
    # if not is not yet in UnionFind, it'll be added by [] operator
    for u,v in sorted_edges:
        u_root = uf[u]
        v_root = uf[v]
        if u_root != v_root:
            uf.union(u_root, v_root)
            # add edge to union-find
            uf.union(u_root, uf[(u,v)])

            # terminate if all vertices in uf
            uflist = list(uf)
            if len(uflist) == 2*node_count-1:
                return uflist_to_graph(uflist)

    raise Exception("Something went wrong, algorithm should never end up here")
コード例 #2
0
ファイル: kruskal.py プロジェクト: Bit0r/algs4
    def __init__(self, G: nx.Graph):
        n = G.number_of_nodes()

        # 初始化边表
        self.edges = []
        # 初始化总权值
        self.weight = 0

        edges = sorted(G.edges,
                       key=lambda e: G.edges[e]['weight'],
                       reverse=True)
        uf = UnionFind()

        while len(self.edges) < n - 1:
            v, w = edges.pop()

            if uf[v] == uf[w]:
                # 如果已在同一个连通部件中,则此边作废
                continue

            # 将该边加入生成树,同时更新总权值
            uf.union(v, w)
            weight = G[v][w]['weight']
            self.weight += weight

            # 将该边的颜色改为红色
            G[v][w]['color'] = 'red'

            self.edges.append((v, w, weight))
コード例 #3
0
def hamming_clustering(nodes: list, n_nodes: int, n_bits: int):
    # Init
    nodes_int = [int(n, 2) for n in nodes]
    nodes_map = dict()
    for i in range(n_nodes):
        try:
            nodes_map[nodes_int[i]].append(i)
        except KeyError:
            nodes_map[nodes_int[i]] = [i]
    uf = UnionFind(range(n_nodes))

    # Bit masks
    dist1 = [1 << i for i in range(n_bits)]
    dist2 = list()
    for i in range(n_bits):
        for j in range(i, n_bits):
            dist2.append(2**i + 2**j)
    bit_masks = list(set(dist1 + dist2))

    # Union if identical vertices
    for eq_list in nodes_map.values():
        if len(eq_list) > 1:
            for item in eq_list[1:]:
                uf.union(eq_list[0], item)

    # Calculation
    for k in nodes_map.keys():
        for d in bit_masks:
            try:
                uf.union(nodes_map[k][0], nodes_map[k ^ d][0])
            except KeyError:
                pass
    return len(list(map(sorted, uf.to_sets())))
コード例 #4
0
def kruscal(G):
    node_count = len(G.nodes())
    uf = UnionFind()

    # we need tree as result of this algorithm, not set of vertices
    # so edges are kept in union-find structures for convenience
    # this means that stop condition is uf reaching size of 2*V-1
    # /we need V-1 edges to connect V vertices assuming no cycles/
    def uflist_to_graph(uf_list):
        edges = filter(lambda x: isinstance(x, tuple), uf_list)
        spt = nx.Graph(edges)
        for u, v in edges:
            spt[u][v]["weight"] = G[u][v]["weight"]
        return spt

    sorted_edges = sorted(G.edges(), key=lambda (u, v): G[u][v]["weight"])

    # create singleton sets for every vertex
    # if not is not yet in UnionFind, it'll be added by [] operator
    for u, v in sorted_edges:
        u_root = uf[u]
        v_root = uf[v]
        if u_root != v_root:
            uf.union(u_root, v_root)
            # add edge to union-find
            uf.union(u_root, uf[(u, v)])

            # terminate if all vertices in uf
            uflist = list(uf)
            if len(uflist) == 2 * node_count - 1:
                return uflist_to_graph(uflist)

    raise Exception("Something went wrong, algorithm should never end up here")
コード例 #5
0
def union_graph(graph, bitmask, ln):
    my_set = set([i for i in range(ln)])
    u_find = UnionFind(my_set)

    for key in graph:
        l_list = list(graph[key])
        l_value = len(l_list)
        while l_value > 1:
            u_find.union(l_list[l_value - 1], l_list[l_value - 2])
            l_value -= 1

    for value in bitmask:
        for key1 in graph:
            key2 = key1 ^ value
            if key2 in graph:
                x1 = graph[key1]
                x2 = graph[key2]
                u_find.union(x1[0], x2[0])

    pointer_set = set(u_find[x] for x in my_set)
    num_clusters = len(pointer_set)
    return num_clusters
コード例 #6
0
def main():
    if len(sys.argv) == 2:
        txt = sys.argv[1]
        graph = {}
        total_nodes = []
        total_bits = []
        node_position = 1
        clusters = []
        with open(txt, 'r') as file:
            for line in file:
                if len(line.split()) == 2:
                    #total_nodes.append(int(line.split()[0]))
                    total_bits.append(int(line.split()[1]))
                else:
                    if int("".join(line.split()), 2) in graph:
                        clusters.remove(graph[int("".join(line.split()), 2)])
                    graph[int("".join(line.split()), 2)] = node_position
                    clusters.append(node_position)
                    node_position += 1
        bits = total_bits[0]
        #generate hamming distance 1 and hamming distance 2 for the bit masks
        bit_mask_1 = [1 << i for i in range(bits)]
        #bit_mask 2 is generated by XORing all pairs of bit_mask_1
        bit_mask_2 = []
        for combo in combinations(range(bits), 2):
            bit_mask_2.append(bit_mask_1[combo[0]] ^ bit_mask_1[combo[1]])
        bit_mask = bit_mask_1 + bit_mask_2
        my_set = set(clusters)
        u_find = UnionFind(my_set)
        for bitmask in bit_mask:
            for key1 in graph:
                key2 = key1 ^ bitmask
                if key2 in graph:
                    if u_find[graph[key1]] != u_find[graph[key2]]:
                        u_find.union(graph[key1], graph[key2])
        result = list(map(sorted, u_find.to_sets()))
        print(len(result))
コード例 #7
0
ファイル: conComp.py プロジェクト: dkimpara/DiverseNode
class Components:
    def __init__(self, g):
        """

        :rtype: nxgraph
        """
        self.uf = UnionFind()  #init uf datastruct

        g = nx.DiGraph.to_undirected(g)  # shallow copy is fine
        for c in nx.algorithms.components.connected_components(g):
            self.uf.union(*c)

    def merge(self, u, v):
        self.uf.union(u, v)

    def split(self, g):
        """reinit for now, optimize later"""
        self.__init__(g)

    def find_component(self, u):
        """returns connected component of u as set"""
        for c in self.uf.to_sets():
            if u in c:
                return c
コード例 #8
0
ファイル: clustering.py プロジェクト: alexnik42/algorithms
    for line in file:
        l = [s for s in line.split()]
        G.append(int(''.join(l), 2)) # Convert each node from binary to integer

for i in range(len(G)):
    if G[i] not in V:
        V[G[i]] = set()
        V[G[i]].add(i)
    else:
        V[G[i]].add(i)

# Initialize UnionFind-instance
my_set = set([i for i in range(len(G))])
u_find = UnionFind(my_set)

# Iterate through nodes and distances, XOR each key with the distances to check, whether the resulting node exists. 
# If yes - call union() to merge their respective sets in UnionFind
for key_1, value in V.items():
    for i in range(len(distances)):
        key_2 = key_1^distances[i]

        if key_2 in V:
            for value_1 in V[key_1]:
                for value_2 in V[key_2]:
                    u_find.union(value_1, value_2)

# Create a set of clusters' names and output their quantity (k)
names_of_clusters = set([u_find[x] for x in my_set])
k = len(names_of_clusters) 
print(k)
コード例 #9
0
with open('clustering1.txt') as file:
    k = 4
    n = int(file.readline())
    E = []
    while file:
        try:
            p, q, weight = map(int, file.readline().split())
            p -=1; q-=1
            heapq.heappush(E, (weight, {p, q}))
        except ValueError:
            break;

uFind = UnionFind(range(n))
while len({uFind[v] for v in range(n)}) > k:
    weight, (p, q) = heapq.heappop(E)
    uFind.union(p, q)

while uFind[p] == uFind[q]:
    weight, (p, q) = heapq.heappop(E)

print(weight)


# -------------------------------------------------------------------------------------------------------------------- #
# Big Clustering

from networkx.utils.union_find import UnionFind


if __name__ == '__main__':
    with open("clustering_big.txt") as file:
コード例 #10
0
# Create an array of bit-masks for the distances, using bit-shifts
mask1 = [1 << t for t in range(0, 24)]
mask2 = [1 << i | 1 << j for i in range(0, 24) for j in range(i + 1, 24)]
mask = mask1 + mask2
# the mask is right
# use ^ (i.e. xor) to apply mask to codes, changing each digit

# initialize all UnionFind sets
ufs = UnionFind(list(range(1, 200001)))
# for each node, search if its neighbors exist and union two sets
for i in range(1, 200001):
    synvalue = hamming[id_code[i]]
    if len(synvalue) > 1:
        for j in synvalue:
            if j != i:
                ufs.union(i, j)
    for m in mask:
        # find if such neighbor(s) exist (and list the ids)
        try:
            neighbor = hamming[id_code[i] ^ m]
        except KeyError:
            continue
        else:
            for n in neighbor:
                ufs.union(i, n)

pdb.set_trace()
# get final leaders in the UFS, then count the final number cluster
cluster_leaders = set([ufs[x] for x in range(1, 200001)])
# set is implemented with mapping, so the search operation is O(1) on average
# The number of clusters