コード例 #1
0
ファイル: utils.py プロジェクト: bfrgoncalves/missingData
def run_kruskal(pm, pl, n, lvs1):

	global profiles
	global maxlen
	global lvs

	maxlen = pl #profile length
	lvs = lvs1
	
	profiles = pm
	
	edges=[] 
	n = n

	for i in range(n):
		for j in range(i +1, n):
			edges.append([i,j])
	edges.sort(EdgeComp) 

	# var uf = new UnionFind(n)
	uf = UF(n)

	tree = []
	i=0
	while i<len(edges) and len(tree)<n-1:
	 	
		if uf.find(edges[i][0]) != uf.find(edges[i][1]): 
			tree.append(edges[i])		
			uf.union(edges[i][0], edges[i][1])
		
		i+=1

	return tree
コード例 #2
0
class EGraph:
    def __init__(self, eqs):
        # TODO use eqs
        # NOTE we're following nelson-oppen figure 1
        self.v1 = ENode('f')
        self.v2 = ENode('f')
        self.v3 = ENode('a')
        self.v4 = ENode('b')
        self.nodes = [self.v1, self.v2, self.v3, self.v4]

        self.R = UF(len(self.nodes))

        self.v1.preds = []
        self.v1.succs = [self.v2, self.v4]

        self.v2.preds = [self.v1]
        self.v2.succs = [self.v3, self.v4]

        self.v3.preds = [self.v2]
        self.v3.succs = []

        self.v4.preds = [self.v2, self.v1]
        self.v4.succs = []

    def preds(self, n):
        # FIXME inefficient
        cls_id = self.R.find(n.id)
        res = []
        for i, nn in enumerate(self.nodes):
            if self.R.find(i) == cls_id:
                res += nn.preds
        return res

    def merge(self, n1, n2):
        if self.R.find(n1.id) == self.R.find(n2.id):
            return
        n1_preds = self.preds(n1)
        n2_preds = self.preds(n2)
        self.R.union(n1.id, n2.id)

        for (x, y) in itertools.product(n1_preds, n2_preds):
            if (self.R.find(x.id) != self.R.find(y.id)) and (self.congruent(
                    x, y)):
                self.merge(x, y)

    def congruent(self, n1, n2):
        if len(n1.succs) != len(n2.succs):
            return False
        for i in range(len(n1.succs)):
            if self.R.find(n1.succs[i].id) != self.R.find(n2.succs[i].id):
                return False
        return True
コード例 #3
0
def clustering(graph, n_clusters=4):
    """
    args: graph
        graph = {
            u: [(v_1, w_1), (v_2, w_2), ...]
        }
    """
    uf = UF(len(graph))
    t = set()
    cost = 0
    edges = []
    for u in graph.keys():
        for v, w in graph[u]:
            edges.append((u, v, w))
    edges = sorted(edges, key=lambda x: x[2])
    edges_index = 0
    #while len(set(uf.parent)) > n_clusters and edges_index < len(edges):
    while len(t) != len(graph.keys()) - n_clusters:
        # print(uf.parent)
        u, v, w = edges[edges_index]
        if uf.find(u - 1) != uf.find(v - 1):
            t = t.union([tuple(sorted([u, v]))])
            uf.union(u - 1, v - 1)
        edges_index += 1

    max_space = calculate_max_distance(edges, uf)

    return max_space
コード例 #4
0
    def __init__(self, eqs):
        # TODO use eqs
        # NOTE we're following nelson-oppen figure 1
        self.v1 = ENode('f')
        self.v2 = ENode('f')
        self.v3 = ENode('a')
        self.v4 = ENode('b')
        self.nodes = [self.v1, self.v2, self.v3, self.v4]

        self.R = UF(len(self.nodes))

        self.v1.preds = []
        self.v1.succs = [self.v2, self.v4]

        self.v2.preds = [self.v1]
        self.v2.succs = [self.v3, self.v4]

        self.v3.preds = [self.v2]
        self.v3.succs = []

        self.v4.preds = [self.v2, self.v1]
        self.v4.succs = []
コード例 #5
0
ファイル: kruskal.py プロジェクト: GaryLai91/algorithms
def kruskal_uf(graph):
    uf = UF(len(graph))
    t = set()
    cost = 0
    edges = []
    for u in graph.keys():
        for v, w in graph[u]:
            edges.append((u, v, w))
    edges = sorted(edges, key=lambda x: x[2])
    for u, v, w in edges:
        if uf.find(u - 1) != uf.find(v - 1):
            t = t.union([tuple(sorted([u, v]))])
            uf.union(u - 1, v - 1)
            cost += w
    return cost
コード例 #6
0
def heap_clustering(graph):
    """
    args: graph
        graph = {
            u: [(v_1, w_1), (v_2, w_2), ...]
        }
    """
    uf = UF(len(graph))
    edges = []
    for u in graph.keys():
        for v, w in graph[u]:
            heapq.heappush(edges, (w, u, v))
    edges_union = []
    while len(edges) > 0:
        w, u, v = heapq.heappop(edges)
        if uf.find(u - 1) != uf.find(v - 1) and w <= 2:
            uf.union(u - 1, v - 1)
            edges_union.append((u, v))

    return len(set(uf.parent))
コード例 #7
0
with open("clustering1.txt") as graph:
    for line in graph:
        split = line.strip().split(" ")
        heapq.heappush(edge_heap,
                       # Add a tuple in the form of (cost, (node1, node2))
                        (
                           int(split[2]), (int(split[0]) - 1, int(split[1]) - 1)
                        )
                       )

        # creating a set of nodes solely to get the node count later to initialize the union find object
        set_of_nodes.add(split[0])
        set_of_nodes.add(split[1])

union_find = UF(len(set_of_nodes))

# Keep popping the smallest edge off the heap
# if the nodes are not connected then union them
while union_find.count() > k:
    (cost, (node_1, node_2)) = heapq.heappop(edge_heap)
    if not union_find.connected(node_1, node_2):
        union_find.union(node_1, node_2)


# The question asks to find the maximum and minimum spacing after clustering
# the answer lies in the remaining edges in the heap.
# First build a node dictionary for each node pointing to what cluster it is in
# Also build a cluster dictionary that stores one value for each of N to N clusters
# Then keep popping from the heap, and store the edge cost in the cluster dictionary according to which cluster each
# of it's nodes are in
コード例 #8
0
def clustering(n, input_data):
    """
    args: input_data
        input_data = {'0000100110': [1,2], '0110101100': [3,4] , 
                      '1000100110' : [5], ... }
    """
    uf = UF(n)
    input_dict = {k: v for k, v in input_data.items()}

    # Merge all 0 distance
    for bin_str, u_vertices in input_dict.items():
        try:
            input_dict[bin_str]
        except KeyError:
            continue
        else:
            v_vertices = input_dict[bin_str]
            for u in u_vertices:
                for v in v_vertices:
                    if (uf.find(u - 1) != uf.find(v - 1)):
                        uf.union(u - 1, v - 1)

    # Merge all 1 distance
    for bin_str, u_vertices in input_dict.items():
        one_comb = compute_one_distance(bin_str)
        for node in one_comb:
            try:
                input_dict[node]
            except KeyError:
                continue
            else:
                v_vertices = input_dict[node]
                for u in u_vertices:
                    for v in v_vertices:
                        if (uf.find(u - 1) != uf.find(v - 1)):
                            uf.union(u - 1, v - 1)

    # Merge all 2 distances
    for bin_str, u_vertices in input_dict.items():
        two_comb = compute_two_distance(bin_str)
        for node in two_comb:
            try:
                input_dict[node]
            except KeyError:
                continue
            else:
                v_vertices = input_dict[node]
                for u in u_vertices:
                    for v in v_vertices:
                        if uf.find(u - 1) != uf.find(v - 1):
                            uf.union(u - 1, v - 1)
    return len(uf.get_clusters())
コード例 #9
0
def setupUF():
    yield UF(10)