def run_kruskal(pm, pl, n, lvs1): global profiles global maxlen global lvs maxlen = pl #profile length lvs = lvs1 profiles = pm edges=[] n = n for i in range(n): for j in range(i +1, n): edges.append([i,j]) edges.sort(EdgeComp) # var uf = new UnionFind(n) uf = UF(n) tree = [] i=0 while i<len(edges) and len(tree)<n-1: if uf.find(edges[i][0]) != uf.find(edges[i][1]): tree.append(edges[i]) uf.union(edges[i][0], edges[i][1]) i+=1 return tree
class EGraph: def __init__(self, eqs): # TODO use eqs # NOTE we're following nelson-oppen figure 1 self.v1 = ENode('f') self.v2 = ENode('f') self.v3 = ENode('a') self.v4 = ENode('b') self.nodes = [self.v1, self.v2, self.v3, self.v4] self.R = UF(len(self.nodes)) self.v1.preds = [] self.v1.succs = [self.v2, self.v4] self.v2.preds = [self.v1] self.v2.succs = [self.v3, self.v4] self.v3.preds = [self.v2] self.v3.succs = [] self.v4.preds = [self.v2, self.v1] self.v4.succs = [] def preds(self, n): # FIXME inefficient cls_id = self.R.find(n.id) res = [] for i, nn in enumerate(self.nodes): if self.R.find(i) == cls_id: res += nn.preds return res def merge(self, n1, n2): if self.R.find(n1.id) == self.R.find(n2.id): return n1_preds = self.preds(n1) n2_preds = self.preds(n2) self.R.union(n1.id, n2.id) for (x, y) in itertools.product(n1_preds, n2_preds): if (self.R.find(x.id) != self.R.find(y.id)) and (self.congruent( x, y)): self.merge(x, y) def congruent(self, n1, n2): if len(n1.succs) != len(n2.succs): return False for i in range(len(n1.succs)): if self.R.find(n1.succs[i].id) != self.R.find(n2.succs[i].id): return False return True
def clustering(graph, n_clusters=4): """ args: graph graph = { u: [(v_1, w_1), (v_2, w_2), ...] } """ uf = UF(len(graph)) t = set() cost = 0 edges = [] for u in graph.keys(): for v, w in graph[u]: edges.append((u, v, w)) edges = sorted(edges, key=lambda x: x[2]) edges_index = 0 #while len(set(uf.parent)) > n_clusters and edges_index < len(edges): while len(t) != len(graph.keys()) - n_clusters: # print(uf.parent) u, v, w = edges[edges_index] if uf.find(u - 1) != uf.find(v - 1): t = t.union([tuple(sorted([u, v]))]) uf.union(u - 1, v - 1) edges_index += 1 max_space = calculate_max_distance(edges, uf) return max_space
def __init__(self, eqs): # TODO use eqs # NOTE we're following nelson-oppen figure 1 self.v1 = ENode('f') self.v2 = ENode('f') self.v3 = ENode('a') self.v4 = ENode('b') self.nodes = [self.v1, self.v2, self.v3, self.v4] self.R = UF(len(self.nodes)) self.v1.preds = [] self.v1.succs = [self.v2, self.v4] self.v2.preds = [self.v1] self.v2.succs = [self.v3, self.v4] self.v3.preds = [self.v2] self.v3.succs = [] self.v4.preds = [self.v2, self.v1] self.v4.succs = []
def kruskal_uf(graph): uf = UF(len(graph)) t = set() cost = 0 edges = [] for u in graph.keys(): for v, w in graph[u]: edges.append((u, v, w)) edges = sorted(edges, key=lambda x: x[2]) for u, v, w in edges: if uf.find(u - 1) != uf.find(v - 1): t = t.union([tuple(sorted([u, v]))]) uf.union(u - 1, v - 1) cost += w return cost
def heap_clustering(graph): """ args: graph graph = { u: [(v_1, w_1), (v_2, w_2), ...] } """ uf = UF(len(graph)) edges = [] for u in graph.keys(): for v, w in graph[u]: heapq.heappush(edges, (w, u, v)) edges_union = [] while len(edges) > 0: w, u, v = heapq.heappop(edges) if uf.find(u - 1) != uf.find(v - 1) and w <= 2: uf.union(u - 1, v - 1) edges_union.append((u, v)) return len(set(uf.parent))
with open("clustering1.txt") as graph: for line in graph: split = line.strip().split(" ") heapq.heappush(edge_heap, # Add a tuple in the form of (cost, (node1, node2)) ( int(split[2]), (int(split[0]) - 1, int(split[1]) - 1) ) ) # creating a set of nodes solely to get the node count later to initialize the union find object set_of_nodes.add(split[0]) set_of_nodes.add(split[1]) union_find = UF(len(set_of_nodes)) # Keep popping the smallest edge off the heap # if the nodes are not connected then union them while union_find.count() > k: (cost, (node_1, node_2)) = heapq.heappop(edge_heap) if not union_find.connected(node_1, node_2): union_find.union(node_1, node_2) # The question asks to find the maximum and minimum spacing after clustering # the answer lies in the remaining edges in the heap. # First build a node dictionary for each node pointing to what cluster it is in # Also build a cluster dictionary that stores one value for each of N to N clusters # Then keep popping from the heap, and store the edge cost in the cluster dictionary according to which cluster each # of it's nodes are in
def clustering(n, input_data): """ args: input_data input_data = {'0000100110': [1,2], '0110101100': [3,4] , '1000100110' : [5], ... } """ uf = UF(n) input_dict = {k: v for k, v in input_data.items()} # Merge all 0 distance for bin_str, u_vertices in input_dict.items(): try: input_dict[bin_str] except KeyError: continue else: v_vertices = input_dict[bin_str] for u in u_vertices: for v in v_vertices: if (uf.find(u - 1) != uf.find(v - 1)): uf.union(u - 1, v - 1) # Merge all 1 distance for bin_str, u_vertices in input_dict.items(): one_comb = compute_one_distance(bin_str) for node in one_comb: try: input_dict[node] except KeyError: continue else: v_vertices = input_dict[node] for u in u_vertices: for v in v_vertices: if (uf.find(u - 1) != uf.find(v - 1)): uf.union(u - 1, v - 1) # Merge all 2 distances for bin_str, u_vertices in input_dict.items(): two_comb = compute_two_distance(bin_str) for node in two_comb: try: input_dict[node] except KeyError: continue else: v_vertices = input_dict[node] for u in u_vertices: for v in v_vertices: if uf.find(u - 1) != uf.find(v - 1): uf.union(u - 1, v - 1) return len(uf.get_clusters())
def setupUF(): yield UF(10)