def kruskal_MST(gr): sorted_edges = sorted(gr.get_edge_weights()) uf = UnionFind() min_cost = 0 for (w, (u, v)) in sorted_edges: if (not uf.get_leader(u) and not uf.get_leader(v)) or (uf.get_leader(u) != uf.get_leader(v)): uf.insert(u, v) min_cost += w return min_cost
def kruskal_MST(gr): """ computes minimum cost spanning tree in a undirected, connected graph using Kruskal's MST. Uses union-find data structure for running times of O(mlogn) """ sorted_edges = sorted(gr.get_edge_weights()) uf = UnionFind() min_cost = 0 for (w, (u, v)) in sorted_edges: if (not uf.get_leader(u) and not uf.get_leader(v)) \ or (uf.get_leader(u) != uf.get_leader(v)): uf.insert(u, v) min_cost += w return min_cost
def is_graph_cyclic(gr): uf = UnionFind() edges_explored = set() for node in gr.nodes(): uf.insert(node, node) for edge in gr.edges(): if edge in edges_explored: continue node, neighbor = edge edges_explored.add(edge) edges_explored.add((neighbor, node)) x = uf.get_leader(node) y = uf.get_leader(neighbor) if x == y: return True uf.make_union(x, y) return False
def setUp(self): self.uf = UnionFind() self.uf.insert("a", "b") self.uf.insert("b", "c") self.uf.insert("i", "j")
class test_unionfind(unittest.TestCase): def setUp(self): self.uf = UnionFind() self.uf.insert("a", "b") self.uf.insert("b", "c") self.uf.insert("i", "j") def test_get_parent_method(self): self.assertEqual("a", self.uf.get_leader("a")) self.assertEqual("a", self.uf.get_leader("b")) self.assertEqual("a", self.uf.get_leader("c")) self.assertEqual("i", self.uf.get_leader("j")) self.assertEqual("i", self.uf.get_leader("i")) self.assertNotEqual(self.uf.get_leader("a"), self.uf.get_leader("i")) def test_insert_method(self): self.uf.insert("c", "d") self.assertEqual(self.uf.get_leader("c"), self.uf.get_leader("d")) self.assertEqual(self.uf.get_leader("a"), self.uf.get_leader("d")) def test_make_union_method(self): self.uf.make_union(self.uf.get_leader("a"), self.uf.get_leader("i")) self.assertEqual(self.uf.get_leader("a"), self.uf.get_leader("i")) def test_make_union_with_invalid_leader_raises_exception(self): self.assertRaises(Exception, self.uf.make_union, "a", "z")
def setup_uf(): uf = UnionFind() uf.insert("a", "b") uf.insert("b", "c") uf.insert("i", "j") return uf
def max_k_clustering(gr, k): sorted_edges = sorted(gr.get_edge_weights()) uf = UnionFind() #initialize each node as its cluster for n in gr.nodes(): uf.insert(n) for (w, (u, v)) in sorted_edges: if uf.count_groups() <= k: return uf.get_sets() if uf.get_leader(u) != uf.get_leader(v): uf.make_union(uf.get_leader(u), uf.get_leader(v))
class test_unionfind(unittest.TestCase): def setUp(self): self.uf = UnionFind() self.uf.insert("a", "b") self.uf.insert("b", "c") self.uf.insert("i", "j") def test_get_parent_method(self): self.assertEqual("a", self.uf.get_leader("a")) self.assertEqual("a", self.uf.get_leader("b")) self.assertEqual("a", self.uf.get_leader("c")) self.assertEqual("i", self.uf.get_leader("j")) self.assertEqual("i", self.uf.get_leader("i")) self.assertNotEqual(self.uf.get_leader("a"), self.uf.get_leader("i")) def test_insert_method(self): self.uf.insert("c", "d") self.assertEqual(self.uf.get_leader("c"), self.uf.get_leader("d")) self.assertEqual(self.uf.get_leader("a"), self.uf.get_leader("d")) def test_insert_one_node(self): self.uf.insert('z') self.assertEqual(self.uf.get_leader('z'), 'z') self.assertEqual(self.uf.count_groups(), 3) def test_make_union_method(self): self.uf.make_union(self.uf.get_leader("a"), self.uf.get_leader("i")) self.assertEqual(self.uf.get_leader("a"), self.uf.get_leader("i")) def test_make_union_with_invalid_leader_raises_exception(self): self.assertRaises(Exception, self.uf.make_union, "a", "z") def test_get_count(self): self.uf.insert("z", "y") self.assertEqual(self.uf.count_groups(), 3)
import os, sys import operator sys.path.append(os.path.join(os.getcwd(), os.path.pardir)) from graphs.graph import graph from itertools import * from union_find.unionfind import UnionFind def ham_dist(e1, e2): """ computes hamming distance between two strings e1 and e2 """ ne = operator.ne return sum(imap(ne, e1, e2)) path = "clustering3.txt" nodes = open(path).readlines() uf = UnionFind() # bitcount = {i: nodes[i].count('1') for i in range(len(nodes))} # similar = [(bitcount[i]-9, ham_dist(nodes[i], nodes[0])) for i in range(1, len(nodes))] # print nodes[1].count('1') - nodes[2].count('1') # print hamdist(nodes[1], nodes[2]) for i in range(len(nodes)): for j in range(i + 1, len(nodes)): print(i, j, ham_dist(nodes[i], nodes[j]))