def k_clustering(file_path, number_of_clusters) -> int: kruskal_graph = convert_file_to_kruskal_graph(file_path) union_find = UnionFind(kruskal_graph) partition_edges = [] max_spacing = 0 edge_count = 0 while len(union_find) > number_of_clusters and edge_count < len( kruskal_graph.edge_list): edge = kruskal_graph.edge_list[edge_count] if not union_find.union(edge.node_one, edge.node_two): partition_edges.append(edge) edge_count += 1 found_max = False for edge in kruskal_graph.edge_list[edge_count:]: if edge.node_one.parent != edge.node_two.parent: max_spacing = edge.weight found_max = True break if not found_max: # todo: sorted statement might have no effect. should assign return? sorted(partition_edges) max_spacing = partition_edges[0].weight print('Maximum spacing: ', max_spacing) return max_spacing
def clustering_1(input_file: str): lines = BasicFuncs.load_file_as_string(input_file).splitlines() num_of_nodes = int(lines[0]) edges = [] for line in lines[1:]: start, finish, cost = map(int, line.split(' ')) edge = Edge(start, finish, cost) edges.append(edge) edges.sort(key=lambda edge: edge.cost) clusters = num_of_nodes union_find = UnionFind(num_of_nodes) for i, edge in enumerate(edges): a = edge.start - 1 b = edge.end - 1 if union_find.join_two_subsets(a, b): clusters -= 1 if clusters <= 4: break # Compute the smallest maximum spacing min_max_spacing = float('inf') for edge in edges[i + 1:]: a = edge.start - 1 b = edge.end - 1 if not union_find.are_two_indicies_part_of_same_set(a, b): min_max_spacing = min(min_max_spacing, edge.cost) return min_max_spacing
def test_all_disjoint(self): n = 10 uf = UnionFind(n) for i in xrange(n): for j in xrange(n): # Must only be joined if i == j self.assertEqual(uf.is_joined(i, j), i == j)
def maximum_k(node_list=[], node_length=24, maximum_spacing=3): """ Input: list of nodes (edges defined implictly by Hamming distances between nodes) Output: max number of clusters with specified maximum spacing Method: BRUTE FORCE For each node iterate through all permuations of possible n-bit differences from 1 up to max_spacing - 1. For example, for max_spacing = 3, node length 24 bits, the number of permutations is (24 C 1) + (24 C 2) = 300. """ node_set = set(node_list) node_list = list(node_set) union_find = UnionFind(nodes=node_list) bitmasks = get_bitmasks(node_length) for node1 in node_list: for i in xrange(1, maximum_spacing): for permutation in combinations(xrange(node_length), i): node2 = node1 for pos in permutation: node2 ^= bitmasks[pos] if node2 in node_set: if union_find.find(node1) != union_find.find(node2): union_find.union(node1, node2) return len(set(union_find.leader_lookup.values()))
def test_simple_joins(self): uf = UnionFind(10) uf.join(1, 3) self.assertTrue(uf.is_joined(1, 3)) self.assertFalse(uf.is_joined(1, 2)) uf.join(6, 7) self.assertTrue(uf.is_joined(6, 7)) self.assertFalse(uf.is_joined(1, 7))
def kruskal(node_list, edge_list): edge_list.sort() mst = defaultdict(dict) # minimum spanning tree union_find = UnionFind(nodes=node_list) for edge in edge_list: weight, node1, node2 = edge if union_find.find(node1) != union_find.find(node2): union_find.union(node1, node2) mst[node1][node2] = weight mst[node2][node1] = weight else: continue return mst
def test_union_find(): union_find = UnionFind(7) union_find.union(0, 1) union_find.union(1, 6) union_find.union(2, 3) union_find.union(6, 3) union_find.union(3, 5) print([x.size for x in union_find.sets]) print(union_find.parent)
def maximum_spacing(node_list=[], edge_list=[], k=0): """ Returns maximum spacing between k clusters""" edge_order = [] # mst = defaultdict(dict) union_find = UnionFind(nodes=node_list) edge_list.sort() for edge in edge_list: weight, node1, node2 = edge if union_find.find(node1) != union_find.find(node2): union_find.union(node1, node2) # mst[node1][node2] = weight # mst[node2][node1] = weight edge_order.append(edge) else: continue return edge_order[-(k - 1)][0]
class KruskalGraph(WeightedGraph): edges = [] spanning_tree = [] components = {} union = None def __init__(self, graph={}, directed=False, *args, **kwargs): super(KruskalGraph, self).__init__(graph, directed, *args, **kwargs) self.union = UnionFind() # find and insert edges for vertex, edges in self.graph.items(): self.components[vertex] = Node(vertex) for edge, weight in edges.items(): self._add_edge(vertex, edge, weight) # sort edges self.edges = sorted(self.edges, key=lambda edges: edges[2]) def build(self): """ Build the MST using Kruskal's Algorithm. """ for x, y, weight in self.edges: x_root = self.union.find(self.components[x]) y_root = self.union.find(self.components[y]) if x_root != y_root: self.spanning_tree.append((x, y, weight)) self.union.union(x_root, y_root) def show(self): for edge in self.spanning_tree: print edge[0], edge[1], edge[2] def _add_edge(self, x, y, weight): edge = (x, y, weight) reverse_edge = (y, x, weight) if edge not in self.edges and reverse_edge not in self.edges: self.edges.append((x, y, weight))
def kruskals_algorithm(edges_file: str): s = BasicFuncs.load_file_as_string(edges_file) lines = s.splitlines() first_line = lines[0] num_of_nodes, num_of_edges = map(int, first_line.split(' ')) union_find = UnionFind(num_of_nodes) edges = [] for line in lines[1:]: start, end, cost = map(int, line.split(' ')) edge = Edge(start, end, cost) edges.append(edge) edges.sort(key=lambda x: x.cost) min_span_tree_cost = 0 for edge in edges: a = edge.start - 1 b = edge.end - 1 if union_find.join_two_subsets(a, b): min_span_tree_cost += edge.cost return min_span_tree_cost
def __init__(self, graph={}, directed=False, *args, **kwargs): super(KruskalGraph, self).__init__(graph, directed, *args, **kwargs) self.union = UnionFind() # find and insert edges for vertex, edges in self.graph.items(): self.components[vertex] = Node(vertex) for edge, weight in edges.items(): self._add_edge(vertex, edge, weight) # sort edges self.edges = sorted(self.edges, key=lambda edges: edges[2])
def kruskal_algorithm(graph): union_find = UnionFind(graph.number_of_nodes) sorted_edges_list = graph.sort_edges() mst_nodes = {} mst_edges = {} edge_number = 1 for edge in sorted_edges_list: first_node = graph.edges[edge[0]][0] second_node = graph.edges[edge[0]][1] weight = edge[1] result = union_find.union(first_node - 1, second_node - 1) # The nodes were in different sets and union was successful, update the graph if result == 1: # Adding the nodes to the MST, also setting their terminal status mst_nodes[first_node] = graph.nodes[first_node][2] mst_nodes[second_node] = graph.nodes[second_node][2] # Adding the edge to the MST mst_edges[edge_number] = [first_node, second_node, weight] edge_number += 1 minimum_spanning_tree = Graph(len(mst_nodes), len(mst_edges), mst_nodes, mst_edges) return minimum_spanning_tree, minimum_spanning_tree.graph_weight()
def test_chained_joins(self): uf = UnionFind(10) uf.join(1, 2) uf.join(2, 3) uf.join(3, 4) self.assertTrue(uf.is_joined(1, 4)) self.assertTrue(uf.is_joined(3, 1)) self.assertFalse(uf.is_joined(0, 1)) uf.join(8, 3) self.assertTrue(uf.is_joined(1, 8)) self.assertTrue(uf.is_joined(4, 8))