Example #1
0
def k_clustering(file_path, number_of_clusters) -> int:
    kruskal_graph = convert_file_to_kruskal_graph(file_path)
    union_find = UnionFind(kruskal_graph)
    partition_edges = []
    max_spacing = 0
    edge_count = 0

    while len(union_find) > number_of_clusters and edge_count < len(
            kruskal_graph.edge_list):
        edge = kruskal_graph.edge_list[edge_count]
        if not union_find.union(edge.node_one, edge.node_two):
            partition_edges.append(edge)
        edge_count += 1

    found_max = False
    for edge in kruskal_graph.edge_list[edge_count:]:
        if edge.node_one.parent != edge.node_two.parent:
            max_spacing = edge.weight
            found_max = True
            break
    if not found_max:
        # todo: sorted statement might have no effect. should assign return?
        sorted(partition_edges)
        max_spacing = partition_edges[0].weight

    print('Maximum spacing: ', max_spacing)
    return max_spacing
Example #2
0
    def clustering_1(input_file: str):
        lines = BasicFuncs.load_file_as_string(input_file).splitlines()
        num_of_nodes = int(lines[0])
        edges = []
        for line in lines[1:]:
            start, finish, cost = map(int, line.split(' '))
            edge = Edge(start, finish, cost)
            edges.append(edge)
        edges.sort(key=lambda edge: edge.cost)
        clusters = num_of_nodes
        union_find = UnionFind(num_of_nodes)
        for i, edge in enumerate(edges):
            a = edge.start - 1
            b = edge.end - 1
            if union_find.join_two_subsets(a, b):
                clusters -= 1

            if clusters <= 4:
                break
        # Compute the smallest maximum spacing
        min_max_spacing = float('inf')
        for edge in edges[i + 1:]:
            a = edge.start - 1
            b = edge.end - 1
            if not union_find.are_two_indicies_part_of_same_set(a, b):
                min_max_spacing = min(min_max_spacing, edge.cost)
        return min_max_spacing
 def test_all_disjoint(self):
     n = 10
     uf = UnionFind(n)
     for i in xrange(n):
         for j in xrange(n):
             # Must only be joined if i == j
             self.assertEqual(uf.is_joined(i, j), i == j)
Example #4
0
def maximum_k(node_list=[], node_length=24, maximum_spacing=3):
    """ Input:
            list of nodes (edges defined implictly by Hamming distances between nodes)
        Output:
            max number of clusters with specified maximum spacing
        Method:
            BRUTE FORCE
            For each node iterate through all permuations of possible n-bit differences  from 1 up to max_spacing - 1.
                For example, for max_spacing = 3, node length 24 bits, the number of permutations is (24 C 1) + (24 C 2) = 300.
    """

    node_set = set(node_list)
    node_list = list(node_set)
    union_find = UnionFind(nodes=node_list)
    bitmasks = get_bitmasks(node_length)

    for node1 in node_list:
        for i in xrange(1, maximum_spacing):
            for permutation in combinations(xrange(node_length), i):
                node2 = node1
                for pos in permutation:
                    node2 ^= bitmasks[pos]
                if node2 in node_set:
                    if union_find.find(node1) != union_find.find(node2):
                        union_find.union(node1, node2)

    return len(set(union_find.leader_lookup.values()))
 def test_simple_joins(self):
     uf = UnionFind(10)
     uf.join(1, 3)
     self.assertTrue(uf.is_joined(1, 3))
     self.assertFalse(uf.is_joined(1, 2))
     uf.join(6, 7)
     self.assertTrue(uf.is_joined(6, 7))
     self.assertFalse(uf.is_joined(1, 7))
Example #6
0
def kruskal(node_list, edge_list):
    edge_list.sort()
    mst = defaultdict(dict)  # minimum spanning tree
    union_find = UnionFind(nodes=node_list)
    for edge in edge_list:
        weight, node1, node2 = edge
        if union_find.find(node1) != union_find.find(node2):
            union_find.union(node1, node2)
            mst[node1][node2] = weight
            mst[node2][node1] = weight
        else:
            continue

    return mst
def test_union_find():
    union_find = UnionFind(7)
    union_find.union(0, 1)
    union_find.union(1, 6)
    union_find.union(2, 3)
    union_find.union(6, 3)
    union_find.union(3, 5)
    print([x.size for x in union_find.sets])
    print(union_find.parent)
Example #8
0
def maximum_spacing(node_list=[], edge_list=[], k=0):
    """ Returns maximum spacing between k clusters"""
    edge_order = []
    # mst = defaultdict(dict)
    union_find = UnionFind(nodes=node_list)

    edge_list.sort()

    for edge in edge_list:
        weight, node1, node2 = edge
        if union_find.find(node1) != union_find.find(node2):
            union_find.union(node1, node2)
            # mst[node1][node2] = weight
            # mst[node2][node1] = weight
            edge_order.append(edge)
        else:
            continue

    return edge_order[-(k - 1)][0]
Example #9
0
class KruskalGraph(WeightedGraph):
    edges = []
    spanning_tree = []
    components = {}
    union = None

    def __init__(self, graph={}, directed=False, *args, **kwargs):
        super(KruskalGraph, self).__init__(graph, directed, *args, **kwargs)
        self.union = UnionFind()

        # find and insert edges
        for vertex, edges in self.graph.items():
            self.components[vertex] = Node(vertex)

            for edge, weight in edges.items():
                self._add_edge(vertex, edge, weight)

        # sort edges
        self.edges = sorted(self.edges, key=lambda edges: edges[2])

    def build(self):
        """
        Build the MST using Kruskal's Algorithm.
        """
        for x, y, weight in self.edges:
            x_root = self.union.find(self.components[x])
            y_root = self.union.find(self.components[y])

            if x_root != y_root:
                self.spanning_tree.append((x, y, weight))
                self.union.union(x_root, y_root)

    def show(self):
        for edge in self.spanning_tree:
            print edge[0], edge[1], edge[2]

    def _add_edge(self, x, y, weight):
        edge = (x, y, weight)
        reverse_edge = (y, x, weight)

        if edge not in self.edges and reverse_edge not in self.edges:
            self.edges.append((x, y, weight))
    def kruskals_algorithm(edges_file: str):
        s = BasicFuncs.load_file_as_string(edges_file)
        lines = s.splitlines()
        first_line = lines[0]
        num_of_nodes, num_of_edges = map(int, first_line.split(' '))
        union_find = UnionFind(num_of_nodes)
        edges = []
        for line in lines[1:]:
            start, end, cost = map(int, line.split(' '))
            edge = Edge(start, end, cost)
            edges.append(edge)
        edges.sort(key=lambda x: x.cost)
        min_span_tree_cost = 0
        for edge in edges:
            a = edge.start - 1
            b = edge.end - 1
            if union_find.join_two_subsets(a, b):
                min_span_tree_cost += edge.cost

        return min_span_tree_cost
Example #11
0
    def __init__(self, graph={}, directed=False, *args, **kwargs):
        super(KruskalGraph, self).__init__(graph, directed, *args, **kwargs)
        self.union = UnionFind()

        # find and insert edges
        for vertex, edges in self.graph.items():
            self.components[vertex] = Node(vertex)

            for edge, weight in edges.items():
                self._add_edge(vertex, edge, weight)

        # sort edges
        self.edges = sorted(self.edges, key=lambda edges: edges[2])
    def kruskal_algorithm(graph):
        union_find = UnionFind(graph.number_of_nodes)
        sorted_edges_list = graph.sort_edges()
        mst_nodes = {}
        mst_edges = {}
        edge_number = 1
        for edge in sorted_edges_list:
            first_node = graph.edges[edge[0]][0]
            second_node = graph.edges[edge[0]][1]
            weight = edge[1]
            result = union_find.union(first_node - 1, second_node - 1)
            # The nodes were in different sets and union was successful, update the graph
            if result == 1:
                # Adding the nodes to the MST, also setting their terminal status
                mst_nodes[first_node] = graph.nodes[first_node][2]
                mst_nodes[second_node] = graph.nodes[second_node][2]

                # Adding the edge to the MST
                mst_edges[edge_number] = [first_node, second_node, weight]
                edge_number += 1

        minimum_spanning_tree = Graph(len(mst_nodes), len(mst_edges),
                                      mst_nodes, mst_edges)
        return minimum_spanning_tree, minimum_spanning_tree.graph_weight()
 def test_chained_joins(self):
     uf = UnionFind(10)
     uf.join(1, 2)
     uf.join(2, 3)
     uf.join(3, 4)
     self.assertTrue(uf.is_joined(1, 4))
     self.assertTrue(uf.is_joined(3, 1))
     self.assertFalse(uf.is_joined(0, 1))
     uf.join(8, 3)
     self.assertTrue(uf.is_joined(1, 8))
     self.assertTrue(uf.is_joined(4, 8))