Ejemplo n.º 1
0
def kruskal_union_find_mst(graph):
    """ Uses Kruskel's greedy algorithm to compute the MST of graph.

    Running time: O(m*log n) - where m is the number of edges and n is the
        number of vertices.

    Params:
        graph: object, instance of src.graph.Graph

    Returns:
        object, src.graph.Graph instance reperesenting the MST.
    """
    mst_edges = []
    edges = graph.get_edges()
    num_vertices = len(graph.get_vertices())

    edges = graph.get_edges()
    edges.sort(key=lambda e: e[2])

    union_find = UnionFind()

    index = 0
    while index < num_vertices:
        edge = edges[index]
        [tail, head, value] = graph.split_edge(edge)
        index += 1

        if union_find.find(head) == union_find.find(tail):
            continue
        else:
            union_find.union(head, tail)
        mst_edges.append(edge)

    mst = Graph.build(edges=mst_edges, directed=False)
    return mst
Ejemplo n.º 2
0
def cluster_graph(g, k):
    """ Clusters the input graph using the greedy single link method.

    Args:
        g: object, instance of src.graph.Graph
        k: int, number of clusters to create

    Returns:
        tuple, format (clusters, distances)
            clusters: dict, format {cluster_lead_vertex: [list_of_vertexes_in_cluster]}
            distance: dict, format {(cluster1, cluster2): distance_between_cluster1_and_cluster2}
    """
    union_find = UnionFind()
    vertices = g.get_vertices()
    for vertex in vertices:
        union_find.make_set(vertex)

    edges = sorted(g.get_edges(), key=lambda e: e[2],
                   reverse=True)  # sort by length
    numClusters = len(vertices)

    # Cluster the nodes in the union_find data structure.
    while numClusters > k:
        while True:
            edge = edges.pop()
            (head, tail, cost) = edge
            if union_find.find(head) != union_find.find(tail):
                break

        union_find.union(head, tail)
        numClusters -= 1

    # Format the clusters for output.
    clusters = {}
    for vertex in vertices:
        leader = union_find.find(vertex)
        if leader not in clusters:
            clusters[leader] = []
        clusters[leader].append(vertex)

    # Computes spacing between clusters, ie. the minimum distance between two
    # nodes in different clusters.
    distances = {}
    for i in clusters.keys():
        for j in clusters.keys():
            if i != j:
                distances[tuple(sorted([i, j]))] = float('inf')

    edges = sorted(g.get_edges(), key=lambda e: e[2], reverse=True)
    for edge in edges:
        (tail, head, distance) = edge
        lead_tail = union_find.find(tail)
        lead_head = union_find.find(head)
        if lead_tail != lead_head and \
           distances[tuple(sorted([lead_tail, lead_head]))] > distance:
            distances[tuple(sorted([lead_tail, lead_head]))] = distance

    return (clusters, distances)
Ejemplo n.º 3
0
def cluster_graph(g, k):
    """ Clusters the input graph using the greedy single link method.

    Args:
        g: object, instance of src.graph.Graph
        k: int, number of clusters to create

    Returns:
        tuple, format (clusters, distances)
            clusters: dict, format {cluster_lead_vertex: [list_of_vertexes_in_cluster]}
            distance: dict, format {(cluster1, cluster2): distance_between_cluster1_and_cluster2}
    """
    union_find = UnionFind()
    vertices = g.get_vertices()
    for vertex in vertices:
        union_find.make_set(vertex)

    edges = sorted(g.get_edges(), key=lambda e: e[2], reverse=True) # sort by length
    numClusters = len(vertices)

    # Cluster the nodes in the union_find data structure.
    while numClusters > k:
        while True:
            edge = edges.pop()
            (head, tail, cost) = edge
            if union_find.find(head) != union_find.find(tail):
                break

        union_find.union(head, tail)
        numClusters -= 1

    # Format the clusters for output.
    clusters = {}
    for vertex in vertices:
        leader = union_find.find(vertex)
        if leader not in clusters:
            clusters[leader] = []
        clusters[leader].append(vertex)


    # Computes spacing between clusters, ie. the minimum distance between two
    # nodes in different clusters.
    distances = {}
    for i in clusters.keys():
        for j in clusters.keys():
            if i != j:
                distances[tuple(sorted([i, j]))] = float('inf')

    edges = sorted(g.get_edges(), key=lambda e: e[2], reverse=True)
    for edge in edges:
        (tail, head, distance) = edge
        lead_tail = union_find.find(tail)
        lead_head = union_find.find(head)
        if lead_tail != lead_head and \
           distances[tuple(sorted([lead_tail, lead_head]))] > distance:
            distances[tuple(sorted([lead_tail, lead_head]))] = distance

    return (clusters, distances)
Ejemplo n.º 4
0
 def __init__(self, graph):
     """
     Grow the tree by adding the minimum-weight edge not entailing a cycle.
     """
     super(KruskalMST, self).__init__(graph)
     q = PriorityQueue()
     for e in graph.edges():
         q.put(e)
     uf = UnionFind(graph.V)
     while len(self.edges()) != graph.V - 1 and not q.empty():
         e = q.get()
         v = e.either()
         w = e.other(v)
         if not uf.connected(v, w):  # not in a cycle
             uf.union(v, w)
             self.mst.append(e)
Ejemplo n.º 5
0
    def test_make_set_existing_key(self):
        uf = UnionFind()

        leader = uf.make_set(1)
        leader = uf.make_set(1)
        self.assertEqual(leader, 1, 'the leader is the same element')

        uf.make_set(2)
        uf.union(1, 2)
        leader = uf.make_set(2)
        self.assertEqual(leader, 1, 'the leader of 2 is still 1')
Ejemplo n.º 6
0
def kruskal_union_find_mst(graph):
    """ Uses Kruskel's greedy algorithm to compute the MST of graph.

    Running time: O(m*log n) - where m is the number of edges and n is the
        number of vertices.

    Params:
        graph: object, instance of src.graph.Graph

    Returns:
        object, src.graph.Graph instance reperesenting the MST.
    """
    mst_edges = []
    edges = graph.get_edges()
    num_vertices = len(graph.get_vertices())

    edges = graph.get_edges()
    edges.sort(key=lambda e: e[2])

    union_find = UnionFind()

    index = 0
    while index < num_vertices:
        edge = edges[index]
        [tail, head, value] = graph.split_edge(edge)
        index += 1

        if union_find.find(head) == union_find.find(tail):
            continue
        else:
            union_find.union(head, tail)
        mst_edges.append(edge)

    mst = Graph.build(edges=mst_edges, directed=False)
    return mst
Ejemplo n.º 7
0
def single_link(points, k, distance):
    """ Clusters a group of elements into subgroups using an optimization
    approach, ie. define a objective function and optimize.

    This greedy method of clustering is called `single-link clustering`, and is
    similar to Kruskel's MST algorithm, with the exception that the iteration
    stops when the number of clusters needed is reached.

    Args:
        points: list of coordinates for points, format (x:int, y:int, name:str)
        k: int, number of clusters to create.
        distance: function, determins the distance between two points.

    Returns:
        A dicts with format {cluster_leader: [list of points in cluster]}.
    """
    cloned_points = points[:]
    union_find = UnionFind()
    for point in points:
        union_find.make_set(point)

    def modified_distance(p1, p2):
        """ Hack which modifies the distance between two points to be +inf
        when these two points are in the same cluster.
        """
        if union_find.find(p1) == union_find.find(p2):
            dist = float('inf')
        else:
            dist = distance(p1, p2)
        return dist

    numClusters = len(points)

    while numClusters > k:
        (p, q) = closest_pair(points, distance=modified_distance)
        if p == q:
            continue

        union_find.union(p, q)
        numClusters -= 1

    out = {}
    for point in cloned_points:
        leader = union_find.find(point)
        if leader not in out:
            out[leader] = []
        out[leader].append(point)
    return out
Ejemplo n.º 8
0
def single_link(points, k, distance):
    """ Clusters a group of elements into subgroups using an optimization
    approach, ie. define a objective function and optimize.

    This greedy method of clustering is called `single-link clustering`, and is
    similar to Kruskel's MST algorithm, with the exception that the iteration
    stops when the number of clusters needed is reached.

    Args:
        points: list of coordinates for points, format (x:int, y:int, name:str)
        k: int, number of clusters to create.
        distance: function, determins the distance between two points.

    Returns:
        A dicts with format {cluster_leader: [list of points in cluster]}.
    """
    cloned_points = points[:]
    union_find = UnionFind()
    for point in points:
        union_find.make_set(point)

    def modified_distance(p1, p2):
        """ Hack which modifies the distance between two points to be +inf
        when these two points are in the same cluster.
        """
        if union_find.find(p1) == union_find.find(p2):
            dist = float('inf')
        else:
            dist = distance(p1, p2)
        return dist

    numClusters = len(points)

    while numClusters > k:
        (p, q) = closest_pair(points, distance=modified_distance)
        if p == q:
            continue

        union_find.union(p, q)
        numClusters -= 1

    out = {}
    for point in cloned_points:
        leader = union_find.find(point)
        if leader not in out:
            out[leader] = []
        out[leader].append(point)
    return out
Ejemplo n.º 9
0
def farm_rainfall(plots):
    """ A group of farmers has some elevation data, and we’re going to help
    them understand how rainfall flows over their farmland.
    We’ll represent the land as a two-dimensional array of altitudes and use
    the following model, based on the idea that water flows downhill:

    If a cell’s four neighboring cells all have higher altitudes, we call this
    cell a sink; water collects in sinks. Otherwise, water will flow to the
    neighboring cell with the lowest altitude. If a cell is not a sink, you may
    assume it has a unique lowest neighbor and that this neighbor will be lower
    than the cell.

    Cells that drain into the same sink – directly or indirectly – are said to
    be part of the same basin.

    Your challenge is to partition the map into basins. In particular, given a
    map of elevations, your code should partition the map into basins and output
    the sizes of the basins, in descending order.

    Assume the elevation maps are square. Input will begin with a line with one
    integer, S, the height (and width) of the map. The next S lines will each
    contain a row of the map, each with S integers – the elevations of the S
    cells in the row. Some farmers have small land plots such as the examples
    below, while some have larger plots. However, in no case will a farmer have
    a plot of land larger than S = 5000.

    Your code should output a space-separated list of the basin sizes, in
    descending order. (Trailing spaces are ignored.)

    While correctness and performance are the most important parts of this
    problem, a human will be reading your solution, so please make an effort
    to submit clean, readable code. In particular, do not write code as if you
    were solving a problem for a competition.

    Complexity: O(n^2) , where n - size of the input array.

    Args:
        plots: list, of lists of elevations

    Returns:
        list, of lists with each position being the name of a pond.
    """
    union_find = UnionFind()
    n = len(plots)

    # Compute sink lots for each lot in the field.
    for i in range(n):
        for j in range(n):
            smaller = get_smaller_neighbour(plots, i, j)
            if smaller != None:
                union_find.union((smaller[0], smaller[1]), (i, j))

    # Compose the output array.
    k = 0
    names = {}
    out = [[None] * n for i in range(n)]
    for i in range(n):
        for j in range(n):
            lead = union_find.find((i, j))
            if lead not in names:
                names[lead] = string.ascii_uppercase[k]
                k += 1
            name = names[lead]
            out[i][j] = name

    return out
Ejemplo n.º 10
0
    def test_correctly_maintains_the_data_structure(self):
        uf = UnionFind()

        uf.make_set(1)
        uf.make_set(2)
        uf.make_set(3)
        self.assertEqual(uf.find(1), 1, 'each item in its own set')
        self.assertEqual(uf.find(2), 2, 'each item in its own set')
        self.assertEqual(uf.find(3), 3, 'each item in its own set')

        self.assertEqual(uf.find(4), 4, 'find should also insert if not found')

        uf.union(1,2)
        uf.union(3,4)
        self.assertEqual(uf.find(1), uf.find(2), '1 and 2 in the same set now')
        self.assertEqual(uf.find(3), uf.find(4), '3 and 4 in the same set now')

        uf.union(1, 4)
        self.assertEqual(uf.find(2), uf.find(3), '2 and 3 are now in the '+
                                    'same set because 1 and 4 were joined')
Ejemplo n.º 11
0
def union_find():
    uf = UnionFind(8)
    for x, y in edges:
        uf.union(x, y)
    assert uf.find("0") == uf.find("1")
    assert uf.find("0") == uf.find("2")
    assert uf.find("0") == uf.find("3")
    assert uf.find("0") != uf.find("4")
    assert uf.find("0") != uf.find("5")
    assert uf.find("4") != uf.find("5")
    assert uf.find("5") == uf.find("6")
    assert uf.find("7") == uf.find("0")
Ejemplo n.º 12
0
def farm_rainfall(plots):
    """ A group of farmers has some elevation data, and we’re going to help
    them understand how rainfall flows over their farmland.
    We’ll represent the land as a two-dimensional array of altitudes and use
    the following model, based on the idea that water flows downhill:

    If a cell’s four neighboring cells all have higher altitudes, we call this
    cell a sink; water collects in sinks. Otherwise, water will flow to the
    neighboring cell with the lowest altitude. If a cell is not a sink, you may
    assume it has a unique lowest neighbor and that this neighbor will be lower
    than the cell.

    Cells that drain into the same sink – directly or indirectly – are said to
    be part of the same basin.

    Your challenge is to partition the map into basins. In particular, given a
    map of elevations, your code should partition the map into basins and output
    the sizes of the basins, in descending order.

    Assume the elevation maps are square. Input will begin with a line with one
    integer, S, the height (and width) of the map. The next S lines will each
    contain a row of the map, each with S integers – the elevations of the S
    cells in the row. Some farmers have small land plots such as the examples
    below, while some have larger plots. However, in no case will a farmer have
    a plot of land larger than S = 5000.

    Your code should output a space-separated list of the basin sizes, in
    descending order. (Trailing spaces are ignored.)

    While correctness and performance are the most important parts of this
    problem, a human will be reading your solution, so please make an effort
    to submit clean, readable code. In particular, do not write code as if you
    were solving a problem for a competition.

    Complexity: O(n^2) , where n - size of the input array.

    Args:
        plots: list, of lists of elevations

    Returns:
        list, of lists with each position being the name of a pond.
    """
    union_find = UnionFind()
    n = len(plots)

    # Compute sink lots for each lot in the field.
    for i in range(n):
        for j in range(n):
            smaller = get_smaller_neighbour(plots, i, j)
            if smaller != None:
                union_find.union((smaller[0], smaller[1]), (i, j))


    # Compose the output array.
    k = 0
    names = {}
    out = [[None]*n for i in range(n)]
    for i in range(n):
        for j in range(n):
            lead = union_find.find((i,j))
            if lead not in names:
                names[lead] = string.ascii_uppercase[k]
                k += 1
            name = names[lead]
            out[i][j] = name

    return out