def heap(self):
     """
     Return a max heap over bunches ordered by radius initialized with
     `root`.
     """
     H = MaxHeap(key=Bunch.radius)
     H.insert(Bunch(self.root))
     return H
Beispiel #2
0
class KNNHeap:
    def __init__(self, k):
        self.heap = MaxHeap(key=lambda x: x[1])
        self.bunches = {}
        self.k = k
        self._len = 0

    def __contains__(self, bunch):
        return bunch in self.bunches

    def insert(self, bunch, distance):
        """
        Insert a new bunch as long as it doesn't cause the heap to get too
        large.  If the new bunch is closer than other bunches in the heap,
        then some may be removed to make space.

        The invariant maintained by insert is that if the bunches are sorted
        by increasing upper bound distance to the query, then the heap contains
        the minimum prefix that has at least k points.

        The items inserted should be pairs.  This is important as we always
        need the distance (i.e. the priority when we remove an item.)
        """

        if len(self) < self.k:
            self.bunches[bunch] = distance
            self.heap.insert((bunch, distance))
        else:
            top, current_radius = self.heap.findmax()
            if distance < current_radius:
                if len(self) - len(top) + len(bunch) >= self.k:
                    self.heap.removemax()
                    del self.bunches[top]
                self.insert(bunch, distance)

    def splitbunch(self, q, bunch):
        """
        Splits a bunch into two bunches using pop and tries to insert both
        back into the heap.
        """
        b = bunch.pop()
        b_radius = min(q.dist(b.point) + b.radius, self.bunches[bunch])

        a = bunch
        a_radius = min(q.dist(a.point) + a.radius, self.bunches[bunch])

        del self.bunches[a]
        self.insert(a, a_radius)
        self.insert(b, b_radius)

    def __len__(self):
        return self._len

    def __iter__(self):
        return iter(self.heap)
Beispiel #3
0
 def testremovemax(self):
     H = MaxHeap([1, 2, 5, 4, 3])
     self.assertEqual(H.removemax(), 5)
     self.assertEqual(len(H), 4)
     self.assertEqual(H.removemax(), 4)
     self.assertEqual(len(H), 3)
     self.assertEqual(H.removemax(), 3)
     H.insert(9)
     H.insert(0)
     self.assertEqual(H.removemax(), 9)
     self.assertEqual(len(H), 3)
     self.assertEqual(H.removemax(), 2)
Beispiel #4
0
class NeighborGraph(Graph):
    # Initialize it as an empty graph.
    def __init__(self,
                 M,
                 root=None,
                 nbrconstant=1,
                 moveconstant=1,
                 gettransportplan=False,
                 mass=None):
        """
        Initialize a new NeighborGraph.

        It starts with an iterable of points of a metric space. 
        The first point will be the center of the default cell and all 
        other points will be placed inside.

        There are two constants that can be set.
        The first `nbrconstant`, which controls the distance between neighbors.
        The second is `moveconstant` which determines when a point is moved
        when a new cell is formed.  The default value for both constants is
        `1`.  This moves a point whenever it has a new nearest neighbor

        The theoretical guarantees are only valid when
        `moveconstant <= nbrconstant`.  As a result, setting these any other
        way raises an exception.

        `gettransportplan` is a flag that determines whether `addcell()`
        computes transportation plans or not.
        """
        # Initialize the `NeighborGraph` to be a `Graph`.
        super().__init__()
        if mass is None:
            mass = [1] * len(M)
        elif len(mass) != len(M):
            raise ValueError("`mass` must of same length as `M`")
        self.Vertex = Cell(M)
        P = iter(M)

        if nbrconstant < moveconstant:
            raise RuntimeError("The move constant must not be larger than the"
                               "neighbor constant.")
        self.nbrconstant = nbrconstant
        self.moveconstant = moveconstant

        # self.gettransportplan is a flag which determines whether the transportation
        # plan is to be computed or not
        self.gettransportplan = gettransportplan

        # self.pointcopies is a dictionary which stores the number of copies (>1)
        # of a point indexed by point. A default value of 1 is assumed and not stored.
        # So if `self.pointcopies[p]==x` then the input contained `x+1` instances of `p`
        self.pointcopies = defaultdict(int)

        # Make a cell to start the graph.  Use the first point as the root
        # if none is give.
        root_cell = self.Vertex(root or next(P))

        # Add the points to the root cell.
        # It doesn't matter if the root point is also in the list of points.
        # It will not be added twice.
        for i, p in enumerate(P):
            self.pointcopies[p] += mass[i]
            root_cell.addpoint(p)

        # Add the new cell as the one vertex of the graph.
        self.addvertex(root_cell)
        self.addedge(root_cell, root_cell)
        self.heap = MaxHeap([root_cell], key=lambda c: c.radius)

    def iscloseenoughto(self, p, q):
        return q.dist(p.center) <= p.radius + q.radius + \
                      self.nbrconstant * max(p.radius, q.radius)

    def addcell(self, newcenter, parent):
        """
        Add a new cell centered at `newcenter` and also compute the mass moved by 
        this change to the neighbor graph.

        The `parent` is a sufficiently close cell that is already in the
        graph.
        It is used to find nearby cells to be the neighbors.
        The cells are rebalanced with points moving from nearby cells into
        the new cell if it is closer.

        If self.gettransportplan=True this method also returns a dictionary
        of the number of points gained and lost by every cell (indexed by center) 
        in this change to the neighbor graph.
        """
        # Create the new cell.
        newcell = self.Vertex(newcenter)

        #if gettransportplan:
        # Create transportation plan for adding this cell
        transportplan = DefaultDict(int)

        if self.gettransportplan:
            transportplan[newcenter] = self.pointcopies[newcenter]
            transportplan[parent.center] -= self.pointcopies[newcenter]

        # Make the cell a new vertex.
        self.addvertex(newcell)
        self.addedge(newcell, newcell)

        # Rebalance the new cell.
        for nbr in self.nbrs(parent):
            localtransport = self.rebalance(newcell, nbr)
            # Add change caused by this rebalance to transportation plan if requested
            if self.gettransportplan:
                transportplan[newcenter] += localtransport
                transportplan[nbr.center] -= localtransport
            self.heap.changepriority(nbr)

        # Add neighbors to the new cell.
        for newnbr in self.nbrs_of_nbrs(parent):
            if self.iscloseenoughto(newcell, newnbr):
                self.addedge(newcell, newnbr)

        # After all the radii are updated, prune edges that are too long.
        for nbr in set(self.nbrs(parent)):
            self.prunenbrs(nbr)

        self.heap.insert(newcell)

        # If self.gettransportplan=False this method returns an empty transportplan
        return newcell, transportplan

    def pop(self):
        cell = self.heap.findmax()
        point = cell.pop
        self.heap.changepriority(cell)
        return point

    def rebalance(self, a, b):
        """
        Returns the number of points moved from `b` to `a`.

        Move points from the cell `b` to the cell `a` if they are
        sufficiently closer to `a.center`.
        """
        # points_to_move = {p for p in b.points
        #                     if a.dist(p) < self.moveconstant * b.dist(p)}
        points_to_move = {
            p
            for p in b.points if a.comparedist(p, b, self.moveconstant)
        }
        b.points -= points_to_move
        mass_to_move = 0
        for p in points_to_move:
            a.addpoint(p)
            mass_to_move += self.pointcopies[p]
        # The radius of self (`a`) is automatically updated by addpoint.
        # The other radius needs to be manually updated.
        b.updateradius()
        return mass_to_move

    def nbrs_of_nbrs(self, u):
        return {b for a in self.nbrs(u) for b in self.nbrs(a)}

    def prunenbrs(self, u):
        """
        Eliminate neighbors that are too far with respect to the current
        radius.
        """
        nbrs_to_delete = set()
        for v in self.nbrs(u):
            if not self.iscloseenoughto(u, v):
                nbrs_to_delete.add(v)

        # Prune the excess edges.
        for v in nbrs_to_delete:
            self.removeedge(u, v)

    def cellmass(self, cell):
        """
        Method to compute the number of points with multiplicity in a cell.
        Better to use this than `len(cell)`.
        """
        mass = 0
        for p in cell:
            mass += self.pointcopies[p]
        return mass