Esempio n. 1
0
class SinglePercentileTracker(object):
    ''' A class that tracks a single percentile'''
    def __init__(self, percentile):
        self.percentile_tracked = percentile
        self.lheap = MaxHeap()
        self.rheap = MinHeap()
        self.size = 0
        self.percentile = None

    def add(self, num):
        # An addition to a list is O(log n) since look up is O(1)
        # insertions are O(log n), and worst case pop is O(log n)
        # and everything is done a constant number of times. In these
        # cases, n is the size of the larger of the two heaps
        self.size += 1
        n = (self.percentile_tracked / 100.0) * (self.size + 1)
        # The left heap should always be the floor of n, so we have the
        # floor(n)th ranked node as the max node in the left heap, and the
        # min node of the right heap will be the nth+1 ranked node.
        lsize = int(math.floor(n))
        # Push the num on to the proper heap
        if num > self.percentile:
            self.rheap.push(num)
        else:
            self.lheap.push(num)

        # if the left heap isn't the right size, push or pop the nodes
        # to make sure it is.
        if self.lheap.size() < lsize:
            self.lheap.push(self.rheap.pop())
        elif self.lheap.size() > lsize:
            self.rheap.push(self.lheap.pop())
        # Take the integer part of n and grab the nth and nth+1
        # ranked nodes. Then take the nth node as the base
        # and add the fractional part of n * nth+1 ranked node to get a
        # weighted value between the two. This is your percentile.
        ir = int(n)
        fr = n - ir
        low_data = self.lheap.get(0)
        high_data = self.rheap.get(0)
        self.percentile = fr * (high_data - low_data) + low_data

    def add_list(self, lst):
        # Add list is O(k * log n) where k is len(lst) and n is
        # the size of the larger of the two heaps
        for l in lst:
            self.add(l)