Ejemplo n.º 1
0
 def _select_maxheap(data, k):
     """Refer to <<Data structure and algorithm analysis in C>>
     The runtime complexity will be O(k + (N-k) * log(k)) = O(Nlog(k))
     """
     import heap
     m = k + 1
     maxh = heap.MaxHeap(data[0:m])
     while m < len(data):
         if data[m] < maxh.peek():
             maxh.deleteMax()
             maxh.insert(data[m])
         m += 1
     return maxh.peek()
Ejemplo n.º 2
0
    def transaction_heaps_add_number(self, identifier, number):
        """ Add a number to one of "Balanced Heaps"(self.transaction_amt_heaps) which is identified by the identifier.

        Due to the transaction_amt_heaps is constructed in format '{"identifier": (lower_heap, higher_heap)}',
        the logic to add a number is:
          1) If lower_heap is empty or number < lower_heap.peek(), add number to the lower_heap;
          2) Otherwise, add number to the higher_heap.

        Args:
          identifier: Identify the transaction_amt_heaps to which the number is added. The format should be 'CMTE_ID|TRANSACTION_DT' or 'CMTE_ID|ZIPCODE'.
          number: (float) The number to be added.
        """
        if identifier not in self.transaction_amt_heaps:
            self.transaction_amt_heaps[identifier] = (heap.MaxHeap(),
                                                      heap.MinHeap())
        # If lower_heap is empty or the number to be added is less than lower_heap.peek(), add the number to lower_heap.
        if self.transaction_amt_heaps[identifier][0].empty(
        ) or number < self.transaction_amt_heaps[identifier][0].peek():
            self.transaction_amt_heaps[identifier][0].push(number)
        # Otherwise, add the number to higher_heap.
        else:
            self.transaction_amt_heaps[identifier][1].push(number)
Ejemplo n.º 3
0
def heap_test():
	heap = h.Heap()
	heap.insert(100)
	heap.insert(25)
	heap.insert(17)
	heap.insert(2)
	heap.insert(19)
	heap.insert(3)
	heap.insert(36)
	heap.insert(7)
	heap.insert(1)
	minheap = h.MinHeap()
	maxheap = h.MaxHeap()
	minheap.merge(heap)
	maxheap.merge(heap)


	print("------ heap ------")
	print("Heap Size: "+str(heap.size()))
	heap.pretty_print()
	# should print in increasing order
	while(not heap.empty()):
		print(heap.pop())

	print("------ minheap ------")
	print("MinHeap Size: "+str(minheap.size()))
	minheap.pretty_print()
	# should print in increasing order
	while(not minheap.empty()):
		print(minheap.pop())

	print("------ maxheap ------")
	print("MaxHeap Size: "+str(maxheap.size()))
	maxheap.pretty_print()
	# should print in decreasing order
	while(not maxheap.empty()):
		print(maxheap.pop())
Ejemplo n.º 4
0
def heapsort(l):
    heap.MaxHeap(_comp, l).heapsort()
    return l
Ejemplo n.º 5
0
def median_maintainance(filename):

    median_array = [
    ]  #maintain a running array of the median at each time step
    min_heap = heap.MinHeap(
    )  #use min heap to store elements larger than the current median
    max_heap = heap.MaxHeap(
    )  #use max heap to store elements smaller than the current median

    ##initialize data stream
    with open(filename, 'r') as stream:
        for line in stream:
            new_data = int(line.strip())
            #print new_data
            if min_heap.size() == max_heap.size() == 0:  #first data point
                median = new_data
                min_heap.insert(new_data)
                median_array.append(median)

            elif max_heap.size() == 0:  #second data point
                if new_data <= min_heap.show_min():
                    max_heap.insert(
                        new_data)  #easy case -- new data belongs in max heap
                else:  #complicated case -- new data belongs in min heap, but data in min heap needs to be bumped down
                    max_heap.insert(min_heap.extract_min())
                    min_heap.insert(new_data)
                median = max(
                    max_heap.array
                )  #by convention, if two heaps are equal sized median is root of max_heap
                median_array.append(median)

            else:
                ##load new data into the appropriate heap

                if new_data <= min_heap.show_min(
                ):  #new data is in the lower half of total dataset
                    max_heap.insert(new_data)
                else:
                    min_heap.insert(new_data)

                #print max_heap, min_heap
                ##find the median
                if min_heap.size() == max_heap.size():
                    #if heaps are equally sized, median is average of two roots
                    median = max(max_heap.array)
                    median_array.append(median)
                    #no rebalancing needed -- we are done with this round

                else:
                    if min_heap.size() > max_heap.size():
                        #if min heap is bigger, median is the root of min heap
                        rebal = min(min_heap.array)
                        min_heap.array.remove(rebal)
                        #rebalance the heaps by loading the former root of the min heap into the max heap
                        max_heap.insert(rebal)

                    else:
                        #if max heap is bigger, median is root of max heap
                        rebal = max(max_heap.array)
                        max_heap.array.remove(rebal)
                        #rebalance the heaps by loading the former root of the max heap into the min heap
                        min_heap.insert(rebal)

                    #if two heaps are same size after rebalancing, take mean of two roots
                    if min_heap.size() == max_heap.size():
                        median = max(max_heap.array)
                    elif min_heap.size() < max_heap.size():
                        median = max(max_heap.array)
                    else:
                        median = min(min_heap.array)
                    median_array.append(median)
                print max(max_heap.array), min(
                    min_heap.array), max_heap.size(), min_heap.size()

    #print 5000 in max_heap.array
    print sum(median_array)

    return sum(median_array) % 10000