def merge_sorted_lists(): k = 10 n = 10 min_value = 0 max_value = 30 sorted_lists = generate_sorted_lists(k,n,min_value,max_value) sorted_list = SingleLinkedList() heap = MinHeap([], 0) # fill in the 1st batch for i, l in sorted_lists.items(): heap.add(IdAndValue(i, l.pop(0))) while len(sorted_lists) > 0: item = heap.pop() sorted_list.append(item.get_value()) list_id = item.get_id() if list_id in sorted_lists: value = sorted_lists[list_id].pop(0) if len(sorted_lists[list_id]) <= 0: sorted_lists.pop(list_id) heap.add(IdAndValue(list_id, value)) else: list_id, list = sorted_lists.items()[0] heap.add(IdAndValue(list_id, list.pop(0))) if len(list) <= 0: sorted_lists.pop(list_id) while not heap.is_empty(): sorted_list.append(heap.pop()) print k*n, len(sorted_list), sorted_list
def test_empty(self): mh = MinHeap() self.assertTrue(mh.empty()) mh.append(1) self.assertFalse(mh.empty()) mh.pop() self.assertTrue(mh.empty())
def test_0(): # 0 # 1 2 3 # 4 5 6 7 8 9 10 11 12 heap = MinHeap(*range(13), n=3) assert heap.height == 3 assert heap.parent(0) is None assert heap.child(0, 0) == 1 assert heap.child(0, 1) == 2 assert heap.child(0, 2) == 3 assert heap.first(0) == 1 assert heap.last(0) == 3 assert heap.parent(1) == 0 assert heap.child(1, 0) == 4 assert heap.child(1, 1) == 5 assert heap.child(1, 2) == 6 assert heap.first(1) == 4 assert heap.last(1) == 6 assert heap.parent(2) == 0 assert heap.child(2, 0) == 7 assert heap.child(2, 1) == 8 assert heap.child(2, 2) == 9 assert heap.first(2) == 7 assert heap.last(2) == 9 assert heap.parent(3) == 0 assert heap.child(3, 0) == 10 assert heap.child(3, 1) == 11 assert heap.child(3, 2) == 12 assert heap.first(3) == 10 assert heap.last(3) == 12 assert heap.parent(4) == 1 assert heap.parent(5) == 1 assert heap.parent(6) == 1 assert heap.parent(7) == 2 assert heap.parent(8) == 2 assert heap.parent(9) == 2 assert heap.parent(10) == 3 assert heap.parent(11) == 3 assert heap.parent(12) == 3 assert heap.is_valid() assert tuple(heap.walk_up(4)) == ((1, 4), (0, 1)) assert tuple(heap.walk_up(5)) == ((1, 5), (0, 1)) assert tuple(heap.walk_up(6)) == ((1, 6), (0, 1)) assert tuple(heap.walk_up(7)) == ((2, 7), (0, 2)) assert tuple(heap.walk_up(12)) == ((3, 12), (0, 3)) assert tuple(heap.walk_down()) == ((0, 1), (1, 4)) heap.push(14) assert heap.is_valid() heap.push(6) assert heap.is_valid() assert heap.pop() == 0 assert heap.is_valid() assert heap.pop() == 1 assert heap.is_valid() assert heap.pop() == 2 assert heap.is_valid()
def test_len(self): mh = MinHeap() self.assertTrue(len(mh) == 0) mh.append(1) self.assertTrue(len(mh) == 1) mh.append(1) self.assertTrue(len(mh) == 2) mh.pop() self.assertTrue(len(mh) == 1) mh.pop() self.assertTrue(len(mh) == 0)
class HuffmanEncoder: frequencies = {} def __init__(self, frequencies): self.frequencies = frequencies self.nodes = MinHeap() def to_nodes(self): for i in self.frequencies.keys(): self.nodes.insert(Node(self.frequencies[i], None, None, i)) def generate_coding(self): d = {} e = {} tree = self.construct_huffman_tree() self.encode_huffman_tree_r(tree, d, e) return d, e def encode_huffman_tree_r(self, node, d, e, val=''): if node.right is None and node.left is None: d[node.character] = val e[val] = node.character if node.left is not None: self.encode_huffman_tree_r(node.left, d, e, val + '0') if node.right is not None: self.encode_huffman_tree_r(node.right, d, e, val + '1') return d, e def construct_huffman_tree(self): self.to_nodes() t = Node(0) while self.nodes.length() != 1: left = self.nodes.pop() right = self.nodes.pop() if right.character is not None and left.character is None: t = Node(left.value + right.value, right, left) else: t = Node(left.value + right.value, left, right) self.nodes.insert(t) return t
def encode(self, symbols=None): """ Huffman-encoding symbols symbols: [(w1, s1), (w2, s2), ..., (wn, sn)] where wi, si are ith symbol's weight/freq """ pq = MinHeap() symbols = copy.deepcopy(symbols) symbols = [(s[0], HuffmanNode(value=s[1], left=None, right=None)) for s in symbols] # initialize symbols to nodes pq.heapify(symbols) while len(symbols) > 1: l, r = pq.pop(symbols), pq.pop(symbols) lw, ls, rw, rs = l[0], l[1], r[0], r[1] # left weight, left symbol, right wreight, right symbol parent = HuffmanNode(value=None, left=ls, right=rs) pq.add(heap=symbols, item=(lw+rw, parent)) self._root = pq.pop(symbols)[1] # tree is complete, pop root node self._symbol2codes() # create symbol: code dictionary self._maxDepth = len(max(self._codes.values(), key=len)) # max depth self._minDepth = len(min(self._codes.values(), key=len)) # min depth self._avgDepth = sum([len(d) for d in self._codes.values()]) / len(self._codes) # mean depth
def test_min_heap_sorting(self): # seed for consistant testing and reproductibility for seed in xrange(10): random.seed(seed) heap = MinHeap() shuffled_nums = [int(random.random() * 20 - 10) for _ in xrange(1000)] nums = sorted(shuffled_nums) for n in shuffled_nums: heap.insert(n) for n in nums: self.assertEqual(n, heap.pop()) heap.heapify(shuffled_nums) for n in nums: self.assertEqual(n, heap.pop())
def test_min(self): mh = MinHeap() with self.assertRaises(IndexError): mh.min() mh.append(1) self.assertEqual(1, mh.min()) self.assertEqual(1, mh.min()) mh.append(1) self.assertEqual(1, mh.min()) mh.pop() self.assertEqual(1, mh.min()) mh.pop() with self.assertRaises(IndexError): mh.min() mh.append(3) self.assertEqual(3, mh.min()) mh.append(1) self.assertEqual(1, mh.min()) mh.pop() self.assertEqual(3, mh.min())
def sort(array): heap = MinHeap() for i in array: heap.push(i) out = [] while True: try: out.append(heap.pop()) except: break return out
class PriorityQueue: def __init__(self): self.heap = MinHeap() def enqueue(self, priority, item): self.heap.push(PriorityQueueItem(priority, item)) def dequeue(self): try: return self.heap.pop().value except: return None
def test_pop(self): mh = MinHeap() with self.assertRaises(IndexError): mh.pop() mh.append(1) self.assertEqual(1, mh.pop()) with self.assertRaises(IndexError): mh.pop() mh.append(9) mh.append(6) mh.append(5) mh.append(3) self.assertEqual(3, mh.pop()) self.assertEqual(5, mh.pop()) self.assertEqual(6, mh.pop()) self.assertEqual(9, mh.pop()) with self.assertRaises(IndexError): mh.pop()
def algorithm(self, graph): edges = sorted(graph.E, key=self.edge_wt_sort) hp = MinHeap() hp.insert(edges[0]) while(not hp.empty()): edge = hp.pop() graph.remove_edge(edge) if(edge.v1 in self.tree.V() \ and edge.v2 in self.tree.V()): continue self.tree.add_edge(edge) self.min_weight += edge.wt neighborhood = graph.edges(edge.v1) + graph.edges(edge.v2) hp.insert_all(neighborhood)
class SinglePercentileTracker(object): ''' A class that tracks a single percentile''' def __init__(self, percentile): self.percentile_tracked = percentile self.lheap = MaxHeap() self.rheap = MinHeap() self.size = 0 self.percentile = None def add(self, num): # An addition to a list is O(log n) since look up is O(1) # insertions are O(log n), and worst case pop is O(log n) # and everything is done a constant number of times. In these # cases, n is the size of the larger of the two heaps self.size += 1 n = (self.percentile_tracked / 100.0) * (self.size + 1) # The left heap should always be the floor of n, so we have the # floor(n)th ranked node as the max node in the left heap, and the # min node of the right heap will be the nth+1 ranked node. lsize = int(math.floor(n)) # Push the num on to the proper heap if num > self.percentile: self.rheap.push(num) else: self.lheap.push(num) # if the left heap isn't the right size, push or pop the nodes # to make sure it is. if self.lheap.size() < lsize: self.lheap.push(self.rheap.pop()) elif self.lheap.size() > lsize: self.rheap.push(self.lheap.pop()) # Take the integer part of n and grab the nth and nth+1 # ranked nodes. Then take the nth node as the base # and add the fractional part of n * nth+1 ranked node to get a # weighted value between the two. This is your percentile. ir = int(n) fr = n - ir low_data = self.lheap.get(0) high_data = self.rheap.get(0) self.percentile = fr * (high_data - low_data) + low_data def add_list(self, lst): # Add list is O(k * log n) where k is len(lst) and n is # the size of the larger of the two heaps for l in lst: self.add(l)
class HeapMedian: ''' solution using min-, max- heaps ''' def __init__(self): self.upper = MinHeap() self.lower = MaxHeap() def add(self, i): assert self.lower.size() >= self.upper.size() if self.lower.size()==0 or\ i <= self.lower.peek(): self.lower.push(i) else: self.upper.push(i) if self.lower.size() < self.upper.size(): self.lower.push(self.upper.pop()) elif self.lower.size() > self.upper.size()+1: self.upper.push(self.lower.pop()) def get(self): return self.lower.peek()
from heap import MinHeap from random import randint from time import clock # TODO use my mergesort for comparison? fill = [] for i in range(30): x = randint(0,10000) fill.append((x,x)) test = MinHeap(fill) print(test.heap) res = [] for i in range(test.size): res.append(test.pop()[1]) print(test.heap) fill.sort(key=lambda tup : tup[0]) res2 = list(map(lambda x : x[0], fill)) print("Expect:",res2) print("Get: ",res) print("Now for excitement!") heapElapsed = 0 listElapsed = 0 err = False for i in range(10000): fill = [] for i in range(100): x = randint(0,10000)
def find_shortest(self): return len(self.encoding) def find_longest(self): return len(self.encoding) with open("huffman.txt", "r") as infile: infile = [int(i) for i in infile.readlines()[1:]] infile = list(enumerate(infile)) ##infile = [(0,1), (1,5), (2,7), (3,2), (4,3)] min_heap = MinHeap() for i in infile: min_heap.insert(i[1], HCLeafNode(i[0], i[1])) #do until only one node left, the root while min_heap.size() > 1: #get two smallest nodes small = min_heap.pop().get_data()[0] small.add_prefix("0") two_small = min_heap.pop().get_data()[0] two_small.add_prefix("1") #merge them together merged = HCMiddleNode(small, two_small) min_heap.insert(merged.get_frequency(), merged) tree = min_heap.pop().get_data()[0]
minheap = MinHeap() start_vertex = 1 #store shortest distances, by default is infinite length to reach shortest_distance = {start_vertex: 0} added_vertices = [1] curr = start_vertex curr_vertex = g.get_graph()[curr] for neighbour, weight in curr_vertex.get_neighbours().items(): #key is the weight of getting to each vertex in the unexplored area #first vertex in data is the source and second is the destination minheap.insert(weight, curr, neighbour) while not minheap.is_empty(): popped = minheap.pop() curr = popped.get_data()[1] if curr in shortest_distance: #just delete and move on since this vertex has already been seen continue #the vertex from the searched that points to the new element curr_parent = popped.get_data()[0] #add current vertex to shortest distance so the distance has been set shortest_distance[curr] = popped.get_key() for neighbour, weight in g.graph[curr].get_neighbours().items(): #popping from min heap and ignoring those that were seen before, so the shortest paths will keep coming up first, so no need to delete #the check if curr in shortest distance also ensures that our duplicate entries for same vertex will be ignored since they would #have previously been inserted into minheap #for below, new distance to each neighbour from the current would be the shortest distance to current + weight of edge between them minheap.insert(shortest_distance[curr] + weight, curr, neighbour)