def median_maintenance(data): yield data[0] if data[0] < data[1]: h_high, h_low = MinHeap([data[1]]), MaxHeap([data[0]]) else: h_high, h_low = MinHeap([data[0]]), MaxHeap([data[1]]) median = h_low.extract_max() h_low.insert(median) yield median for k in data[2:]: lower, upper = h_low.extract_max(), h_high.extract_min() if k <= lower: h_low.insert(k) else: h_high.insert(k) h_low.insert(lower) h_high.insert(upper) if abs(h_high.size() - h_low.size()) > 1: if h_high.size() > h_low.size(): h_low.insert(h_high.extract_min()) else: h_high.insert(h_low.extract_max()) if (h_high.size() + h_low.size()) % 2 == 0 or h_low.size() > h_high.size(): median = h_low.extract_max() h_low.insert(median) yield median else: median = h_high.extract_min() h_high.insert(median) yield median
class TestMinHeap(unittest.TestCase): def setUp(self): self.heap = MinHeap() def test_basic_initialization_and_repr(self): self.assertEqual(repr(self.heap), '[]') def test_insert(self): self.heap.insert(4) self.assertEqual(repr(self.heap), '[4]') self.assertEqual(self.heap.size, 1) self.heap.insert(4) self.assertEqual(repr(self.heap), '[4, 4]') self.assertEqual(self.heap.size, 2) self.heap.insert(6) self.assertEqual(repr(self.heap), '[4, 4, 6]') self.assertEqual(self.heap.size, 3) self.heap.insert(1) self.assertEqual(repr(self.heap), '[1, 4, 6, 4]') self.assertEqual(self.heap.size, 4) self.heap.insert(3) self.assertEqual(repr(self.heap), '[1, 3, 6, 4, 4]') self.assertEqual(self.heap.size, 5) def test_get_min(self): self.assertEqual(self.heap.get_min(), None) self.heap.insert(4) self.assertEqual(self.heap.get_min(), 4) self.heap.insert(7) self.assertEqual(self.heap.get_min(), 4) self.heap.insert(2) self.assertEqual(self.heap.get_min(), 2) self.heap.insert(-1) self.assertEqual(self.heap.get_min(), -1) def test_extract_min(self): self.heap.insert(4) self.heap.insert(5) self.heap.insert(7) self.heap.insert(2) self.heap.insert(-1) self.assertEqual(self.heap.extract_min(), -1) self.assertEqual(self.heap.extract_min(), 2) self.assertEqual(self.heap.extract_min(), 4) self.assertEqual(self.heap.extract_min(), 5) self.assertEqual(self.heap.extract_min(), 7) self.assertEqual(self.heap.extract_min(), None) def test_build_heap(self): self.heap.build_heap([4, 4, 6, 1, 3]) self.assertEqual(repr(self.heap), '[1, 3, 6, 4, 4]')
class Median: def __init__(self): self.h_low = MaxHeap() self.h_high = MinHeap() def add_element(self, value): if self.h_low.heap_size == 0 or value < self.h_low.max(): self.h_low.insert(value) if self.h_low.heap_size - self.h_high.heap_size > 1: self.h_high.insert(self.h_low.extract_max()) else: self.h_high.insert(value) if self.h_high.heap_size - self.h_low.heap_size > 1: self.h_low.insert(self.h_high.extract_min()) def get_median(self): if (self.h_low.heap_size + self.h_high.heap_size) % 2 == 0: return self.h_low.max(), self.h_high.min() else: if self.h_low.heap_size > self.h_high.heap_size: return self.h_low.max() else: return self.h_high.min() def get_maxheap_elements(self): return self.h_low.heap def get_minheap_elements(self): return self.h_high.heap
def Prim(V, s): """Prim algorithm. Returns list T containing edges of minimum spanning tree and T_sum, the sum of its weights.""" # initialize structures Q = set() D = MinHeap() P = {} # set initial values for v in V: Q.add(v) D[v] = float('inf') D[s] = 0 P[s] = None while len(Q) != 0: v, _ = D.extract_min() Q.remove(v) for w, d in v.get_neighbors(): if w in Q: if d < D[w]: D[w] = d P[w] = v tree = [] tree_sum = 0 for v in P: if P[v] is not None: tree.append((P[v], v)) tree_sum += D[v] return tree, tree_sum
def Dijkstra(V, s): """Dijkstra algorithm. Returns MinHeap D containing minimum distances and dict P containing previous node.""" # initialize structures Q = set() D = MinHeap() P = {} # set initial values for v in V: Q.add(v) D[v] = float('inf') D[s] = 0 P[s] = None while len(Q) != 0: v, _ = D.extract_min() Q.remove(v) for w, d in v.get_neighbors(): new_d = D[v] + d if w in Q: if new_d < D[w]: D[w] = new_d P[w] = v return D, P
def sort_k_sorted(array, k): sorted = [] min_heap = MinHeap(array[:k + 2]) for i in range(len(array)): sorted.append(min_heap.extract_min()) if i + k + 2 < len(array): min_heap.insert(array[i + k + 2]) return sorted
def heap_sort(arr): sorted_arr = [] min_heap = MinHeap(array=arr) for i in range(len(arr)): sorted_arr.append(min_heap.extract_min()) for i in range(len(sorted_arr)): arr.append(sorted_arr[i])
def merge(lists): h = MinHeap() for i, l in enumerate(lists): # store list index and position of last element from # that list in min heap. h.insert(l[0], (i, 0)) while h: min_val, (li, pos) = h.extract_min() yield min_val l = lists[li] pos += 1 if pos < len(l): h.insert(l[pos], (li, pos))
def shortest_paths(self, v): ''' Computes the shortest path distances from a source vertex to all other vertices using Dijkstra's algorithm. ''' processed = {} # mapping of processed vertices to geodesic distance candidates = {} # mapping of candidate vertices to their Dijkstra scores; exists for convenience of O(1) lookups trace = [] # stores edges in order of processing; used to extract shortest paths def dijkstra_score(src, dest): return processed[src] + self.getWeight(src, dest) # Initialize Dijkstra scores for n in self.nodes: if n == v: processed[n] = 0 for dest in self.edges[n]: score = dijkstra_score(n, dest) if dest not in candidates or score < candidates[dest]: candidates[dest] = score else: if n not in candidates: candidates[n] = float('inf') # heapify node/score tuples, provide comparison key unprocessed = MinHeap(list(candidates.items()), lambda x:x[1]) # compute shortest paths while not unprocessed.is_empty(): n,s = unprocessed.extract_min() processed[n] = s candidates.pop(n) if len(trace) == 0: trace.append(Edge(v, n)) # Investigate KeyError when using WeightedEdge else: src = trace[-1].getDestination() trace.append(Edge(src, n)) # Investigate KeyError when using WeightedEdge for dest in self.edges[n]: if dest in candidates: unprocessed.delete((dest, candidates[dest])) score = dijkstra_score(n, dest) best = min(candidates[dest], score) candidates[dest] = best unprocessed.insert((dest, best)) return (processed, PathFinder(trace))
class MedianMaintenance: def __init__(self): self.hlow_heap = MaxHeap() self.hhigh_heap = MinHeap() def compute_median(self, i): self.insert_heap(i) self.balance_heap() return self.median() def balance_heap(self): if self.hhigh_heap.size - self.hlow_heap.size > 1 : # rebalance heap to keep it balanced high = self.hhigh_heap.extract_min() self.hlow_heap.insert(high) elif self.hlow_heap.size - self.hhigh_heap.size > 1: low = self.hlow_heap.extract_max() self.hhigh_heap.insert(low) def insert_heap(self, i): if self.hlow_heap.is_empty(): low = None else: low = self.hlow_heap.peek_max() if self.hhigh_heap.is_empty(): high = None else: high = self.hhigh_heap.peek_min() if low is None or i < low: self.hlow_heap.insert(i) elif high is not None and i > high: self.hhigh_heap.insert(i) else:# i wedged inbetween insert in first heap by default self.hlow_heap.insert(i) def median(self): if self.hhigh_heap.size - self.hlow_heap.size == 1: return self.hhigh_heap.peek_min() else:# default choice when hlow is bigger/same size as hhigh return self.hlow_heap.peek_max()
def get_max_pairs(A, B, M=None): N = len(A) if not M: M = N A.sort() B.sort() h = MinHeap() used_pairs = set() val = (N - 1, N - 1) key = -A[val[0]] - B[val[1]] h.insert(key, val) used_pairs.add(val) for _ in range(M): key, (i, j) = h.extract_min() yield -key for pair in ((i - 1, j), (i, j - 1)): if pair[0] < 0 or pair[1] < 0 or pair in used_pairs: continue key = -A[pair[0]] - B[pair[1]] h.insert(key, pair) used_pairs.add(pair)