예제 #1
0
def merge_sorted_lists():
    k = 10
    n = 10
    min_value = 0
    max_value = 30
    sorted_lists = generate_sorted_lists(k,n,min_value,max_value)
    sorted_list = SingleLinkedList()
    heap = MinHeap([], 0)
    # fill in the 1st batch
    for i, l in sorted_lists.items():
        heap.add(IdAndValue(i, l.pop(0)))

    while len(sorted_lists) > 0:
        item = heap.pop()
        sorted_list.append(item.get_value())

        list_id = item.get_id()

        if list_id in sorted_lists:
            value = sorted_lists[list_id].pop(0)
            if len(sorted_lists[list_id]) <= 0:
                sorted_lists.pop(list_id)

            heap.add(IdAndValue(list_id, value))

        else:
            list_id, list = sorted_lists.items()[0]
            heap.add(IdAndValue(list_id, list.pop(0)))

            if len(list) <= 0:
                sorted_lists.pop(list_id)
    while not heap.is_empty():
        sorted_list.append(heap.pop())

    print k*n, len(sorted_list), sorted_list
예제 #2
0
    def test_empty(self):
        mh = MinHeap()
        self.assertTrue(mh.empty())

        mh.append(1)
        self.assertFalse(mh.empty())

        mh.pop()
        self.assertTrue(mh.empty())
예제 #3
0
def test_0():
    # 0
    # 1 2 3
    # 4 5 6   7 8 9   10 11 12
    heap = MinHeap(*range(13), n=3)
    assert heap.height == 3
    assert heap.parent(0) is None
    assert heap.child(0, 0) == 1
    assert heap.child(0, 1) == 2
    assert heap.child(0, 2) == 3
    assert heap.first(0) == 1
    assert heap.last(0) == 3
    assert heap.parent(1) == 0
    assert heap.child(1, 0) == 4
    assert heap.child(1, 1) == 5
    assert heap.child(1, 2) == 6
    assert heap.first(1) == 4
    assert heap.last(1) == 6
    assert heap.parent(2) == 0
    assert heap.child(2, 0) == 7
    assert heap.child(2, 1) == 8
    assert heap.child(2, 2) == 9
    assert heap.first(2) == 7
    assert heap.last(2) == 9
    assert heap.parent(3) == 0
    assert heap.child(3, 0) == 10
    assert heap.child(3, 1) == 11
    assert heap.child(3, 2) == 12
    assert heap.first(3) == 10
    assert heap.last(3) == 12
    assert heap.parent(4) == 1
    assert heap.parent(5) == 1
    assert heap.parent(6) == 1
    assert heap.parent(7) == 2
    assert heap.parent(8) == 2
    assert heap.parent(9) == 2
    assert heap.parent(10) == 3
    assert heap.parent(11) == 3
    assert heap.parent(12) == 3
    assert heap.is_valid()
    assert tuple(heap.walk_up(4)) == ((1, 4), (0, 1))
    assert tuple(heap.walk_up(5)) == ((1, 5), (0, 1))
    assert tuple(heap.walk_up(6)) == ((1, 6), (0, 1))
    assert tuple(heap.walk_up(7)) == ((2, 7), (0, 2))
    assert tuple(heap.walk_up(12)) == ((3, 12), (0, 3))
    assert tuple(heap.walk_down()) == ((0, 1), (1, 4))
    heap.push(14)
    assert heap.is_valid()
    heap.push(6)
    assert heap.is_valid()
    assert heap.pop() == 0
    assert heap.is_valid()
    assert heap.pop() == 1
    assert heap.is_valid()
    assert heap.pop() == 2
    assert heap.is_valid()
예제 #4
0
    def test_len(self):
        mh = MinHeap()
        self.assertTrue(len(mh) == 0)

        mh.append(1)
        self.assertTrue(len(mh) == 1)

        mh.append(1)
        self.assertTrue(len(mh) == 2)

        mh.pop()
        self.assertTrue(len(mh) == 1)

        mh.pop()
        self.assertTrue(len(mh) == 0)
예제 #5
0
class HuffmanEncoder:
    frequencies = {}

    def __init__(self, frequencies):
        self.frequencies = frequencies
        self.nodes = MinHeap()

    def to_nodes(self):
        for i in self.frequencies.keys():
            self.nodes.insert(Node(self.frequencies[i], None, None, i))

    def generate_coding(self):
        d = {}
        e = {}
        tree = self.construct_huffman_tree()
        self.encode_huffman_tree_r(tree, d, e)
        return d, e

    def encode_huffman_tree_r(self, node, d, e, val=''):
        if node.right is None and node.left is None:
            d[node.character] = val
            e[val] = node.character

        if node.left is not None:
            self.encode_huffman_tree_r(node.left, d, e, val + '0')

        if node.right is not None:
            self.encode_huffman_tree_r(node.right, d, e, val + '1')

        return d, e

    def construct_huffman_tree(self):
        self.to_nodes()

        t = Node(0)
        while self.nodes.length() != 1:

            left = self.nodes.pop()
            right = self.nodes.pop()

            if right.character is not None and left.character is None:
                t = Node(left.value + right.value, right, left)
            else:
                t = Node(left.value + right.value, left, right)

            self.nodes.insert(t)

        return t
예제 #6
0
파일: tree.py 프로젝트: hzhaoc/utils
	def encode(self, symbols=None):
		"""
		Huffman-encoding symbols
		symbols: [(w1, s1), (w2, s2), ..., (wn, sn)] where wi, si are ith symbol's weight/freq 
		"""
		pq = MinHeap()
		symbols = copy.deepcopy(symbols)
		symbols = [(s[0], HuffmanNode(value=s[1], left=None, right=None)) for s in symbols]  # initialize symbols to nodes
		pq.heapify(symbols)
		while len(symbols) > 1:
			l, r = pq.pop(symbols), pq.pop(symbols)
			lw, ls, rw, rs = l[0], l[1], r[0], r[1]  # left weight, left symbol, right wreight, right symbol
			parent = HuffmanNode(value=None, left=ls, right=rs)
			pq.add(heap=symbols, item=(lw+rw, parent))
		self._root = pq.pop(symbols)[1]  # tree is complete, pop root node
		self._symbol2codes()  # create symbol: code dictionary
		self._maxDepth = len(max(self._codes.values(), key=len))  # max depth
		self._minDepth = len(min(self._codes.values(), key=len))  # min depth
		self._avgDepth = sum([len(d) for d in self._codes.values()]) / len(self._codes)  # mean depth
    def test_min_heap_sorting(self):

        # seed for consistant testing and reproductibility
        for seed in xrange(10):
            random.seed(seed)

            heap = MinHeap()
            shuffled_nums = [int(random.random() * 20 - 10) for _ in xrange(1000)]
            nums = sorted(shuffled_nums)
            for n in shuffled_nums:
                heap.insert(n)

            for n in nums:
                self.assertEqual(n, heap.pop())

            heap.heapify(shuffled_nums)

            for n in nums:
                self.assertEqual(n, heap.pop())
예제 #8
0
    def test_min(self):
        mh = MinHeap()
        with self.assertRaises(IndexError):
            mh.min()

        mh.append(1)
        self.assertEqual(1, mh.min())
        self.assertEqual(1, mh.min())

        mh.append(1)
        self.assertEqual(1, mh.min())

        mh.pop()
        self.assertEqual(1, mh.min())

        mh.pop()
        with self.assertRaises(IndexError):
            mh.min()

        mh.append(3)
        self.assertEqual(3, mh.min())

        mh.append(1)
        self.assertEqual(1, mh.min())

        mh.pop()
        self.assertEqual(3, mh.min())
예제 #9
0
def sort(array):
    heap = MinHeap()
    for i in array:
        heap.push(i)

    out = []
    while True:
        try:
            out.append(heap.pop())
        except:
            break

    return out
예제 #10
0
class PriorityQueue:
	
	def __init__(self):
		self.heap = MinHeap()
	
	def enqueue(self, priority, item):
		self.heap.push(PriorityQueueItem(priority, item))
	
	def dequeue(self):
		try:
			return self.heap.pop().value
		except:
			return None
예제 #11
0
    def test_pop(self):
        mh = MinHeap()
        with self.assertRaises(IndexError):
            mh.pop()

        mh.append(1)
        self.assertEqual(1, mh.pop())
        with self.assertRaises(IndexError):
            mh.pop()

        mh.append(9)
        mh.append(6)
        mh.append(5)
        mh.append(3)
        self.assertEqual(3, mh.pop())
        self.assertEqual(5, mh.pop())
        self.assertEqual(6, mh.pop())
        self.assertEqual(9, mh.pop())
        with self.assertRaises(IndexError):
            mh.pop()
예제 #12
0
	def algorithm(self, graph):
		edges = sorted(graph.E, key=self.edge_wt_sort)

		hp = MinHeap()
		hp.insert(edges[0])

		while(not hp.empty()):
			edge = hp.pop()
			graph.remove_edge(edge)
			if(edge.v1 in self.tree.V() \
				and edge.v2 in self.tree.V()): continue
			
			self.tree.add_edge(edge)
			self.min_weight += edge.wt
			neighborhood = graph.edges(edge.v1) + graph.edges(edge.v2)
			hp.insert_all(neighborhood)
예제 #13
0
class SinglePercentileTracker(object):
    ''' A class that tracks a single percentile'''
    def __init__(self, percentile):
        self.percentile_tracked = percentile
        self.lheap = MaxHeap()
        self.rheap = MinHeap()
        self.size = 0
        self.percentile = None

    def add(self, num):
        # An addition to a list is O(log n) since look up is O(1)
        # insertions are O(log n), and worst case pop is O(log n)
        # and everything is done a constant number of times. In these
        # cases, n is the size of the larger of the two heaps
        self.size += 1
        n = (self.percentile_tracked / 100.0) * (self.size + 1)
        # The left heap should always be the floor of n, so we have the
        # floor(n)th ranked node as the max node in the left heap, and the
        # min node of the right heap will be the nth+1 ranked node.
        lsize = int(math.floor(n))
        # Push the num on to the proper heap
        if num > self.percentile:
            self.rheap.push(num)
        else:
            self.lheap.push(num)

        # if the left heap isn't the right size, push or pop the nodes
        # to make sure it is.
        if self.lheap.size() < lsize:
            self.lheap.push(self.rheap.pop())
        elif self.lheap.size() > lsize:
            self.rheap.push(self.lheap.pop())
        # Take the integer part of n and grab the nth and nth+1
        # ranked nodes. Then take the nth node as the base
        # and add the fractional part of n * nth+1 ranked node to get a
        # weighted value between the two. This is your percentile.
        ir = int(n)
        fr = n - ir
        low_data = self.lheap.get(0)
        high_data = self.rheap.get(0)
        self.percentile = fr * (high_data - low_data) + low_data

    def add_list(self, lst):
        # Add list is O(k * log n) where k is len(lst) and n is
        # the size of the larger of the two heaps
        for l in lst:
            self.add(l)
예제 #14
0
class HeapMedian:
    ''' solution using min-, max- heaps '''
    def __init__(self):
        self.upper = MinHeap()
        self.lower = MaxHeap()

    def add(self, i):
        assert self.lower.size() >= self.upper.size()

        if self.lower.size()==0 or\
                i <= self.lower.peek():
            self.lower.push(i)
        else:
            self.upper.push(i)

        if self.lower.size() < self.upper.size():
            self.lower.push(self.upper.pop())
        elif self.lower.size() > self.upper.size()+1:
            self.upper.push(self.lower.pop())

    def get(self):
        return self.lower.peek()
예제 #15
0
from heap import MinHeap
from random import randint
from time import clock

# TODO use my mergesort for comparison?

fill = []
for i in range(30):
    x = randint(0,10000)
    fill.append((x,x))
test = MinHeap(fill)
print(test.heap)

res = []
for i in range(test.size):
    res.append(test.pop()[1])
print(test.heap)

fill.sort(key=lambda tup : tup[0])
res2 = list(map(lambda x : x[0], fill))
print("Expect:",res2)
print("Get:   ",res)

print("Now for excitement!")
heapElapsed = 0
listElapsed = 0
err = False
for i in range(10000):
    fill = []
    for i in range(100):
        x = randint(0,10000)
예제 #16
0
    def find_shortest(self):
        return len(self.encoding)

    def find_longest(self):
        return len(self.encoding)


with open("huffman.txt", "r") as infile:
    infile = [int(i) for i in infile.readlines()[1:]]
    infile = list(enumerate(infile))

##infile = [(0,1), (1,5), (2,7), (3,2), (4,3)]

min_heap = MinHeap()
for i in infile:
    min_heap.insert(i[1], HCLeafNode(i[0], i[1]))

#do until only one node left, the root
while min_heap.size() > 1:
    #get two smallest nodes
    small = min_heap.pop().get_data()[0]
    small.add_prefix("0")
    two_small = min_heap.pop().get_data()[0]
    two_small.add_prefix("1")
    #merge them together
    merged = HCMiddleNode(small, two_small)

    min_heap.insert(merged.get_frequency(), merged)

tree = min_heap.pop().get_data()[0]
예제 #17
0
minheap = MinHeap()

start_vertex = 1
#store shortest distances, by default is infinite length to reach
shortest_distance = {start_vertex: 0}
added_vertices = [1]
curr = start_vertex
curr_vertex = g.get_graph()[curr]
for neighbour, weight in curr_vertex.get_neighbours().items():
    #key is the weight of getting to each vertex in the unexplored area
    #first vertex in data is the source and second is the destination
    minheap.insert(weight, curr, neighbour)

while not minheap.is_empty():
    popped = minheap.pop()
    curr = popped.get_data()[1]
    if curr in shortest_distance:
        #just delete and move on since this vertex has already been seen
        continue
    #the vertex from the searched that points to the new element
    curr_parent = popped.get_data()[0]
    #add current vertex to shortest distance so the distance has been set
    shortest_distance[curr] = popped.get_key()
    for neighbour, weight in g.graph[curr].get_neighbours().items():
        #popping from min heap and ignoring those that were seen before, so the shortest paths will keep coming up first, so no need to delete
        #the check if curr in shortest distance also ensures that our duplicate entries for same vertex will be ignored since they would
        #have previously been inserted into minheap

        #for below, new distance to each neighbour from the current would be the shortest distance to current + weight of edge between them
        minheap.insert(shortest_distance[curr] + weight, curr, neighbour)