def test(): h = Heap() h.data_from_string('SORTEXAMPLE') print 'initial', h s = HeapSort() sorted_heap = s.sort(h) assert unicode(sorted_heap) == 'A E E L M O P R S T X', h print 'sort ok'
def random_heap(): h = Heap() h.push(10) h.push(20) h.push(2) h.push(100) h.push(150) return h
def test_pop_heap(): heap1 = Heap(num_list) heap2 = Heap() heap3 = Heap() heap2.extend(num_list) for i in num_list: heap3.push(i) expected = [-2, 3, 10] assert (expected == [i for i in pop_heap(heap1)]) assert (expected == [i for i in pop_heap(heap2)]) assert (expected == [i for i in pop_heap(heap3)])
def __init__(self, size, alpha): super(RankBasedReplay, self).__init__(size) assert alpha >= 0 self.alpha = alpha self.sorted = [] self.heap = Heap(self.maxsize) self.max_priority = 1.0 # will change as priorities are updated according to TD error self.N_list, self.range_list = load_quantiles( ) # gets ranges of equal probability of zipf distribution for a few values of N self.range_idx = 0 # index into N_list of the ranges we're currently using self.priority_sums = [ sum([i**(-alpha) for i in range(1, N + 1)]) for N in self.N_list ] # normalizing factors for priority distributions self.min_priorities = [ N**(-alpha) / self.priority_sums[i] for i, N in enumerate(self.N_list) ] # minimum possible priorities given N
def test2(): heap = Heap(max_size) for i in range(5): heap.insert(HeapItem(i, i)) print(heap) heap.build_heap() print(heap)
def test_pop_heap(): heap1 = Heap(num_list) heap2 = Heap() heap3 = Heap() heap2.extend(num_list) for i in num_list: heap3.push(i) expected = [-2, 3, 10] assert(expected == [i for i in pop_heap(heap1)]) assert(expected == [i for i in pop_heap(heap2)]) assert(expected == [i for i in pop_heap(heap3)])
class RankBasedReplay(ExperienceReplay): def __init__(self, size, alpha): super(RankBasedReplay, self).__init__(size) assert alpha >= 0 self.alpha = alpha self.sorted = [] self.heap = Heap(self.maxsize) self.max_priority = 1.0 # will change as priorities are updated according to TD error self.N_list, self.range_list = load_quantiles( ) # gets ranges of equal probability of zipf distribution for a few values of N self.range_idx = 0 # index into N_list of the ranges we're currently using self.priority_sums = [ sum([i**(-alpha) for i in range(1, N + 1)]) for N in self.N_list ] # normalizing factors for priority distributions self.min_priorities = [ N**(-alpha) / self.priority_sums[i] for i, N in enumerate(self.N_list) ] # minimum possible priorities given N def add(self, experience): if self.next_idx >= len( self.buffer): # increase size of buffer if there's still room self.buffer.append([experience, self.next_idx]) # index is into the heap self.heap.insert( HeapItem(self.max_priority**self.alpha, self.next_idx)) # index is into buffer self.sorted.append( self.next_idx ) # while growing, highest priority (newest) is ranked last until we resort else: # overwrite old experience self.buffer[self.next_idx][0] = experience heap_idx = self.buffer[self.next_idx][1] self.heap[heap_idx].value = self.max_priority**self.alpha self.next_idx = (self.next_idx + 1) % self.maxsize # update set of ranges we're using if self.range_idx < len(self.N_list) - 1 and len( self.buffer) >= self.N_list[self.range_idx + 1]: self.range_idx += 1 # a rank is uniformly sampled from each of a set of precomputed ranges def _sample_by_rank(self, batch_size): if len( self.buffer ) < batch_size: # return all indices if there are fewer than batch_size of them return list(range(1, len(self.buffer) + 1)) ranks = [] ranges = self.range_list[self.range_idx] # precomputed ranges for _range in ranges: # for each range ranks.append(self.np_random.randint( _range[0], _range[1] + 1)) # random int in closed interval return ranks # sample batch of experiences along with their weights and indices def sample(self, batch_size, beta): assert beta > 0 ranks = self._sample_by_rank(batch_size) p_min = self.min_priorities[ self.range_idx] # minimum possible priority for a transition max_weight = (p_min * len(self.buffer))**( -beta) # (p_uniform/p_min)^beta is maximum possible IS weight # get IS weights for sampled experience weights = [] for rank in ranks: p_sample = rank**(-self.alpha) / self.priority_sums[ self.range_idx] # normalize sampled priority weight = (p_sample * len(self.buffer))**( -beta) # (p_uniform/p_sample)^beta. IS weight weights.append( weight / max_weight ) # weights normalized by max so that they only scale the update downwards weights = np.array(weights) heap_idxs = [self.sorted[rank - 1] for rank in ranks] buffer_idxs = [self.heap[heap_idx].index for heap_idx in heap_idxs] encoded_sample = self.encode_samples( buffer_idxs, ranked_priority=True) # collect experience at given indices return tuple(list(encoded_sample) + [weights, heap_idxs]) # set the priorities of experiences at given indices def update_priorities(self, heap_idxs, priorities): assert len(heap_idxs) == len(priorities) for idx, priority in zip(heap_idxs, priorities): assert priority > 0 assert 0 <= idx < len(self.heap) self.heap[idx].value = priority**self.alpha self.max_priority = max(self.max_priority, priority) # re-heapify. to be called periodically def sort(self): self.heap.build_heap() for i in range(len(self.heap)): buffer_idx = self.heap[i].index self.buffer[buffer_idx][1] = i # update buffer's indices into heap self.sorted = self.heap.get_k_largest(len(self.heap))
def test_copy_heap(): heap = Heap(num_list) copy = heap.copy() assert (heap._vals == copy._vals) assert (heap is not copy)
def empty_heap(): h = Heap() return h
from binary_heap import Node from binary_heap import Heap n1 = Node(1) n2 = Node(2) n3 = Node(3) n4 = Node(4) n5 = Node(5) n6 = Node(6) heap = Heap() heap.insert(n1) heap.insert(n2) heap.insert(n3) heap.insert(n4) heap.insert(n5) heap.insert(n6) val = heap.delete() print(str(val.key)) val = heap.delete() print(str(val.key)) val = heap.delete() print(str(val.key))
def test_heap(): h = Heap() assert h.heap_list[0] == 0
def test_iter_heap(): l = [100, 19, 36, 17, 3, 25, 1, 2, 7] h = Heap(l) assert h.heap_list[1] == 100
def full_heap(): h = Heap() h.push(100) h.push(19) h.push(36) h.push(17) h.push(3) h.push(25) h.push(1) h.push(2) h.push(7) return h
def test_push_pop(): heap = Heap(num_list) assert(heap.pushpop(0) == -2) assert(heap.pushpop(-1) == -1) assert(heap.poppush(99) == 0) assert(heap.poppush(-10) == 3)
def test_clear_heap(): heap = Heap(num_list) assert (len(heap) == 3) heap.clear() assert (len(heap) == 0) assert (not heap)
def test_copy_heap(): heap = Heap(num_list) copy = heap.copy() assert(heap._vals == copy._vals) assert(heap is not copy)
def test_peek_heap(): heap = Heap(num_list) assert(heap.peek() == heap.pop())
def test_push_pop(): heap = Heap(num_list) assert (heap.pushpop(0) == -2) assert (heap.pushpop(-1) == -1) assert (heap.poppush(99) == 0) assert (heap.poppush(-10) == 3)
def test_peek_heap(): heap = Heap(num_list) assert (heap.peek() == heap.pop())
def test_max_heap(): heap = Heap(num_list, max_heap=True) expected = [10, 3, -2] assert (expected == [i for i in pop_heap(heap)])
def test_clear_heap(): heap = Heap(num_list) assert(len(heap) == 3) heap.clear() assert(len(heap) == 0) assert(not heap)