class HeapTest(unittest.TestCase): def setUp(self): self.heap = Heap(compare_request) def test_push(self): self.heap.push(req1) self.heap.push(req2) assert len(self.heap) == 2 def test_pop0_as_pop0(self): print len(self.heap) self.heap.push(req1) req = self.heap.pop(0) assert len(req) == 1 assert req[0] == req1 self.heap.push(req1) req = self.heap.pop(1) assert len(req) == 1 assert req[0] == req1 def test_pop_smallest_in_heap(self): self.heap.push(req1) self.heap.push(req2) req = self.heap.pop(0) assert req[0] == req2 def test_push_limit(self): self.heap = Heap(compare_request, limit=2) self.heap.push(req1) self.heap.push(req2) p = self.heap.push(req3) assert p == req2 assert len(self.heap) == 2 assert self.heap.pop(2) == [req3, req1]
def test_heap_order(self): heap = Heap(cmp) heap.push(5) heap.push(2) heap.push(3) heap.push(4) heap.push(1) assert heap.pop(1) == [1] assert heap.pop(3) == [2, 3, 4] assert heap.pop(10) == [5] assert heap.pop(1) == []
class MemoryBaseBackend(Backend): """ Base class for in-memory heapq Backend objects. """ component_name = 'Memory Base Backend' def __init__(self, manager): self.manager = manager self.requests = {} self.heap = Heap(self._compare_pages) @classmethod def from_manager(cls, manager): return cls(manager) def frontier_start(self): pass def frontier_stop(self): pass def add_seeds(self, seeds): for seed in seeds: request, _ = self._get_or_create_request(seed) self.heap.push(request) def get_next_requests(self, max_next_requests, **kwargs): return self.heap.pop(max_next_requests) def page_crawled(self, response, links): for link in links: request, created = self._get_or_create_request(link) if created: request.meta['depth'] = response.request.meta.get('depth', 0) + 1 self.heap.push(request) def request_error(self, request, error): pass def _get_or_create_request(self, request): fingerprint = request.meta['fingerprint'] if fingerprint not in self.requests: new_request = self._create_request(request) self.requests[fingerprint] = new_request self.manager.logger.backend.debug('Creating request %s' % new_request) return new_request, True else: page = self.requests[fingerprint] self.manager.logger.backend.debug('Request exists %s' % request) return page, False def _create_request(self, request): new_request = request.copy() new_request.meta['created_at'] = datetime.datetime.utcnow() new_request.meta['depth'] = 0 return new_request def _compare_pages(self, first, second): raise NotImplementedError
def test_heap_obj(self): obj = type('obj', (object, ), {}) a = obj() a.score = 3 b = obj() b.score = 1 c = obj() c.score = 2 heap = Heap(lambda x, y: cmp(x.score, y.score)) heap.push(a) heap.push(b) heap.push(c) assert heap.pop(3) == [b, c, a] assert heap.pop(1) == []
def test_heap_obj(self): obj = type("obj", (object,), {}) a = obj() a.score = 3 b = obj() b.score = 1 c = obj() c.score = 2 heap = Heap(lambda x, y: cmp(x.score, y.score)) heap.push(a) heap.push(b) heap.push(c) assert heap.pop(3) == [b, c, a] assert heap.pop(1) == []