class LoopingCallTest(unittest.TestCase): default_args = (10, 'hello') default_kwargs = {'a': 47, 'b': 'c'} def setUp(self): self.clock = Clock() self.obj = ModifiedObject() self.sc = LoopingCall(self.obj.func, clock=self.clock, *self.default_args, **self.default_kwargs) def _check(self, args, kwargs): if args is None: self.assertIsNone(self.obj.args) else: self.assertTupleEqual(self.obj.args, args) if kwargs is None: self.assertIsNone(self.obj.kwargs) else: self.assertEqual(self.obj.kwargs, kwargs) def test_init(self): # test initializing LoopingCall without overriding its clock sc = LoopingCall(self.obj.func, *self.default_args, **self.default_kwargs) sc.schedule() sc.cancel() def test_basic(self): # scheduling self.assertFalse(self.sc.is_scheduled()) self.sc.schedule(2, count=2, now=False) # before the first call self.assertTrue(self.sc.is_scheduled()) self.assertEqual(self.sc.get_time(), 2) self.assertEqual(self.sc.calls_left(), 2) self.clock.advance(1) self.assertEqual(self.obj.num_calls, 0) # after the first call self.clock.advance(1) self.assertEqual(self.obj.num_calls, 1) self.assertEqual(self.sc.calls_left(), 1) self._check(self.default_args, self.default_kwargs) # after the second call self.clock.advance(2) self.assertEqual(self.obj.num_calls, 2) self.assertEqual(self.sc.calls_left(), 0) self.assertFalse(self.sc.is_scheduled()) # no more calls self.clock.advance(20) self.assertEqual(self.obj.num_calls, 2) def test_now(self): self.sc.schedule(2, count=2, now=True) self.assertEqual(self.obj.num_calls, 0) self.clock.advance(0) self.assertEqual(self.obj.num_calls, 1) self.clock.advance(20) self.assertEqual(self.obj.num_calls, 2) def test_infinite(self): self.sc.schedule(2) self.clock.pump([2] * 100) self.assertEqual(self.obj.num_calls, 100) self.assertTrue(self.sc.is_scheduled()) self.assertIsNone(self.sc.calls_left()) def test_cancel(self): self.sc.schedule(2) self.clock.advance(1) self.sc.cancel() self.clock.advance(20) self.assertEqual(self.obj.num_calls, 0) def test_reschedule(self): self.sc.schedule(2) self.clock.advance(1) self.sc.schedule(5) self.clock.advance(4) self.assertEqual(self.obj.num_calls, 0) self.clock.advance(1) self.assertEqual(self.obj.num_calls, 1) def test_no_delay(self): self.sc.schedule() self.assertEqual(self.obj.num_calls, 0) self.clock.advance(0) self.assertEqual(self.obj.num_calls, 1) self.clock.advance(0) self.assertEqual(self.obj.num_calls, 2) def test_nested_schedule(self): def func(*args, **kwargs): self.obj.func(*args, **kwargs) self.sc.schedule() self.sc.func = func self.sc.schedule() self.assertEqual(self.obj.num_calls, 0) self.clock.advance(0) self.assertEqual(self.obj.num_calls, 1) self.clock.advance(0) self.assertEqual(self.obj.num_calls, 2)
class DownloaderTest(unittest.TestCase): default_settings = { 'CONCURRENT_REQUESTS': 2, 'CONCURRENT_REQUESTS_PER_DOMAIN': 1, 'DOWNLOAD_DELAY': 0, 'RANDOMIZE_DOWNLOAD_DELAY': False} def setUp(self): self.clock = Clock() self.request_queue = MemoryQueue() self.response_queue = ResponseQueue() self.dwn = Downloader(Settings(self.default_settings), self.request_queue, self.response_queue, download_handler=MockDownloaderHandler(Settings()), clock=self.clock) self.handler = self.dwn.download_handler def _update_dwn(self, **kwargs): '''Update downloader with the new settings. ''' new_settings = self.default_settings.copy() new_settings.update(**kwargs) self.dwn.processing.cancel() self.dwn = Downloader(Settings(new_settings), self.request_queue, self.response_queue, download_handler=MockDownloaderHandler(Settings()), clock=self.clock) self.handler = self.dwn.download_handler def test_concurrency(self): # standard situation self._update_dwn() self.assertEqual(self.dwn.total_concurrency, 2) self.assertEqual(self.dwn.domain_concurrency, 1) self.assertTrue(self.dwn.use_domain_specific) # delay set self._update_dwn(CONCURRENT_REQUESTS=10, CONCURRENT_REQUESTS_PER_DOMAIN=5, DOWNLOAD_DELAY=5) self.assertEqual(self.dwn.total_concurrency, 1) self.assertEqual(self.dwn.domain_concurrency, 1) self.assertFalse(self.dwn.use_domain_specific) # domain concurrency is 0 self._update_dwn(CONCURRENT_REQUESTS=10, CONCURRENT_REQUESTS_PER_DOMAIN=0) self.assertEqual(self.dwn.total_concurrency, 10) self.assertEqual(self.dwn.domain_concurrency, 10) self.assertFalse(self.dwn.use_domain_specific) # domain concurrency is too big self._update_dwn(CONCURRENT_REQUESTS=5, CONCURRENT_REQUESTS_PER_DOMAIN=10) self.assertEqual(self.dwn.total_concurrency, 5) self.assertEqual(self.dwn.domain_concurrency, 5) self.assertFalse(self.dwn.use_domain_specific) self._update_dwn(CONCURRENT_REQUESTS=5, CONCURRENT_REQUESTS_PER_DOMAIN=5) self.assertFalse(self.dwn.use_domain_specific) def test_get_slot(self): key, slot = self.dwn._get_slot(Request('http://www.github.com/')) self.assertEqual(key, 'www.github.com') key2, slot2 = self.dwn._get_slot(Request('http://www.github.com/hello/world#bla')) self.assertEqual(key2, 'www.github.com') self.assertIs(slot2, slot) key3, slot3 = self.dwn._get_slot(Request('http://sites.github.com/')) self.assertEqual(key3, 'sites.github.com') self.assertIsNot(slot3, slot) self.assertEqual(len(self.dwn.slots), 2) # don't use domain specific slots self.dwn.use_domain_specific = False key, slot = self.dwn._get_slot(Request('http://www.github.com/')) self.assertEqual(key, '') key2, slot2 = self.dwn._get_slot(Request('http://sites.github.com/')) self.assertIs(slot2, slot) def test_basic(self): # create 5 requests with slot ids: a, b, a, a, c requests = [get_request(id)[0] for id in 'abaac'] map(lambda r: self.request_queue.push(r), requests) self.assertEqual(self.dwn.free_slots, 2) self.assertTrue(self.dwn.is_idle()) # start downloading first two requests self.clock.advance(0) self.assertEqual(self.dwn.free_slots, 0) self.assertFalse(self.dwn.is_idle()) # no more requests are scheduled, until download is finished self.clock.advance(20) self.assertEqual(len(self.request_queue), 3) # download the first request self.handler.call(requests[0], Response('hello')) self.assertEqual(self.dwn.free_slots, 1) # slot is immediately available # result is also available result = self.response_queue.peek() self.assertIs(result.request, requests[0]) self.assertEqual(result.url, 'hello') # enqueue third request self.clock.advance(Downloader.QUEUE_CHECK_FREQUENCY) self.assertEqual(self.dwn.free_slots, 0) # download second request self.handler.call(requests[1], Response('')) # enqueue fourth request self.clock.advance(Downloader.QUEUE_CHECK_FREQUENCY) self.assertEqual(self.dwn.free_slots, 0) # fourth request should not begin download, until 3rd request is done self.assertRaises(KeyError, self.handler.call, requests[3], Response('')) # finish self.handler.call(requests[2], Response('')) self.handler.call(requests[3], Response('')) self.clock.advance(Downloader.QUEUE_CHECK_FREQUENCY) self.handler.call(requests[4], Response('')) # final checks self.clock.pump([1] * 10) self.assertEqual(len(self.response_queue), 5) self.assertTrue(self.dwn.is_idle()) def test_close(self): req1 = get_request('a')[0] req2 = get_request('b')[0] self.request_queue.push(req1) self.clock.advance(20) self.request_queue.push(req2) # test basic attributes, before and after closing self.assertTrue(self.dwn.running) self.assertTrue(self.dwn.processing.is_scheduled()) self.dwn.close() self.assertFalse(self.dwn.running) self.assertFalse(self.dwn.processing.is_scheduled()) self.clock.advance(20) self.assertEqual(len(self.request_queue), 1) # request 2 remains unqueued # downloader behavior after closing self.assertEqual(len(self.response_queue), 0) self.handler.call(req1, Response('')) self.assertEqual(len(self.response_queue), 0) def test_fail(self): self._update_dwn(CONCURRENT_REQUESTS=3, CONCURRENT_REQUESTS_PER_DOMAIN=2) requests = [get_request(id)[0] for id in 'aab'] map(lambda r: self.request_queue.push(r), requests) # enqueue requests self.clock.advance(0) # fail 1st request err = ValueError('my bad') self.handler.fail(requests[0], err) self.assertEqual(self.dwn.free_slots, 1) fail = self.response_queue.pop() self.assertIs(fail.request, requests[0]) self.assertIs(fail.value, err) # fail 3rd request self.handler.fail(requests[2], err) fail = self.response_queue.pop() self.assertIs(fail.request, requests[2]) self.assertIs(fail.value, err) # succeed 2nd request self.handler.call(requests[1], Response('nice!', request=requests[1])) resp = self.response_queue.pop() self.assertIs(resp.request, requests[1]) self.assertEqual(resp.url, 'nice!') def test_clear_slots(self): requests = [get_request(id)[0] for id in xrange(30)] for r in requests: self.request_queue.push(r) self.clock.advance(Downloader.QUEUE_CHECK_FREQUENCY) self.handler.call(r, Response('')) self.assertLessEqual(len(self.dwn.slots), 2 * self.dwn.total_concurrency)
class DownloaderTest(unittest.TestCase): default_settings = { 'CONCURRENT_REQUESTS': 2, 'CONCURRENT_REQUESTS_PER_DOMAIN': 1, 'DOWNLOAD_DELAY': 0, 'RANDOMIZE_DOWNLOAD_DELAY': False } def setUp(self): self.clock = Clock() self.request_queue = MemoryQueue() self.response_queue = ResponseQueue() self.dwn = Downloader(Settings(self.default_settings), self.request_queue, self.response_queue, download_handler=MockDownloaderHandler( Settings()), clock=self.clock) self.handler = self.dwn.download_handler def _update_dwn(self, **kwargs): '''Update downloader with the new settings. ''' new_settings = self.default_settings.copy() new_settings.update(**kwargs) self.dwn.processing.cancel() self.dwn = Downloader(Settings(new_settings), self.request_queue, self.response_queue, download_handler=MockDownloaderHandler( Settings()), clock=self.clock) self.handler = self.dwn.download_handler def test_concurrency(self): # standard situation self._update_dwn() self.assertEqual(self.dwn.total_concurrency, 2) self.assertEqual(self.dwn.domain_concurrency, 1) self.assertTrue(self.dwn.use_domain_specific) # delay set self._update_dwn(CONCURRENT_REQUESTS=10, CONCURRENT_REQUESTS_PER_DOMAIN=5, DOWNLOAD_DELAY=3.14) self.assertEqual(self.dwn.download_delay, 3.14) self.assertEqual(self.dwn.total_concurrency, 1) self.assertEqual(self.dwn.domain_concurrency, 1) self.assertFalse(self.dwn.use_domain_specific) # domain concurrency is 0 self._update_dwn(CONCURRENT_REQUESTS=10, CONCURRENT_REQUESTS_PER_DOMAIN=0) self.assertEqual(self.dwn.total_concurrency, 10) self.assertEqual(self.dwn.domain_concurrency, 10) self.assertFalse(self.dwn.use_domain_specific) # domain concurrency is too big self._update_dwn(CONCURRENT_REQUESTS=5, CONCURRENT_REQUESTS_PER_DOMAIN=10) self.assertEqual(self.dwn.total_concurrency, 5) self.assertEqual(self.dwn.domain_concurrency, 5) self.assertFalse(self.dwn.use_domain_specific) self._update_dwn(CONCURRENT_REQUESTS=5, CONCURRENT_REQUESTS_PER_DOMAIN=5) self.assertFalse(self.dwn.use_domain_specific) def test_get_slot(self): key, slot = self.dwn._get_slot(Request('http://www.github.com/')) self.assertEqual(key, 'www.github.com') key2, slot2 = self.dwn._get_slot( Request('http://www.github.com/hello/world#bla')) self.assertEqual(key2, 'www.github.com') self.assertIs(slot2, slot) key3, slot3 = self.dwn._get_slot(Request('http://sites.github.com/')) self.assertEqual(key3, 'sites.github.com') self.assertIsNot(slot3, slot) self.assertEqual(len(self.dwn.slots), 2) # don't use domain specific slots self.dwn.use_domain_specific = False key, slot = self.dwn._get_slot(Request('http://www.github.com/')) self.assertEqual(key, '') key2, slot2 = self.dwn._get_slot(Request('http://sites.github.com/')) self.assertIs(slot2, slot) def test_basic(self): # create 5 requests with slot ids: a, b, a, a, c requests = [get_request(id)[0] for id in 'abaac'] map(lambda r: self.request_queue.push(r), requests) self.assertEqual(self.dwn.free_slots, 2) self.assertTrue(self.dwn.is_idle()) # start downloading first two requests self.clock.advance(0) self.assertEqual(self.dwn.free_slots, 0) self.assertFalse(self.dwn.is_idle()) # no more requests are scheduled, until download is finished self.clock.advance(20) self.assertEqual(len(self.request_queue), 3) # download the first request self.handler.call(requests[0], Response('hello')) self.assertEqual(self.dwn.free_slots, 1) # slot is immediately available # result is also available result = self.response_queue.peek() self.assertIs(result.request, requests[0]) self.assertEqual(result.url, 'hello') # enqueue third request self.clock.advance(Downloader.QUEUE_CHECK_FREQUENCY) self.assertEqual(self.dwn.free_slots, 0) # download second request self.handler.call(requests[1], Response('')) # enqueue fourth request self.clock.advance(Downloader.QUEUE_CHECK_FREQUENCY) self.assertEqual(self.dwn.free_slots, 0) # fourth request should not begin download, until 3rd request is done self.assertRaises(KeyError, self.handler.call, requests[3], Response('')) # finish self.handler.call(requests[2], Response('')) self.handler.call(requests[3], Response('')) self.clock.advance(Downloader.QUEUE_CHECK_FREQUENCY) self.handler.call(requests[4], Response('')) # final checks self.clock.pump([1] * 10) self.assertEqual(len(self.response_queue), 5) self.assertTrue(self.dwn.is_idle()) def test_close(self): req1 = get_request('a')[0] req2 = get_request('b')[0] self.request_queue.push(req1) self.clock.advance(20) self.request_queue.push(req2) # test basic attributes, before and after closing self.assertTrue(self.dwn.running) self.assertTrue(self.dwn.processing.is_scheduled()) self.dwn.close() self.assertFalse(self.dwn.running) self.assertFalse(self.dwn.processing.is_scheduled()) self.clock.advance(20) self.assertEqual(len(self.request_queue), 1) # request 2 remains unqueued # downloader behavior after closing self.assertEqual(len(self.response_queue), 0) self.handler.call(req1, Response('')) self.assertEqual(len(self.response_queue), 0) def test_fail(self): self._update_dwn(CONCURRENT_REQUESTS=3, CONCURRENT_REQUESTS_PER_DOMAIN=2) requests = [get_request(id)[0] for id in 'aab'] map(lambda r: self.request_queue.push(r), requests) # enqueue requests self.clock.advance(0) # fail 1st request err = ValueError('my bad') self.handler.fail(requests[0], err) self.assertEqual(self.dwn.free_slots, 1) fail = self.response_queue.pop() self.assertIs(fail.request, requests[0]) self.assertIs(fail.value, err) # fail 3rd request self.handler.fail(requests[2], err) fail = self.response_queue.pop() self.assertIs(fail.request, requests[2]) self.assertIs(fail.value, err) # succeed 2nd request self.handler.call(requests[1], Response('nice!', request=requests[1])) resp = self.response_queue.pop() self.assertIs(resp.request, requests[1]) self.assertEqual(resp.url, 'nice!') def test_clear_slots(self): requests = [get_request(id)[0] for id in xrange(30)] for r in requests: self.request_queue.push(r) self.clock.advance(Downloader.QUEUE_CHECK_FREQUENCY) self.handler.call(r, Response('')) self.assertLessEqual(len(self.dwn.slots), 2 * self.dwn.total_concurrency)