class LogStatsTest(unittest.TestCase): def setUp(self): self.clock = Clock() self.engine = get_engine(LOG_STATS_INTERVAL=30) self.engine.signals = SignalManager(self.engine) self.ls = LogStats(self.engine, clock=self.clock) self.lw = LogWrapper() self.lw.setUp() def tearDown(self): self.lw.tearDown() def test_config(self): self.assertRaises(NotConfigured, LogStats, get_engine(LOG_STATS_INTERVAL=0)) def test_basic(self): # engine is stopped self.clock.advance(60) self.assertEqual(self.lw.get_first_line(), '') # start the engine self.engine.signals.send(signals.engine_started) self.clock.advance(29) self.assertEqual(self.lw.get_first_line(), '') self.clock.advance(1) self.assertEqual(self.lw.get_first_line(), '[crawlmi] INFO: Crawled 0 pages (at 0 pages/min).') # download some responses self.engine.signals.send(signals.response_downloaded, response=Response(url='')) self.engine.signals.send(signals.response_downloaded, response=Response(url='')) self.engine.signals.send(signals.response_received, response=Response(url='')) self.clock.advance(30) self.assertEqual(self.lw.get_first_line(), '[crawlmi] INFO: Crawled 2 pages (at 4 pages/min).') # stop the engine self.engine.signals.send(signals.engine_stopped) self.clock.advance(60) self.assertEqual(self.lw.get_first_line(), '')
class ScheduledCallTest(unittest.TestCase): default_args = (10, 'hello') default_kwargs = {'a': 47, 'b': 'c'} def setUp(self): self.clock = Clock() self.obj = ModifiedObject() self.sc = ScheduledCall(self.obj.func, clock=self.clock, *self.default_args, **self.default_kwargs) def _check(self, args, kwargs): if args is None: self.assertIsNone(self.obj.args) else: self.assertTupleEqual(self.obj.args, args) if kwargs is None: self.assertIsNone(self.obj.kwargs) else: self.assertEqual(self.obj.kwargs, kwargs) def test_init(self): # test initializing ScheduledCall without overriding its clock sc = ScheduledCall(self.obj.func, *self.default_args, **self.default_kwargs) sc.schedule() sc.cancel() def test_get_time_and_is_scheduled(self): self.clock.advance(10) self.assertFalse(self.sc.is_scheduled()) self.assertEqual(self.sc.get_time(), 0) self.sc.schedule(5) self.assertTrue(self.sc.is_scheduled()) self.assertEqual(self.sc.get_time(), 15) self.clock.advance(5) self.assertFalse(self.sc.is_scheduled()) self.assertEqual(self.sc.get_time(), 0) def test_no_delay(self): self.sc.schedule() self._check(None, None) self.clock.advance(0) self._check(self.default_args, self.default_kwargs) def test_default(self): self.assertTrue(self.sc.schedule(5)) self._check(None, None) self.clock.advance(1) self.assertFalse(self.sc.schedule(1)) self.clock.advance(2) self._check(None, None) self.clock.advance(3) self._check(self.default_args, self.default_kwargs) def test_cancel(self): self.sc.schedule(5) self.clock.advance(3) self.sc.cancel() self.clock.advance(3) self._check(None, None) self.assertTrue(self.sc.schedule(1)) self.clock.advance(1) self._check(self.default_args, self.default_kwargs) def test_overwrite(self): over_args = ('crawlmi',) over_kwargs = {'a': 50, 'd': 'e'} self.sc.schedule(5, *over_args, **over_kwargs) self.clock.advance(5) self._check(over_args, over_kwargs) def test_partial_overwrite(self): over_args = ('crawlmi',) self.sc.schedule(5, *over_args) self.clock.advance(5) self._check(over_args, {}) def test_nested_schedule(self): def func(*args, **kwargs): self.obj.func(*args, **kwargs) self.sc.schedule() self.sc.func = func self.sc.schedule() self.assertEqual(self.obj.num_calls, 0) self.clock.advance(0) self.assertEqual(self.obj.num_calls, 1) self.clock.advance(0) self.assertEqual(self.obj.num_calls, 2) def test_nested_schedule_delay(self): args1 = ('a',) kwargs1 = {'a': 'b'} args2 = ('b',) kwargs2 = {'b': 'c'} def func(*args, **kwargs): self.obj.func(*args, **kwargs) self.sc.schedule(4, *args2, **kwargs2) self.sc.func = func self.sc.schedule(3, *args1, **kwargs1) self.clock.advance(3) self.assertIsNotNone(self.sc._call) self._check(args1, kwargs1) self.clock.advance(3) self._check(args1, kwargs1) self.clock.advance(1) self._check(args2, kwargs2)
class LoopingCallTest(unittest.TestCase): default_args = (10, 'hello') default_kwargs = {'a': 47, 'b': 'c'} def setUp(self): self.clock = Clock() self.obj = ModifiedObject() self.sc = LoopingCall(self.obj.func, clock=self.clock, *self.default_args, **self.default_kwargs) def _check(self, args, kwargs): if args is None: self.assertIsNone(self.obj.args) else: self.assertTupleEqual(self.obj.args, args) if kwargs is None: self.assertIsNone(self.obj.kwargs) else: self.assertEqual(self.obj.kwargs, kwargs) def test_init(self): # test initializing LoopingCall without overriding its clock sc = LoopingCall(self.obj.func, *self.default_args, **self.default_kwargs) sc.schedule() sc.cancel() def test_basic(self): # scheduling self.assertFalse(self.sc.is_scheduled()) self.sc.schedule(2, count=2, now=False) # before the first call self.assertTrue(self.sc.is_scheduled()) self.assertEqual(self.sc.get_time(), 2) self.assertEqual(self.sc.calls_left(), 2) self.clock.advance(1) self.assertEqual(self.obj.num_calls, 0) # after the first call self.clock.advance(1) self.assertEqual(self.obj.num_calls, 1) self.assertEqual(self.sc.calls_left(), 1) self._check(self.default_args, self.default_kwargs) # after the second call self.clock.advance(2) self.assertEqual(self.obj.num_calls, 2) self.assertEqual(self.sc.calls_left(), 0) self.assertFalse(self.sc.is_scheduled()) # no more calls self.clock.advance(20) self.assertEqual(self.obj.num_calls, 2) def test_now(self): self.sc.schedule(2, count=2, now=True) self.assertEqual(self.obj.num_calls, 0) self.clock.advance(0) self.assertEqual(self.obj.num_calls, 1) self.clock.advance(20) self.assertEqual(self.obj.num_calls, 2) def test_infinite(self): self.sc.schedule(2) self.clock.pump([2] * 100) self.assertEqual(self.obj.num_calls, 100) self.assertTrue(self.sc.is_scheduled()) self.assertIsNone(self.sc.calls_left()) def test_cancel(self): self.sc.schedule(2) self.clock.advance(1) self.sc.cancel() self.clock.advance(20) self.assertEqual(self.obj.num_calls, 0) def test_reschedule(self): self.sc.schedule(2) self.clock.advance(1) self.sc.schedule(5) self.clock.advance(4) self.assertEqual(self.obj.num_calls, 0) self.clock.advance(1) self.assertEqual(self.obj.num_calls, 1) def test_no_delay(self): self.sc.schedule() self.assertEqual(self.obj.num_calls, 0) self.clock.advance(0) self.assertEqual(self.obj.num_calls, 1) self.clock.advance(0) self.assertEqual(self.obj.num_calls, 2) def test_nested_schedule(self): def func(*args, **kwargs): self.obj.func(*args, **kwargs) self.sc.schedule() self.sc.func = func self.sc.schedule() self.assertEqual(self.obj.num_calls, 0) self.clock.advance(0) self.assertEqual(self.obj.num_calls, 1) self.clock.advance(0) self.assertEqual(self.obj.num_calls, 2)
class ScheduledCallTest(unittest.TestCase): default_args = (10, 'hello') default_kwargs = {'a': 47, 'b': 'c'} def setUp(self): self.clock = Clock() self.obj = ModifiedObject() self.sc = ScheduledCall(self.obj.func, clock=self.clock, *self.default_args, **self.default_kwargs) def _check(self, args, kwargs): if args is None: self.assertIsNone(self.obj.args) else: self.assertTupleEqual(self.obj.args, args) if kwargs is None: self.assertIsNone(self.obj.kwargs) else: self.assertEqual(self.obj.kwargs, kwargs) def test_init(self): # test initializing ScheduledCall without overriding its clock sc = ScheduledCall(self.obj.func, *self.default_args, **self.default_kwargs) sc.schedule() sc.cancel() def test_get_time_and_is_scheduled(self): self.clock.advance(10) self.assertFalse(self.sc.is_scheduled()) self.assertEqual(self.sc.get_time(), 0) self.sc.schedule(5) self.assertTrue(self.sc.is_scheduled()) self.assertEqual(self.sc.get_time(), 15) self.clock.advance(5) self.assertFalse(self.sc.is_scheduled()) self.assertEqual(self.sc.get_time(), 0) def test_no_delay(self): self.sc.schedule() self._check(None, None) self.clock.advance(0) self._check(self.default_args, self.default_kwargs) def test_default(self): self.assertTrue(self.sc.schedule(5)) self._check(None, None) self.clock.advance(1) self.assertFalse(self.sc.schedule(1)) self.clock.advance(2) self._check(None, None) self.clock.advance(3) self._check(self.default_args, self.default_kwargs) def test_cancel(self): self.sc.schedule(5) self.clock.advance(3) self.sc.cancel() self.clock.advance(3) self._check(None, None) self.assertTrue(self.sc.schedule(1)) self.clock.advance(1) self._check(self.default_args, self.default_kwargs) def test_overwrite(self): over_args = ('crawlmi', ) over_kwargs = {'a': 50, 'd': 'e'} self.sc.schedule(5, *over_args, **over_kwargs) self.clock.advance(5) self._check(over_args, over_kwargs) def test_partial_overwrite(self): over_args = ('crawlmi', ) self.sc.schedule(5, *over_args) self.clock.advance(5) self._check(over_args, {}) def test_nested_schedule(self): def func(*args, **kwargs): self.obj.func(*args, **kwargs) self.sc.schedule() self.sc.func = func self.sc.schedule() self.assertEqual(self.obj.num_calls, 0) self.clock.advance(0) self.assertEqual(self.obj.num_calls, 1) self.clock.advance(0) self.assertEqual(self.obj.num_calls, 2) def test_nested_schedule_delay(self): args1 = ('a', ) kwargs1 = {'a': 'b'} args2 = ('b', ) kwargs2 = {'b': 'c'} def func(*args, **kwargs): self.obj.func(*args, **kwargs) self.sc.schedule(4, *args2, **kwargs2) self.sc.func = func self.sc.schedule(3, *args1, **kwargs1) self.clock.advance(3) self.assertIsNotNone(self.sc._call) self._check(args1, kwargs1) self.clock.advance(3) self._check(args1, kwargs1) self.clock.advance(1) self._check(args2, kwargs2)
class DownloaderSlotTest(unittest.TestCase): default_concurrency = 2 default_delay = 0 default_randomize_delay = False def setUp(self): self.handler = MockDownloaderHandler(Settings()) self.clock = Clock() self.slot = Slot(self.handler, self.default_concurrency, self.default_delay, self.default_randomize_delay, clock=self.clock) def test_basic(self): received = [] def downloaded(result): received.append(result) # enqueue 3 requests r1, dfd1 = get_request('1', func=downloaded) self.slot.enqueue(r1, dfd1) self.assertEqual(len(self.slot.in_progress), 1) self.assertEqual(len(self.slot.transferring), 1) r2, dfd2 = get_request('2', func=downloaded) r3, dfd3 = get_request('3', func=downloaded) self.slot.enqueue(r2, dfd2) self.slot.enqueue(r3, dfd3) self.assertEqual(len(self.slot.in_progress), 3) self.assertEqual(len(self.slot.transferring), 2) self.assertEqual(self.slot.free_slots, 0) # download r2 self.handler.call(r2, Response('')) self.assertIs(received[-1].request, r2) self.assertEqual(len(self.slot.transferring), 2) self.assertEqual(len(self.slot.in_progress), 2) self.assertEqual(self.slot.free_slots, 0) # download r1 and r3 self.handler.call(r3, Response('')) self.handler.call(r1, Response('')) self.assertIs(received[-2].request, r3) self.assertIs(received[-1].request, r1) self.assertEqual(self.slot.free_slots, 2) # nothing happens now self.clock.advance(5) self.assertEqual(len(self.slot.in_progress), 0) self.assertEqual(self.slot.free_slots, 2) def test_delay(self): self.slot.concurrency = 1 self.slot.delay = 5 self.clock.advance(10) # so we don't start on time 0 # enqueue 3 requests r1, dfd1 = get_request('1') self.slot.enqueue(r1, dfd1) r2, dfd2 = get_request('2') self.slot.enqueue(r2, dfd2) r3, dfd3 = get_request('3') self.slot.enqueue(r3, dfd3) self.assertEqual(len(self.slot.in_progress), 3) self.assertEqual(len(self.slot.transferring), 1) self.assertEqual(self.slot.last_download_time, 10) self.assertEqual(self.slot.delayed_processing.get_time(), 15) # download the 1st request self.handler.call(r1, Response('')) self.assertEqual(len(self.slot.in_progress), 2) self.assertEqual(len(self.slot.transferring), 0) self.assertEqual(self.slot.free_slots, 1) # we should still wait self.clock.advance(3) self.assertEqual(len(self.slot.in_progress), 2) self.assertEqual(len(self.slot.transferring), 0) self.assertEqual(self.slot.free_slots, 1) # make the 2nd request downloading self.clock.advance(3) self.assertEqual(len(self.slot.in_progress), 2) self.assertEqual(len(self.slot.transferring), 1) self.assertEqual(self.slot.free_slots, 0) self.assertEqual(self.slot.last_download_time, 16) self.assertEqual(self.slot.delayed_processing.get_time(), 21) # wait and nothing happens self.clock.advance(10) self.assertEqual(len(self.slot.in_progress), 2) self.assertEqual(len(self.slot.transferring), 1) self.assertFalse(self.slot.delayed_processing.is_scheduled()) def test_random_delay(self): self.slot.delay = 5 self.slot.randomize_delay = False delays = [self.slot.get_download_delay() for x in xrange(10)] self.assertTrue(all(x == 5 for x in delays)) self.slot.randomize_delay = True lower = 0.5 * 5 upper = 1.5 * 5 delays = [self.slot.get_download_delay() for x in xrange(100)] self.assertTrue(all(lower <= x <= upper for x in delays)) third1 = (2 * lower + upper) / 3 third2 = (lower + 2 * upper) / 3 self.assertTrue(any(x <= third1 for x in delays)) self.assertTrue(any(third1 <= x <= third2 for x in delays)) self.assertTrue(any(third2 <= x for x in delays)) def test_fail(self): received = [] def downloaded(result): received.append(result) # enqueue 3 requests r1, dfd1 = get_request('1', func=downloaded) self.slot.enqueue(r1, dfd1) r2, dfd2 = get_request('2', func=downloaded) self.slot.enqueue(r2, dfd2) r3, dfd3 = get_request('3', func=downloaded) self.slot.enqueue(r3, dfd3) # fail the first request err = ValueError('my bad') self.handler.fail(r1, err) self.assertEqual(received[-1].value, err) # other requests should be ok self.assertEqual(len(self.slot.in_progress), 2) self.assertEqual(len(self.slot.transferring), 2) self.handler.call(r2, Response('')) self.assertEqual(received[-1].request, r2) self.handler.call(r3, Response('')) self.assertEqual(received[-1].request, r3) self.assertEqual(len(self.slot.in_progress), 0) self.assertEqual(len(self.slot.transferring), 0) def test_exception(self): self.slot.download_handler = ExceptionDownloaderHandler(Settings()) r1, dfd1 = get_request('1') self.slot.enqueue(r1, dfd1) return self.assertFailure(dfd1, Exception) def test_failure(self): self.slot.download_handler = FailureDownloaderHandler(Settings()) download_values = [] def downloaded(value): download_values.append(value) for i in xrange(2): r, dfd = get_request(str(i)) dfd.addBoth(downloaded) self.slot.enqueue(r, dfd) self.assertEqual(len(download_values), 2) self.assertIsInstance(download_values[0], Failure) self.assertIsInstance(download_values[1], Failure) self.assertIsNot(download_values[0], download_values[1]) self.assertIsInstance(download_values[0].value, ValueError)
class DownloaderTest(unittest.TestCase): default_settings = { 'CONCURRENT_REQUESTS': 2, 'CONCURRENT_REQUESTS_PER_DOMAIN': 1, 'DOWNLOAD_DELAY': 0, 'RANDOMIZE_DOWNLOAD_DELAY': False} def setUp(self): self.clock = Clock() self.request_queue = MemoryQueue() self.response_queue = ResponseQueue() self.dwn = Downloader(Settings(self.default_settings), self.request_queue, self.response_queue, download_handler=MockDownloaderHandler(Settings()), clock=self.clock) self.handler = self.dwn.download_handler def _update_dwn(self, **kwargs): '''Update downloader with the new settings. ''' new_settings = self.default_settings.copy() new_settings.update(**kwargs) self.dwn.processing.cancel() self.dwn = Downloader(Settings(new_settings), self.request_queue, self.response_queue, download_handler=MockDownloaderHandler(Settings()), clock=self.clock) self.handler = self.dwn.download_handler def test_concurrency(self): # standard situation self._update_dwn() self.assertEqual(self.dwn.total_concurrency, 2) self.assertEqual(self.dwn.domain_concurrency, 1) self.assertTrue(self.dwn.use_domain_specific) # delay set self._update_dwn(CONCURRENT_REQUESTS=10, CONCURRENT_REQUESTS_PER_DOMAIN=5, DOWNLOAD_DELAY=5) self.assertEqual(self.dwn.total_concurrency, 1) self.assertEqual(self.dwn.domain_concurrency, 1) self.assertFalse(self.dwn.use_domain_specific) # domain concurrency is 0 self._update_dwn(CONCURRENT_REQUESTS=10, CONCURRENT_REQUESTS_PER_DOMAIN=0) self.assertEqual(self.dwn.total_concurrency, 10) self.assertEqual(self.dwn.domain_concurrency, 10) self.assertFalse(self.dwn.use_domain_specific) # domain concurrency is too big self._update_dwn(CONCURRENT_REQUESTS=5, CONCURRENT_REQUESTS_PER_DOMAIN=10) self.assertEqual(self.dwn.total_concurrency, 5) self.assertEqual(self.dwn.domain_concurrency, 5) self.assertFalse(self.dwn.use_domain_specific) self._update_dwn(CONCURRENT_REQUESTS=5, CONCURRENT_REQUESTS_PER_DOMAIN=5) self.assertFalse(self.dwn.use_domain_specific) def test_get_slot(self): key, slot = self.dwn._get_slot(Request('http://www.github.com/')) self.assertEqual(key, 'www.github.com') key2, slot2 = self.dwn._get_slot(Request('http://www.github.com/hello/world#bla')) self.assertEqual(key2, 'www.github.com') self.assertIs(slot2, slot) key3, slot3 = self.dwn._get_slot(Request('http://sites.github.com/')) self.assertEqual(key3, 'sites.github.com') self.assertIsNot(slot3, slot) self.assertEqual(len(self.dwn.slots), 2) # don't use domain specific slots self.dwn.use_domain_specific = False key, slot = self.dwn._get_slot(Request('http://www.github.com/')) self.assertEqual(key, '') key2, slot2 = self.dwn._get_slot(Request('http://sites.github.com/')) self.assertIs(slot2, slot) def test_basic(self): # create 5 requests with slot ids: a, b, a, a, c requests = [get_request(id)[0] for id in 'abaac'] map(lambda r: self.request_queue.push(r), requests) self.assertEqual(self.dwn.free_slots, 2) self.assertTrue(self.dwn.is_idle()) # start downloading first two requests self.clock.advance(0) self.assertEqual(self.dwn.free_slots, 0) self.assertFalse(self.dwn.is_idle()) # no more requests are scheduled, until download is finished self.clock.advance(20) self.assertEqual(len(self.request_queue), 3) # download the first request self.handler.call(requests[0], Response('hello')) self.assertEqual(self.dwn.free_slots, 1) # slot is immediately available # result is also available result = self.response_queue.peek() self.assertIs(result.request, requests[0]) self.assertEqual(result.url, 'hello') # enqueue third request self.clock.advance(Downloader.QUEUE_CHECK_FREQUENCY) self.assertEqual(self.dwn.free_slots, 0) # download second request self.handler.call(requests[1], Response('')) # enqueue fourth request self.clock.advance(Downloader.QUEUE_CHECK_FREQUENCY) self.assertEqual(self.dwn.free_slots, 0) # fourth request should not begin download, until 3rd request is done self.assertRaises(KeyError, self.handler.call, requests[3], Response('')) # finish self.handler.call(requests[2], Response('')) self.handler.call(requests[3], Response('')) self.clock.advance(Downloader.QUEUE_CHECK_FREQUENCY) self.handler.call(requests[4], Response('')) # final checks self.clock.pump([1] * 10) self.assertEqual(len(self.response_queue), 5) self.assertTrue(self.dwn.is_idle()) def test_close(self): req1 = get_request('a')[0] req2 = get_request('b')[0] self.request_queue.push(req1) self.clock.advance(20) self.request_queue.push(req2) # test basic attributes, before and after closing self.assertTrue(self.dwn.running) self.assertTrue(self.dwn.processing.is_scheduled()) self.dwn.close() self.assertFalse(self.dwn.running) self.assertFalse(self.dwn.processing.is_scheduled()) self.clock.advance(20) self.assertEqual(len(self.request_queue), 1) # request 2 remains unqueued # downloader behavior after closing self.assertEqual(len(self.response_queue), 0) self.handler.call(req1, Response('')) self.assertEqual(len(self.response_queue), 0) def test_fail(self): self._update_dwn(CONCURRENT_REQUESTS=3, CONCURRENT_REQUESTS_PER_DOMAIN=2) requests = [get_request(id)[0] for id in 'aab'] map(lambda r: self.request_queue.push(r), requests) # enqueue requests self.clock.advance(0) # fail 1st request err = ValueError('my bad') self.handler.fail(requests[0], err) self.assertEqual(self.dwn.free_slots, 1) fail = self.response_queue.pop() self.assertIs(fail.request, requests[0]) self.assertIs(fail.value, err) # fail 3rd request self.handler.fail(requests[2], err) fail = self.response_queue.pop() self.assertIs(fail.request, requests[2]) self.assertIs(fail.value, err) # succeed 2nd request self.handler.call(requests[1], Response('nice!', request=requests[1])) resp = self.response_queue.pop() self.assertIs(resp.request, requests[1]) self.assertEqual(resp.url, 'nice!') def test_clear_slots(self): requests = [get_request(id)[0] for id in xrange(30)] for r in requests: self.request_queue.push(r) self.clock.advance(Downloader.QUEUE_CHECK_FREQUENCY) self.handler.call(r, Response('')) self.assertLessEqual(len(self.dwn.slots), 2 * self.dwn.total_concurrency)
class DownloaderTest(unittest.TestCase): default_settings = { 'CONCURRENT_REQUESTS': 2, 'CONCURRENT_REQUESTS_PER_DOMAIN': 1, 'DOWNLOAD_DELAY': 0, 'RANDOMIZE_DOWNLOAD_DELAY': False } def setUp(self): self.clock = Clock() self.request_queue = MemoryQueue() self.response_queue = ResponseQueue() self.dwn = Downloader(Settings(self.default_settings), self.request_queue, self.response_queue, download_handler=MockDownloaderHandler( Settings()), clock=self.clock) self.handler = self.dwn.download_handler def _update_dwn(self, **kwargs): '''Update downloader with the new settings. ''' new_settings = self.default_settings.copy() new_settings.update(**kwargs) self.dwn.processing.cancel() self.dwn = Downloader(Settings(new_settings), self.request_queue, self.response_queue, download_handler=MockDownloaderHandler( Settings()), clock=self.clock) self.handler = self.dwn.download_handler def test_concurrency(self): # standard situation self._update_dwn() self.assertEqual(self.dwn.total_concurrency, 2) self.assertEqual(self.dwn.domain_concurrency, 1) self.assertTrue(self.dwn.use_domain_specific) # delay set self._update_dwn(CONCURRENT_REQUESTS=10, CONCURRENT_REQUESTS_PER_DOMAIN=5, DOWNLOAD_DELAY=3.14) self.assertEqual(self.dwn.download_delay, 3.14) self.assertEqual(self.dwn.total_concurrency, 1) self.assertEqual(self.dwn.domain_concurrency, 1) self.assertFalse(self.dwn.use_domain_specific) # domain concurrency is 0 self._update_dwn(CONCURRENT_REQUESTS=10, CONCURRENT_REQUESTS_PER_DOMAIN=0) self.assertEqual(self.dwn.total_concurrency, 10) self.assertEqual(self.dwn.domain_concurrency, 10) self.assertFalse(self.dwn.use_domain_specific) # domain concurrency is too big self._update_dwn(CONCURRENT_REQUESTS=5, CONCURRENT_REQUESTS_PER_DOMAIN=10) self.assertEqual(self.dwn.total_concurrency, 5) self.assertEqual(self.dwn.domain_concurrency, 5) self.assertFalse(self.dwn.use_domain_specific) self._update_dwn(CONCURRENT_REQUESTS=5, CONCURRENT_REQUESTS_PER_DOMAIN=5) self.assertFalse(self.dwn.use_domain_specific) def test_get_slot(self): key, slot = self.dwn._get_slot(Request('http://www.github.com/')) self.assertEqual(key, 'www.github.com') key2, slot2 = self.dwn._get_slot( Request('http://www.github.com/hello/world#bla')) self.assertEqual(key2, 'www.github.com') self.assertIs(slot2, slot) key3, slot3 = self.dwn._get_slot(Request('http://sites.github.com/')) self.assertEqual(key3, 'sites.github.com') self.assertIsNot(slot3, slot) self.assertEqual(len(self.dwn.slots), 2) # don't use domain specific slots self.dwn.use_domain_specific = False key, slot = self.dwn._get_slot(Request('http://www.github.com/')) self.assertEqual(key, '') key2, slot2 = self.dwn._get_slot(Request('http://sites.github.com/')) self.assertIs(slot2, slot) def test_basic(self): # create 5 requests with slot ids: a, b, a, a, c requests = [get_request(id)[0] for id in 'abaac'] map(lambda r: self.request_queue.push(r), requests) self.assertEqual(self.dwn.free_slots, 2) self.assertTrue(self.dwn.is_idle()) # start downloading first two requests self.clock.advance(0) self.assertEqual(self.dwn.free_slots, 0) self.assertFalse(self.dwn.is_idle()) # no more requests are scheduled, until download is finished self.clock.advance(20) self.assertEqual(len(self.request_queue), 3) # download the first request self.handler.call(requests[0], Response('hello')) self.assertEqual(self.dwn.free_slots, 1) # slot is immediately available # result is also available result = self.response_queue.peek() self.assertIs(result.request, requests[0]) self.assertEqual(result.url, 'hello') # enqueue third request self.clock.advance(Downloader.QUEUE_CHECK_FREQUENCY) self.assertEqual(self.dwn.free_slots, 0) # download second request self.handler.call(requests[1], Response('')) # enqueue fourth request self.clock.advance(Downloader.QUEUE_CHECK_FREQUENCY) self.assertEqual(self.dwn.free_slots, 0) # fourth request should not begin download, until 3rd request is done self.assertRaises(KeyError, self.handler.call, requests[3], Response('')) # finish self.handler.call(requests[2], Response('')) self.handler.call(requests[3], Response('')) self.clock.advance(Downloader.QUEUE_CHECK_FREQUENCY) self.handler.call(requests[4], Response('')) # final checks self.clock.pump([1] * 10) self.assertEqual(len(self.response_queue), 5) self.assertTrue(self.dwn.is_idle()) def test_close(self): req1 = get_request('a')[0] req2 = get_request('b')[0] self.request_queue.push(req1) self.clock.advance(20) self.request_queue.push(req2) # test basic attributes, before and after closing self.assertTrue(self.dwn.running) self.assertTrue(self.dwn.processing.is_scheduled()) self.dwn.close() self.assertFalse(self.dwn.running) self.assertFalse(self.dwn.processing.is_scheduled()) self.clock.advance(20) self.assertEqual(len(self.request_queue), 1) # request 2 remains unqueued # downloader behavior after closing self.assertEqual(len(self.response_queue), 0) self.handler.call(req1, Response('')) self.assertEqual(len(self.response_queue), 0) def test_fail(self): self._update_dwn(CONCURRENT_REQUESTS=3, CONCURRENT_REQUESTS_PER_DOMAIN=2) requests = [get_request(id)[0] for id in 'aab'] map(lambda r: self.request_queue.push(r), requests) # enqueue requests self.clock.advance(0) # fail 1st request err = ValueError('my bad') self.handler.fail(requests[0], err) self.assertEqual(self.dwn.free_slots, 1) fail = self.response_queue.pop() self.assertIs(fail.request, requests[0]) self.assertIs(fail.value, err) # fail 3rd request self.handler.fail(requests[2], err) fail = self.response_queue.pop() self.assertIs(fail.request, requests[2]) self.assertIs(fail.value, err) # succeed 2nd request self.handler.call(requests[1], Response('nice!', request=requests[1])) resp = self.response_queue.pop() self.assertIs(resp.request, requests[1]) self.assertEqual(resp.url, 'nice!') def test_clear_slots(self): requests = [get_request(id)[0] for id in xrange(30)] for r in requests: self.request_queue.push(r) self.clock.advance(Downloader.QUEUE_CHECK_FREQUENCY) self.handler.call(r, Response('')) self.assertLessEqual(len(self.dwn.slots), 2 * self.dwn.total_concurrency)