예제 #1
0
    def __init__(self, settings, request_queue, response_queue,
                 download_handler=None, clock=None):
        self.request_queue = request_queue
        self.response_queue = response_queue  # queue of responses
        self.download_handler = download_handler or GeneralHandler(settings)
        self.slots = {}
        self.num_in_progress = 0
        self.clock = clock or reactor
        self.processing = LoopingCall(self.process, clock=self.clock)
        self.processing.schedule(self.QUEUE_CHECK_FREQUENCY, now=True)
        self.running = True

        self.download_delay = settings.get_float('DOWNLOAD_DELAY')
        self.randomize_delay = settings.get_int(
            'RANDOMIZE_DOWNLOAD_DELAY')
        if self.download_delay:
            self.total_concurrency = self.domain_concurrency = 1
            self.use_domain_specific = False
        else:
            self.total_concurrency = settings.get_int(
                'CONCURRENT_REQUESTS')
            self.domain_concurrency = settings.get_int(
                'CONCURRENT_REQUESTS_PER_DOMAIN')
            if (not self.domain_concurrency or
                    self.domain_concurrency >= self.total_concurrency):
                self.use_domain_specific = False
                self.domain_concurrency = self.total_concurrency
            else:
                self.use_domain_specific = True
 def setUp(self):
     self.settings = Settings({
         'DOWNLOAD_HANDLERS': {
             'file':
             'crawlmi.core.handlers.FileDownloadHandler',
             'http':
             'crawlmi.core.handlers.HttpDownloadHandler',
             'https':
             'crawlmi.tests.test_downloader_handlers.NonConfiguredHandler',
         }
     })
     self.handler = GeneralHandler(self.settings)