Ejemplo n.º 1
0
 def setUp(self):
     self.clock = Clock()
     self.request_queue = MemoryQueue()
     self.response_queue = ResponseQueue()
     self.dwn = Downloader(Settings(self.default_settings),
                           self.request_queue,
                           self.response_queue,
                           download_handler=MockDownloaderHandler(
                               Settings()),
                           clock=self.clock)
     self.handler = self.dwn.download_handler
Ejemplo n.º 2
0
 def __init__(self, download_handler, concurrency, delay, randomize_delay,
              clock=None):
     self.download_handler = download_handler
     self.concurrency = concurrency
     self.delay = delay
     self.randomize_delay = randomize_delay
     self.in_progress = set()  # request waiting to be downloaded
     self.transferring = set()  # requests being downloaded (subset of `in_progress`)
     self.last_download_time = 0
     self.queue = MemoryQueue()  # queue of (request, deferred)
     # clock is used in unittests
     self.clock = clock or reactor
     self.delayed_processing = ScheduledCall(self._process, clock=self.clock)
Ejemplo n.º 3
0
    def setup(self):
        assert self.spider is not None, 'Spider is not set in Engine.'

        # IMPORTANT: order of the following initializations is very important
        # so please, think twice about any changes to it

        # initialize logging
        if self.settings.get_bool('LOG_ENABLED'):
            log.start(self.settings['LOG_FILE'], self.settings['LOG_LEVEL'],
                      self.settings['LOG_STDOUT'],
                      self.settings['LOG_ENCODING'])

        # initialize signals
        self.signals = SignalManager(self)

        #initialize stats
        stats_cls = load_object(self.settings.get('STATS_CLASS'))
        self.stats = stats_cls(self)

        # initialize downloader
        self.request_queue = PriorityQueue(lambda _: MemoryQueue())
        self.response_queue = ResponseQueue(
            self.settings.get_int('RESPONSE_ACTIVE_SIZE_LIMIT'))
        self.downloader = Downloader(self.settings,
                                     self.request_queue,
                                     self.response_queue,
                                     clock=self.clock)

        # initialize extensions
        self.extensions = ExtensionManager(self)
        # initialize downloader pipeline
        self.pipeline = PipelineManager(self)

        self.initialized = True

        # now that everything is ready, set the spider's engine
        self.spider.set_engine(self)
Ejemplo n.º 4
0
def qfactory(priority):
    return MemoryQueue()