コード例 #1
0
    def test_no_limit(self):
        q = ResponseQueue(0)
        r1 = Response('', body='x' * 50)
        r2 = Response('', body='y' * 50)

        self.assertFalse(q.needs_backout())
        q.push(r1)
        q.push(r2)
        self.assertFalse(q.needs_backout())
コード例 #2
0
 def setUp(self):
     self.clock = Clock()
     self.request_queue = MemoryQueue()
     self.response_queue = ResponseQueue()
     self.dwn = Downloader(Settings(self.default_settings),
                           self.request_queue,
                           self.response_queue,
                           download_handler=MockDownloaderHandler(
                               Settings()),
                           clock=self.clock)
     self.handler = self.dwn.download_handler
コード例 #3
0
    def test_limit(self):
        q = ResponseQueue(10)
        r1 = Response('', body='x' * 5)
        r2 = Response('', body='y' * 5)

        self.assertFalse(q.needs_backout())
        q.push(r1)
        self.assertFalse(q.needs_backout())
        q.push(r2)
        self.assertTrue(q.needs_backout())
        q.pop()
        self.assertFalse(q.needs_backout())
コード例 #4
0
ファイル: engine.py プロジェクト: dahuangfeng123/crawlmi
    def setup(self):
        assert self.spider is not None, 'Spider is not set in Engine.'

        # IMPORTANT: order of the following initializations is very important
        # so please, think twice about any changes to it

        # initialize logging
        if self.settings.get_bool('LOG_ENABLED'):
            log.start(self.settings['LOG_FILE'], self.settings['LOG_LEVEL'],
                      self.settings['LOG_STDOUT'],
                      self.settings['LOG_ENCODING'])

        # initialize signals
        self.signals = SignalManager(self)

        #initialize stats
        stats_cls = load_object(self.settings.get('STATS_CLASS'))
        self.stats = stats_cls(self)

        # initialize downloader
        self.request_queue = PriorityQueue(lambda _: MemoryQueue())
        self.response_queue = ResponseQueue(
            self.settings.get_int('RESPONSE_ACTIVE_SIZE_LIMIT'))
        self.downloader = Downloader(self.settings,
                                     self.request_queue,
                                     self.response_queue,
                                     clock=self.clock)

        # initialize extensions
        self.extensions = ExtensionManager(self)
        # initialize downloader pipeline
        self.pipeline = PipelineManager(self)

        self.initialized = True

        # now that everything is ready, set the spider's engine
        self.spider.set_engine(self)