def engine_opened(self): log.msg('para:' + self.para) self.crawler.engine.downloader.slots['s.taobao.com'] = Slot( 1, 1, self.settings) self.crawler.engine.downloader.slots['detailskip.taobao.com'] = Slot( 5, 0.1, self.settings) self.crawler.engine.downloader.slots['count.tbcdn.cn'] = Slot( 5, 0.1, self.settings)
def _get_slot(self, request, spider): key = self._get_slot_key(request, spider) if key not in self.slots: conc = self.ip_concurrency if self.ip_concurrency else self.domain_concurrency if 'facebook' in key: conc, delay = _get_concurrency_delay(conc, spider, self.settings) else: conc, delay = 10, 0 self.slots[key] = Slot(conc, delay, self.randomize_delay) return key, self.slots[key]
def _get_slot(self, request=None, spider=None): key = self._get_slot_key(request, spider) if key not in self.slots.keys(): conc = self.ip_concurrency if self.ip_concurrency else self.domain_concurrency conc, delay = _get_concurrency_delay(conc, spider, self.settings[spider.name]) self.slots[key] = Slot(conc, delay, self.settings[spider.name]) if self.rememberseq.__sizeof__() != 0 and spider.name in self.rememberseq.keys(): logger.info("change it!") self.slots[key].concurrency = self.rememberseq[spider.name] del self.rememberseq[spider.name] else: logger.info("nothing") return key, self.slots[key]
def spider_opened(self, spider): self.downloader_slots = self.crawler.engine.downloader.slots if self.settings.get('SLOTS'): for slot_name, slot_config in self.settings.get('SLOTS').items(): concurrency = slot_config.get('concurrency') delay = slot_config.get('delay') randomize_delay = slot_config.get( 'randomize_delay') or self.randomize_delay if concurrency is None: str = 'slot {name} should have concurrency in config'.format( name=slot_name) raise Exception(str) if delay is None: str = 'slot {name} should have delay in config'.format( name=slot_name) raise Exception(str) self.downloader_slots[slot_name] = Slot( concurrency, delay, randomize_delay=randomize_delay) logger.info('ADDED YOUR SLOTS `{NAME}`'.format(NAME=slot_name))
def engine_opened(self): log.msg('para:' + self.para) self.crawler.engine.downloader.slots['sou.zhaopin.com'] = Slot(5, 5, self.settings)
def test_repr(self): slot = Slot(concurrency=8, delay=0.1, randomize_delay=True) self.assertEqual( repr(slot), 'Slot(concurrency=8, delay=0.10, randomize_delay=True)')
def engine_started(self): log.msg('[para] %s' % (self.para)) self.crawler.engine.downloader.slots['zhaopingou.com'] = Slot( self.download_concurrence, self.download_delay, self.settings)