Esempio n. 1
0
 def engine_opened(self):
     log.msg('para:' + self.para)
     self.crawler.engine.downloader.slots['s.taobao.com'] = Slot(
         1, 1, self.settings)
     self.crawler.engine.downloader.slots['detailskip.taobao.com'] = Slot(
         5, 0.1, self.settings)
     self.crawler.engine.downloader.slots['count.tbcdn.cn'] = Slot(
         5, 0.1, self.settings)
Esempio n. 2
0
    def _get_slot(self, request, spider):
        key = self._get_slot_key(request, spider)
        if key not in self.slots:
            conc = self.ip_concurrency if self.ip_concurrency else self.domain_concurrency
            if 'facebook' in key:
                conc, delay = _get_concurrency_delay(conc, spider,
                                                     self.settings)
            else:
                conc, delay = 10, 0
            self.slots[key] = Slot(conc, delay, self.randomize_delay)

        return key, self.slots[key]
Esempio n. 3
0
 def _get_slot(self, request=None, spider=None):
     key = self._get_slot_key(request, spider)
     if key not in self.slots.keys():
         conc = self.ip_concurrency if self.ip_concurrency else self.domain_concurrency
         conc, delay = _get_concurrency_delay(conc, spider, self.settings[spider.name])
         self.slots[key] = Slot(conc, delay, self.settings[spider.name])
         if self.rememberseq.__sizeof__() != 0 and spider.name in self.rememberseq.keys():
             logger.info("change it!")
             self.slots[key].concurrency = self.rememberseq[spider.name]
             del self.rememberseq[spider.name]
         else:
             logger.info("nothing")
     return key, self.slots[key]
Esempio n. 4
0
 def spider_opened(self, spider):
     self.downloader_slots = self.crawler.engine.downloader.slots
     if self.settings.get('SLOTS'):
         for slot_name, slot_config in self.settings.get('SLOTS').items():
             concurrency = slot_config.get('concurrency')
             delay = slot_config.get('delay')
             randomize_delay = slot_config.get(
                 'randomize_delay') or self.randomize_delay
             if concurrency is None:
                 str = 'slot {name} should have concurrency in config'.format(
                     name=slot_name)
                 raise Exception(str)
             if delay is None:
                 str = 'slot {name} should have delay in config'.format(
                     name=slot_name)
                 raise Exception(str)
             self.downloader_slots[slot_name] = Slot(
                 concurrency, delay, randomize_delay=randomize_delay)
             logger.info('ADDED YOUR SLOTS `{NAME}`'.format(NAME=slot_name))
Esempio n. 5
0
 def engine_opened(self):
     log.msg('para:' + self.para)
     self.crawler.engine.downloader.slots['sou.zhaopin.com'] = Slot(5, 5, self.settings)
 def test_repr(self):
     slot = Slot(concurrency=8, delay=0.1, randomize_delay=True)
     self.assertEqual(
         repr(slot),
         'Slot(concurrency=8, delay=0.10, randomize_delay=True)')
Esempio n. 7
0
 def engine_started(self):
     log.msg('[para] %s' % (self.para))
     self.crawler.engine.downloader.slots['zhaopingou.com'] = Slot(
         self.download_concurrence, self.download_delay, self.settings)