コード例 #1
0
    def open(self, spider):
        self.spider = spider

        if spider.name == 'fgilt':
            self.queue = self.queue_cls(self.server, spider,
                                        self.time + '_' + self.queue_key)
        else:
            self.queue = self.queue_cls(self.server, spider, self.queue_key)

        if self.need_dupefilter:
            if spider.name == 'fgilt':
                self.df = RFPDupeFilter(
                    self.server, self.time + '_' +
                    self.dupefilter_key % {'spider': spider.name})
            else:
                self.df = RFPDupeFilter(
                    self.server, self.dupefilter_key % {'spider': spider.name})
        else:
            self.df = ItemRFPDupeFilter(
                self.server, self.dupefilter_key % {'spider': spider.name})

        if self.idle_before_close < 0:
            self.idle_before_close = 0
        # notice if there are requests already in the queue to resume the crawl
        if len(self.queue):
            spider.log("Resuming crawl (%d requests scheduled)" %
                       len(self.queue))
コード例 #2
0
def test_log_dupes():
    def _test(df, dupes, logcount):
        df.logger.debug = mock.Mock(wraps=df.logger.debug)
        for i in range(dupes):
            req = Request('http://example')
            df.log(req, spider=mock.Mock())
        assert df.logger.debug.call_count == logcount

    server = get_redis_mock()

    df_quiet = RFPDupeFilter(server, 'foo')  # debug=False
    _test(df_quiet, 5, 1)

    df_debug = RFPDupeFilter(server, 'foo', debug=True)
    _test(df_debug, 5, 5)
コード例 #3
0
 def open(self, spider):
     self.spider = spider
     self.queue = SpiderQueue(self.server, spider, self.queue_key)
     self.df = RFPDupeFilter(self.server, DUPEFILTER_KEY % {'spider': spider.name})
     # notice if there are requests already in the queue
     if len(self.queue):
         spider.log("Resuming crawl (%d requests scheduled)" % len(self.queue))
コード例 #4
0
ファイル: scheduler.py プロジェクト: xuemy/scrapy-redis
 def open(self, spider):
     self.spider = spider
     self.queue = self.queue_cls(self.server, spider, self.queue_key)
     self.df = RFPDupeFilter(self.server,
                             self.dupefilter_key % {'spider': spider.name})
     # notice if there are requests already in the queue to resume the crawl
     if len(self.queue):
         spider.log("Resuming crawl (%d requests scheduled)" %
                    len(self.queue))
コード例 #5
0
 def open(self, spider):
     self.spider = spider
     self.queue = self.queue_cls(self.server, spider, self.queue_key)
     self.df = RFPDupeFilter(self.server, self.dupefilter_key % {'spider.domain': spider.domain})
     if self.idle_before_close < 0:
         self.idle_before_close = 0
     # notice if there are requests already in the queue to resume the crawl
     if len(self.queue):
         spider.log("Resuming crawl (%d requests scheduled)" % len(self.queue))
コード例 #6
0
    def open(self, spider):
        self.spider = spider
        self.queue = self.queue_cls(self.server, spider, self.queue_key % {'spider': spider.name})
        self.df = RFPDupeFilter(self.redis_server, self.dupefilter_key % {'spider': spider.name})

        if self.idle_before_close < 0:
            self.idle_before_close = 0

        if len(self.queue):
            spider.log("Resuming crawl (%d requests scheduled)" % len(self.queue))
コード例 #7
0
 def open(self, spider):
     """
         execute this function when open one spider
     """
     
     self.spider = spider
     self.queue = self.queue_cls(self.server, spider, self.queue_key)
     self.dupefilter = RFPDupeFilter(self.server, self.dupefilter_key % {'spider':spider.name})
     if self.idle_before_close < 0:
         self.idle_before_close = 0
     # notice if there are requests already in the queue to resume the crawl
     if len(self.queue):
         spider.log("Resuming crawl (%d requests scheduled)" % len(self.queue))
     print "++++++++++++++++++++++++"
     print  "scheduler open"
     print "++++++++++++++++++++++++"
コード例 #8
0
 def setUp(self):
     self.key = 'scrapy_redis:tests:dupefilter:'
     self.df = RFPDupeFilter(self.server, self.key)
コード例 #9
0
 def setUp(self):
     self.server = redis.Redis(REDIS_HOST, REDIS_PORT)
     self.key = 'scrapy_redis:tests:dupefilter:'
     self.df = RFPDupeFilter(self.server, self.key)
コード例 #10
0
 def setup(self):
     self.server = get_redis_mock()
     self.key = 'dupefilter:1'
     self.df = RFPDupeFilter(self.server, self.key)