Ejemplo n.º 1
0
 def process_request(self, request, spider):
     _proxy = Proxy()
     proxy = _proxy.get()
     proxy2 = _proxy.get()
     try:
         request.meta["proxy"] = "http://" + proxy2
     except Exception as e:
         print(e)
Ejemplo n.º 2
0
    def _retry(self, request, reason, spider):
        retries = request.meta.get('retry_times', 0) + 1
        retry_times = self.max_retry_times

        if 'max_retry_times' in request.meta:
            retry_times = request.meta['max_retry_times']

        stats = spider.crawler.stats
        if retries <= retry_times:
            logger.debug(
                "Retrying %(request)s (failed %(retries)d times): %(reason)s",
                {
                    'request': request,
                    'retries': retries,
                    'reason': reason
                },
                extra={'spider': spider})
            retryreq = request.copy()
            retryreq.meta['retry_times'] = retries
            retryreq.dont_filter = True
            retryreq.priority = request.priority + self.priority_adjust
            _proxy = Proxy()
            number = random.randint(20, 50)
            proxy_id = _proxy.get_ip(server_id=number)
            proxy_id = proxy_id.decode()
            proxy = "http://" + proxy_id + ":9990"
            retryreq.meta["proxy"] = proxy

            if isinstance(reason, Exception):
                reason = global_object_name(reason.__class__)

            stats.inc_value('retry/count')
            stats.inc_value('retry/reason_count/%s' % reason)
            return retryreq
        else:
            _proxy = Proxy()
            proxy = _proxy.get()
            proxy2 = _proxy.get()
            proxy3 = _proxy.get()
            proxy4 = _proxy.get()
            request.meta["proxy"] = "http://" + proxy4
            request.dont_filter = True
            request.priority = request.priority + self.priority_adjust
            # return self.process_request(request, spider)
            return request