def process_exception(self, request, exception, spider):
        try:
            proxy = request.meta['proxy']
            if 'http://' in proxy:
                proxy = proxy.replace('http://', '')
            else:
                proxy = proxy.replace('https://', '')

            getProxyPoolWorker().plus_proxy_faild_time(proxy.split(':')[0])
        except Exception as e:
            logging.debug("===  访问页面: " + request.url + " 出现异常。\n %s", e)
    def process_responce(self, request, response, spider):
        if response.staus < 200 or response.staus >= 400:
            try:
                proxy = request.meta['proxy']
                if 'http://' in proxy:
                    proxy = proxy.replace('http://', '')
                else:
                    proxy = proxy.replace('https://', '')

                getProxyPoolWorker().plus_proxy_faild_time(proxy.split(':')[0])
            except KeyError:
                logging.debug("===  无法正常访问到的页面: " + response.url + " ===")
        return response
 def process_request(self, request, spider):
     # Set the location of the proxy
     proxyAddress = getProxyPoolWorker().select_proxy_data()
     logging.debug(
         "=====  ProxyMiddleware get a random_proxy:【 {} 】 =====".format(
             proxyAddress))
     request.meta['proxy'] = proxyAddress
Exemple #4
0
 def stop(self):
     self.isRunning = False
     # 关闭资源
     getProxyPoolWorker().stopWork()
Exemple #5
0
 def start_proxyPool(self):
     # 启动代理
     getProxyPoolWorker().startWork()