Esempio n. 1
0
    def process_exception(self, request, exception, spider):
        try:
            proxy = request.meta['proxy']
            if 'http://' in proxy:
                proxy = proxy.replace('http://', '')
            else:
                proxy = proxy.replace('https://', '')

            get_proxy_pool_worker().plus_proxy_faild_time(proxy.split(':')[0])
        except Exception as e:
            logging.debug("===  访问页面: " + request.url + " 出现异常。\n %s", e)
Esempio n. 2
0
    def process_responce(self, request, response, spider):
        if response.staus < 200 or response.staus >= 400:
            try:
                proxy = request.meta['proxy']
                if 'http://' in proxy:
                    proxy = proxy.replace('http://', '')
                else:
                    proxy = proxy.replace('https://', '')

                get_proxy_pool_worker().plus_proxy_faild_time(proxy.split(':')[0])
            except KeyError:
                logging.debug("===  无法正常访问到的页面: " + response.url + " ===")
        return response
Esempio n. 3
0
 def process_request(self, request, spider):
     # Set the location of the proxy
     proxy_address = get_proxy_pool_worker().select_proxy_data()
     logging.debug(
         "=====  ProxyMiddleware get a random_proxy:【 {} 】 =====".format(
             proxy_address))
     request.meta['proxy'] = proxy_address
Esempio n. 4
0
 def stop(self):
     self.isRunning = False
     # 关闭资源
     get_proxy_pool_worker().stop_work()
Esempio n. 5
0
 def start_proxy_pool(self):
     # 启动代理
     get_proxy_pool_worker().start_work()
Esempio n. 6
0
 def process_request(self, request, spider):
     proxy_address = get_proxy_pool_worker().select_proxy_data()
     logging.debug(f"===== ProxyMiddleware get a random_proxy: 【 {proxy_address} 】")
     request.meta['proxy'] = proxy_address