def process_exception(self, request, exception, spider): try: proxy = request.meta['proxy'] if 'http://' in proxy: proxy = proxy.replace('http://', '') else: proxy = proxy.replace('https://', '') get_proxy_pool_worker().plus_proxy_faild_time(proxy.split(':')[0]) except Exception as e: logging.debug("=== 访问页面: " + request.url + " 出现异常。\n %s", e)
def process_responce(self, request, response, spider): if response.staus < 200 or response.staus >= 400: try: proxy = request.meta['proxy'] if 'http://' in proxy: proxy = proxy.replace('http://', '') else: proxy = proxy.replace('https://', '') get_proxy_pool_worker().plus_proxy_faild_time(proxy.split(':')[0]) except KeyError: logging.debug("=== 无法正常访问到的页面: " + response.url + " ===") return response
def process_request(self, request, spider): # Set the location of the proxy proxy_address = get_proxy_pool_worker().select_proxy_data() logging.debug( "===== ProxyMiddleware get a random_proxy:【 {} 】 =====".format( proxy_address)) request.meta['proxy'] = proxy_address
def stop(self): self.isRunning = False # 关闭资源 get_proxy_pool_worker().stop_work()
def start_proxy_pool(self): # 启动代理 get_proxy_pool_worker().start_work()
def process_request(self, request, spider): proxy_address = get_proxy_pool_worker().select_proxy_data() logging.debug(f"===== ProxyMiddleware get a random_proxy: 【 {proxy_address} 】") request.meta['proxy'] = proxy_address