def process_request(self, request, spider): _proxy = Proxy() proxy = _proxy.get() proxy2 = _proxy.get() try: request.meta["proxy"] = "http://" + proxy2 except Exception as e: print(e)
def _retry(self, request, reason, spider): retries = request.meta.get('retry_times', 0) + 1 retry_times = self.max_retry_times if 'max_retry_times' in request.meta: retry_times = request.meta['max_retry_times'] stats = spider.crawler.stats if retries <= retry_times: logger.debug( "Retrying %(request)s (failed %(retries)d times): %(reason)s", { 'request': request, 'retries': retries, 'reason': reason }, extra={'spider': spider}) retryreq = request.copy() retryreq.meta['retry_times'] = retries retryreq.dont_filter = True retryreq.priority = request.priority + self.priority_adjust _proxy = Proxy() proxy = _proxy.get() proxy2 = _proxy.get() proxy3 = _proxy.get() request.meta["proxy"] = "http://" + proxy3 if isinstance(reason, Exception): reason = global_object_name(reason.__class__) stats.inc_value('retry/count') stats.inc_value('retry/reason_count/%s' % reason) return retryreq else: _proxy = Proxy() proxy = _proxy.get() proxy2 = _proxy.get() proxy3 = _proxy.get() proxy4 = _proxy.get() request.meta["proxy"] = "http://" + proxy4 request.dont_filter = True request.priority = request.priority + self.priority_adjust # return self.process_request(request, spider) return request
from afanti_tiku_lib.dbs import html_archive from afanti_tiku_lib.dbs.execute import execute from adsl_server.proxy import Proxy from user_agent_lib.user_agent import UserAgent LOGGING_FORMAT = '%(asctime)-15s:%(levelname)s: %(message)s' logging.basicConfig(format=LOGGING_FORMAT, level=logging.INFO, filename='working/achihuo_mini_vko.log', filemode='a') mysql = CommonMysql('html_archive2') mysql_conn = mysql.connection() _proxy = Proxy() user_agent = UserAgent() HEADERS = { 'Accept-Encoding': 'gzip, deflate', 'Accept-Language': 'zh-CN,zh;q=0.8,en;q=0.6,zh-TW;q=0.4', 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.87 Safari/537.36', 'Accept': 'text/javascript, application/javascript, application/ecmascript, application/x-ecmascript, */*; q=0.01', 'Referer': 'http://tiku.vko.cn/', 'X-Requested-With': 'XMLHttpRequest', 'Connection': 'keep-alive', }