def _retry(self, request, reason, spider): if isinstance(reason, TCPTimedOutError): reason.args = (u'...',) retries = request.meta.get('retry_times', 0) if str(reason).find('404') > -1 and request.callback.im_class == CarDetailSpider: ci = request.cookies[FetchConstant.CarInfo] fs = FetchSession() ci_exist = fs.query(CarInfo).filter(CarInfo.seqid == ci.seqid).first() if ci_exist: try: ci_exist.statustype = CarInfoValueConst.offline ci_exist.offlinedatetime = datetime.datetime.today() fs.commit() msg = (u'[404] seqid: %s ,url not exist %s') % (ci.seqid, request.url,) spider.log(msg, log.INFO) except: fs.rollback() finally: fs.close() request.meta['retry_times'] = self.max_retry_times return RetryMiddleware._retry(self, request, reason, spider) if retries <= self.max_retry_times - 1: next_proxy = get_valid_proxy.next() rs = request.copy() if next_proxy: proxy_str = next_proxy.build_literal() rs = rs.replace(dont_filter=True) rs.meta['proxy'] = proxy_str msg = (u'use to %s access %s ') % (proxy_str, rs.url) spider.log(msg, log.DEBUG) else: try: del rs.meta[u'proxy'] msg = (u'use self ip asscess %s') % (rs.url) spider.log(msg, log.DEBUG) except :pass return RetryMiddleware._retry(self, rs, reason, spider)
def _retry(self, request, reason, spider): log.message('Changing proxy') conn = TorCtl.connect(passphrase="1234") conn.sendAndRecv('signal newnym\r\n') conn.close() time.sleep(3) log.message("renewed") return RetryMiddleware._retry(self, request, reason, spider)
def _retry(self, request, reason, spider): retries = request.meta.get('retry_times', 0) proxy = request.meta.get(u'proxy') if retries <= self.max_retry_times - 1: try: next_proxy = spider.get_next_proxy(request.cookies) except Exception: msg = (u'there is no proxy list in cookies %s ,please check') spider.log(msg, log.WARNING) return RetryMiddleware._retry(self, request, reason, spider) if proxy: msg = (u'proxy %s fail, use %s for the %srd time ' 'retry') % (proxy, next_proxy, retries) else: msg = (u'request without proxy , use %s for the %srd time ' 'retry') % (next_proxy, retries) spider.log(msg, log.INFO) request.meta[u'proxy'] = next_proxy return RetryMiddleware._retry(self, request, reason, spider)
def _retry(self, request, reason, spider): log.msg('Changing-proxy') tn = telnetlib.Telnet('127.0.0.1', 9051) tn.read_until("Escape character is '^]'.", 2) tn.write('AUTHENTICATE "267765"\r\n') tn.read_until("250 OK", 2) tn.write("signal NEWNYM\r\n") tn.read_until("250 OK", 2) tn.write("quit\r\n") tn.close() time.sleep(3) log.msg('Proxychanged') return RetryMiddleware._retry(self, request, reason, spider)
def _retry(self, request, reason, spider): settings = spider.settings if RetryChangeProxyMiddleware.conn is None: RetryChangeProxyMiddleware.conn = TorCtl.connect(controlAddr=settings.get('TOR_HOST'), controlPort=settings.get('TOR_PORT'), passphrase=settings.get('TOR_PASSW')) RetryChangeProxyMiddleware.last = 0 RetryChangeProxyMiddleware.timelimit = settings.get('TOR_CHANGE_LIMIT') if isinstance(reason, basestring): #log.msg('Valid retry, reason: ' + reason + ' for URL ' + request.url, log.INFO) t = time.time() diff = t - RetryChangeProxyMiddleware.last if RetryChangeProxyMiddleware.conn and diff > RetryChangeProxyMiddleware.timelimit: TorCtl.Connection.send_signal(RetryChangeProxyMiddleware.conn, "NEWNYM") RetryChangeProxyMiddleware.last = t log.msg('Proxy changed for reason %s. New last: %s' % (reason, time.strftime("%H:%M:%S")), log.INFO) #else: # log.msg('Proxy not changed! Time difference is %s seconds' % ("{:.2f}".format(diff)), log.INFO) return RetryMiddleware._retry(self, request, reason, spider)
def _retry_proxy(self, request, reason, spider): change_proxy(log_msg=True) # time.sleep(1) return RetryMiddleware._retry(self, request, reason, spider)
def _retry(self, request, exception, spider): if isinstance(exception, ConnectionRefusedError): TorManager.get_instance().refresh_circuit() time.sleep(3) log.msg('Connection refused and tor circuit refreshed') return RetryMiddleware._retry(self, request, exception, spider)