Exemple #1
0
    def _retry(self, request, reason, spider):
        
        if isinstance(reason, TCPTimedOutError):
            reason.args = (u'...',)
        
        retries = request.meta.get('retry_times', 0)
        
        if str(reason).find('404') > -1 and request.callback.im_class == CarDetailSpider:
            ci = request.cookies[FetchConstant.CarInfo]

            fs = FetchSession()
            ci_exist = fs.query(CarInfo).filter(CarInfo.seqid == ci.seqid).first()
            if ci_exist:
                try:
                    ci_exist.statustype = CarInfoValueConst.offline
                    ci_exist.offlinedatetime = datetime.datetime.today()
                    fs.commit()
                    msg = (u'[404] seqid: %s ,url not exist %s') % (ci.seqid, request.url,)
                    spider.log(msg, log.INFO)
                except:
                    fs.rollback()
                finally:
                    fs.close()
            
            request.meta['retry_times'] = self.max_retry_times
                
            return RetryMiddleware._retry(self, request, reason, spider)
        
        if retries <= self.max_retry_times - 1:
            next_proxy = get_valid_proxy.next()
            rs = request.copy()
            if next_proxy:
                proxy_str = next_proxy.build_literal()
                rs = rs.replace(dont_filter=True)
                rs.meta['proxy'] = proxy_str
                msg = (u'use to %s access %s ') % (proxy_str, rs.url)
                spider.log(msg, log.DEBUG)
            else:
                try:
                    del rs.meta[u'proxy']
                    msg = (u'use self ip asscess %s') % (rs.url)
                    spider.log(msg, log.DEBUG)
                except :pass
            
        return RetryMiddleware._retry(self, rs, reason, spider)
Exemple #2
0
 def _retry(self, request, reason, spider):
     log.message('Changing proxy')
     conn = TorCtl.connect(passphrase="1234")
     conn.sendAndRecv('signal newnym\r\n')
     conn.close()      
     time.sleep(3)
     log.message("renewed")
     
     return RetryMiddleware._retry(self, request, reason, spider)
Exemple #3
0
    def _retry(self, request, reason, spider):
        retries = request.meta.get('retry_times', 0)
        proxy = request.meta.get(u'proxy')
        if retries <= self.max_retry_times - 1:
            try:
                next_proxy = spider.get_next_proxy(request.cookies)
            except Exception:
                msg = (u'there is no proxy list in cookies %s ,please check')
                spider.log(msg, log.WARNING)
                return RetryMiddleware._retry(self, request, reason, spider)
            if proxy:
                msg = (u'proxy %s fail, use %s for the %srd time '
                       'retry') % (proxy, next_proxy, retries)
            else:
                msg = (u'request without proxy , use %s for the %srd time '
                       'retry') % (next_proxy, retries)
            spider.log(msg, log.INFO)
            request.meta[u'proxy'] = next_proxy

        return RetryMiddleware._retry(self, request, reason, spider)
 def _retry(self, request, reason, spider):
     retries = request.meta.get('retry_times', 0)
     proxy = request.meta.get(u'proxy')
     if retries <= self.max_retry_times - 1:
         try:
             next_proxy = spider.get_next_proxy(request.cookies)
         except Exception:
             msg = (u'there is no proxy list in cookies %s ,please check')
             spider.log(msg, log.WARNING)
             return RetryMiddleware._retry(self, request, reason, spider)
         if proxy:
             msg = (u'proxy %s fail, use %s for the %srd time '
                    'retry') % (proxy, next_proxy, retries)
         else:
             msg = (u'request without proxy , use %s for the %srd time '
                    'retry') % (next_proxy, retries)
         spider.log(msg, log.INFO)
         request.meta[u'proxy'] = next_proxy 
         
     return RetryMiddleware._retry(self, request, reason, spider)
Exemple #5
0
 def _retry(self, request, reason, spider):
     log.msg('Changing-proxy')
     tn = telnetlib.Telnet('127.0.0.1', 9051)
     tn.read_until("Escape character is '^]'.", 2)
     tn.write('AUTHENTICATE "267765"\r\n')
     tn.read_until("250 OK", 2)
     tn.write("signal NEWNYM\r\n")
     tn.read_until("250 OK", 2)
     tn.write("quit\r\n")
     tn.close()
     time.sleep(3)
     log.msg('Proxychanged')
     return RetryMiddleware._retry(self, request, reason, spider)
Exemple #6
0
    def _retry(self, request, reason, spider):
        settings = spider.settings

        if RetryChangeProxyMiddleware.conn is None:
            RetryChangeProxyMiddleware.conn = TorCtl.connect(controlAddr=settings.get('TOR_HOST'),
                                                             controlPort=settings.get('TOR_PORT'),
                                                             passphrase=settings.get('TOR_PASSW'))
            RetryChangeProxyMiddleware.last = 0
            RetryChangeProxyMiddleware.timelimit = settings.get('TOR_CHANGE_LIMIT')

        if isinstance(reason, basestring):
            #log.msg('Valid retry, reason: ' + reason + ' for URL ' + request.url, log.INFO)
            t = time.time()
            diff = t - RetryChangeProxyMiddleware.last
            if RetryChangeProxyMiddleware.conn and diff > RetryChangeProxyMiddleware.timelimit:
                TorCtl.Connection.send_signal(RetryChangeProxyMiddleware.conn, "NEWNYM")
                RetryChangeProxyMiddleware.last = t
                log.msg('Proxy changed for reason %s. New last: %s' % (reason, time.strftime("%H:%M:%S")), log.INFO)
            #else:
            #    log.msg('Proxy not changed! Time difference is %s seconds' % ("{:.2f}".format(diff)), log.INFO)
            return RetryMiddleware._retry(self, request, reason, spider)
Exemple #7
0
def _retry_proxy(self, request, reason, spider):
    change_proxy(log_msg=True)
    # time.sleep(1)
    return RetryMiddleware._retry(self, request, reason, spider)
Exemple #8
0
 def _retry(self, request, exception, spider):
     if isinstance(exception, ConnectionRefusedError):
         TorManager.get_instance().refresh_circuit()
         time.sleep(3)
         log.msg('Connection refused and tor circuit refreshed')
     return RetryMiddleware._retry(self, request, exception, spider)
 def _retry(self, request, exception, spider):
   if isinstance(exception, ConnectionRefusedError):
     TorManager.get_instance().refresh_circuit()
     time.sleep(3)
     log.msg('Connection refused and tor circuit refreshed')
   return RetryMiddleware._retry(self, request, exception, spider)
Exemple #10
0
def _retry_proxy(self, request, reason, spider):
    change_proxy(log_msg=True)
    # time.sleep(1)
    return RetryMiddleware._retry(self, request, reason, spider)