コード例 #1
0
ファイル: main.py プロジェクト: pijiupapa/crawler
def setup_adsl(enginer):
    if not settings.USE_ADSL:
        return

    from adsl.ppoe.client import ppoe_client
    logger.debug(u"配置adsl")
    ppoe_client.start()
コード例 #2
0
ファイル: main.py プロジェクト: pijiupapa/crawler
def setup_vpn(enginer):
    if not settings.USING_VPN:
        return

    from adsl.pptp.client import vpn_client
    logger.debug(u"配置vpn")
    vpn_client.start()
コード例 #3
0
 def log_response(self, response):
     logger.debug("log response into influxdb")
     url_id = self.get_influx_url_id(response.request.url)
     cost = int(response.cost_time * 1000)
     line = "request,host=%s,project=%s,url_id=%s,status=%s cost=%s" % (
         settings.host, settings.project, url_id, response.status_code,
         cost)
     influx_logger.log(line)
コード例 #4
0
ファイル: scheduler.py プロジェクト: pijiupapa/crawler
 def next_task(self, queue):
     while True:
         logger.debug("get task")
         method, header, body = self.channel.basic_get(queue)
         if not method:
             time.sleep(1)
             continue
         message = Message(method, header, body)
         task_cls = self.tasks[queue]
         task = task_cls.from_jsondata(body)
         task.message = message
         task.scheduler = self
         return task
コード例 #5
0
    def request(self, *args, **kwargs):
        if 'timeout' not in kwargs:
            kwargs['timeout'] = self.timeout
        max_retry = kwargs.pop('max_retry', None) or self.max_retry
        content_type = kwargs.pop("content_type", "text/html")

        read_timeout_times = 0
        connection_error_times = 0
        response_error_times = 0
        for i in range(max_retry):
            try:
                before_req_time = time.time()
                response = super(BaseSession, self).request(*args, **kwargs)
                response.content_type = content_type
                span_time = time.time() - before_req_time
                logger.debug("requst cost time: %s" % span_time)
                response.cost_time = span_time
                self.log_response(response)
                self.check_error_page(response)
                self.check_ban(response)
                return response

            except requests.exceptions.ConnectionError:
                connection_error_times += 1
                logger.debug("request session connection error")
                self.connection_error()

            except requests.exceptions.ReadTimeout:
                read_timeout_times += 1
                logger.debug("request session receive data timeout")

            except requests.exceptions.ChunkedEncodingError:
                logger.debug("request session chunked encoding error")

            except BanError:
                logger.debug("ip has been banned by server")
                self.on_ban(response)

            except ResponseError:
                response_error_times += 1
                logger.info("response error page")
                self.on_error(response)

        else:
            logger.debug("request session connect times greater than %s" %
                         max_retry)
            self.error_times(read_timeout_times, connection_error_times,
                             response_error_times, max_retry)
            raise ExitWithoutDone()