Exemple #1
0
def tunnel_request_data(host, port, proxy_auth_header=None):
    host_value = to_bytes(host, encoding='ascii') + b':' + to_bytes(str(port))
    tunnel_req = b'CONNECT ' + host_value + b' HTTP/1.1\r\n'
    tunnel_req += b'Host: ' + host_value + b'\r\n'
    if proxy_auth_header:
        tunnel_req += b'Proxy-Authorization: ' + proxy_auth_header + b'\r\n'
    tunnel_req += b'\r\n'
    return tunnel_req
Exemple #2
0
    def download_request(self, request):
        # 设定多长时间内下载不报错
        timeout = request.meta.get('download_timeout') or self._connectTimeout
        redirect = request.meta.get('download_redirect') or self._redirect
        self.request = request
        try:
            logger.debug(*self.lfm.crawled('Request', request,
                                           '执行download_request,超时时间:',
                                           {'time': timeout}))
            if redirect:
                agent = self._getRedirectAgent(timeout)
            else:
                agent = self._getAgent(timeout)
            #  url格式如下:protocol :// hostname[:port] / path / [;parameters][?query]#fragment
            #  urldefrag去掉fragment
            url = urldefrag(request.url)[0]
            method = to_bytes(request.method)
            headers = request.headers
            if request.body:
                bodyproducer = _RequestBodyProducer(request.body)
            elif method == b'POST':
                bodyproducer = _RequestBodyProducer(b'')
            else:
                bodyproducer = None
            start_time = time.clock()

            d = agent.request(method, to_bytes(url), headers, bodyproducer)
            d.addCallback(self._cb_latency, request, start_time)

            #  下载request.body
            d.addCallback(self._cb_body_get, request)
            d.addCallback(self._cb_body_done, request, url)
            #  检查是否超时,如果在设定时间还没返回结果,就将defer取消
            #  当d.callback执行的方法,一直处于占用状态的时候,callLater是不会执行的,
            #  只有执行的方法是能够回到reactor主循环的时候,callLater才能执行
            #  _RequestBodyProducer中dataReceived方法不会一直占用,数据还没接收到是,是会回到reactor循环的,
            #  当总的接收数据的时间超过了timeout的时候,才会执行d.cancel
            self._timeout_cl = reactor.callLater(timeout, d.cancel)
            d.addBoth(self._cb_timeout, url, timeout)
        except Exception as e:
            # logger.error(e)
            logger.error(*self.lfm.error("Request", request, DownloadAgent,
                                         '下载过程中出现错误:'),
                         extra={
                             'exception': e,
                         },
                         exc_info=True)

        return d
Exemple #3
0
 def body(self, body):
     if body is None:
         self._body = b''
     else:
         self._body = to_bytes(body, self.encoding)