def _request(self, sock, method, path, protocol_version, headers, payload, bufsize=8192): request_data = [] request_data.append('%s %s %s' % (method, path, protocol_version)) for k, v in headers.items(): request_data.append('%s: %s' % (k.title(), v)) #if self.proxy: # _, username, password, _ = parse_proxy(self.proxy) # if username and password: # request_data += 'Proxy-Authorization: Basic %s\r\n' % base64.b64encode(('%s:%s' % (username, password)).encode()).decode().strip() request_data.append('\r\n') if hasattr(payload, 'read'): #避免发送多个小数据包 sock.setsockopt(socket.SOL_TCP, socket.TCP_NODELAY, False) request_data = '\r\n'.join(request_data).encode() sock.sendall(request_data) try: #以下按原样转发 readed = 0 left_size = int(headers.get('Content-Length', 0)) while True: if left_size < 1: break data = payload.read(min(bufsize, left_size)) sock.sendall(data) left_size -= len(data) readed += len(data) finally: if payload.__class__.__name__ == '_PaddedFile': payload.file.readed = readed else: payload.readed = readed #为下个请求恢复无延迟发送 sock.setsockopt(socket.SOL_TCP, socket.TCP_NODELAY, True) else: request_data = '\r\n'.join(request_data).encode() + payload sock.sendall(request_data) # try: # response = HTTPResponse(sock, method=method) # response.begin() # except Exception as e: #这里有时会捕捉到奇怪的异常,找不到来源路径 # py2 的 raise 不带参数会导致捕捉到错误的异常,但使用 exc_clear 或换用 py3 还是会出现 # if hasattr(e, 'xip'): #logging.warning('4444 %r | %r | %r', sock.getpeername(), sock.xip, e.xip) # del e.xip # raise e response = HTTPResponse(sock, method=method) response.begin() response.xip = sock.xip response.sock = sock return response
def _request(self, sock, method, path, protocol_version, headers, payload, bufsize=8192): request_data = [] request_data.append('%s %s %s' % (method, path, protocol_version)) for k, v in headers.items(): request_data.append('%s: %s' % (k.title(), v)) #if self.proxy: # _, username, password, _ = parse_proxy(self.proxy) # if username and password: # request_data += 'Proxy-Authorization: Basic %s\r\n' % base64.b64encode(('%s:%s' % (username, password)).encode()).decode().strip() request_data.append('\r\n') if hasattr(payload, 'read'): #避免发送多个小数据包 sock.setsockopt(socket.SOL_TCP, socket.TCP_NODELAY, False) request_data = '\r\n'.join(request_data).encode() sock.sendall(request_data) readed = 0 #以下按原样转发 if 'Transfer-Encoding' in headers: while True: chunk = payload.read(bufsize) if chunk: sock.sendall(chunk) readed += len(chunk) else: break #chunk_size_str = payload.readline(65537) #if len(chunk_size_str) > 65536: # raise Exception('分块尺寸过大') #sock.sendall(chunk_size_str) #readed += len(chunk_size_str) #chunk_size = int(chunk_size_str.split(b';')[0], 16) + 2 #if chunk_size == 2: # while True: # chunk = payload.readline(65536) # sock.sendall(chunk) # readed += len(chunk) # if chunk in (b'\r\n', b'\n', b''): # b'' 也许无法读取到空串 # break # else: # logging.debug('%s "%s %s%s"分块拖挂:%r', sock.xip[0], method, headers['Host'], path, chunk) # break #chunk = payload.read(chunk_size) #sock.sendall(chunk) #readed += chunk_size #if chunk[-2:] != b'\r\n': # raise Exception('分块尺寸不匹配 CRLF') else: left_size = int(headers.get('Content-Length', 0)) while True: if left_size < 1: break data = payload.read(min(bufsize, left_size)) sock.sendall(data) left_size -= len(data) readed += len(data) payload.readed = readed #为下个请求恢复无延迟发送 sock.setsockopt(socket.SOL_TCP, socket.TCP_NODELAY, True) else: request_data = '\r\n'.join(request_data).encode() + payload sock.sendall(request_data) # try: # response = HTTPResponse(sock, method=method) # response.begin() # except Exception as e: #这里有时会捕捉到奇怪的异常,找不到来源路径 # py2 的 raise 不带参数会导致捕捉到错误的异常,但使用 exc_clear 或换用 py3 还是会出现 # if hasattr(e, 'xip'): #logging.warning('4444 %r | %r | %r', sock.getpeername(), sock.xip, e.xip) # del e.xip # raise e response = HTTPResponse(sock, method=method) response.begin() response.xip = sock.xip response.sock = sock return response
def gae_urlfetch(method, url, headers, payload, appid, getfast=None, **kwargs): # GAE 代理请求不允许设置 Host 头域 if 'Host' in headers: del headers['Host'] metadata = '%s %s HTTP/1.1\r\n' % (method, url) metadata += ''.join('%s: %s\r\n' % (k, v) for k, v in headers.items()) metadata += gae_options if not isinstance(metadata, bytes): metadata = metadata.encode() metadata = zlib.compress(metadata)[2:-4] if payload: if not isinstance(payload, bytes): payload = payload.encode() payload = struct.pack('!h', len(metadata)) + metadata + payload else: payload = struct.pack('!h', len(metadata)) + metadata request_headers = { 'User-Agent': 'Mozilla/5.0', 'Accept-Encoding': 'gzip', 'Content-Length': str(len(payload)) } request_params = gae_params_dict[appid] realurl = 'GAE-' + url qGAE.get() # get start from Queue while True: response = http_gws.request(request_params, payload, request_headers, connection_cache_key='google_gae|:443', getfast=getfast, realmethod=method, realurl=realurl) if response is None: return if response.status not in (200, 404): break app_server = response.headers.get('Server') if app_server == 'Google Frontend': break if GC.GAE_ENABLEPROXY: logging.warning('GAE 前置代理 [%s:%d] 无法正常工作', *response.xip) continue if test_ip_gae(response.xip[0]): break logging.warning('发现并移除非 GAE IP:%s,Server:%s', response.xip[0], app_server) response.app_status = response.status if response.status != 200: return response #解压并解析 chunked & gziped 响应 if 'Transfer-Encoding' in response.headers: responseg = HTTPResponse(GzipSock(response), method=method) responseg.begin() responseg.app_status = 200 responseg.xip = response.xip responseg.sock = response.sock return responseg #读取压缩头部 data = response.read(2) if len(data) < 2: response.status = 502 make_errinfo(response, b'connection aborted. too short leadtype data=' + data) return response headers_length, = struct.unpack('!h', data) data = response.read(headers_length) if len(data) < headers_length: response.status = 502 make_errinfo(response, b'connection aborted. too short headers data=' + data) return response #解压缩并解析头部 raw_response_line, headers_data = zlib.decompress(data, -zlib.MAX_WBITS).split(b'\r\n', 1) raw_response_line = str(raw_response_line, 'iso-8859-1') raw_response_list = raw_response_line.split(None, 2) raw_response_length = len(raw_response_list) if raw_response_length == 3: _, status, reason = raw_response_list response.reason = reason.strip() elif raw_response_length == 2: _, status = raw_response_list response.reason = '' else: return response.status = int(status) #标记服务器端错误信息 headers_data, app_msg = headers_data.split(b'\r\n\r\n') if app_msg: response.app_status = response.status response.reason = 'debug error' response.app_msg = app_msg response.headers = response.msg = parse_headers(BytesIO(headers_data)) if response.app_status == 200: response._method = method if response.status in (204, 205, 304) or 100 <= response.status < 200: response.length = 0 else: try: response.length = int(response.headers.get('Content-Length')) except: response.length = None return response
def _gae_urlfetch(appid, payload, getfast, method, realurl): request_params, http_util, connection_cache_key = _get_request_params(appid) if http_util is http_gws: request_headers = { 'User-Agent': 'Mozilla/5.0', 'Accept-Encoding': 'gzip', 'Content-Length': str(len(payload)) } else: #禁用 CDN 不兼容的 GAE chunked 机制 request_headers = { 'User-Agent': '', 'Content-Length': str(len(payload)) } while True: response = http_util.request(request_params, payload, request_headers, connection_cache_key=connection_cache_key, getfast=getfast, realmethod=method, realurl=realurl) if response is None: return if response.status not in (200, 404): break if http_util is http_nor: break app_server = response.headers.get('Server') if app_server == 'Google Frontend': break if GC.GAE_ENABLEPROXY: logging.warning('GAE 前置代理 [%s:%d] 无法正常工作', *response.xip) continue if test_ip_gae(response.xip[0]): break logging.warning('发现并移除非 GAE IP:%s,Server:%s', response.xip[0], app_server) response.http_util = http_util response.connection_cache_key = connection_cache_key response.app_status = response.status if response.status != 200: return response #解压并解析 chunked & gziped 响应 if 'Transfer-Encoding' in response.headers: responseg = HTTPResponse(GzipSock(response), method=method) responseg.begin() responseg.app_status = 200 responseg.xip = response.xip responseg.sock = response.sock responseg.http_util = http_util responseg.connection_cache_key = connection_cache_key return responseg #读取压缩头部 data = response.read(2) if len(data) < 2: response.status = 502 make_errinfo(response, 'connection aborted. too short leadtype data=%r' % data) return response headers_length, = struct.unpack('!h', data) data = response.read(headers_length) if len(data) < headers_length: response.status = 502 make_errinfo(response, 'connection aborted. too short headers data=%r' % data) return response #解压缩并解析头部 raw_response_line, headers_data = zlib.decompress(data, -zlib.MAX_WBITS).split(b'\r\n', 1) raw_response_line = str(raw_response_line, 'iso-8859-1') raw_response_list = raw_response_line.split(None, 2) raw_response_length = len(raw_response_list) if raw_response_length == 3: _, status, reason = raw_response_list response.reason = reason.strip() elif raw_response_length == 2: _, status = raw_response_list response.reason = '' else: return response.status = int(status) #标记服务器端错误信息 headers_data, app_msg = headers_data.split(b'\r\n\r\n') if app_msg: response.app_status = response.status response.reason = 'debug error' response.app_msg = app_msg response.headers = response.msg = parse_headers(BytesIO(headers_data)) if response.app_status == 200: response._method = method if response.status in (204, 205, 304) or 100 <= response.status < 200: response.length = 0 else: try: response.length = int(response.headers.get('Content-Length')) except: response.length = None return response