def _parse_headers(self, data): data = native_str(data.decode('latin1')) eol = data.find("\r\n") start_line = data[:eol] try: headers = httputil.HTTPHeaders.parse(data[eol:]) except ValueError: # probably form split() if there was no ':' in the line raise httputil.HTTPInputError("Malformed HTTP headers: %r" % data[eol:100]) return start_line, headers
def _read_body(self, headers, delegate): content_length = headers.get("Content-Length") if content_length: content_length = int(content_length) if content_length > self._max_body_size: raise httputil.HTTPInputError("Content-Length too long") return self._read_fixed_body(content_length, delegate) if headers.get("Transfer-Encoding") == "chunked": return self._read_chunked_body(delegate) if self.is_client: return self._read_body_until_close(delegate) return None
def _parse_headers(self, data): data = native_str(data.decode('latin1')).lstrip("\r\n") # RFC 7230 section allows for both CRLF and bare LF. eol = data.find("\n") start_line = data[:eol].rstrip("\r") try: headers = httputil.HTTPHeaders.parse(data[eol:]) except ValueError: # probably form split() if there was no ':' in the line raise httputil.HTTPInputError("Malformed HTTP headers: %r" % data[eol:100]) return start_line, headers
def _read_body(self, code, headers, delegate): if "Content-Length" in headers: if "," in headers["Content-Length"]: # Proxies sometimes cause Content-Length headers to get # duplicated. If all the values are identical then we can # use them but if they differ it's an error. pieces = re.split(r',\s*', headers["Content-Length"]) if any(i != pieces[0] for i in pieces): raise httputil.HTTPInputError( "Multiple unequal Content-Lengths: %r" % headers["Content-Length"]) headers["Content-Length"] = pieces[0] content_length = int(headers["Content-Length"]) if content_length > self._max_body_size: raise httputil.HTTPInputError("Content-Length too long") else: content_length = None # 204 No Content,表示服务器已经完成了请求,但是返回的信息不包括 message-body,但是可以通过 # header fields 返回一些用于更新的元数据。 if code == 204: # This response code is not allowed to have a non-empty body, # and has an implicit length of zero instead of read-until-close. # http://www.w3.org/Protocols/rfc2616/rfc2616-sec4.html#sec4.3 if ("Transfer-Encoding" in headers or content_length not in (None, 0)): raise httputil.HTTPInputError( "Response with code %d should not have body" % code) content_length = 0 # 持久连接: Content-Length or Transfer-Encoding if content_length is not None: return self._read_fixed_body(content_length, delegate) if headers.get("Transfer-Encoding") == "chunked": return self._read_chunked_body(delegate) # 非持久连接 if self.is_client: return self._read_body_until_close(delegate) return None
def _parse_headers(self, data): # The lstrip removes newlines that some implementations sometimes # insert between messages of a reused connection. Per RFC 7230, # we SHOULD ignore at least one empty line before the request. # http://tools.ietf.org/html/rfc7230#section-3.5 data = native_str(data.decode('latin1')).lstrip("\r\n") eol = data.find("\r\n") start_line = data[:eol] try: headers = httputil.HTTPHeaders.parse(data[eol:]) except ValueError: # probably form split() if there was no ':' in the line raise httputil.HTTPInputError("Malformed HTTP headers: %r" % data[eol:100]) return start_line, headers
async def data_received(self, chunk: bytes) -> None: if self._decompressor: compressed_data = chunk while compressed_data: decompressed = self._decompressor.decompress( compressed_data, self._chunk_size) if decompressed: ret = self._delegate.data_received(decompressed) if ret is not None: await ret compressed_data = self._decompressor.unconsumed_tail if compressed_data and not decompressed: raise httputil.HTTPInputError( "encountered unconsumed gzip data without making progress" ) else: ret = self._delegate.data_received(chunk) if ret is not None: await ret
def _parse_headers(self, data): # HTTP 消息包括 Request(c2s)和 Response(s2c),消息格式为: # 起始行(Request-Line/Status-Line) + 0 个或多个头域(((general-header | # (request-header | response-header) | entity-header)CRLF)) + # 头域结束行(\r\n,CRLF) + 可选的消息体(message-body),所以读取消息头时以 # r"\r\n\r\n" 作为匹配字符。 # 每个头域由一个头域名称(name) + 冒号(:) + 域值(value), 三部分组成, # name 是大小写无关的,value 前可以添加任何数量的空格符,头域可以被扩展为 # 多行,在每行开始处,使用至少一个空格或制表符。相关 RFC: # 1. Request:http://www.w3.org/Protocols/rfc2616/rfc2616-sec5.html#sec5 # 2. Response:http://www.w3.org/Protocols/rfc2616/rfc2616-sec6.html#sec6 data = native_str(data.decode('latin1')) eol = data.find("\r\n") start_line = data[:eol] try: headers = httputil.HTTPHeaders.parse(data[eol:]) except ValueError: # probably form split() if there was no ':' in the line raise httputil.HTTPInputError("Malformed HTTP headers: %r" % data[eol:100]) return start_line, headers
def _read_chunked_body(self, delegate): # TODO: "chunk extensions" http://tools.ietf.org/html/rfc2616#section-3.6.1 total_size = 0 while True: chunk_len = yield self.stream.read_until(b"\r\n", max_bytes=64) chunk_len = int(chunk_len.strip(), 16) if chunk_len == 0: return total_size += chunk_len if total_size > self._max_body_size: raise httputil.HTTPInputError("chunked body too large") bytes_to_read = chunk_len while bytes_to_read: chunk = yield self.stream.read_bytes( min(bytes_to_read, self.params.chunk_size), partial=True) bytes_to_read -= len(chunk) if not self._write_finished or self.is_client: with _ExceptionLoggingContext(app_log): yield gen.maybe_future(delegate.data_received(chunk)) # chunk ends with \r\n crlf = yield self.stream.read_bytes(2) assert crlf == b"\r\n"
def _read_message(self, delegate): need_delegate_close = False try: header_future = self.stream.read_until_regex( b"\r?\n\r?\n", max_bytes=self.params.max_header_size) if self.params.header_timeout is None: header_data = yield header_future else: try: header_data = yield gen.with_timeout( self.stream.io_loop.time() + self.params.header_timeout, header_future, io_loop=self.stream.io_loop, quiet_exceptions=iostream.StreamClosedError) except gen.TimeoutError: self.close() raise gen.Return(False) start_line, headers = self._parse_headers(header_data) if self.is_client: start_line = httputil.parse_response_start_line(start_line) self._response_start_line = start_line else: start_line = httputil.parse_request_start_line(start_line) self._request_start_line = start_line self._request_headers = headers self._disconnect_on_finish = not self._can_keep_alive( start_line, headers) need_delegate_close = True with _ExceptionLoggingContext(app_log): header_future = delegate.headers_received(start_line, headers) if header_future is not None: yield header_future if self.stream is None: # We've been detached. need_delegate_close = False raise gen.Return(False) skip_body = False if self.is_client: if (self._request_start_line is not None and self._request_start_line.method == 'HEAD'): skip_body = True code = start_line.code if code == 304: # 304 responses may include the content-length header # but do not actually have a body. # http://tools.ietf.org/html/rfc7230#section-3.3 skip_body = True if code >= 100 and code < 200: # 1xx responses should never indicate the presence of # a body. if ('Content-Length' in headers or 'Transfer-Encoding' in headers): raise httputil.HTTPInputError( "Response code %d cannot have body" % code) # TODO: client delegates will get headers_received twice # in the case of a 100-continue. Document or change? yield self._read_message(delegate) else: if (headers.get("Expect") == "100-continue" and not self._write_finished): self.stream.write(b"HTTP/1.1 100 (Continue)\r\n\r\n") if not skip_body: body_future = self._read_body( start_line.code if self.is_client else 0, headers, delegate) if body_future is not None: if self._body_timeout is None: yield body_future else: try: yield gen.with_timeout( self.stream.io_loop.time() + self._body_timeout, body_future, self.stream.io_loop, quiet_exceptions=iostream.StreamClosedError) except gen.TimeoutError: gen_log.info("Timeout reading body from %s", self.context) self.stream.close() raise gen.Return(False) self._read_finished = True if not self._write_finished or self.is_client: need_delegate_close = False with _ExceptionLoggingContext(app_log): delegate.finish() # If we're waiting for the application to produce an asynchronous # response, and we're not detached, register a close callback # on the stream (we didn't need one while we were reading) if (not self._finish_future.done() and self.stream is not None and not self.stream.closed()): self.stream.set_close_callback(self._on_connection_close) yield self._finish_future if self.is_client and self._disconnect_on_finish: self.close() if self.stream is None: raise gen.Return(False) except httputil.HTTPInputError as e: gen_log.info("Malformed HTTP message from %s: %s", self.context, e) self.close() raise gen.Return(False) finally: if need_delegate_close: with _ExceptionLoggingContext(app_log): delegate.on_connection_close() self._clear_callbacks() raise gen.Return(True)
async def _read_message(self, delegate: httputil.HTTPMessageDelegate) -> bool: need_delegate_close = False try: header_future = self.stream.read_until_regex( b"\r?\n\r?\n", max_bytes=self.params.max_header_size ) if self.params.header_timeout is None: header_data = await header_future else: try: header_data = await gen.with_timeout( self.stream.io_loop.time() + self.params.header_timeout, header_future, quiet_exceptions=iostream.StreamClosedError, ) except gen.TimeoutError: self.close() return False start_line_str, headers = self._parse_headers(header_data) if self.is_client: resp_start_line = httputil.parse_response_start_line(start_line_str) self._response_start_line = resp_start_line start_line = ( resp_start_line ) # type: Union[httputil.RequestStartLine, httputil.ResponseStartLine] # TODO: this will need to change to support client-side keepalive self._disconnect_on_finish = False else: req_start_line = httputil.parse_request_start_line(start_line_str) self._request_start_line = req_start_line self._request_headers = headers start_line = req_start_line self._disconnect_on_finish = not self._can_keep_alive( req_start_line, headers ) need_delegate_close = True with _ExceptionLoggingContext(app_log): header_recv_future = delegate.headers_received(start_line, headers) if header_recv_future is not None: await header_recv_future if self.stream is None: # We've been detached. need_delegate_close = False return False skip_body = False if self.is_client: assert isinstance(start_line, httputil.ResponseStartLine) if ( self._request_start_line is not None and self._request_start_line.method == "HEAD" ): skip_body = True code = start_line.code if code == 304: # 304 responses may include the content-length header # but do not actually have a body. # http://tools.ietf.org/html/rfc7230#section-3.3 skip_body = True if 100 <= code < 200: # 1xx responses should never indicate the presence of # a body. if "Content-Length" in headers or "Transfer-Encoding" in headers: raise httputil.HTTPInputError( "Response code %d cannot have body" % code ) # TODO: client delegates will get headers_received twice # in the case of a 100-continue. Document or change? await self._read_message(delegate) else: if headers.get("Expect") == "100-continue" and not self._write_finished: self.stream.write(b"HTTP/1.1 100 (Continue)\r\n\r\n") if not skip_body: body_future = self._read_body( resp_start_line.code if self.is_client else 0, headers, delegate ) if body_future is not None: if self._body_timeout is None: await body_future else: try: await gen.with_timeout( self.stream.io_loop.time() + self._body_timeout, body_future, quiet_exceptions=iostream.StreamClosedError, ) except gen.TimeoutError: gen_log.info("Timeout reading body from %s", self.context) self.stream.close() return False self._read_finished = True if not self._write_finished or self.is_client: need_delegate_close = False with _ExceptionLoggingContext(app_log): delegate.finish() # If we're waiting for the application to produce an asynchronous # response, and we're not detached, register a close callback # on the stream (we didn't need one while we were reading) if ( not self._finish_future.done() and self.stream is not None and not self.stream.closed() ): self.stream.set_close_callback(self._on_connection_close) await self._finish_future if self.is_client and self._disconnect_on_finish: self.close() if self.stream is None: return False except httputil.HTTPInputError as e: gen_log.info("Malformed HTTP message from %s: %s", self.context, e) if not self.is_client: await self.stream.write(b"HTTP/1.1 400 Bad Request\r\n\r\n") self.close() return False finally: if need_delegate_close: with _ExceptionLoggingContext(app_log): delegate.on_connection_close() header_future = None # type: ignore self._clear_callbacks() return True
def _read_message(self, delegate): need_delegate_close = False try: # 消息头与消息体之间由一个空行分开 header_future = self.stream.read_until_regex( b"\r?\n\r?\n", max_bytes=self.params.max_header_size) if self.params.header_timeout is None: header_data = yield header_future else: try: header_data = yield gen.with_timeout( self.stream.io_loop.time() + self.params.header_timeout, header_future, io_loop=self.stream.io_loop) except gen.TimeoutError: self.close() raise gen.Return(False) # 解析消息头,分离头字段和首行(request-line/status-line) start_line, headers = self._parse_headers(header_data) # 作为 client 解析的是 server 的 response,作为 server 解析的是 client 的 request。 # response 与 request 的 start_line(status-line/request-line) 的字段内容不同: # 1. response's status-line: HTTP-Version SP Status-Code SP Reason-Phrase CRLF # 2. request's request-line:Method SP Request-URI SP HTTP-Version CRLF # start_line 的值是一个 namedtuple。 if self.is_client: start_line = httputil.parse_response_start_line(start_line) self._response_start_line = start_line else: start_line = httputil.parse_request_start_line(start_line) self._request_start_line = start_line self._request_headers = headers # 非 keep-alive 的请求或响应处理完成后要关闭连接。 self._disconnect_on_finish = not self._can_keep_alive( start_line, headers) need_delegate_close = True with _ExceptionLoggingContext(app_log): header_future = delegate.headers_received(start_line, headers) if header_future is not None: # 如果 header_future 是一个 `Future` 实例,则要等到完成才读取 body。 yield header_future # websocket ??? if self.stream is None: # We've been detached. need_delegate_close = False raise gen.Return(False) skip_body = False if self.is_client: # 作为 client 如果发起的是 HEAD 请求,那么 server response 应该无消息体 if (self._request_start_line is not None and self._request_start_line.method == 'HEAD'): skip_body = True code = start_line.code if code == 304: # 304 responses may include the content-length header # but do not actually have a body. # http://tools.ietf.org/html/rfc7230#section-3.3 skip_body = True if code >= 100 and code < 200: # 1xx responses should never indicate the presence of # a body. if ('Content-Length' in headers or 'Transfer-Encoding' in headers): raise httputil.HTTPInputError( "Response code %d cannot have body" % code) # TODO: client delegates will get headers_received twice # in the case of a 100-continue. Document or change? yield self._read_message(delegate) else: # 100-continue 这个状态码是在 HTTP/1.1 中为了提高传输效率而设置的。当 # client 需要 POST 较大数据给 WebServer 时,可以在发送 HTTP 请求时带上 # Expect: 100-continue,WebServer 如果接受这个请求则应答一个 # ``HTTP/1.1 100 (Continue)``,那么 client 就继续传输 request body, # 否则应答 ``HTTP/1.1 417 Expectation Failed`` client 就放弃传输剩余 # 的数据。(注:Expect 头部域,用于指出客户端要求的特殊服务器行为采用扩展语法 # 定义,以方便扩展。) if (headers.get("Expect") == "100-continue" and not self._write_finished): self.stream.write(b"HTTP/1.1 100 (Continue)\r\n\r\n") if not skip_body: body_future = self._read_body( start_line.code if self.is_client else 0, headers, delegate) if body_future is not None: if self._body_timeout is None: yield body_future else: try: yield gen.with_timeout( self.stream.io_loop.time() + self._body_timeout, body_future, self.stream.io_loop) except gen.TimeoutError: gen_log.info("Timeout reading body from %s", self.context) self.stream.close() raise gen.Return(False) self._read_finished = True # 对 client mode ,response 解析完成就调用 HTTPMessageDelegate.finish() 方法是合适的; # 对 server mode ,_write_finished 表示 response 是否发送完成,未完成前调用 # HTTPMessageDelegate.finish() 方法让 delegate 执行请求响应; if not self._write_finished or self.is_client: need_delegate_close = False with _ExceptionLoggingContext(app_log): delegate.finish() # If we're waiting for the application to produce an asynchronous # response, and we're not detached, register a close callback # on the stream (we didn't need one while we were reading) # # NOTE:_finish_future resolves when all data has been written and flushed # to the IOStream. # # hold 住执行流程,直到异步响应完成,所有数据都写入 fd,才继续后续处理,通常调用方执行 `finish` 方法 # 设置 `_finish_future` 完成,详细见 `finish` 和 `_finish_request` 方法实现。 if (not self._finish_future.done() and self.stream is not None and not self.stream.closed()): self.stream.set_close_callback(self._on_connection_close) yield self._finish_future # 对于 client mode,处理完响应后如果不是 keep-alive 就断开连接。 # 对于 server mode,需要在 response 完成后才断开连接,详细见 _finish_request/finish 方法实现。 if self.is_client and self._disconnect_on_finish: self.close() if self.stream is None: raise gen.Return(False) except httputil.HTTPInputError as e: gen_log.info("Malformed HTTP message from %s: %s", self.context, e) self.close() raise gen.Return(False) finally: # 连接 “关闭” 前还没能结束处理请求(call HTTPMessageDelegate.finish()),则 # call HTTPMessageDelegate.on_connection_close() if need_delegate_close: with _ExceptionLoggingContext(app_log): delegate.on_connection_close() self._clear_callbacks() raise gen.Return(True)
def _read_message(self, delegate): need_delegate_close = False try: # 这一句会从 iostream 里异步地读取 header # 的内容,返回的是一个将会填充 # header 内容的值的 future 对象。 # HTTP 协议中,header 和 body 之间使用类似于 \r\n\r\n # 这样的被称为 CRLF 的字符串来划分, # 所以作者传入了下面的正则来保证刚好读完 header 的信息 header_future = self.stream.read_until_regex( b"\r?\n\r?\n", max_bytes=self.params.max_header_size) if self.params.header_timeout is None: header_data = yield header_future else: # 如果设置了 timeout,则将 Future 对象转化为一个 # gen._Timeout 对象,让 iostream 在 # 限定时间内去填充 future 对象内的 result,否则会报超时的错。 try: header_data = yield gen.with_timeout( self.stream.io_loop.time() + self.params.header_timeout, header_future, io_loop=self.stream.io_loop, quiet_exceptions=iostream.StreamClosedError) except gen.TimeoutError: self.close() raise gen.Return(False) # 将 header 信息转化为保存有响应信息的对象。因为客户端和服务端中 # http 头信息的形式是不同的,所以要根据 connection 是客户端方还 # 是服务端方来作不同的处理。 # 客户端得到的为 start line 被叫做 status-line,结构类似于: # HTTP/1.1 200 OK # 服务端得到的 start line 被叫做 # request-line,结构类似于:GET / HTTP/1.1 start_line, headers = self._parse_headers(header_data) if self.is_client: start_line = httputil.parse_response_start_line(start_line) self._response_start_line = start_line else: start_line = httputil.parse_request_start_line(start_line) self._request_start_line = start_line self._request_headers = headers # 根据头信息来判断连接的保持规则。 self._disconnect_on_finish = not self._can_keep_alive( start_line, headers) need_delegate_close = True # 在这个 Context 之下,如果代码抛出异常,将会把 exception 的信息 # 用 app_log 当做日志输出 with _ExceptionLoggingContext(app_log): # 将解析好的头信息交给 delegate,这时对于服务端来说,会根据 # start line 中的 path 信息来匹配处理请求的那个 Handler。 header_future = delegate.headers_received(start_line, headers) # 在 stream 的情况下,header_future 则不会是 None。 if header_future is not None: yield header_future if self.stream is None: # We've been detached. need_delegate_close = False raise gen.Return(False) skip_body = False if self.is_client: # 对于客户端而言如果使用这个连接的对象是客户端类型的,则进 # 行头信息以及 HTTP 状态码来决定是否继续读取 body 数据。 if (self._request_start_line is not None and self._request_start_line.method == 'HEAD'): skip_body = True code = start_line.code if code == 304: # 304 responses may include the content-length header # but do not actually have a body. # http://tools.ietf.org/html/rfc7230#section-3.3 skip_body = True if code >= 100 and code < 200: # 1xx responses should never indicate the presence of # a body. if ('Content-Length' in headers or 'Transfer-Encoding' in headers): raise httputil.HTTPInputError( "Response code %d cannot have body" % code) # TODO: client delegates will get headers_received twice # in the case of a 100-continue. Document or change? yield self._read_message(delegate) else: # 对于服务端而言,不存在需要跳过 body 数据的情况。 if (headers.get("Expect") == "100-continue" and # 处理头中包含『Expect: 100-continue』的客户端请求。 not self._write_finished): self.stream.write(b"HTTP/1.1 100 (Continue)\r\n\r\n") if not skip_body: body_future = self._read_body( start_line.code if self.is_client else 0, headers, delegate) if body_future is not None: if self._body_timeout is None: yield body_future else: try: yield gen.with_timeout( self.stream.io_loop.time() + self._body_timeout, body_future, self.stream.io_loop, quiet_exceptions=iostream.StreamClosedError) except gen.TimeoutError: gen_log.info("Timeout reading body from %s", self.context) self.stream.close() raise gen.Return(False) self._read_finished = True if not self._write_finished or self.is_client: # 服务端在这里便会调用到匹配到的 handler # 来向请求方写回返回数据。 # 这时 _RequestDispatcher 对象中已经保有了 header 和 body 的数 # 据,可以稍微处理一下(例如将表单数据 parse 一下)作为参数交给 # Handler 处理了。 need_delegate_close = False with _ExceptionLoggingContext(app_log): delegate.finish() # If we're waiting for the application to produce an asynchronous # response, and we're not detached, register a close callback # on the stream (we didn't need one while we were reading) if (not self._finish_future.done() and self.stream is not None and not self.stream.closed()): self.stream.set_close_callback(self._on_connection_close) yield self._finish_future if self.is_client and self._disconnect_on_finish: self.close() if self.stream is None: raise gen.Return(False) except httputil.HTTPInputError as e: gen_log.info("Malformed HTTP message from %s: %s", self.context, e) self.close() raise gen.Return(False) finally: if need_delegate_close: with _ExceptionLoggingContext(app_log): delegate.on_connection_close() self._clear_callbacks() raise gen.Return(True)
def _read_message(self, delegate): need_delegate_close = False try: header_future = self.stream.read_until_regex( b"\r?\n\r?\n", max_bytes=self.params.max_header_size) if self.params.header_timeout is None: header_data = yield header_future else: try: header_data = yield gen.with_timeout( self.stream.io_loop.time() + self.params.header_timeout, header_future, io_loop=self.stream.io_loop, quiet_exceptions=iostream.StreamClosedError) except gen.TimeoutError: self.close() raise gen.Return(False) start_line, headers = self._parse_headers(header_data) if self.is_client: start_line = httputil.parse_response_start_line(start_line) self._response_start_line = start_line else: start_line = httputil.parse_request_start_line(start_line) self._request_start_line = start_line self._request_headers = headers self._disconnect_on_finish = not self._can_keep_alive( start_line, headers) need_delegate_close = True with _ExceptionLoggingContext(): header_future = delegate.headers_received(start_line, headers) if header_future is not None: yield header_future if self.stream is None: # We've been detached. need_delegate_close = False raise gen.Return(False) skip_body = False if self.is_client: if (self._request_start_line is not None and self._request_start_line.method == 'HEAD'): skip_body = True code = start_line.code if code == 304: skip_body = True if code >= 100 and code < 200: if ('Content-Length' in headers or 'Transfer-Encoding' in headers): raise httputil.HTTPInputError( "Response code %d cannot have body" % code) yield self._read_message(delegate) else: if (headers.get("Expect") == "100-continue" and not self._write_finished): self.stream.write(b"HTTP/1.1 100 (Continue)\r\n\r\n") if not skip_body: body_future = self._read_body( start_line.code if self.is_client else 0, headers, delegate) if body_future is not None: if self._body_timeout is None: yield body_future else: try: yield gen.with_timeout( self.stream.io_loop.time() + self._body_timeout, body_future, self.stream.io_loop, quiet_exceptions=iostream.StreamClosedError) except gen.TimeoutError: print("Timeout reading body from %s" % self.context) self.stream.close() raise gen.Return(False) self._read_finished = True if not self._write_finished or self.is_client: need_delegate_close = False with _ExceptionLoggingContext(): delegate.finish() if (not self._finish_future.done() and self.stream is not None and not self.stream.closed()): self.stream.set_close_callback(self._on_connection_close) yield self._finish_future if self.is_client and self._disconnect_on_finish: self.close() if self.stream is None: raise gen.Return(False) except httputil.HTTPInputError as e: print("Malformed HTTP message from %s: %s", (self.context, e)) self.close() raise gen.Return(False) finally: if need_delegate_close: with _ExceptionLoggingContext(): delegate.on_connection_close() self._clear_callbacks() raise gen.Return(True)