def _read_request_line( line: bytes) -> Tuple[str, int, bytes, bytes, bytes, bytes, bytes]: try: method, target, http_version = line.split() port: Optional[int] if target == b"*" or target.startswith(b"/"): scheme, authority, path = b"", b"", target host, port = "", 0 elif method == b"CONNECT": scheme, authority, path = b"", target, b"" host, port = url.parse_authority(authority, check=True) if not port: raise ValueError else: scheme, rest = target.split(b"://", maxsplit=1) authority, path_ = rest.split(b"/", maxsplit=1) path = b"/" + path_ host, port = url.parse_authority(authority, check=True) port = port or url.default_port(scheme) if not port: raise ValueError # TODO: we can probably get rid of this check? url.parse(target) raise_if_http_version_unknown(http_version) except ValueError as e: raise ValueError(f"Bad HTTP request line: {line!r}") from e return host, port, method, scheme, authority, path, http_version
def _read_request_line(rfile): try: line = _get_first_line(rfile) except exceptions.HttpReadDisconnect: # We want to provide a better error message. raise exceptions.HttpReadDisconnect("Client disconnected") try: method, target, http_version = line.split() if target == b"*" or target.startswith(b"/"): scheme, authority, path = b"", b"", target host, port = "", 0 elif method == b"CONNECT": scheme, authority, path = b"", target, b"" host, port = url.parse_authority(authority, check=True) if not port: raise ValueError else: scheme, rest = target.split(b"://", maxsplit=1) authority, path_ = rest.split(b"/", maxsplit=1) path = b"/" + path_ host, port = url.parse_authority(authority, check=True) port = port or url.default_port(scheme) if not port: raise ValueError # TODO: we can probably get rid of this check? url.parse(target) _check_http_version(http_version) except ValueError: raise exceptions.HttpSyntaxException(f"Bad HTTP request line: {line}") return host, port, method, scheme, authority, path, http_version
def test_parse_authority(authority: AnyStr, valid: bool, out): assert parse_authority(authority, False) == out if valid: assert parse_authority(authority, True) == out else: with pytest.raises(ValueError): parse_authority(authority, True)
def parse_h2_request_headers( h2_headers: Sequence[Tuple[bytes, bytes]] ) -> Tuple[str, int, bytes, bytes, bytes, bytes, http.Headers]: """Split HTTP/2 pseudo-headers from the actual headers and parse them.""" pseudo_headers, headers = split_pseudo_headers(h2_headers) try: method: bytes = pseudo_headers.pop(b":method") scheme: bytes = pseudo_headers.pop( b":scheme") # this raises for HTTP/2 CONNECT requests path: bytes = pseudo_headers.pop(b":path") authority: bytes = pseudo_headers.pop(b":authority", b"") except KeyError as e: raise ValueError(f"Required pseudo header is missing: {e}") if pseudo_headers: raise ValueError(f"Unknown pseudo headers: {pseudo_headers}") if authority: host, port = url.parse_authority(authority, check=True) if port is None: port = 80 if scheme == b'http' else 443 else: host = "" port = 0 return host, port, method, scheme, authority, path, headers
def read_request_headers(self, flow): self.request_message.arrived.wait() self.raise_zombie() if self.pushed: flow.metadata['h2-pushed-stream'] = True # pseudo header must be present, see https://http2.github.io/http2-spec/#rfc.section.8.1.2.3 authority = self.request_message.headers.pop(':authority', "") method = self.request_message.headers.pop(':method') scheme = self.request_message.headers.pop(':scheme') path = self.request_message.headers.pop(':path') host, port = url.parse_authority(authority, check=True) port = port or url.default_port(scheme) or 0 return http.HTTPRequest( host, port, method.encode(), scheme.encode(), authority.encode(), path.encode(), b"HTTP/2.0", self.request_message.headers, None, None, self.timestamp_start, self.timestamp_end, )
def pretty_host(self) -> str: """ Similar to :py:attr:`host`, but using the host/:authority header as an additional (preferred) data source. This is useful in transparent mode where :py:attr:`host` is only an IP address, but may not reflect the actual destination as the Host header could be spoofed. """ authority = self.host_header if authority: return url.parse_authority(authority, check=False)[0] else: return self.host
def read_request( self, __rfile, include_body=True, body_size_limit=None, allow_empty=False, ): if body_size_limit is not None: raise NotImplementedError() self.perform_connection_preface() timestamp_start = time.time() if hasattr(self.tcp_handler.rfile, "reset_timestamps"): self.tcp_handler.rfile.reset_timestamps() stream_id, headers, body = self._receive_transmission( include_body=include_body, ) if hasattr(self.tcp_handler.rfile, "first_byte_timestamp"): # more accurate timestamp_start timestamp_start = self.tcp_handler.rfile.first_byte_timestamp timestamp_end = time.time() # pseudo header must be present, see https://http2.github.io/http2-spec/#rfc.section.8.1.2.3 authority = headers.pop(':authority', "") method = headers.pop(':method', "") scheme = headers.pop(':scheme', "") path = headers.pop(':path', "") host, port = url.parse_authority(authority, check=False) port = port or url.default_port(scheme) or 0 request = mitmproxy.net.http.Request( host=host, port=port, method=method.encode(), scheme=scheme.encode(), authority=authority.encode(), path=path.encode(), http_version=b"HTTP/2.0", headers=headers, content=body, trailers=None, timestamp_start=timestamp_start, timestamp_end=timestamp_end, ) request.stream_id = stream_id return request
def pretty_host(self) -> str: """ *Read-only:* Like `Request.host`, but using `Request.host_header` header as an additional (preferred) data source. This is useful in transparent mode where `Request.host` is only an IP address. *Warning:* When working in adversarial environments, this may not reflect the actual destination as the Host header could be spoofed. """ authority = self.host_header if authority: return url.parse_authority(authority, check=False)[0] else: return self.host
def pretty_url(self) -> str: """ Like :py:attr:`url`, but using :py:attr:`pretty_host` instead of :py:attr:`host`. """ if self.first_line_format == "authority": return self.authority host_header = self.host_header if not host_header: return self.url pretty_host, pretty_port = url.parse_authority(host_header, check=False) pretty_port = pretty_port or url.default_port(self.scheme) or 443 return url.unparse(self.scheme, pretty_host, pretty_port, self.path)
class HttpStream(layer.Layer): request_body_buf: bytes response_body_buf: bytes flow: http.HTTPFlow stream_id: StreamId child_layer: Optional[layer.Layer] = None @property def mode(self): i = self.context.layers.index(self) parent: HttpLayer = self.context.layers[i - 1] return parent.mode def __init__(self, context: Context, stream_id: int): super().__init__(context) self.request_body_buf = b"" self.response_body_buf = b"" self.client_state = self.state_uninitialized self.server_state = self.state_uninitialized self.stream_id = stream_id def __repr__(self): return (f"HttpStream(" f"id={self.stream_id}, " f"client_state={self.client_state.__name__}, " f"server_state={self.server_state.__name__}" f")") @expect(events.Start, HttpEvent) def _handle_event(self, event: events.Event) -> layer.CommandGenerator[None]: if isinstance(event, events.Start): self.client_state = self.state_wait_for_request_headers elif isinstance(event, (RequestProtocolError, ResponseProtocolError)): yield from self.handle_protocol_error(event) elif isinstance(event, (RequestHeaders, RequestData, RequestEndOfMessage)): yield from self.client_state(event) else: yield from self.server_state(event) @expect(RequestHeaders) def state_wait_for_request_headers( self, event: RequestHeaders) -> layer.CommandGenerator[None]: if not event.replay_flow: self.flow = http.HTTPFlow(self.context.client, self.context.server) else: self.flow = event.replay_flow self.flow.request = event.request if err := validate_request(self.mode, self.flow.request): self.flow.response = http.HTTPResponse.make(502, str(err)) self.client_state = self.state_errored return (yield from self.send_response()) if self.flow.request.method == "CONNECT": return (yield from self.handle_connect()) if self.mode is HTTPMode.transparent: # Determine .scheme, .host and .port attributes for transparent requests assert self.context.server.address self.flow.request.data.host = self.context.server.address[0] self.flow.request.data.port = self.context.server.address[1] self.flow.request.scheme = "https" if self.context.server.tls else "http" elif not self.flow.request.host: # We need to extract destination information from the host header. try: host, port = url.parse_authority(self.flow.request.host_header or "", check=True) except ValueError: self.flow.response = http.HTTPResponse.make( 400, "HTTP request has no host header, destination unknown.") self.client_state = self.state_errored return (yield from self.send_response()) else: if port is None: port = 443 if self.context.client.tls else 80 self.flow.request.data.host = host self.flow.request.data.port = port self.flow.request.scheme = "https" if self.context.client.tls else "http" if self.mode is HTTPMode.regular and not self.flow.request.is_http2: # Set the request target to origin-form for HTTP/1, some servers don't support absolute-form requests. # see https://github.com/mitmproxy/mitmproxy/issues/1759 self.flow.request.authority = "" # update host header in reverse proxy mode if self.context.options.mode.startswith( "reverse:") and not self.context.options.keep_host_header: assert self.context.server.address self.flow.request.host_header = url.hostport( "https" if self.context.server.tls else "http", self.context.server.address[0], self.context.server.address[1], ) yield HttpRequestHeadersHook(self.flow) if (yield from self.check_killed(True)): return if self.flow.request.headers.get("expect", "").lower() == "100-continue": continue_response = http.HTTPResponse.make(100) continue_response.headers.clear() yield SendHttp(ResponseHeaders(self.stream_id, continue_response), self.context.client) self.flow.request.headers.pop("expect") if self.flow.request.stream: if self.flow.response: raise NotImplementedError( "Can't set a response and enable streaming at the same time." ) yield HttpRequestHook(self.flow) ok = yield from self.make_server_connection() if not ok: return yield SendHttp(event, self.context.server) self.client_state = self.state_stream_request_body else: self.client_state = self.state_consume_request_body self.server_state = self.state_wait_for_response_headers