def host(self, val: Union[str, bytes]) -> None: self.data.host = always_str(val, "idna", "strict") # Update host header if "Host" in self.data.headers: self.data.headers["Host"] = val # Update authority if self.data.authority: self.authority = url.hostport(self.scheme, self.host, self.port)
def _set_request_url(self, flow, postreq=''): host = flow.request.headers.get('host') if not host: host = flow.request.host homepage_redirect = None if (host == self.proxy_magic and (flow.request.path in (H_REFRESH_PATH, H_REDIR_PATH))): homepage_redirect = flow.request.path elif host == self.proxy_magic: flow.request.host = self.fwd_host flow.request.scheme = self.fwd_scheme flow.request.port = self.fwd_port flow.request.headers['X-Proxy-For'] = str( flow.client_conn.address.host) return False if host: host = flow.request.scheme + '://' + host else: host = hostport(flow.request.scheme, flow.request.host, flow.request.port) req_url = host + flow.request.path flow.request.req_url = req_url flow.request.req_scheme = flow.request.scheme result = self.upstream_url_resolver(url=quote_plus(req_url), headers=flow.request.headers, address=flow.client_conn.address, postreq=postreq) full_url, extra_data = result if homepage_redirect: url = extra_data.get('url') if url: if homepage_redirect == H_REFRESH_PATH: self.homepage_refresh(flow, url) elif homepage_redirect == H_REDIR_PATH: self.homepage_redir(flow, url) return False scheme, host, port, path = parse(full_url) flow.request.scheme = scheme flow.request.host = host flow.request.port = port flow.request.path = path flow.extra_data = extra_data return True
class HttpStream(layer.Layer): request_body_buf: bytes response_body_buf: bytes flow: http.HTTPFlow stream_id: StreamId child_layer: Optional[layer.Layer] = None @property def mode(self): i = self.context.layers.index(self) parent: HttpLayer = self.context.layers[i - 1] return parent.mode def __init__(self, context: Context, stream_id: int): super().__init__(context) self.request_body_buf = b"" self.response_body_buf = b"" self.client_state = self.state_uninitialized self.server_state = self.state_uninitialized self.stream_id = stream_id def __repr__(self): return (f"HttpStream(" f"id={self.stream_id}, " f"client_state={self.client_state.__name__}, " f"server_state={self.server_state.__name__}" f")") @expect(events.Start, HttpEvent) def _handle_event(self, event: events.Event) -> layer.CommandGenerator[None]: if isinstance(event, events.Start): self.client_state = self.state_wait_for_request_headers elif isinstance(event, (RequestProtocolError, ResponseProtocolError)): yield from self.handle_protocol_error(event) elif isinstance(event, (RequestHeaders, RequestData, RequestEndOfMessage)): yield from self.client_state(event) else: yield from self.server_state(event) @expect(RequestHeaders) def state_wait_for_request_headers( self, event: RequestHeaders) -> layer.CommandGenerator[None]: if not event.replay_flow: self.flow = http.HTTPFlow(self.context.client, self.context.server) else: self.flow = event.replay_flow self.flow.request = event.request if err := validate_request(self.mode, self.flow.request): self.flow.response = http.HTTPResponse.make(502, str(err)) self.client_state = self.state_errored return (yield from self.send_response()) if self.flow.request.method == "CONNECT": return (yield from self.handle_connect()) if self.mode is HTTPMode.transparent: # Determine .scheme, .host and .port attributes for transparent requests assert self.context.server.address self.flow.request.data.host = self.context.server.address[0] self.flow.request.data.port = self.context.server.address[1] self.flow.request.scheme = "https" if self.context.server.tls else "http" elif not self.flow.request.host: # We need to extract destination information from the host header. try: host, port = url.parse_authority(self.flow.request.host_header or "", check=True) except ValueError: self.flow.response = http.HTTPResponse.make( 400, "HTTP request has no host header, destination unknown.") self.client_state = self.state_errored return (yield from self.send_response()) else: if port is None: port = 443 if self.context.client.tls else 80 self.flow.request.data.host = host self.flow.request.data.port = port self.flow.request.scheme = "https" if self.context.client.tls else "http" if self.mode is HTTPMode.regular and not self.flow.request.is_http2: # Set the request target to origin-form for HTTP/1, some servers don't support absolute-form requests. # see https://github.com/mitmproxy/mitmproxy/issues/1759 self.flow.request.authority = "" # update host header in reverse proxy mode if self.context.options.mode.startswith( "reverse:") and not self.context.options.keep_host_header: assert self.context.server.address self.flow.request.host_header = url.hostport( "https" if self.context.server.tls else "http", self.context.server.address[0], self.context.server.address[1], ) yield HttpRequestHeadersHook(self.flow) if (yield from self.check_killed(True)): return if self.flow.request.headers.get("expect", "").lower() == "100-continue": continue_response = http.HTTPResponse.make(100) continue_response.headers.clear() yield SendHttp(ResponseHeaders(self.stream_id, continue_response), self.context.client) self.flow.request.headers.pop("expect") if self.flow.request.stream: if self.flow.response: raise NotImplementedError( "Can't set a response and enable streaming at the same time." ) yield HttpRequestHook(self.flow) ok = yield from self.make_server_connection() if not ok: return yield SendHttp(event, self.context.server) self.client_state = self.state_stream_request_body else: self.client_state = self.state_consume_request_body self.server_state = self.state_wait_for_response_headers
def replay(self, f): # pragma: no cover f.live = True r = f.request bsl = human.parse_size(self.options.body_size_limit) authority_backup = r.authority server = None try: f.response = None # If we have a channel, run script hooks. request_reply = self.channel.ask("request", f) if isinstance(request_reply, http.HTTPResponse): f.response = request_reply if not f.response: # In all modes, we directly connect to the server displayed if self.options.mode.startswith("upstream:"): server_address = server_spec.parse_with_mode( self.options.mode)[1].address server = connections.ServerConnection(server_address) server.connect() if r.scheme == "https": connect_request = http.make_connect_request( (r.data.host, r.port)) server.wfile.write( http1.assemble_request(connect_request)) server.wfile.flush() resp = http1.read_response(server.rfile, connect_request, body_size_limit=bsl) if resp.status_code != 200: raise exceptions.ReplayException( "Upstream server refuses CONNECT request") server.establish_tls( sni=f.server_conn.sni, **tls.client_arguments_from_options(self.options)) r.authority = b"" else: r.authority = hostport(r.scheme, r.host, r.port) else: server_address = (r.host, r.port) server = connections.ServerConnection(server_address) server.connect() if r.scheme == "https": server.establish_tls( sni=f.server_conn.sni, **tls.client_arguments_from_options(self.options)) r.authority = "" server.wfile.write(http1.assemble_request(r)) server.wfile.flush() r.timestamp_start = r.timestamp_end = time.time() if f.server_conn: f.server_conn.close() f.server_conn = server f.response = http1.read_response(server.rfile, r, body_size_limit=bsl) response_reply = self.channel.ask("response", f) if response_reply == exceptions.Kill: raise exceptions.Kill() except (exceptions.ReplayException, exceptions.NetlibException) as e: f.error = flow.Error(str(e)) self.channel.ask("error", f) except exceptions.Kill: self.channel.tell("log", log.LogEntry(flow.Error.KILLED_MESSAGE, "info")) except Exception as e: self.channel.tell("log", log.LogEntry(repr(e), "error")) finally: r.authority = authority_backup f.live = False if server and server.connected(): server.finish() server.close()
def test_hostport(): assert url.hostport(b"https", b"foo.com", 8080) == b"foo.com:8080"
def _process_flow(self, f): try: try: request: http.HTTPRequest = self.read_request_headers(f) except exceptions.HttpReadDisconnect: # don't throw an error for disconnects that happen # before/between requests. return False f.request = request if request.first_line_format == "authority": # The standards are silent on what we should do with a CONNECT # request body, so although it's not common, it's allowed. f.request.data.content = b"".join( self.read_request_body(f.request)) f.request.data.trailers = self.read_request_trailers(f.request) f.request.timestamp_end = time.time() self.channel.ask("http_connect", f) if self.mode is HTTPMode.regular: return self.handle_regular_connect(f) elif self.mode is HTTPMode.upstream: return self.handle_upstream_connect(f) else: msg = "Unexpected CONNECT request." self.send_error_response(400, msg) return False if not self.config.options.relax_http_form_validation: validate_request_form(self.mode, request) self.channel.ask("requestheaders", f) # Re-validate request form in case the user has changed something. if not self.config.options.relax_http_form_validation: validate_request_form(self.mode, request) if request.headers.get("expect", "").lower() == "100-continue": # TODO: We may have to use send_response_headers for HTTP2 # here. self.send_response(http.make_expect_continue_response()) request.headers.pop("expect") if f.request.stream: f.request.data.content = None else: f.request.data.content = b"".join( self.read_request_body(request)) f.request.data.trailers = self.read_request_trailers(f.request) request.timestamp_end = time.time() except exceptions.HttpException as e: # We optimistically guess there might be an HTTP client on the # other end self.send_error_response(400, repr(e)) # Request may be malformed at this point, so we unset it. f.request = None f.error = flow.Error(str(e)) self.channel.ask("error", f) self.log("request", "warn", [f"HTTP protocol error in client request: {e}"]) return False self.log("request", "debug", [repr(request)]) # set first line format to relative in regular mode, # see https://github.com/mitmproxy/mitmproxy/issues/1759 if self.mode is HTTPMode.regular and request.first_line_format == "absolute": request.authority = "" # update host header in reverse proxy mode if self.config.options.mode.startswith( "reverse:") and not self.config.options.keep_host_header: f.request.host_header = url.hostport( self.config.upstream_server.scheme, *self.config.upstream_server.address) # Determine .scheme, .host and .port attributes for inline scripts. For # absolute-form requests, they are directly given in the request. For # authority-form requests, we only need to determine the request # scheme. For relative-form requests, we need to determine host and # port as well. if self.mode is HTTPMode.transparent: # Setting request.host also updates the host header, which we want # to preserve f.request.data.host = self.__initial_server_address[0] f.request.data.port = self.__initial_server_address[1] f.request.data.scheme = b"https" if self.__initial_server_tls else b"http" self.channel.ask("request", f) try: valid = (websocket.check_handshake(request.headers) and websocket.check_client_version(request.headers)) if valid: f.metadata['websocket'] = True # We only support RFC6455 with WebSocket version 13 # allow inline scripts to manipulate the client handshake self.channel.ask("websocket_handshake", f) if not f.response: self.establish_server_connection(f.request.host, f.request.port, f.request.scheme) def get_response(): self.send_request_headers(f.request) if f.request.stream: chunks = self.read_request_body(f.request) if callable(f.request.stream): chunks = f.request.stream(chunks) self.send_request_body(f.request, chunks) else: self.send_request_body(f.request, [f.request.data.content]) self.send_request_trailers(f.request) f.response = self.read_response_headers() try: get_response() except exceptions.NetlibException as e: self.log("server communication error: %s" % repr(e), level="debug") # In any case, we try to reconnect at least once. This is # necessary because it might be possible that we already # initiated an upstream connection after clientconnect that # has already been expired, e.g consider the following event # log: # > clientconnect (transparent mode destination known) # > serverconnect (required for client tls handshake) # > read n% of large request # > server detects timeout, disconnects # > read (100-n)% of large request # > send large request upstream if isinstance(e, exceptions.Http2ProtocolException): # do not try to reconnect for HTTP2 raise exceptions.ProtocolException( "First and only attempt to get response via HTTP2 failed." ) elif f.request.stream: # We may have already consumed some request chunks already, # so all we can do is signal downstream that upstream closed the connection. self.send_error_response(408, "Request Timeout") f.error = flow.Error(repr(e)) self.channel.ask("error", f) return False self.disconnect() self.connect() get_response() # call the appropriate script hook - this is an opportunity for # an inline script to set f.stream = True self.channel.ask("responseheaders", f) if f.response.stream: f.response.data.content = None else: f.response.data.content = b"".join( self.read_response_body(f.request, f.response)) f.response.timestamp_end = time.time() # no further manipulation of self.server_conn beyond this point # we can safely set it as the final attribute value here. f.server_conn = self.server_conn else: # response was set by an inline script. # we now need to emulate the responseheaders hook. self.channel.ask("responseheaders", f) f.response.data.trailers = self.read_response_trailers( f.request, f.response) self.log("response", "debug", [repr(f.response)]) self.channel.ask("response", f) if not f.response.stream: # no streaming: # we already received the full response from the server and can # send it to the client straight away. self.send_response(f.response) else: # streaming: # First send the headers and then transfer the response incrementally self.send_response_headers(f.response) chunks = self.read_response_body(f.request, f.response) if callable(f.response.stream): chunks = f.response.stream(chunks) self.send_response_body(f.response, chunks) f.response.timestamp_end = time.time() if self.check_close_connection(f): return False # Handle 101 Switching Protocols if f.response.status_code == 101: # Handle a successful HTTP 101 Switching Protocols Response, # received after e.g. a WebSocket upgrade request. # Check for WebSocket handshake is_websocket = (websocket.check_handshake(f.request.headers) and websocket.check_handshake( f.response.headers)) if is_websocket and not self.config.options.websocket: self.log( "Client requested WebSocket connection, but the protocol is disabled.", "info") if is_websocket and self.config.options.websocket: layer = WebSocketLayer(self, f) else: layer = self.ctx.next_layer(self) layer() return False # should never be reached except (exceptions.ProtocolException, exceptions.NetlibException) as e: if not f.response: self.send_error_response(502, repr(e)) f.error = flow.Error(str(e)) self.channel.ask("error", f) return False else: raise exceptions.ProtocolException( "Error in HTTP connection: %s" % repr(e)) finally: if f: f.live = False return True