Beispiel #1
0
    def host(self, val: Union[str, bytes]) -> None:
        self.data.host = always_str(val, "idna", "strict")

        # Update host header
        if "Host" in self.data.headers:
            self.data.headers["Host"] = val
        # Update authority
        if self.data.authority:
            self.authority = url.hostport(self.scheme, self.host, self.port)
Beispiel #2
0
    def _set_request_url(self, flow, postreq=''):
        host = flow.request.headers.get('host')
        if not host:
            host = flow.request.host

        homepage_redirect = None

        if (host == self.proxy_magic
                and (flow.request.path in (H_REFRESH_PATH, H_REDIR_PATH))):
            homepage_redirect = flow.request.path

        elif host == self.proxy_magic:
            flow.request.host = self.fwd_host
            flow.request.scheme = self.fwd_scheme
            flow.request.port = self.fwd_port
            flow.request.headers['X-Proxy-For'] = str(
                flow.client_conn.address.host)
            return False

        if host:
            host = flow.request.scheme + '://' + host
        else:
            host = hostport(flow.request.scheme, flow.request.host,
                            flow.request.port)

        req_url = host + flow.request.path

        flow.request.req_url = req_url
        flow.request.req_scheme = flow.request.scheme

        result = self.upstream_url_resolver(url=quote_plus(req_url),
                                            headers=flow.request.headers,
                                            address=flow.client_conn.address,
                                            postreq=postreq)

        full_url, extra_data = result

        if homepage_redirect:
            url = extra_data.get('url')
            if url:
                if homepage_redirect == H_REFRESH_PATH:
                    self.homepage_refresh(flow, url)
                elif homepage_redirect == H_REDIR_PATH:
                    self.homepage_redir(flow, url)

                return False

        scheme, host, port, path = parse(full_url)

        flow.request.scheme = scheme
        flow.request.host = host
        flow.request.port = port
        flow.request.path = path

        flow.extra_data = extra_data
        return True
Beispiel #3
0
class HttpStream(layer.Layer):
    request_body_buf: bytes
    response_body_buf: bytes
    flow: http.HTTPFlow
    stream_id: StreamId
    child_layer: Optional[layer.Layer] = None

    @property
    def mode(self):
        i = self.context.layers.index(self)
        parent: HttpLayer = self.context.layers[i - 1]
        return parent.mode

    def __init__(self, context: Context, stream_id: int):
        super().__init__(context)
        self.request_body_buf = b""
        self.response_body_buf = b""
        self.client_state = self.state_uninitialized
        self.server_state = self.state_uninitialized
        self.stream_id = stream_id

    def __repr__(self):
        return (f"HttpStream("
                f"id={self.stream_id}, "
                f"client_state={self.client_state.__name__}, "
                f"server_state={self.server_state.__name__}"
                f")")

    @expect(events.Start, HttpEvent)
    def _handle_event(self,
                      event: events.Event) -> layer.CommandGenerator[None]:
        if isinstance(event, events.Start):
            self.client_state = self.state_wait_for_request_headers
        elif isinstance(event, (RequestProtocolError, ResponseProtocolError)):
            yield from self.handle_protocol_error(event)
        elif isinstance(event,
                        (RequestHeaders, RequestData, RequestEndOfMessage)):
            yield from self.client_state(event)
        else:
            yield from self.server_state(event)

    @expect(RequestHeaders)
    def state_wait_for_request_headers(
            self, event: RequestHeaders) -> layer.CommandGenerator[None]:
        if not event.replay_flow:
            self.flow = http.HTTPFlow(self.context.client, self.context.server)

        else:
            self.flow = event.replay_flow
        self.flow.request = event.request

        if err := validate_request(self.mode, self.flow.request):
            self.flow.response = http.HTTPResponse.make(502, str(err))
            self.client_state = self.state_errored
            return (yield from self.send_response())

        if self.flow.request.method == "CONNECT":
            return (yield from self.handle_connect())

        if self.mode is HTTPMode.transparent:
            # Determine .scheme, .host and .port attributes for transparent requests
            assert self.context.server.address
            self.flow.request.data.host = self.context.server.address[0]
            self.flow.request.data.port = self.context.server.address[1]
            self.flow.request.scheme = "https" if self.context.server.tls else "http"
        elif not self.flow.request.host:
            # We need to extract destination information from the host header.
            try:
                host, port = url.parse_authority(self.flow.request.host_header
                                                 or "",
                                                 check=True)
            except ValueError:
                self.flow.response = http.HTTPResponse.make(
                    400,
                    "HTTP request has no host header, destination unknown.")
                self.client_state = self.state_errored
                return (yield from self.send_response())
            else:
                if port is None:
                    port = 443 if self.context.client.tls else 80
                self.flow.request.data.host = host
                self.flow.request.data.port = port
                self.flow.request.scheme = "https" if self.context.client.tls else "http"

        if self.mode is HTTPMode.regular and not self.flow.request.is_http2:
            # Set the request target to origin-form for HTTP/1, some servers don't support absolute-form requests.
            # see https://github.com/mitmproxy/mitmproxy/issues/1759
            self.flow.request.authority = ""

        # update host header in reverse proxy mode
        if self.context.options.mode.startswith(
                "reverse:") and not self.context.options.keep_host_header:
            assert self.context.server.address
            self.flow.request.host_header = url.hostport(
                "https" if self.context.server.tls else "http",
                self.context.server.address[0],
                self.context.server.address[1],
            )

        yield HttpRequestHeadersHook(self.flow)
        if (yield from self.check_killed(True)):
            return

        if self.flow.request.headers.get("expect",
                                         "").lower() == "100-continue":
            continue_response = http.HTTPResponse.make(100)
            continue_response.headers.clear()
            yield SendHttp(ResponseHeaders(self.stream_id, continue_response),
                           self.context.client)
            self.flow.request.headers.pop("expect")

        if self.flow.request.stream:
            if self.flow.response:
                raise NotImplementedError(
                    "Can't set a response and enable streaming at the same time."
                )
            yield HttpRequestHook(self.flow)
            ok = yield from self.make_server_connection()
            if not ok:
                return
            yield SendHttp(event, self.context.server)
            self.client_state = self.state_stream_request_body
        else:
            self.client_state = self.state_consume_request_body
        self.server_state = self.state_wait_for_response_headers
    def replay(self, f):  # pragma: no cover
        f.live = True
        r = f.request
        bsl = human.parse_size(self.options.body_size_limit)
        authority_backup = r.authority
        server = None
        try:
            f.response = None

            # If we have a channel, run script hooks.
            request_reply = self.channel.ask("request", f)
            if isinstance(request_reply, http.HTTPResponse):
                f.response = request_reply

            if not f.response:
                # In all modes, we directly connect to the server displayed
                if self.options.mode.startswith("upstream:"):
                    server_address = server_spec.parse_with_mode(
                        self.options.mode)[1].address
                    server = connections.ServerConnection(server_address)
                    server.connect()
                    if r.scheme == "https":
                        connect_request = http.make_connect_request(
                            (r.data.host, r.port))
                        server.wfile.write(
                            http1.assemble_request(connect_request))
                        server.wfile.flush()
                        resp = http1.read_response(server.rfile,
                                                   connect_request,
                                                   body_size_limit=bsl)
                        if resp.status_code != 200:
                            raise exceptions.ReplayException(
                                "Upstream server refuses CONNECT request")
                        server.establish_tls(
                            sni=f.server_conn.sni,
                            **tls.client_arguments_from_options(self.options))
                        r.authority = b""
                    else:
                        r.authority = hostport(r.scheme, r.host, r.port)
                else:
                    server_address = (r.host, r.port)
                    server = connections.ServerConnection(server_address)
                    server.connect()
                    if r.scheme == "https":
                        server.establish_tls(
                            sni=f.server_conn.sni,
                            **tls.client_arguments_from_options(self.options))
                    r.authority = ""

                server.wfile.write(http1.assemble_request(r))
                server.wfile.flush()
                r.timestamp_start = r.timestamp_end = time.time()

                if f.server_conn:
                    f.server_conn.close()
                f.server_conn = server

                f.response = http1.read_response(server.rfile,
                                                 r,
                                                 body_size_limit=bsl)
            response_reply = self.channel.ask("response", f)
            if response_reply == exceptions.Kill:
                raise exceptions.Kill()
        except (exceptions.ReplayException, exceptions.NetlibException) as e:
            f.error = flow.Error(str(e))
            self.channel.ask("error", f)
        except exceptions.Kill:
            self.channel.tell("log",
                              log.LogEntry(flow.Error.KILLED_MESSAGE, "info"))
        except Exception as e:
            self.channel.tell("log", log.LogEntry(repr(e), "error"))
        finally:
            r.authority = authority_backup
            f.live = False
            if server and server.connected():
                server.finish()
                server.close()
Beispiel #5
0
def test_hostport():
    assert url.hostport(b"https", b"foo.com", 8080) == b"foo.com:8080"
Beispiel #6
0
    def _process_flow(self, f):
        try:
            try:
                request: http.HTTPRequest = self.read_request_headers(f)
            except exceptions.HttpReadDisconnect:
                # don't throw an error for disconnects that happen
                # before/between requests.
                return False

            f.request = request

            if request.first_line_format == "authority":
                # The standards are silent on what we should do with a CONNECT
                # request body, so although it's not common, it's allowed.
                f.request.data.content = b"".join(
                    self.read_request_body(f.request))
                f.request.data.trailers = self.read_request_trailers(f.request)
                f.request.timestamp_end = time.time()
                self.channel.ask("http_connect", f)

                if self.mode is HTTPMode.regular:
                    return self.handle_regular_connect(f)
                elif self.mode is HTTPMode.upstream:
                    return self.handle_upstream_connect(f)
                else:
                    msg = "Unexpected CONNECT request."
                    self.send_error_response(400, msg)
                    return False

            if not self.config.options.relax_http_form_validation:
                validate_request_form(self.mode, request)
            self.channel.ask("requestheaders", f)
            # Re-validate request form in case the user has changed something.
            if not self.config.options.relax_http_form_validation:
                validate_request_form(self.mode, request)

            if request.headers.get("expect", "").lower() == "100-continue":
                # TODO: We may have to use send_response_headers for HTTP2
                # here.
                self.send_response(http.make_expect_continue_response())
                request.headers.pop("expect")

            if f.request.stream:
                f.request.data.content = None
            else:
                f.request.data.content = b"".join(
                    self.read_request_body(request))

            f.request.data.trailers = self.read_request_trailers(f.request)

            request.timestamp_end = time.time()
        except exceptions.HttpException as e:
            # We optimistically guess there might be an HTTP client on the
            # other end
            self.send_error_response(400, repr(e))
            # Request may be malformed at this point, so we unset it.
            f.request = None
            f.error = flow.Error(str(e))
            self.channel.ask("error", f)
            self.log("request", "warn",
                     [f"HTTP protocol error in client request: {e}"])
            return False

        self.log("request", "debug", [repr(request)])

        # set first line format to relative in regular mode,
        # see https://github.com/mitmproxy/mitmproxy/issues/1759
        if self.mode is HTTPMode.regular and request.first_line_format == "absolute":
            request.authority = ""

        # update host header in reverse proxy mode
        if self.config.options.mode.startswith(
                "reverse:") and not self.config.options.keep_host_header:
            f.request.host_header = url.hostport(
                self.config.upstream_server.scheme,
                *self.config.upstream_server.address)

        # Determine .scheme, .host and .port attributes for inline scripts. For
        # absolute-form requests, they are directly given in the request. For
        # authority-form requests, we only need to determine the request
        # scheme. For relative-form requests, we need to determine host and
        # port as well.
        if self.mode is HTTPMode.transparent:
            # Setting request.host also updates the host header, which we want
            # to preserve
            f.request.data.host = self.__initial_server_address[0]
            f.request.data.port = self.__initial_server_address[1]
            f.request.data.scheme = b"https" if self.__initial_server_tls else b"http"
        self.channel.ask("request", f)

        try:
            valid = (websocket.check_handshake(request.headers)
                     and websocket.check_client_version(request.headers))
            if valid:
                f.metadata['websocket'] = True
                # We only support RFC6455 with WebSocket version 13
                # allow inline scripts to manipulate the client handshake
                self.channel.ask("websocket_handshake", f)

            if not f.response:
                self.establish_server_connection(f.request.host,
                                                 f.request.port,
                                                 f.request.scheme)

                def get_response():
                    self.send_request_headers(f.request)

                    if f.request.stream:
                        chunks = self.read_request_body(f.request)
                        if callable(f.request.stream):
                            chunks = f.request.stream(chunks)
                        self.send_request_body(f.request, chunks)
                    else:
                        self.send_request_body(f.request,
                                               [f.request.data.content])

                    self.send_request_trailers(f.request)

                    f.response = self.read_response_headers()

                try:
                    get_response()
                except exceptions.NetlibException as e:
                    self.log("server communication error: %s" % repr(e),
                             level="debug")
                    # In any case, we try to reconnect at least once. This is
                    # necessary because it might be possible that we already
                    # initiated an upstream connection after clientconnect that
                    # has already been expired, e.g consider the following event
                    # log:
                    # > clientconnect (transparent mode destination known)
                    # > serverconnect (required for client tls handshake)
                    # > read n% of large request
                    # > server detects timeout, disconnects
                    # > read (100-n)% of large request
                    # > send large request upstream

                    if isinstance(e, exceptions.Http2ProtocolException):
                        # do not try to reconnect for HTTP2
                        raise exceptions.ProtocolException(
                            "First and only attempt to get response via HTTP2 failed."
                        )
                    elif f.request.stream:
                        # We may have already consumed some request chunks already,
                        # so all we can do is signal downstream that upstream closed the connection.
                        self.send_error_response(408, "Request Timeout")
                        f.error = flow.Error(repr(e))
                        self.channel.ask("error", f)
                        return False

                    self.disconnect()
                    self.connect()
                    get_response()

                # call the appropriate script hook - this is an opportunity for
                # an inline script to set f.stream = True
                self.channel.ask("responseheaders", f)

                if f.response.stream:
                    f.response.data.content = None
                else:
                    f.response.data.content = b"".join(
                        self.read_response_body(f.request, f.response))
                f.response.timestamp_end = time.time()

                # no further manipulation of self.server_conn beyond this point
                # we can safely set it as the final attribute value here.
                f.server_conn = self.server_conn
            else:
                # response was set by an inline script.
                # we now need to emulate the responseheaders hook.
                self.channel.ask("responseheaders", f)

            f.response.data.trailers = self.read_response_trailers(
                f.request, f.response)

            self.log("response", "debug", [repr(f.response)])
            self.channel.ask("response", f)

            if not f.response.stream:
                # no streaming:
                # we already received the full response from the server and can
                # send it to the client straight away.
                self.send_response(f.response)
            else:
                # streaming:
                # First send the headers and then transfer the response incrementally
                self.send_response_headers(f.response)
                chunks = self.read_response_body(f.request, f.response)
                if callable(f.response.stream):
                    chunks = f.response.stream(chunks)
                self.send_response_body(f.response, chunks)
                f.response.timestamp_end = time.time()

            if self.check_close_connection(f):
                return False

            # Handle 101 Switching Protocols
            if f.response.status_code == 101:
                # Handle a successful HTTP 101 Switching Protocols Response,
                # received after e.g. a WebSocket upgrade request.
                # Check for WebSocket handshake
                is_websocket = (websocket.check_handshake(f.request.headers)
                                and websocket.check_handshake(
                                    f.response.headers))
                if is_websocket and not self.config.options.websocket:
                    self.log(
                        "Client requested WebSocket connection, but the protocol is disabled.",
                        "info")

                if is_websocket and self.config.options.websocket:
                    layer = WebSocketLayer(self, f)
                else:
                    layer = self.ctx.next_layer(self)
                layer()
                return False  # should never be reached

        except (exceptions.ProtocolException, exceptions.NetlibException) as e:
            if not f.response:
                self.send_error_response(502, repr(e))
                f.error = flow.Error(str(e))
                self.channel.ask("error", f)
                return False
            else:
                raise exceptions.ProtocolException(
                    "Error in HTTP connection: %s" % repr(e))
        finally:
            if f:
                f.live = False

        return True