Ejemplo n.º 1
0
def _read_request_line(
        line: bytes) -> Tuple[str, int, bytes, bytes, bytes, bytes, bytes]:
    try:
        method, target, http_version = line.split()
        port: Optional[int]

        if target == b"*" or target.startswith(b"/"):
            scheme, authority, path = b"", b"", target
            host, port = "", 0
        elif method == b"CONNECT":
            scheme, authority, path = b"", target, b""
            host, port = url.parse_authority(authority, check=True)
            if not port:
                raise ValueError
        else:
            scheme, rest = target.split(b"://", maxsplit=1)
            authority, path_ = rest.split(b"/", maxsplit=1)
            path = b"/" + path_
            host, port = url.parse_authority(authority, check=True)
            port = port or url.default_port(scheme)
            if not port:
                raise ValueError
            # TODO: we can probably get rid of this check?
            url.parse(target)

        raise_if_http_version_unknown(http_version)
    except ValueError as e:
        raise ValueError(f"Bad HTTP request line: {line!r}") from e

    return host, port, method, scheme, authority, path, http_version
Ejemplo n.º 2
0
def _read_request_line(rfile):
    try:
        line = _get_first_line(rfile)
    except exceptions.HttpReadDisconnect:
        # We want to provide a better error message.
        raise exceptions.HttpReadDisconnect("Client disconnected")

    try:
        method, target, http_version = line.split()

        if target == b"*" or target.startswith(b"/"):
            scheme, authority, path = b"", b"", target
            host, port = "", 0
        elif method == b"CONNECT":
            scheme, authority, path = b"", target, b""
            host, port = url.parse_authority(authority, check=True)
            if not port:
                raise ValueError
        else:
            scheme, rest = target.split(b"://", maxsplit=1)
            authority, path_ = rest.split(b"/", maxsplit=1)
            path = b"/" + path_
            host, port = url.parse_authority(authority, check=True)
            port = port or url.default_port(scheme)
            if not port:
                raise ValueError
            # TODO: we can probably get rid of this check?
            url.parse(target)

        _check_http_version(http_version)
    except ValueError:
        raise exceptions.HttpSyntaxException(f"Bad HTTP request line: {line}")

    return host, port, method, scheme, authority, path, http_version
Ejemplo n.º 3
0
def test_parse_authority(authority: AnyStr, valid: bool, out):
    assert parse_authority(authority, False) == out

    if valid:
        assert parse_authority(authority, True) == out
    else:
        with pytest.raises(ValueError):
            parse_authority(authority, True)
Ejemplo n.º 4
0
def parse_h2_request_headers(
    h2_headers: Sequence[Tuple[bytes, bytes]]
) -> Tuple[str, int, bytes, bytes, bytes, bytes, http.Headers]:
    """Split HTTP/2 pseudo-headers from the actual headers and parse them."""
    pseudo_headers, headers = split_pseudo_headers(h2_headers)

    try:
        method: bytes = pseudo_headers.pop(b":method")
        scheme: bytes = pseudo_headers.pop(
            b":scheme")  # this raises for HTTP/2 CONNECT requests
        path: bytes = pseudo_headers.pop(b":path")
        authority: bytes = pseudo_headers.pop(b":authority", b"")
    except KeyError as e:
        raise ValueError(f"Required pseudo header is missing: {e}")

    if pseudo_headers:
        raise ValueError(f"Unknown pseudo headers: {pseudo_headers}")

    if authority:
        host, port = url.parse_authority(authority, check=True)
        if port is None:
            port = 80 if scheme == b'http' else 443
    else:
        host = ""
        port = 0

    return host, port, method, scheme, authority, path, headers
Ejemplo n.º 5
0
    def read_request_headers(self, flow):
        self.request_message.arrived.wait()
        self.raise_zombie()

        if self.pushed:
            flow.metadata['h2-pushed-stream'] = True

        # pseudo header must be present, see https://http2.github.io/http2-spec/#rfc.section.8.1.2.3
        authority = self.request_message.headers.pop(':authority', "")
        method = self.request_message.headers.pop(':method')
        scheme = self.request_message.headers.pop(':scheme')
        path = self.request_message.headers.pop(':path')

        host, port = url.parse_authority(authority, check=True)
        port = port or url.default_port(scheme) or 0

        return http.HTTPRequest(
            host,
            port,
            method.encode(),
            scheme.encode(),
            authority.encode(),
            path.encode(),
            b"HTTP/2.0",
            self.request_message.headers,
            None,
            None,
            self.timestamp_start,
            self.timestamp_end,
        )
Ejemplo n.º 6
0
 def pretty_host(self) -> str:
     """
     Similar to :py:attr:`host`, but using the host/:authority header as an additional (preferred) data source.
     This is useful in transparent mode where :py:attr:`host` is only an IP address,
     but may not reflect the actual destination as the Host header could be spoofed.
     """
     authority = self.host_header
     if authority:
         return url.parse_authority(authority, check=False)[0]
     else:
         return self.host
    def read_request(
        self,
        __rfile,
        include_body=True,
        body_size_limit=None,
        allow_empty=False,
    ):
        if body_size_limit is not None:
            raise NotImplementedError()

        self.perform_connection_preface()

        timestamp_start = time.time()
        if hasattr(self.tcp_handler.rfile, "reset_timestamps"):
            self.tcp_handler.rfile.reset_timestamps()

        stream_id, headers, body = self._receive_transmission(
            include_body=include_body,
        )

        if hasattr(self.tcp_handler.rfile, "first_byte_timestamp"):
            # more accurate timestamp_start
            timestamp_start = self.tcp_handler.rfile.first_byte_timestamp

        timestamp_end = time.time()

        # pseudo header must be present, see https://http2.github.io/http2-spec/#rfc.section.8.1.2.3
        authority = headers.pop(':authority', "")
        method = headers.pop(':method', "")
        scheme = headers.pop(':scheme', "")
        path = headers.pop(':path', "")

        host, port = url.parse_authority(authority, check=False)
        port = port or url.default_port(scheme) or 0

        request = mitmproxy.net.http.Request(
            host=host,
            port=port,
            method=method.encode(),
            scheme=scheme.encode(),
            authority=authority.encode(),
            path=path.encode(),
            http_version=b"HTTP/2.0",
            headers=headers,
            content=body,
            trailers=None,
            timestamp_start=timestamp_start,
            timestamp_end=timestamp_end,
        )
        request.stream_id = stream_id

        return request
Ejemplo n.º 8
0
    def pretty_host(self) -> str:
        """
        *Read-only:* Like `Request.host`, but using `Request.host_header` header as an additional (preferred) data source.
        This is useful in transparent mode where `Request.host` is only an IP address.

        *Warning:* When working in adversarial environments, this may not reflect the actual destination
        as the Host header could be spoofed.
        """
        authority = self.host_header
        if authority:
            return url.parse_authority(authority, check=False)[0]
        else:
            return self.host
Ejemplo n.º 9
0
    def pretty_url(self) -> str:
        """
        Like :py:attr:`url`, but using :py:attr:`pretty_host` instead of :py:attr:`host`.
        """
        if self.first_line_format == "authority":
            return self.authority

        host_header = self.host_header
        if not host_header:
            return self.url

        pretty_host, pretty_port = url.parse_authority(host_header,
                                                       check=False)
        pretty_port = pretty_port or url.default_port(self.scheme) or 443

        return url.unparse(self.scheme, pretty_host, pretty_port, self.path)
Ejemplo n.º 10
0
class HttpStream(layer.Layer):
    request_body_buf: bytes
    response_body_buf: bytes
    flow: http.HTTPFlow
    stream_id: StreamId
    child_layer: Optional[layer.Layer] = None

    @property
    def mode(self):
        i = self.context.layers.index(self)
        parent: HttpLayer = self.context.layers[i - 1]
        return parent.mode

    def __init__(self, context: Context, stream_id: int):
        super().__init__(context)
        self.request_body_buf = b""
        self.response_body_buf = b""
        self.client_state = self.state_uninitialized
        self.server_state = self.state_uninitialized
        self.stream_id = stream_id

    def __repr__(self):
        return (f"HttpStream("
                f"id={self.stream_id}, "
                f"client_state={self.client_state.__name__}, "
                f"server_state={self.server_state.__name__}"
                f")")

    @expect(events.Start, HttpEvent)
    def _handle_event(self,
                      event: events.Event) -> layer.CommandGenerator[None]:
        if isinstance(event, events.Start):
            self.client_state = self.state_wait_for_request_headers
        elif isinstance(event, (RequestProtocolError, ResponseProtocolError)):
            yield from self.handle_protocol_error(event)
        elif isinstance(event,
                        (RequestHeaders, RequestData, RequestEndOfMessage)):
            yield from self.client_state(event)
        else:
            yield from self.server_state(event)

    @expect(RequestHeaders)
    def state_wait_for_request_headers(
            self, event: RequestHeaders) -> layer.CommandGenerator[None]:
        if not event.replay_flow:
            self.flow = http.HTTPFlow(self.context.client, self.context.server)

        else:
            self.flow = event.replay_flow
        self.flow.request = event.request

        if err := validate_request(self.mode, self.flow.request):
            self.flow.response = http.HTTPResponse.make(502, str(err))
            self.client_state = self.state_errored
            return (yield from self.send_response())

        if self.flow.request.method == "CONNECT":
            return (yield from self.handle_connect())

        if self.mode is HTTPMode.transparent:
            # Determine .scheme, .host and .port attributes for transparent requests
            assert self.context.server.address
            self.flow.request.data.host = self.context.server.address[0]
            self.flow.request.data.port = self.context.server.address[1]
            self.flow.request.scheme = "https" if self.context.server.tls else "http"
        elif not self.flow.request.host:
            # We need to extract destination information from the host header.
            try:
                host, port = url.parse_authority(self.flow.request.host_header
                                                 or "",
                                                 check=True)
            except ValueError:
                self.flow.response = http.HTTPResponse.make(
                    400,
                    "HTTP request has no host header, destination unknown.")
                self.client_state = self.state_errored
                return (yield from self.send_response())
            else:
                if port is None:
                    port = 443 if self.context.client.tls else 80
                self.flow.request.data.host = host
                self.flow.request.data.port = port
                self.flow.request.scheme = "https" if self.context.client.tls else "http"

        if self.mode is HTTPMode.regular and not self.flow.request.is_http2:
            # Set the request target to origin-form for HTTP/1, some servers don't support absolute-form requests.
            # see https://github.com/mitmproxy/mitmproxy/issues/1759
            self.flow.request.authority = ""

        # update host header in reverse proxy mode
        if self.context.options.mode.startswith(
                "reverse:") and not self.context.options.keep_host_header:
            assert self.context.server.address
            self.flow.request.host_header = url.hostport(
                "https" if self.context.server.tls else "http",
                self.context.server.address[0],
                self.context.server.address[1],
            )

        yield HttpRequestHeadersHook(self.flow)
        if (yield from self.check_killed(True)):
            return

        if self.flow.request.headers.get("expect",
                                         "").lower() == "100-continue":
            continue_response = http.HTTPResponse.make(100)
            continue_response.headers.clear()
            yield SendHttp(ResponseHeaders(self.stream_id, continue_response),
                           self.context.client)
            self.flow.request.headers.pop("expect")

        if self.flow.request.stream:
            if self.flow.response:
                raise NotImplementedError(
                    "Can't set a response and enable streaming at the same time."
                )
            yield HttpRequestHook(self.flow)
            ok = yield from self.make_server_connection()
            if not ok:
                return
            yield SendHttp(event, self.context.server)
            self.client_state = self.state_stream_request_body
        else:
            self.client_state = self.state_consume_request_body
        self.server_state = self.state_wait_for_response_headers