def test_read_http_body(): h = odict.ODictCaseless() s = cStringIO.StringIO("testing") assert http.read_http_body(500, s, h, False, None) == "" h["content-length"] = ["foo"] s = cStringIO.StringIO("testing") tutils.raises(http.HttpError, http.read_http_body, 500, s, h, False, None) h["content-length"] = [5] s = cStringIO.StringIO("testing") assert len(http.read_http_body(500, s, h, False, None)) == 5 s = cStringIO.StringIO("testing") tutils.raises(http.HttpError, http.read_http_body, 500, s, h, False, 4) h = odict.ODictCaseless() s = cStringIO.StringIO("testing") assert len(http.read_http_body(500, s, h, True, 4)) == 4 s = cStringIO.StringIO("testing") assert len(http.read_http_body(500, s, h, True, 100)) == 7 h = odict.ODictCaseless() h["transfer-encoding"] = ["chunked"] s = cStringIO.StringIO("5\r\naaaaa\r\n0\r\n\r\n") assert http.read_http_body(500, s, h, True, 100) == "aaaaa"
def test_read_chunked(): h = odict.ODictCaseless() h["transfer-encoding"] = ["chunked"] s = cStringIO.StringIO("1\r\na\r\n0\r\n") tutils.raises("malformed chunked body", http.read_http_body, s, h, None, "GET", None, True) s = cStringIO.StringIO("1\r\na\r\n0\r\n\r\n") assert http.read_http_body(s, h, None, "GET", None, True) == "a" s = cStringIO.StringIO("\r\n\r\n1\r\na\r\n0\r\n\r\n") assert http.read_http_body(s, h, None, "GET", None, True) == "a" s = cStringIO.StringIO("\r\n") tutils.raises("closed prematurely", http.read_http_body, s, h, None, "GET", None, True) s = cStringIO.StringIO("1\r\nfoo") tutils.raises("malformed chunked body", http.read_http_body, s, h, None, "GET", None, True) s = cStringIO.StringIO("foo\r\nfoo") tutils.raises(http.HttpError, http.read_http_body, s, h, None, "GET", None, True) s = cStringIO.StringIO("5\r\naaaaa\r\n0\r\n\r\n") tutils.raises("too large", http.read_http_body, s, h, 2, "GET", None, True)
def test_read_http_body(): # test default case h = odict.ODictCaseless() h["content-length"] = [7] s = cStringIO.StringIO("testing") assert http.read_http_body(s, h, None, "GET", 200, False) == "testing" # test content length: invalid header h["content-length"] = ["foo"] s = cStringIO.StringIO("testing") tutils.raises( http.HttpError, http.read_http_body, s, h, None, "GET", 200, False ) # test content length: invalid header #2 h["content-length"] = [-1] s = cStringIO.StringIO("testing") tutils.raises( http.HttpError, http.read_http_body, s, h, None, "GET", 200, False ) # test content length: content length > actual content h["content-length"] = [5] s = cStringIO.StringIO("testing") tutils.raises( http.HttpError, http.read_http_body, s, h, 4, "GET", 200, False ) # test content length: content length < actual content s = cStringIO.StringIO("testing") assert len(http.read_http_body(s, h, None, "GET", 200, False)) == 5 # test no content length: limit > actual content h = odict.ODictCaseless() s = tcp.Reader(cStringIO.StringIO("testing")) assert len(http.read_http_body(s, h, 100, "GET", 200, False)) == 7 # test no content length: limit < actual content s = tcp.Reader(cStringIO.StringIO("testing")) tutils.raises( http.HttpError, http.read_http_body, s, h, 4, "GET", 200, False ) # test chunked h = odict.ODictCaseless() h["transfer-encoding"] = ["chunked"] s = tcp.Reader(cStringIO.StringIO("5\r\naaaaa\r\n0\r\n\r\n")) assert http.read_http_body(s, h, 100, "GET", 200, False) == "aaaaa"
class HTTPHandler(ProtocolHandler): """ HTTPHandler implements mitmproxys understanding of the HTTP protocol. """ def __init__(self, c): super(HTTPHandler, self).__init__(c) self.expected_form_in = c.config.mode.http_form_in self.expected_form_out = c.config.mode.http_form_out self.skip_authentication = False def handle_messages(self): while self.handle_flow(): pass def get_response_from_server(self, flow): self.c.establish_server_connection() request_raw = flow.request.assemble() for attempt in (0, 1): try: self.c.server_conn.send(request_raw) # Only get the headers at first... flow.response = HTTPResponse.from_stream( self.c.server_conn.rfile, flow.request.method, body_size_limit=self.c.config.body_size_limit, include_body=False) break except (tcp.NetLibDisconnect, http.HttpErrorConnClosed), v: self.c.log("error in server communication: %s" % repr(v), level="debug") if attempt == 0: # In any case, we try to reconnect at least once. # This is necessary because it might be possible that we already initiated an upstream connection # after clientconnect that has already been expired, e.g consider the following event log: # > clientconnect (transparent mode destination known) # > serverconnect # > read n% of large request # > server detects timeout, disconnects # > read (100-n)% of large request # > send large request upstream self.c.server_reconnect() else: raise # call the appropriate script hook - this is an opportunity for an # inline script to set flow.stream = True self.c.channel.ask("responseheaders", flow) # now get the rest of the request body, if body still needs to be read # but not streaming this response if flow.response.stream: flow.response.content = CONTENT_MISSING else: flow.response.content = http.read_http_body( self.c.server_conn.rfile, flow.response.headers, self.c.config.body_size_limit, flow.request.method, flow.response.code, False) flow.response.timestamp_end = utils.timestamp()
def _read_request_origin_form(self, client_conn, scheme, host, port): """ Read a HTTP request with regular (origin-form) request line. An example origin-form request line would be: GET /foo.html HTTP/1.1 The request destination is already known from one of the following sources: 1) transparent proxy: destination provided by platform resolver 2) reverse proxy: fixed destination 3) regular proxy: known from CONNECT command. """ if scheme.lower() == "https" and not self.ssl_established: self.establish_ssl(client_conn, host, port) line = self.get_line(self.rfile) if line == "": return None r = http.parse_init_http(line) if not r: raise ProxyError(400, "Bad HTTP request line: %s"%repr(line)) method, path, httpversion = r headers = self.read_headers(authenticate=False) self.handle_expect_header(headers, httpversion) content = http.read_http_body( self.rfile, headers, self.config.body_size_limit, True ) r = flow.Request( client_conn, httpversion, host, port, scheme, method, path, headers, content, self.rfile.first_byte_timestamp, utils.timestamp() ) r.set_live(self.rfile, self.wfile) return r
def from_stream(cls, rfile, include_content=True, body_size_limit=None): """ Parse an HTTP request from a file stream """ httpversion, host, port, scheme, method, path, headers, content, timestamp_start, timestamp_end \ = None, None, None, None, None, None, None, None, None, None if hasattr(rfile, "reset_timestamps"): rfile.reset_timestamps() request_line = get_line(rfile) if hasattr(rfile, "first_byte_timestamp"): timestamp_start = rfile.first_byte_timestamp else: timestamp_start = utils.timestamp() request_line_parts = http.parse_init(request_line) if not request_line_parts: raise http.HttpError( 400, "Bad HTTP request line: %s" % repr(request_line)) method, path, httpversion = request_line_parts if path == '*': form_in = "asterisk" elif path.startswith("/"): form_in = "origin" if not netlib.utils.isascii(path): raise http.HttpError( 400, "Bad HTTP request line: %s" % repr(request_line)) elif method.upper() == 'CONNECT': form_in = "authority" r = http.parse_init_connect(request_line) if not r: raise http.HttpError( 400, "Bad HTTP request line: %s" % repr(request_line)) host, port, _ = r path = None else: form_in = "absolute" r = http.parse_init_proxy(request_line) if not r: raise http.HttpError( 400, "Bad HTTP request line: %s" % repr(request_line)) _, scheme, host, port, path, _ = r headers = http.read_headers(rfile) if headers is None: raise http.HttpError(400, "Invalid headers") if include_content: content = http.read_http_body(rfile, headers, body_size_limit, True) timestamp_end = utils.timestamp() return HTTPRequest(form_in, method, scheme, host, port, path, httpversion, headers, content, timestamp_start, timestamp_end)
def test_read_http_body(): # test default case h = odict.ODictCaseless() h["content-length"] = [7] s = cStringIO.StringIO("testing") assert http.read_http_body(s, h, None, "GET", 200, False) == "testing" # test content length: invalid header h["content-length"] = ["foo"] s = cStringIO.StringIO("testing") tutils.raises(http.HttpError, http.read_http_body, s, h, None, "GET", 200, False) # test content length: invalid header #2 h["content-length"] = [-1] s = cStringIO.StringIO("testing") tutils.raises(http.HttpError, http.read_http_body, s, h, None, "GET", 200, False) # test content length: content length > actual content h["content-length"] = [5] s = cStringIO.StringIO("testing") tutils.raises(http.HttpError, http.read_http_body, s, h, 4, "GET", 200, False) # test content length: content length < actual content s = cStringIO.StringIO("testing") assert len(http.read_http_body(s, h, None, "GET", 200, False)) == 5 # test no content length: limit > actual content h = odict.ODictCaseless() s = cStringIO.StringIO("testing") assert len(http.read_http_body(s, h, 100, "GET", 200, False)) == 7 # test no content length: limit < actual content s = cStringIO.StringIO("testing") tutils.raises(http.HttpError, http.read_http_body, s, h, 4, "GET", 200, False) # test chunked h = odict.ODictCaseless() h["transfer-encoding"] = ["chunked"] s = cStringIO.StringIO("5\r\naaaaa\r\n0\r\n\r\n") assert http.read_http_body(s, h, 100, "GET", 200, False) == "aaaaa"
def from_stream(cls, rfile, include_content=True, body_size_limit=None): """ Parse an HTTP request from a file stream """ httpversion, host, port, scheme, method, path, headers, content, timestamp_start, timestamp_end \ = None, None, None, None, None, None, None, None, None, None if hasattr(rfile, "reset_timestamps"): rfile.reset_timestamps() request_line = get_line(rfile) if hasattr(rfile, "first_byte_timestamp"): timestamp_start = rfile.first_byte_timestamp else: timestamp_start = utils.timestamp() request_line_parts = http.parse_init(request_line) if not request_line_parts: raise http.HttpError(400, "Bad HTTP request line: %s" % repr(request_line)) method, path, httpversion = request_line_parts if path == '*': form_in = "asterisk" elif path.startswith("/"): form_in = "origin" if not netlib.utils.isascii(path): raise http.HttpError(400, "Bad HTTP request line: %s" % repr(request_line)) elif method.upper() == 'CONNECT': form_in = "authority" r = http.parse_init_connect(request_line) if not r: raise http.HttpError(400, "Bad HTTP request line: %s" % repr(request_line)) host, port, _ = r path = None else: form_in = "absolute" r = http.parse_init_proxy(request_line) if not r: raise http.HttpError(400, "Bad HTTP request line: %s" % repr(request_line)) _, scheme, host, port, path, _ = r headers = http.read_headers(rfile) if headers is None: raise http.HttpError(400, "Invalid headers") if include_content: content = http.read_http_body(rfile, headers, body_size_limit, True) timestamp_end = utils.timestamp() return HTTPRequest(form_in, method, scheme, host, port, path, httpversion, headers, content, timestamp_start, timestamp_end)
def _read_request_absolute_form(self, client_conn, line): """ When making a request to a proxy (other than CONNECT or OPTIONS), a client must send the target uri in absolute-form. An example absolute-form request line would be: GET http://www.example.com/foo.html HTTP/1.1 """ r = http.parse_init_proxy(line) if not r: raise ProxyError(400, "Bad HTTP request line: %s"%repr(line)) method, scheme, host, port, path, httpversion = r headers = self.read_headers(authenticate=True) self.handle_expect_header(headers, httpversion) content = http.read_http_body( self.rfile, headers, self.config.body_size_limit, True ) r = flow.Request( client_conn, httpversion, host, port, scheme, method, path, headers, content, self.rfile.first_byte_timestamp, utils.timestamp() ) r.set_live(self.rfile, self.wfile) return r
def test_read_http_body_response(): h = odict.ODictCaseless() s = cStringIO.StringIO("testing") assert http.read_http_body(s, h, None, "GET", 200, False) == "testing"
def test_read_http_body_request(): h = odict.ODictCaseless() r = cStringIO.StringIO("testing") assert http.read_http_body(r, h, None, "GET", None, True) == ""
def handle_request(self): """ Returns a (again, log) tuple. again: True if request handling should continue. log: A dictionary, or None """ line = self.rfile.readline() if line == "\r\n" or line == "\n": # Possible leftover from previous message line = self.rfile.readline() if line == "": # Normal termination return False, None m = utils.MemBool() if m(http.parse_init_connect(line)): headers = http.read_headers(self.rfile) self.wfile.write( 'HTTP/1.1 200 Connection established\r\n' + ('Proxy-agent: %s\r\n' % version.NAMEVERSION) + '\r\n' ) self.wfile.flush() if not self.server.ssloptions.not_after_connect: try: cert, key, chain_file = self.server.ssloptions.get_cert(m.v[0]) self.convert_to_ssl( cert, key, handle_sni=self.handle_sni, request_client_cert=self.server.ssloptions.request_client_cert, cipher_list=self.server.ssloptions.ciphers, method=self.server.ssloptions.sslversion, ) except tcp.NetLibError as v: s = str(v) self.info(s) return False, dict(type="error", msg=s) return True, None elif m(http.parse_init_proxy(line)): method, _, _, _, path, httpversion = m.v elif m(http.parse_init_http(line)): method, path, httpversion = m.v else: s = "Invalid first line: %s" % repr(line) self.info(s) return False, dict(type="error", msg=s) headers = http.read_headers(self.rfile) if headers is None: s = "Invalid headers" self.info(s) return False, dict(type="error", msg=s) clientcert = None if self.clientcert: clientcert = dict( cn=self.clientcert.cn, subject=self.clientcert.subject, serial=self.clientcert.serial, notbefore=self.clientcert.notbefore.isoformat(), notafter=self.clientcert.notafter.isoformat(), keyinfo=self.clientcert.keyinfo, ) retlog = dict( type="crafted", request=dict( path=path, method=method, headers=headers.lst, httpversion=httpversion, sni=self.sni, remote_address=self.address(), clientcert=clientcert, ), cipher=None, ) if self.ssl_established: retlog["cipher"] = self.get_current_cipher() try: content = http.read_http_body( self.rfile, headers, None, method, None, True ) except http.HttpError as s: s = str(s) self.info(s) return False, dict(type="error", msg=s) for i in self.server.anchors: if i[0].match(path): self.info("crafting anchor: %s" % path) again, retlog["response"] = self.serve_crafted(i[1]) return again, retlog if not self.server.nocraft and path.startswith(self.server.craftanchor): spec = urllib.parse.unquote(path)[len(self.server.craftanchor):] self.info("crafting spec: %s" % spec) try: crafted = language.parse_response(spec) except language.ParseException as v: self.info("Parse error: %s" % v.msg) crafted = language.make_error_response( "Parse Error", "Error parsing response spec: %s\n" % v.msg + v.marked() ) again, retlog["response"] = self.serve_crafted(crafted) return again, retlog elif self.server.noweb: crafted = language.make_error_response("Access Denied") language.serve(crafted, self.wfile, self.server.request_settings) return False, dict( type="error", msg="Access denied: web interface disabled" ) else: self.info("app: %s %s" % (method, path)) req = wsgi.Request("http", method, path, headers, content) flow = wsgi.Flow(self.address, req) sn = self.connection.getsockname() a = wsgi.WSGIAdaptor( self.server.app, sn[0], self.server.address.port, version.NAMEVERSION ) a.serve(flow, self.wfile) return True, None
request=dict( path=path, method=method, headers=headers.lst, httpversion=httpversion, sni=self.sni, remote_address=self.address(), clientcert=clientcert, ), cipher=None, ) if self.ssl_established: retlog["cipher"] = self.get_current_cipher() try: content = http.read_http_body(self.rfile, headers, None, method, None, True) except http.HttpError, s: s = str(s) self.info(s) return False, dict(type="error", msg=s) for i in self.server.anchors: if i[0].match(path): self.info("crafting anchor: %s" % path) aresp = language.parse_response(self.server.request_settings, i[1]) again, retlog["response"] = self.serve_crafted(aresp) return again, retlog if not self.server.nocraft and path.startswith( self.server.craftanchor):
def handle_flow(self): flow = HTTPFlow(self.c.client_conn, self.c.server_conn, self.change_server) try: req = HTTPRequest.from_stream( self.c.client_conn.rfile, body_size_limit=self.c.config.body_size_limit) self.c.log("request", "debug", [req._assemble_first_line(req.form_in)]) send_request_upstream = self.process_request(flow, req) if not send_request_upstream: return True # Be careful NOT to assign the request to the flow before # process_request completes. This is because the call can raise an # exception. If the request object is already attached, this results # in an Error object that has an attached request that has not been # sent through to the Master. flow.request = req request_reply = self.c.channel.ask("request", flow.request) self.determine_server_address(flow, flow.request) flow.server_conn = self.c.server_conn # Update server_conn attribute on the flow if request_reply is None or request_reply == KILL: return False if isinstance(request_reply, HTTPResponse): flow.response = request_reply else: # read initially in "stream" mode, so we can get the headers separately flow.response = self.get_response_from_server( flow.request, include_body=False) # call the appropriate script hook - this is an opportunity for an inline script to set flow.stream = True self.c.channel.ask("responseheaders", flow.response) # now get the rest of the request body, if body still needs to be read but not streaming this response if flow.response.stream: flow.response.content = CONTENT_MISSING else: flow.response.content = http.read_http_body( self.c.server_conn.rfile, flow.response.headers, self.c.config.body_size_limit, flow.request.method, flow.response.code, False) # no further manipulation of self.c.server_conn beyond this point # we can safely set it as the final attribute value here. flow.server_conn = self.c.server_conn self.c.log("response", "debug", [flow.response._assemble_first_line()]) response_reply = self.c.channel.ask("response", flow.response) if response_reply is None or response_reply == KILL: return False if not flow.response.stream: # no streaming: # we already received the full response from the server and can send it to the client straight away. self.c.client_conn.send(flow.response._assemble()) else: # streaming: # First send the body and then transfer the response incrementally: h = flow.response._assemble_head( preserve_transfer_encoding=True) self.c.client_conn.send(h) for chunk in http.read_http_body_chunked( self.c.server_conn.rfile, flow.response.headers, self.c.config.body_size_limit, flow.request.method, flow.response.code, False, 4096): for part in chunk: self.c.client_conn.wfile.write(part) self.c.client_conn.wfile.flush() flow.timestamp_end = utils.timestamp() close_connection = ( http.connection_close(flow.request.httpversion, flow.request.headers) or http.connection_close(flow.response.httpversion, flow.response.headers) or http.expected_http_body_size(flow.response.headers, False, flow.request.method, flow.response.code) == -1) if close_connection: if flow.request.form_in == "authority" and flow.response.code == 200: # Workaround for https://github.com/mitmproxy/mitmproxy/issues/313: # Some proxies (e.g. Charles) send a CONNECT response with HTTP/1.0 and no Content-Length header pass else: return False if flow.request.form_in == "authority" and flow.response.code == 200: # TODO: Eventually add headers (space/usefulness tradeoff) # Make sure to add state info before the actual upgrade happens. # During the upgrade, we may receive an SNI indication from the client, # which resets the upstream connection. If this is the case, we must # already re-issue the CONNECT request at this point. self.c.server_conn.state.append(("http", { "state": "connect", "host": flow.request.host, "port": flow.request.port })) self.ssl_upgrade() # If the user has changed the target server on this connection, # restore the original target server self.restore_server() return True except (HttpAuthenticationError, http.HttpError, proxy.ProxyError, tcp.NetLibError), e: self.handle_error(e, flow)
path = path, method = method, headers = headers.lst, httpversion = httpversion, sni = self.sni, remote_address = self.address(), clientcert = clientcert, ), cipher = None, ) if self.ssl_established: retlog["cipher"] = self.get_current_cipher() try: content = http.read_http_body( self.rfile, headers, None, True ) except http.HttpError, s: s = str(s) self.info(s) return False, dict(type = "error", msg = s) for i in self.server.anchors: if i[0].match(path): self.info("crafting anchor: %s"%path) aresp = language.parse_response(self.server.request_settings, i[1]) again, retlog["response"] = self.serve_crafted(aresp) return again, retlog if not self.server.nocraft and path.startswith(self.server.craftanchor): spec = urllib.unquote(path)[len(self.server.craftanchor):]
def handle_flow(self): flow = HTTPFlow(self.c.client_conn, self.c.server_conn, self.live) try: req = HTTPRequest.from_stream(self.c.client_conn.rfile, body_size_limit=self.c.config.body_size_limit) self.c.log("request", "debug", [req._assemble_first_line(req.form_in)]) send_request_upstream = self.process_request(flow, req) if not send_request_upstream: return True # Be careful NOT to assign the request to the flow before # process_request completes. This is because the call can raise an # exception. If the request object is already attached, this results # in an Error object that has an attached request that has not been # sent through to the Master. flow.request = req request_reply = self.c.channel.ask("request", flow.request) self.determine_server_address(flow, flow.request) flow.server_conn = self.c.server_conn # Update server_conn attribute on the flow if request_reply is None or request_reply == KILL: return False if isinstance(request_reply, HTTPResponse): flow.response = request_reply else: # read initially in "stream" mode, so we can get the headers separately flow.response = self.get_response_from_server(flow.request, include_body=False) # call the appropriate script hook - this is an opportunity for an inline script to set flow.stream = True self.c.channel.ask("responseheaders", flow.response) # now get the rest of the request body, if body still needs to be read but not streaming this response if flow.response.stream: flow.response.content = CONTENT_MISSING else: flow.response.content = http.read_http_body(self.c.server_conn.rfile, flow.response.headers, self.c.config.body_size_limit, flow.request.method, flow.response.code, False) # no further manipulation of self.c.server_conn beyond this point # we can safely set it as the final attribute value here. flow.server_conn = self.c.server_conn self.c.log("response", "debug", [flow.response._assemble_first_line()]) response_reply = self.c.channel.ask("response", flow.response) if response_reply is None or response_reply == KILL: return False if not flow.response.stream: # no streaming: # we already received the full response from the server and can send it to the client straight away. self.c.client_conn.send(flow.response._assemble()) else: # streaming: # First send the body and then transfer the response incrementally: h = flow.response._assemble_head(preserve_transfer_encoding=True) self.c.client_conn.send(h) for chunk in http.read_http_body_chunked(self.c.server_conn.rfile, flow.response.headers, self.c.config.body_size_limit, flow.request.method, flow.response.code, False, 4096): for part in chunk: self.c.client_conn.wfile.write(part) self.c.client_conn.wfile.flush() flow.response.timestamp_end = utils.timestamp() flow.timestamp_end = utils.timestamp() close_connection = ( http.connection_close(flow.request.httpversion, flow.request.headers) or http.connection_close(flow.response.httpversion, flow.response.headers) or http.expected_http_body_size(flow.response.headers, False, flow.request.method, flow.response.code) == -1) if close_connection: if flow.request.form_in == "authority" and flow.response.code == 200: # Workaround for https://github.com/mitmproxy/mitmproxy/issues/313: # Some proxies (e.g. Charles) send a CONNECT response with HTTP/1.0 and no Content-Length header pass else: return False if flow.request.form_in == "authority" and flow.response.code == 200: # TODO: Eventually add headers (space/usefulness tradeoff) # Make sure to add state info before the actual upgrade happens. # During the upgrade, we may receive an SNI indication from the client, # which resets the upstream connection. If this is the case, we must # already re-issue the CONNECT request at this point. self.c.server_conn.state.append(("http", {"state": "connect", "host": flow.request.host, "port": flow.request.port})) self.ssl_upgrade() # If the user has changed the target server on this connection, # restore the original target server flow.live.restore_server() flow.live = None return True except (HttpAuthenticationError, http.HttpError, proxy.ProxyError, tcp.NetLibError), e: self.handle_error(e, flow)