def __call__(self, data, **metadata): headers = metadata.get("headers", {}) ctype = headers.get("content-type") if data and ctype: ct = http.parse_content_type(ctype) if ctype else None ct = "%s/%s" % (ct[0], ct[1]) if ct in content_types_map: return content_types_map[ct][0](data, **metadata) elif strutils.is_xml(data): return get("XML")(data, **metadata) if metadata.get("query"): return get("Query")(data, **metadata) if data and strutils.is_mostly_bin(data): return get("Hex")(data) if not data: return "No content", [] return get("Raw")(data)
def test_is_mostly_bin(): assert not strutils.is_mostly_bin(b"foo\xFF") assert strutils.is_mostly_bin(b"foo" + b"\xFF" * 10) assert not strutils.is_mostly_bin("")
def response(flow): """ Called when a server response has been received. """ # -1 indicates that these values do not apply to current request ssl_time = -1 connect_time = -1 if flow.server_conn and flow.server_conn not in SERVERS_SEEN: connect_time = (flow.server_conn.timestamp_tcp_setup - flow.server_conn.timestamp_start) if flow.server_conn.timestamp_ssl_setup is not None: ssl_time = (flow.server_conn.timestamp_ssl_setup - flow.server_conn.timestamp_tcp_setup) SERVERS_SEEN.add(flow.server_conn) # Calculate raw timings from timestamps. DNS timings can not be calculated # for lack of a way to measure it. The same goes for HAR blocked. # mitmproxy will open a server connection as soon as it receives the host # and port from the client connection. So, the time spent waiting is actually # spent waiting between request.timestamp_end and response.timestamp_start # thus it correlates to HAR wait instead. timings_raw = { 'send': flow.request.timestamp_end - flow.request.timestamp_start, 'receive': flow.response.timestamp_end - flow.response.timestamp_start, 'wait': flow.response.timestamp_start - flow.request.timestamp_end, 'connect': connect_time, 'ssl': ssl_time, } # HAR timings are integers in ms, so we re-encode the raw timings to that format. timings = dict([(k, int(1000 * v)) for k, v in timings_raw.items()]) # full_time is the sum of all timings. # Timings set to -1 will be ignored as per spec. full_time = sum(v for v in timings.values() if v > -1) started_date_time = format_datetime(datetime.utcfromtimestamp(flow.request.timestamp_start)) # Response body size and encoding response_body_size = len(flow.response.raw_content) response_body_decoded_size = len(flow.response.content) response_body_compression = response_body_decoded_size - response_body_size entry = { "startedDateTime": started_date_time, "time": full_time, "request": { "method": flow.request.method, "url": flow.request.url, "httpVersion": flow.request.http_version, "cookies": format_request_cookies(flow.request.cookies.fields), "headers": name_value(flow.request.headers), "queryString": name_value(flow.request.query or {}), "headersSize": len(str(flow.request.headers)), "bodySize": len(flow.request.content), }, "response": { "status": flow.response.status_code, "statusText": flow.response.reason, "httpVersion": flow.response.http_version, "cookies": format_response_cookies(flow.response.cookies.fields), "headers": name_value(flow.response.headers), "content": { "size": response_body_size, "compression": response_body_compression, "mimeType": flow.response.headers.get('Content-Type', '') }, "redirectURL": flow.response.headers.get('Location', ''), "headersSize": len(str(flow.response.headers)), "bodySize": response_body_size, }, "cache": {}, "timings": timings, } # Store binary data as base64 if strutils.is_mostly_bin(flow.response.content): entry["response"]["content"]["text"] = base64.b64encode(flow.response.content).decode() entry["response"]["content"]["encoding"] = "base64" else: entry["response"]["content"]["text"] = flow.response.get_text(strict=False) if flow.request.method in ["POST", "PUT", "PATCH"]: params = [ {"name": a.decode("utf8", "surrogateescape"), "value": b.decode("utf8", "surrogateescape")} for a, b in flow.request.urlencoded_form.items(multi=True) ] entry["request"]["postData"] = { "mimeType": flow.request.headers.get("Content-Type", ""), "text": flow.request.get_text(strict=False), "params": params } if flow.server_conn.connected(): entry["serverIPAddress"] = str(flow.server_conn.ip_address.address[0]) HAR["log"]["entries"].append(entry)
def response(flow): """ Called when a server response has been received. """ # -1 indicates that these values do not apply to current request ssl_time = -1 connect_time = -1 if flow.server_conn and flow.server_conn not in SERVERS_SEEN: connect_time = (flow.server_conn.timestamp_tcp_setup - flow.server_conn.timestamp_start) if flow.server_conn.timestamp_tls_setup is not None: ssl_time = (flow.server_conn.timestamp_tls_setup - flow.server_conn.timestamp_tcp_setup) SERVERS_SEEN.add(flow.server_conn) # Calculate raw timings from timestamps. DNS timings can not be calculated # for lack of a way to measure it. The same goes for HAR blocked. # mitmproxy will open a server connection as soon as it receives the host # and port from the client connection. So, the time spent waiting is actually # spent waiting between request.timestamp_end and response.timestamp_start # thus it correlates to HAR wait instead. timings_raw = { 'send': flow.request.timestamp_end - flow.request.timestamp_start, 'receive': flow.response.timestamp_end - flow.response.timestamp_start, 'wait': flow.response.timestamp_start - flow.request.timestamp_end, 'connect': connect_time, 'ssl': ssl_time, } # HAR timings are integers in ms, so we re-encode the raw timings to that format. timings = { k: int(1000 * v) if v != -1 else -1 for k, v in timings_raw.items() } # full_time is the sum of all timings. # Timings set to -1 will be ignored as per spec. full_time = sum(v for v in timings.values() if v > -1) started_date_time = datetime.fromtimestamp(flow.request.timestamp_start, timezone.utc).isoformat() # Response body size and encoding response_body_size = len( flow.response.raw_content) if flow.response.raw_content else 0 response_body_decoded_size = len( flow.response.content) if flow.response.content else 0 response_body_compression = response_body_decoded_size - response_body_size entry = { "startedDateTime": started_date_time, "time": full_time, "request": { "method": flow.request.method, "url": flow.request.url, "httpVersion": flow.request.http_version, "cookies": format_request_cookies(flow.request.cookies.fields), "headers": name_value(flow.request.headers), "queryString": name_value(flow.request.query or {}), "headersSize": len(str(flow.request.headers)), "bodySize": len(flow.request.content), }, "response": { "status": flow.response.status_code, "statusText": flow.response.reason, "httpVersion": flow.response.http_version, "cookies": format_response_cookies(flow.response.cookies.fields), "headers": name_value(flow.response.headers), "content": { "size": response_body_size, "compression": response_body_compression, "mimeType": flow.response.headers.get('Content-Type', '') }, "redirectURL": flow.response.headers.get('Location', ''), "headersSize": len(str(flow.response.headers)), "bodySize": response_body_size, }, "cache": {}, "timings": timings, } # Store binary data as base64 if strutils.is_mostly_bin(flow.response.content): entry["response"]["content"]["text"] = base64.b64encode( flow.response.content).decode() entry["response"]["content"]["encoding"] = "base64" else: entry["response"]["content"]["text"] = flow.response.get_text( strict=False) if flow.request.method in ["POST", "PUT", "PATCH"]: params = [{ "name": a, "value": b } for a, b in flow.request.urlencoded_form.items(multi=True)] entry["request"]["postData"] = { "mimeType": flow.request.headers.get("Content-Type", ""), "text": flow.request.get_text(strict=False), "params": params } if flow.server_conn.connected(): entry["serverIPAddress"] = str(flow.server_conn.ip_address[0]) HAR["log"]["entries"].append(entry)
def update_har_data_from_response(self, flow): ssl_time = -1 connect_time = -1 if flow.server_conn and flow.server_conn not in self.servers_seen: connect_time = (flow.server_conn.timestamp_tcp_setup - flow.server_conn.timestamp_start) if flow.server_conn.timestamp_tls_setup is not None: ssl_time = (flow.server_conn.timestamp_tls_setup - flow.server_conn.timestamp_tcp_setup) self.servers_seen.add(flow.server_conn) timings_raw = { 'send': flow.request.timestamp_end - flow.request.timestamp_start, 'receive': flow.response.timestamp_end - flow.response.timestamp_start, 'wait': flow.response.timestamp_start - flow.request.timestamp_end, 'connect': connect_time, 'ssl': ssl_time, } timings = dict([(k, int(1000 * v)) for k, v in timings_raw.items()]) full_time = sum(v for v in timings.values() if v > -1) started_date_time = datetime.fromtimestamp( flow.request.timestamp_start, timezone.utc).isoformat() response_body_size = len(flow.response.raw_content) response_body_decoded_size = len(flow.response.content) response_body_compression = response_body_decoded_size - response_body_size entry = { 'startedDateTime': started_date_time, 'time': full_time, 'request': { 'method': flow.request.method, 'url': flow.request.url, 'httpVersion': flow.request.http_version, 'cookies': format_request_cookies(flow.request.cookies.fields), 'headers': name_value(flow.request.headers), 'queryString': name_value(flow.request.query or {}), 'headersSize': len(str(flow.request.headers)), 'bodySize': len(flow.request.content), }, 'response': { 'status': flow.response.status_code, 'statusText': flow.response.reason, 'httpVersion': flow.response.http_version, 'cookies': format_response_cookies(flow.response.cookies.fields), 'headers': name_value(flow.response.headers), 'content': { 'size': response_body_size, 'compression': response_body_compression, 'mimeType': flow.response.headers.get('Content-Type', '') }, 'redirectURL': flow.response.headers.get('Location', ''), 'headersSize': len(str(flow.response.headers)), 'bodySize': response_body_size, }, 'cache': {}, 'timings': timings, } if strutils.is_mostly_bin(flow.response.content): entry['response']['content']['text'] = base64.b64encode( flow.response.content).decode() entry['response']['content']['encoding'] = 'base64' else: entry['response']['content']['text'] = flow.response.get_text( strict=False) # adjustment_method = self.adjust_flow(flow) # if adjustment_method not in ['strip_content_both', 'strip_content_response']: # if strutils.is_mostly_bin(flow.response.content): # entry['response']['content']['text'] = base64.b64encode(flow.response.content).decode() # entry['response']['content']['encoding'] = 'base64' # else: # entry['response']['content']['text'] = flow.response.get_text(strict=False) # else: # entry['response']['content']['_adjustment_method'] = adjustment_method # if adjustment_method not in ['strip_content_both', 'strip_content_request']: # entry['request']['_content'] = {} # if strutils.is_mostly_bin(flow.request.content): # entry['request']['_content']['text'] = base64.b64encode(flow.request.content).decode() # entry['request']['_content']['encoding'] = 'base64' # else: # entry['request']['_content']['text'] = flow.request.get_text(strict=False) # else: # entry['request']['_content']['adjustment_method'] = adjustment_method if flow.request.method in ['POST', 'PUT', 'PATCH']: params = [{ 'name': a, 'value': b } for a, b in flow.request.urlencoded_form.items(multi=True)] entry['request']['postData'] = { 'mimeType': flow.request.headers.get('Content-Type', ''), 'text': flow.request.get_text(strict=False), 'params': params, } if flow.server_conn.connected(): entry['serverIPAddress'] = str(flow.server_conn.ip_address[0]) self.har_data['log']['entries'].append(entry)
def response(self, flow): har_entry = self.get_har_entry(flow) ctx.log.debug('Incoming response for request to url: {}'.format( flow.request.url)) if 'WhiteListFiltered' in flow.metadata or 'BlackListFiltered' in flow.metadata: ctx.log.debug('Black/White list filtered, return nothing.') return # -1 indicates that these values do not apply to current request self.get_or_create_har(DEFAULT_PAGE_REF, DEFAULT_PAGE_TITLE, True) ssl_time = -1 connect_time = -1 if flow.server_conn and flow.server_conn not in SERVERS_SEEN: connect_time = (flow.server_conn.timestamp_tcp_setup - flow.server_conn.timestamp_start) if flow.server_conn.timestamp_tls_setup is not None: ssl_time = (flow.server_conn.timestamp_tls_setup - flow.server_conn.timestamp_tcp_setup) SERVERS_SEEN.add(flow.server_conn) timings = self.calculate_timings(connect_time, flow, ssl_time) timings['dnsNanos'] = int(har_entry['timings']['dnsNanos']) full_time = sum(v for v in timings.values() if v > -1) # Response body size and encoding response_body_size = len( flow.response.raw_content) if flow.response.raw_content else 0 response_body_decoded_size = len( flow.response.content) if flow.response.content else 0 response_body_compression = response_body_decoded_size - response_body_size har_response = self.generate_har_entry_response() har_response["status"] = flow.response.status_code har_response["statusText"] = flow.response.reason har_response["httpVersion"] = flow.response.http_version if HarCaptureTypes.RESPONSE_COOKIES in self.har_capture_types: har_response["cookies"] = \ self.format_response_cookies(flow.response.cookies.fields) if HarCaptureTypes.RESPONSE_HEADERS in self.har_capture_types: har_response["headers"] = self.name_value(flow.response.headers) if flow.response.status_code in [300, 301, 302, 303, 307]: har_response['redirectURL'] = flow.response.headers['Location'] content = har_response['content'] content['size'] = response_body_size content['compression'] = response_body_compression content['mimeType'] = flow.response.headers.get('Content-Type', '') if HarCaptureTypes.RESPONSE_CONTENT in self.har_capture_types: if strutils.is_mostly_bin(flow.response.content): if HarCaptureTypes.RESPONSE_BINARY_CONTENT in self.har_capture_types: har_response["content"]["text"] = base64.b64encode( flow.response.content).decode() har_response["content"]["encoding"] = "base64" else: har_response["content"]["text"] = flow.response.get_text( strict=False) har_response["redirectURL"] = flow.response.headers.get('Location', '') har_response["headersSize"] = len(str(flow.response.headers)) har_response["bodySize"] = response_body_size har_entry['response'] = har_response har_entry['time'] = self.nano_to_ms(full_time) har_entry['pageref'] = self.get_current_page_ref() har_entry['timings'] = timings if flow.server_conn.connected(): har_entry["serverIPAddress"] = str(flow.server_conn.ip_address[0]) ctx.log.debug('Populated har entry for response: {}, entry: {}'.format( flow.request.url, str(har_entry)))