Ejemplo n.º 1
0
 def __call__(self, data, **metadata):
     headers = metadata.get("headers", {})
     ctype = headers.get("content-type")
     if data and ctype:
         ct = http.parse_content_type(ctype) if ctype else None
         ct = "%s/%s" % (ct[0], ct[1])
         if ct in content_types_map:
             return content_types_map[ct][0](data, **metadata)
         elif strutils.is_xml(data):
             return get("XML")(data, **metadata)
     if metadata.get("query"):
         return get("Query")(data, **metadata)
     if data and strutils.is_mostly_bin(data):
         return get("Hex")(data)
     if not data:
         return "No content", []
     return get("Raw")(data)
Ejemplo n.º 2
0
def test_is_mostly_bin():
    assert not strutils.is_mostly_bin(b"foo\xFF")
    assert strutils.is_mostly_bin(b"foo" + b"\xFF" * 10)
    assert not strutils.is_mostly_bin("")
Ejemplo n.º 3
0
def response(flow):
    """
       Called when a server response has been received.
    """

    # -1 indicates that these values do not apply to current request
    ssl_time = -1
    connect_time = -1

    if flow.server_conn and flow.server_conn not in SERVERS_SEEN:
        connect_time = (flow.server_conn.timestamp_tcp_setup -
                        flow.server_conn.timestamp_start)

        if flow.server_conn.timestamp_ssl_setup is not None:
            ssl_time = (flow.server_conn.timestamp_ssl_setup -
                        flow.server_conn.timestamp_tcp_setup)

        SERVERS_SEEN.add(flow.server_conn)

    # Calculate raw timings from timestamps. DNS timings can not be calculated
    # for lack of a way to measure it. The same goes for HAR blocked.
    # mitmproxy will open a server connection as soon as it receives the host
    # and port from the client connection. So, the time spent waiting is actually
    # spent waiting between request.timestamp_end and response.timestamp_start
    # thus it correlates to HAR wait instead.
    timings_raw = {
        'send': flow.request.timestamp_end - flow.request.timestamp_start,
        'receive': flow.response.timestamp_end - flow.response.timestamp_start,
        'wait': flow.response.timestamp_start - flow.request.timestamp_end,
        'connect': connect_time,
        'ssl': ssl_time,
    }

    # HAR timings are integers in ms, so we re-encode the raw timings to that format.
    timings = dict([(k, int(1000 * v)) for k, v in timings_raw.items()])

    # full_time is the sum of all timings.
    # Timings set to -1 will be ignored as per spec.
    full_time = sum(v for v in timings.values() if v > -1)

    started_date_time = format_datetime(datetime.utcfromtimestamp(flow.request.timestamp_start))

    # Response body size and encoding
    response_body_size = len(flow.response.raw_content)
    response_body_decoded_size = len(flow.response.content)
    response_body_compression = response_body_decoded_size - response_body_size

    entry = {
        "startedDateTime": started_date_time,
        "time": full_time,
        "request": {
            "method": flow.request.method,
            "url": flow.request.url,
            "httpVersion": flow.request.http_version,
            "cookies": format_request_cookies(flow.request.cookies.fields),
            "headers": name_value(flow.request.headers),
            "queryString": name_value(flow.request.query or {}),
            "headersSize": len(str(flow.request.headers)),
            "bodySize": len(flow.request.content),
        },
        "response": {
            "status": flow.response.status_code,
            "statusText": flow.response.reason,
            "httpVersion": flow.response.http_version,
            "cookies": format_response_cookies(flow.response.cookies.fields),
            "headers": name_value(flow.response.headers),
            "content": {
                "size": response_body_size,
                "compression": response_body_compression,
                "mimeType": flow.response.headers.get('Content-Type', '')
            },
            "redirectURL": flow.response.headers.get('Location', ''),
            "headersSize": len(str(flow.response.headers)),
            "bodySize": response_body_size,
        },
        "cache": {},
        "timings": timings,
    }

    # Store binary data as base64
    if strutils.is_mostly_bin(flow.response.content):
        entry["response"]["content"]["text"] = base64.b64encode(flow.response.content).decode()
        entry["response"]["content"]["encoding"] = "base64"
    else:
        entry["response"]["content"]["text"] = flow.response.get_text(strict=False)

    if flow.request.method in ["POST", "PUT", "PATCH"]:
        params = [
            {"name": a.decode("utf8", "surrogateescape"), "value": b.decode("utf8", "surrogateescape")}
            for a, b in flow.request.urlencoded_form.items(multi=True)
        ]
        entry["request"]["postData"] = {
            "mimeType": flow.request.headers.get("Content-Type", ""),
            "text": flow.request.get_text(strict=False),
            "params": params
        }

    if flow.server_conn.connected():
        entry["serverIPAddress"] = str(flow.server_conn.ip_address.address[0])

    HAR["log"]["entries"].append(entry)
Ejemplo n.º 4
0
def response(flow):
    """
       Called when a server response has been received.
    """

    # -1 indicates that these values do not apply to current request
    ssl_time = -1
    connect_time = -1

    if flow.server_conn and flow.server_conn not in SERVERS_SEEN:
        connect_time = (flow.server_conn.timestamp_tcp_setup -
                        flow.server_conn.timestamp_start)

        if flow.server_conn.timestamp_tls_setup is not None:
            ssl_time = (flow.server_conn.timestamp_tls_setup -
                        flow.server_conn.timestamp_tcp_setup)

        SERVERS_SEEN.add(flow.server_conn)

    # Calculate raw timings from timestamps. DNS timings can not be calculated
    # for lack of a way to measure it. The same goes for HAR blocked.
    # mitmproxy will open a server connection as soon as it receives the host
    # and port from the client connection. So, the time spent waiting is actually
    # spent waiting between request.timestamp_end and response.timestamp_start
    # thus it correlates to HAR wait instead.
    timings_raw = {
        'send': flow.request.timestamp_end - flow.request.timestamp_start,
        'receive': flow.response.timestamp_end - flow.response.timestamp_start,
        'wait': flow.response.timestamp_start - flow.request.timestamp_end,
        'connect': connect_time,
        'ssl': ssl_time,
    }

    # HAR timings are integers in ms, so we re-encode the raw timings to that format.
    timings = {
        k: int(1000 * v) if v != -1 else -1
        for k, v in timings_raw.items()
    }

    # full_time is the sum of all timings.
    # Timings set to -1 will be ignored as per spec.
    full_time = sum(v for v in timings.values() if v > -1)

    started_date_time = datetime.fromtimestamp(flow.request.timestamp_start,
                                               timezone.utc).isoformat()

    # Response body size and encoding
    response_body_size = len(
        flow.response.raw_content) if flow.response.raw_content else 0
    response_body_decoded_size = len(
        flow.response.content) if flow.response.content else 0
    response_body_compression = response_body_decoded_size - response_body_size

    entry = {
        "startedDateTime": started_date_time,
        "time": full_time,
        "request": {
            "method": flow.request.method,
            "url": flow.request.url,
            "httpVersion": flow.request.http_version,
            "cookies": format_request_cookies(flow.request.cookies.fields),
            "headers": name_value(flow.request.headers),
            "queryString": name_value(flow.request.query or {}),
            "headersSize": len(str(flow.request.headers)),
            "bodySize": len(flow.request.content),
        },
        "response": {
            "status": flow.response.status_code,
            "statusText": flow.response.reason,
            "httpVersion": flow.response.http_version,
            "cookies": format_response_cookies(flow.response.cookies.fields),
            "headers": name_value(flow.response.headers),
            "content": {
                "size": response_body_size,
                "compression": response_body_compression,
                "mimeType": flow.response.headers.get('Content-Type', '')
            },
            "redirectURL": flow.response.headers.get('Location', ''),
            "headersSize": len(str(flow.response.headers)),
            "bodySize": response_body_size,
        },
        "cache": {},
        "timings": timings,
    }

    # Store binary data as base64
    if strutils.is_mostly_bin(flow.response.content):
        entry["response"]["content"]["text"] = base64.b64encode(
            flow.response.content).decode()
        entry["response"]["content"]["encoding"] = "base64"
    else:
        entry["response"]["content"]["text"] = flow.response.get_text(
            strict=False)

    if flow.request.method in ["POST", "PUT", "PATCH"]:
        params = [{
            "name": a,
            "value": b
        } for a, b in flow.request.urlencoded_form.items(multi=True)]
        entry["request"]["postData"] = {
            "mimeType": flow.request.headers.get("Content-Type", ""),
            "text": flow.request.get_text(strict=False),
            "params": params
        }

    if flow.server_conn.connected():
        entry["serverIPAddress"] = str(flow.server_conn.ip_address[0])

    HAR["log"]["entries"].append(entry)
Ejemplo n.º 5
0
 def update_har_data_from_response(self, flow):
     ssl_time = -1
     connect_time = -1
     if flow.server_conn and flow.server_conn not in self.servers_seen:
         connect_time = (flow.server_conn.timestamp_tcp_setup -
                         flow.server_conn.timestamp_start)
         if flow.server_conn.timestamp_tls_setup is not None:
             ssl_time = (flow.server_conn.timestamp_tls_setup -
                         flow.server_conn.timestamp_tcp_setup)
         self.servers_seen.add(flow.server_conn)
     timings_raw = {
         'send': flow.request.timestamp_end - flow.request.timestamp_start,
         'receive':
         flow.response.timestamp_end - flow.response.timestamp_start,
         'wait': flow.response.timestamp_start - flow.request.timestamp_end,
         'connect': connect_time,
         'ssl': ssl_time,
     }
     timings = dict([(k, int(1000 * v)) for k, v in timings_raw.items()])
     full_time = sum(v for v in timings.values() if v > -1)
     started_date_time = datetime.fromtimestamp(
         flow.request.timestamp_start, timezone.utc).isoformat()
     response_body_size = len(flow.response.raw_content)
     response_body_decoded_size = len(flow.response.content)
     response_body_compression = response_body_decoded_size - response_body_size
     entry = {
         'startedDateTime': started_date_time,
         'time': full_time,
         'request': {
             'method': flow.request.method,
             'url': flow.request.url,
             'httpVersion': flow.request.http_version,
             'cookies': format_request_cookies(flow.request.cookies.fields),
             'headers': name_value(flow.request.headers),
             'queryString': name_value(flow.request.query or {}),
             'headersSize': len(str(flow.request.headers)),
             'bodySize': len(flow.request.content),
         },
         'response': {
             'status': flow.response.status_code,
             'statusText': flow.response.reason,
             'httpVersion': flow.response.http_version,
             'cookies':
             format_response_cookies(flow.response.cookies.fields),
             'headers': name_value(flow.response.headers),
             'content': {
                 'size': response_body_size,
                 'compression': response_body_compression,
                 'mimeType': flow.response.headers.get('Content-Type', '')
             },
             'redirectURL': flow.response.headers.get('Location', ''),
             'headersSize': len(str(flow.response.headers)),
             'bodySize': response_body_size,
         },
         'cache': {},
         'timings': timings,
     }
     if strutils.is_mostly_bin(flow.response.content):
         entry['response']['content']['text'] = base64.b64encode(
             flow.response.content).decode()
         entry['response']['content']['encoding'] = 'base64'
     else:
         entry['response']['content']['text'] = flow.response.get_text(
             strict=False)
     # adjustment_method = self.adjust_flow(flow)
     # if adjustment_method not in ['strip_content_both', 'strip_content_response']:
     #     if strutils.is_mostly_bin(flow.response.content):
     #         entry['response']['content']['text'] = base64.b64encode(flow.response.content).decode()
     #         entry['response']['content']['encoding'] = 'base64'
     #     else:
     #         entry['response']['content']['text'] = flow.response.get_text(strict=False)
     # else:
     #     entry['response']['content']['_adjustment_method'] = adjustment_method
     # if adjustment_method not in ['strip_content_both', 'strip_content_request']:
     #     entry['request']['_content'] = {}
     #     if strutils.is_mostly_bin(flow.request.content):
     #         entry['request']['_content']['text'] = base64.b64encode(flow.request.content).decode()
     #         entry['request']['_content']['encoding'] = 'base64'
     #     else:
     #         entry['request']['_content']['text'] = flow.request.get_text(strict=False)
     # else:
     #     entry['request']['_content']['adjustment_method'] = adjustment_method
     if flow.request.method in ['POST', 'PUT', 'PATCH']:
         params = [{
             'name': a,
             'value': b
         } for a, b in flow.request.urlencoded_form.items(multi=True)]
         entry['request']['postData'] = {
             'mimeType': flow.request.headers.get('Content-Type', ''),
             'text': flow.request.get_text(strict=False),
             'params': params,
         }
     if flow.server_conn.connected():
         entry['serverIPAddress'] = str(flow.server_conn.ip_address[0])
     self.har_data['log']['entries'].append(entry)
Ejemplo n.º 6
0
    def response(self, flow):
        har_entry = self.get_har_entry(flow)

        ctx.log.debug('Incoming response for request to url: {}'.format(
            flow.request.url))

        if 'WhiteListFiltered' in flow.metadata or 'BlackListFiltered' in flow.metadata:
            ctx.log.debug('Black/White list filtered, return nothing.')
            return

        # -1 indicates that these values do not apply to current request
        self.get_or_create_har(DEFAULT_PAGE_REF, DEFAULT_PAGE_TITLE, True)

        ssl_time = -1
        connect_time = -1

        if flow.server_conn and flow.server_conn not in SERVERS_SEEN:
            connect_time = (flow.server_conn.timestamp_tcp_setup -
                            flow.server_conn.timestamp_start)

            if flow.server_conn.timestamp_tls_setup is not None:
                ssl_time = (flow.server_conn.timestamp_tls_setup -
                            flow.server_conn.timestamp_tcp_setup)

            SERVERS_SEEN.add(flow.server_conn)

        timings = self.calculate_timings(connect_time, flow, ssl_time)
        timings['dnsNanos'] = int(har_entry['timings']['dnsNanos'])

        full_time = sum(v for v in timings.values() if v > -1)

        # Response body size and encoding
        response_body_size = len(
            flow.response.raw_content) if flow.response.raw_content else 0
        response_body_decoded_size = len(
            flow.response.content) if flow.response.content else 0
        response_body_compression = response_body_decoded_size - response_body_size

        har_response = self.generate_har_entry_response()
        har_response["status"] = flow.response.status_code
        har_response["statusText"] = flow.response.reason
        har_response["httpVersion"] = flow.response.http_version

        if HarCaptureTypes.RESPONSE_COOKIES in self.har_capture_types:
            har_response["cookies"] = \
                self.format_response_cookies(flow.response.cookies.fields)

        if HarCaptureTypes.RESPONSE_HEADERS in self.har_capture_types:
            har_response["headers"] = self.name_value(flow.response.headers)

        if flow.response.status_code in [300, 301, 302, 303, 307]:
            har_response['redirectURL'] = flow.response.headers['Location']

        content = har_response['content']
        content['size'] = response_body_size
        content['compression'] = response_body_compression
        content['mimeType'] = flow.response.headers.get('Content-Type', '')

        if HarCaptureTypes.RESPONSE_CONTENT in self.har_capture_types:
            if strutils.is_mostly_bin(flow.response.content):
                if HarCaptureTypes.RESPONSE_BINARY_CONTENT in self.har_capture_types:
                    har_response["content"]["text"] = base64.b64encode(
                        flow.response.content).decode()
                    har_response["content"]["encoding"] = "base64"
            else:
                har_response["content"]["text"] = flow.response.get_text(
                    strict=False)

        har_response["redirectURL"] = flow.response.headers.get('Location', '')
        har_response["headersSize"] = len(str(flow.response.headers))
        har_response["bodySize"] = response_body_size

        har_entry['response'] = har_response
        har_entry['time'] = self.nano_to_ms(full_time)
        har_entry['pageref'] = self.get_current_page_ref()

        har_entry['timings'] = timings

        if flow.server_conn.connected():
            har_entry["serverIPAddress"] = str(flow.server_conn.ip_address[0])

        ctx.log.debug('Populated har entry for response: {}, entry: {}'.format(
            flow.request.url, str(har_entry)))
Ejemplo n.º 7
0
def test_is_mostly_bin():
    assert not strutils.is_mostly_bin(b"foo\xFF")
    assert strutils.is_mostly_bin(b"foo" + b"\xFF" * 10)
    assert not strutils.is_mostly_bin("")