コード例 #1
0
def process_har(file_in):
    with open(file_in) as json_file:
        json_str = json.load(json_file)
        # Use key 'log' to match HAR standard
        my_har = HAR.log(json_str['log'])

        for entry in dict(my_har)['entries']:
            logger.info("REQUEST URL \n {}".format(
                dict(dict(entry)['request'])['url']))
            if 'postData' in dict(entry)['request']:
                logger.info("REQUEST DATA \n {}".format(
                    dict(dict(entry)['request']['postData'])['text']))

            a, b, c, d = False, False, False, False
            for head in dict(dict(entry)['request'])['headers']:
                a = True if 'did' == dict(head)['name'] else False
                b = True if 'access_token' == dict(head)['name'] else False
                c = True if 't_token' == dict(head)['name'] else False
                d = True if 't_time' == dict(head)['name'] else False

            logger.info(
                "REQUEST Headers include: did {} access token {} t_token {} t_time {}"
                .format(a, b, c, d))
            if 'text' in dict(dict(entry)['response']['content']):
                logger.info("RESPONSE CONTENT \n {}".format(
                    dict(dict(entry)['response']['content'])['text']))
            else:
                logger.info("RESPONSE CONTENT \n {}".format(
                    dict(dict(entry)['response']['content'])))
コード例 #2
0
ファイル: har_extractor.py プロジェクト: srinatar/mitmproxy
def response(context, flow):
    """
       Called when a server response has been received. At the time of this
       message both a request and a response are present and completely done.
    """
    # Values are converted from float seconds to int milliseconds later.
    ssl_time = -.001
    connect_time = -.001
    if flow.server_conn not in context.seen_server:
        # Calculate the connect_time for this server_conn. Afterwards add it to
        # seen list, in order to avoid the connect_time being present in entries
        # that use an existing connection.
        connect_time = flow.server_conn.timestamp_tcp_setup - \
            flow.server_conn.timestamp_start
        context.seen_server.add(flow.server_conn)

        if flow.server_conn.timestamp_ssl_setup is not None:
            # Get the ssl_time for this server_conn as the difference between
            # the start of the successful tcp setup and the successful ssl
            # setup. If  no ssl setup has been made it is left as -1 since it
            # doesn't apply to this connection.
            ssl_time = flow.server_conn.timestamp_ssl_setup - \
                flow.server_conn.timestamp_tcp_setup

    # Calculate the raw timings from the different timestamps present in the
    # request and response object. For lack of a way to measure it dns timings
    # can not be calculated. The same goes for HAR blocked: MITMProxy will open
    # a server connection as soon as it receives the host and port from the
    # client connection. So the time spent waiting is actually spent waiting
    # between request.timestamp_end and response.timestamp_start thus it
    # correlates to HAR wait instead.
    timings_raw = {
        'send': flow.request.timestamp_end - flow.request.timestamp_start,
        'wait': flow.response.timestamp_start - flow.request.timestamp_end,
        'receive': flow.response.timestamp_end - flow.response.timestamp_start,
        'connect': connect_time,
        'ssl': ssl_time
    }

    # HAR timings are integers in ms, so we have to re-encode the raw timings to
    # that format.
    timings = dict([(key, int(1000 * value))
                    for key, value in timings_raw.iteritems()])

    # The full_time is the sum of all timings. Timings set to -1 will be ignored
    # as per spec.
    full_time = 0
    for item in timings.values():
        if item > -1:
            full_time += item

    started_date_time = datetime.fromtimestamp(
        flow.request.timestamp_start,
        tz=utc).isoformat()

    request_query_string = [{"name": k, "value": v}
                            for k, v in flow.request.get_query()]
    request_http_version = ".".join([str(v) for v in flow.request.httpversion])
    # Cookies are shaped as tuples by MITMProxy.
    request_cookies = [{"name": k.strip(), "value": v[0]}
                       for k, v in (flow.request.get_cookies() or {}).iteritems()]
    request_headers = [{"name": k, "value": v} for k, v in flow.request.headers]
    request_headers_size = len(str(flow.request.headers))
    request_body_size = len(flow.request.content)

    response_http_version = ".".join(
        [str(v) for v in flow.response.httpversion])
    # Cookies are shaped as tuples by MITMProxy.
    response_cookies = [{"name": k.strip(), "value": v[0]}
                        for k, v in (flow.response.get_cookies() or {}).iteritems()]
    response_headers = [{"name": k, "value": v}
                        for k, v in flow.response.headers]
    response_headers_size = len(str(flow.response.headers))
    response_body_size = len(flow.response.content)
    response_body_decoded_size = len(flow.response.get_decoded_content())
    response_body_compression = response_body_decoded_size - response_body_size
    response_mime_type = flow.response.headers.get_first('Content-Type', '')
    response_redirect_url = flow.response.headers.get_first('Location', '')

    entry = HAR.entries(
        {
            "startedDateTime": started_date_time,
            "time": full_time,
            "request": {
                "method": flow.request.method,
                "url": flow.request.url,
                "httpVersion": request_http_version,
                "cookies": request_cookies,
                "headers": request_headers,
                "queryString": request_query_string,
                "headersSize": request_headers_size,
                "bodySize": request_body_size,
            },
            "response": {
                "status": flow.response.code,
                "statusText": flow.response.msg,
                "httpVersion": response_http_version,
                "cookies": response_cookies,
                "headers": response_headers,
                "content": {
                    "size": response_body_size,
                    "compression": response_body_compression,
                    "mimeType": response_mime_type},
                "redirectURL": response_redirect_url,
                "headersSize": response_headers_size,
                "bodySize": response_body_size,
            },
            "cache": {},
            "timings": timings,
        })

    # If the current url is in the page list of context.HARLog or does not have
    # a referrer we add it as a new pages object.
    if flow.request.url in context.HARLog.get_page_list() or flow.request.headers.get(
            'Referer',
            None) is None:
        page_id = context.HARLog.create_page_id()
        context.HARLog.add(
            HAR.pages({
                "startedDateTime": entry['startedDateTime'],
                "id": page_id,
                "title": flow.request.url,
            })
        )
        context.HARLog.set_page_ref(flow.request.url, page_id)
        entry['pageref'] = page_id

    # Lookup the referer in the page_ref of context.HARLog to point this entries
    # pageref attribute to the right pages object, then set it as a new
    # reference to build a reference tree.
    elif context.HARLog.get_page_ref(flow.request.headers.get('Referer', (None, ))[0]) is not None:
        entry['pageref'] = context.HARLog.get_page_ref(
            flow.request.headers['Referer'][0]
        )
        context.HARLog.set_page_ref(
            flow.request.headers['Referer'][0], entry['pageref']
        )

    context.HARLog.add(entry)
コード例 #3
0
def response(context, flow):
    """
       Called when a server response has been received. At the time of this
       message both a request and a response are present and completely done.
    """
    # Values are converted from float seconds to int milliseconds later.
    ssl_time = -.001
    connect_time = -.001
    if flow.server_conn not in context.seen_server:
        # Calculate the connect_time for this server_conn. Afterwards add it to
        # seen list, in order to avoid the connect_time being present in entries
        # that use an existing connection.
        connect_time = (flow.server_conn.timestamp_tcp_setup -
                        flow.server_conn.timestamp_start)
        context.seen_server.add(flow.server_conn)

        if flow.server_conn.timestamp_ssl_setup is not None:
            # Get the ssl_time for this server_conn as the difference between
            # the start of the successful tcp setup and the successful ssl
            # setup. If no ssl setup has been made it is left as -1 since it
            # doesn't apply to this connection.
            ssl_time = (flow.server_conn.timestamp_ssl_setup -
                        flow.server_conn.timestamp_tcp_setup)

    # Calculate the raw timings from the different timestamps present in the
    # request and response object. For lack of a way to measure it dns timings
    # can not be calculated. The same goes for HAR blocked: MITMProxy will open
    # a server connection as soon as it receives the host and port from the
    # client connection. So the time spent waiting is actually spent waiting
    # between request.timestamp_end and response.timestamp_start thus it
    # correlates to HAR wait instead.
    timings_raw = {
        'send': flow.request.timestamp_end - flow.request.timestamp_start,
        'wait': flow.response.timestamp_start - flow.request.timestamp_end,
        'receive': flow.response.timestamp_end - flow.response.timestamp_start,
        'connect': connect_time,
        'ssl': ssl_time
    }

    # HAR timings are integers in ms, so we have to re-encode the raw timings to
    # that format.
    timings = dict([(k, int(1000 * v)) for k, v in timings_raw.iteritems()])

    # The full_time is the sum of all timings.
    # Timings set to -1 will be ignored as per spec.
    full_time = sum(v for v in timings.values() if v > -1)

    started_date_time = datetime.utcfromtimestamp(
        flow.request.timestamp_start).isoformat()

    request_query_string = [{
        "name": k,
        "value": v
    } for k, v in flow.request.query or {}]

    response_body_size = len(flow.response.content)
    response_body_decoded_size = len(flow.response.get_decoded_content())
    response_body_compression = response_body_decoded_size - response_body_size

    entry = HAR.entries({
        "startedDateTime": started_date_time,
        "time": full_time,
        "request": {
            "method": flow.request.method,
            "url": flow.request.url,
            "httpVersion": flow.request.http_version,
            "cookies": format_cookies(flow.request.cookies),
            "headers": format_headers(flow.request.headers),
            "queryString": request_query_string,
            "headersSize": len(str(flow.request.headers)),
            "bodySize": len(flow.request.content),
        },
        "response": {
            "status": flow.response.status_code,
            "statusText": flow.response.msg,
            "httpVersion": flow.response.http_version,
            "cookies": format_cookies(flow.response.cookies),
            "headers": format_headers(flow.response.headers),
            "content": {
                "size": response_body_size,
                "compression": response_body_compression,
                "mimeType": flow.response.headers.get('Content-Type', '')
            },
            "redirectURL": flow.response.headers.get('Location', ''),
            "headersSize": len(str(flow.response.headers)),
            "bodySize": response_body_size,
        },
        "cache": {},
        "timings": timings,
    })

    # If the current url is in the page list of context.HARLog or
    # does not have a referrer, we add it as a new pages object.
    if (flow.request.url in context.HARLog.get_page_list()
            or flow.request.headers.get('Referer') is None):
        page_id = context.HARLog.create_page_id()
        context.HARLog.add(
            HAR.pages({
                "startedDateTime": entry['startedDateTime'],
                "id": page_id,
                "title": flow.request.url,
            }))
        context.HARLog.set_page_ref(flow.request.url, page_id)
        entry['pageref'] = page_id

    # Lookup the referer in the page_ref of context.HARLog to point this entries
    # pageref attribute to the right pages object, then set it as a new
    # reference to build a reference tree.
    elif context.HARLog.get_page_ref(
            flow.request.headers.get('Referer')) is not None:
        entry['pageref'] = context.HARLog.get_page_ref(
            flow.request.headers['Referer'])
        context.HARLog.set_page_ref(flow.request.headers['Referer'],
                                    entry['pageref'])

    context.HARLog.add(entry)