예제 #1
0
파일: har.py 프로젝트: vfaronov/httpolice
def _process_request(data, creator, path):
    version, header_entries = _process_message(data, creator)
    method = data['method']
    parsed = urlparse(data['url'])
    scheme = parsed.scheme

    if method == m.CONNECT:
        target = parsed.netloc
    elif any(name == h.host for (name, _) in header_entries):
        # With HAR, we can't tell if the request was to a proxy or to a server.
        # So we force most requests into the "origin form" of the target,
        target = parsed.path
        if parsed.query:
            target += u'?' + parsed.query
    else:
        # However, if the request has no ``Host`` header,
        # the user won't be able to see the target host
        # unless we set the full URL ("absolute form") as the target.
        # To prevent this from having an effect on the proxy logic,
        # we explicitly set `Request.is_to_proxy` to `None` later.
        target = data['url']

    if data['bodySize'] == 0:
        # No body, or a body of length 0 (which we do not distinguish).
        body = b''
    elif data['bodySize'] > 0:
        # A message body was present, but we cannot recover it,
        # because message body is the body *with* ``Content-Encoding``,
        # and HAR does not include that.
        body = Unavailable()
    else:
        # Unknown. Maybe there was a body, maybe there wasn't.
        body = None

    text = None
    post = data.get('postData')
    if post and post.get('text'):
        text = post['text']
        if creator in FIDDLER and method == m.CONNECT and u'Fiddler' in text:
            # Fiddler's HAR export adds a body with debug information
            # to CONNECT requests.
            text = None
            body = b''

    req = Request(scheme, method, target, version, header_entries, body,
                  remark=u'from %s' % path)
    if text is not None:
        req.unicode_body = text
    req.is_to_proxy = None                      # See above.
    return req
예제 #2
0
파일: har.py 프로젝트: jayvdb/httpolice
def _process_request(data, creator, path):
    (version, header_entries, pseudo_headers) = _process_message(data, creator)
    if creator.is_chrome and version == http11 and u':host' in pseudo_headers:
        # SPDY exported from Chrome.
        version = None

    # Firefox exports "Connection: keep-alive" on HTTP/2 requests
    # (which triggers notice 1244)
    # even though it does not actually send it
    # (this can be verified with SSLKEYLOGFILE + Wireshark).
    if creator.is_firefox and version == http2:
        header_entries = [(name, value) for (name, value) in header_entries
                          if (name, value) != (h.connection, u'keep-alive')]

    method = data['method']
    header_names = {name for (name, _) in header_entries}

    parsed = urlparse(data['url'])
    scheme = parsed.scheme

    if creator.is_insomnia:
        # https://github.com/getinsomnia/insomnia/issues/840
        if h.host not in header_names:
            header_entries.insert(0, (h.host, parsed.netloc))
        if h.user_agent not in header_names:
            # The actual version can probably be extracted from
            ua_string = u'insomnia/%s' % creator.reconstruct_insomnia_version()
            header_entries.append((h.user_agent, ua_string))
        if h.accept not in header_names:
            header_entries.append((h.accept, u'*/*'))
        header_names = {name for (name, _) in header_entries}

    if method == m.CONNECT:
        target = parsed.netloc
    elif h.host in header_names:
        # With HAR, we can't tell if the request was to a proxy or to a server.
        # So we force most requests into the "origin form" of the target,
        target = parsed.path
        if parsed.query:
            target += u'?' + parsed.query
    else:
        # However, if the request has no ``Host`` header,
        # the user won't be able to see the target host
        # unless we set the full URL ("absolute form") as the target.
        # To prevent this from having an effect on the proxy logic,
        # we explicitly set `Request.is_to_proxy` to `None` later.
        target = data['url']

    if data['bodySize'] == 0:
        # No body, or a body of length 0 (which we do not distinguish).
        body = b''
    elif data['bodySize'] > 0:
        # A message body was present, but we cannot recover it,
        # because message body is the body *with* ``Content-Encoding``,
        # and HAR does not include that.
        body = Unavailable()
    else:
        # Unknown. Maybe there was a body, maybe there wasn't.
        body = None

    text = None
    post = data.get('postData')
    if post and post.get('text'):
        text = post['text']

        if creator.is_firefox and \
                post['mimeType'] == media.application_x_www_form_urlencoded \
                and u'\r\n' in text:
            # Yes, Firefox actually outputs this stuff. Go figure.
            (wtf, actual_text) = text.rsplit(u'\r\n', 1)
            try:
                buf = io.BufferedReader(io.BytesIO(wtf.encode('iso-8859-1')))
                more_entries = framing1.parse_header_fields(Stream(buf))
            except (UnicodeError, ParseError):  # pragma: no cover
                pass
            else:
                header_entries.extend(more_entries)
                text = actual_text

        if creator.is_fiddler and method == m.CONNECT and u'Fiddler' in text:
            # Fiddler's HAR export adds a body with debug information
            # to CONNECT requests.
            text = None
            body = b''

    req = Request(scheme,
                  method,
                  target,
                  version,
                  header_entries,
                  body,
                  remark=u'from %s' % path)
    if text is not None:
        req.unicode_body = text
    req.is_to_proxy = None  # See above.
    return req
예제 #3
0
파일: har.py 프로젝트: garmann/httpolice
def _process_request(data, creator):
    (version, header_entries, pseudo_headers) = _process_message(data, creator)
    if creator in CHROME and version == http11 and u':host' in pseudo_headers:
        # SPDY exported from Chrome.
        version = None

    # Firefox exports "Connection: keep-alive" on HTTP/2 requests
    # (which triggers notice 1244)
    # even though it does not actually send it
    # (this can be verified with SSLKEYLOGFILE + Wireshark).
    if creator in FIREFOX and version == http2:
        header_entries = [
            (name, value)
            for (name, value) in header_entries
            if (name, value) != (h.connection, u'keep-alive')
        ]

    method = data['method']

    parsed = urlparse(data['url'])
    scheme = parsed.scheme

    if method == m.CONNECT:
        target = parsed.netloc
    elif any(name == h.host for (name, _) in header_entries):
        # With HAR, we can't tell if the request was to a proxy or to a server.
        # So we force most requests into the "origin form" of the target,
        target = parsed.path
        if parsed.query:
            target += u'?' + parsed.query
    else:
        # However, if the request has no ``Host`` header,
        # the user won't be able to see the target host
        # unless we set the full URL ("absolute form") as the target.
        # To prevent this from having an effect on the proxy logic,
        # we explicitly set `Request.is_to_proxy` to `None` later.
        target = data['url']

    if data['bodySize'] == 0:
        # No body, or a body of length 0 (which we do not distinguish).
        body = b''
    elif data['bodySize'] > 0:
        # A message body was present, but we cannot recover it,
        # because message body is the body *with* ``Content-Encoding``,
        # and HAR does not include that.
        body = Unavailable
    else:
        # Unknown. Maybe there was a body, maybe there wasn't.
        body = None

    text = None
    post = data.get('postData')
    if post and post.get('text'):
        text = post['text']

        if creator in FIREFOX and \
                post['mimeType'] == media.application_x_www_form_urlencoded \
                and u'\r\n' in text:
            # Yes, Firefox actually outputs this stuff. Go figure.
            (wtf, actual_text) = text.rsplit(u'\r\n', 1)
            try:
                stream = Stream((wtf + u'\r\n').encode('iso-8859-1'))
                more_entries = framing1.parse_header_fields(stream)
            except (UnicodeError, ParseError):
                pass
            else:
                header_entries.extend(more_entries)
                text = actual_text

        if creator in FIDDLER and method == m.CONNECT and u'Fiddler' in text:
            # Fiddler's HAR export adds a body with debug information
            # to CONNECT requests.
            text = None
            body = b''

    req = Request(scheme, method, target, version, header_entries, body)
    if text is not None:
        req.unicode_body = text
    req.is_to_proxy = None                      # See above.
    return req