예제 #1
0
def parse_combined(path):
    path = decode_path(path)
    if path.endswith(u'.https'):
        scheme = u'https'
    elif path.endswith(u'.noscheme'):
        scheme = None
    else:
        scheme = u'http'

    with io.open(path, 'rb') as f:
        data = f.read()
    parts1 = data.split(b'======== BEGIN INBOUND STREAM ========\r\n', 1)
    if len(parts1) != 2:
        raise InputError(u'%s: bad combined file: no inbound marker' % path)
    (preamble, rest) = parts1
    try:
        preamble = preamble.decode('utf-8')
    except UnicodeError as exc:  # pragma: no cover
        raise InputError(u'%s: invalid UTF-8 in preamble' % path) from exc
    parts2 = rest.split(b'======== BEGIN OUTBOUND STREAM ========\r\n', 1)
    if len(parts2) != 2:  # pragma: no cover
        raise InputError(u'%s: bad combined file: no outbound marker' % path)
    (inbound_data, outbound_data) = parts2

    inbound = Stream(io.BufferedReader(io.BytesIO(inbound_data)),
                     name=path + u' (inbound)')
    outbound = Stream(io.BufferedReader(io.BytesIO(outbound_data)),
                      name=path + u' (outbound)')

    return (inbound, outbound, scheme, preamble)
예제 #2
0
def _parse_paths(inbound_path, outbound_path, scheme=u'http'):
    inbound_file = inbound = outbound_file = outbound = None

    try:
        if inbound_path:
            inbound_file = io.open(inbound_path, 'rb')
            inbound = Stream(inbound_file, name=decode_path(inbound_path))
        if outbound_path:
            outbound_file = io.open(outbound_path, 'rb')
            outbound = Stream(outbound_file, name=decode_path(outbound_path))
        for exch in parse_streams(inbound, outbound, scheme):
            yield exch

    finally:
        if inbound_file is not None:
            inbound_file.close()
        if outbound_file is not None:
            outbound_file.close()
예제 #3
0
파일: har.py 프로젝트: jayvdb/httpolice
def _process_request(data, creator, path):
    (version, header_entries, pseudo_headers) = _process_message(data, creator)
    if creator.is_chrome and version == http11 and u':host' in pseudo_headers:
        # SPDY exported from Chrome.
        version = None

    # Firefox exports "Connection: keep-alive" on HTTP/2 requests
    # (which triggers notice 1244)
    # even though it does not actually send it
    # (this can be verified with SSLKEYLOGFILE + Wireshark).
    if creator.is_firefox and version == http2:
        header_entries = [(name, value) for (name, value) in header_entries
                          if (name, value) != (h.connection, u'keep-alive')]

    method = data['method']
    header_names = {name for (name, _) in header_entries}

    parsed = urlparse(data['url'])
    scheme = parsed.scheme

    if creator.is_insomnia:
        # https://github.com/getinsomnia/insomnia/issues/840
        if h.host not in header_names:
            header_entries.insert(0, (h.host, parsed.netloc))
        if h.user_agent not in header_names:
            # The actual version can probably be extracted from
            ua_string = u'insomnia/%s' % creator.reconstruct_insomnia_version()
            header_entries.append((h.user_agent, ua_string))
        if h.accept not in header_names:
            header_entries.append((h.accept, u'*/*'))
        header_names = {name for (name, _) in header_entries}

    if method == m.CONNECT:
        target = parsed.netloc
    elif h.host in header_names:
        # With HAR, we can't tell if the request was to a proxy or to a server.
        # So we force most requests into the "origin form" of the target,
        target = parsed.path
        if parsed.query:
            target += u'?' + parsed.query
    else:
        # However, if the request has no ``Host`` header,
        # the user won't be able to see the target host
        # unless we set the full URL ("absolute form") as the target.
        # To prevent this from having an effect on the proxy logic,
        # we explicitly set `Request.is_to_proxy` to `None` later.
        target = data['url']

    if data['bodySize'] == 0:
        # No body, or a body of length 0 (which we do not distinguish).
        body = b''
    elif data['bodySize'] > 0:
        # A message body was present, but we cannot recover it,
        # because message body is the body *with* ``Content-Encoding``,
        # and HAR does not include that.
        body = Unavailable()
    else:
        # Unknown. Maybe there was a body, maybe there wasn't.
        body = None

    text = None
    post = data.get('postData')
    if post and post.get('text'):
        text = post['text']

        if creator.is_firefox and \
                post['mimeType'] == media.application_x_www_form_urlencoded \
                and u'\r\n' in text:
            # Yes, Firefox actually outputs this stuff. Go figure.
            (wtf, actual_text) = text.rsplit(u'\r\n', 1)
            try:
                buf = io.BufferedReader(io.BytesIO(wtf.encode('iso-8859-1')))
                more_entries = framing1.parse_header_fields(Stream(buf))
            except (UnicodeError, ParseError):  # pragma: no cover
                pass
            else:
                header_entries.extend(more_entries)
                text = actual_text

        if creator.is_fiddler and method == m.CONNECT and u'Fiddler' in text:
            # Fiddler's HAR export adds a body with debug information
            # to CONNECT requests.
            text = None
            body = b''

    req = Request(scheme,
                  method,
                  target,
                  version,
                  header_entries,
                  body,
                  remark=u'from %s' % path)
    if text is not None:
        req.unicode_body = text
    req.is_to_proxy = None  # See above.
    return req