예제 #1
0
파일: har.py 프로젝트: vfaronov/httpolice
def _process_request(data, creator, path):
    version, header_entries = _process_message(data, creator)
    method = data['method']
    parsed = urlparse(data['url'])
    scheme = parsed.scheme

    if method == m.CONNECT:
        target = parsed.netloc
    elif any(name == h.host for (name, _) in header_entries):
        # With HAR, we can't tell if the request was to a proxy or to a server.
        # So we force most requests into the "origin form" of the target,
        target = parsed.path
        if parsed.query:
            target += u'?' + parsed.query
    else:
        # However, if the request has no ``Host`` header,
        # the user won't be able to see the target host
        # unless we set the full URL ("absolute form") as the target.
        # To prevent this from having an effect on the proxy logic,
        # we explicitly set `Request.is_to_proxy` to `None` later.
        target = data['url']

    if data['bodySize'] == 0:
        # No body, or a body of length 0 (which we do not distinguish).
        body = b''
    elif data['bodySize'] > 0:
        # A message body was present, but we cannot recover it,
        # because message body is the body *with* ``Content-Encoding``,
        # and HAR does not include that.
        body = Unavailable()
    else:
        # Unknown. Maybe there was a body, maybe there wasn't.
        body = None

    text = None
    post = data.get('postData')
    if post and post.get('text'):
        text = post['text']
        if creator in FIDDLER and method == m.CONNECT and u'Fiddler' in text:
            # Fiddler's HAR export adds a body with debug information
            # to CONNECT requests.
            text = None
            body = b''

    req = Request(scheme, method, target, version, header_entries, body,
                  remark=u'from %s' % path)
    if text is not None:
        req.unicode_body = text
    req.is_to_proxy = None                      # See above.
    return req
예제 #2
0
def _parse_request_heading(stream, scheme=None):
    beginning = stream.point
    try:
        with stream:
            method_ = Method(stream.consume_regex(rfc7230.method))
            stream.consume_regex(SP)
            target = stream.consume_regex(b'[^\\s]+', u'request target')
            stream.consume_regex(SP)
            version_ = HTTPVersion(stream.consume_regex(rfc7230.HTTP_version))
            _parse_line_ending(stream)
            entries = parse_header_fields(stream)
    except ParseError as e:
        stream.sane = False
        stream.complain(1006, error=e)
        return Unavailable
    else:
        req = Request(scheme,
                      method_,
                      target,
                      version_,
                      entries,
                      body=None,
                      remark=u'from %s, offset %d' % (stream.name, beginning))
        stream.dump_complaints(req.complain, place=u'request heading')
        return req
예제 #3
0
def test_effective_uri_1():
    req = Request(u'http',
                  m.GET, u'/pub/WWW/TheProject.html', http11,
                  [(h.host, b'www.example.org:8080')],
                  b'')
    assert req.effective_uri == \
        u'http://www.example.org:8080/pub/WWW/TheProject.html'
예제 #4
0
def test_construct_exchange():
    req = Request(u'http', u'GET', u'/', u'HTTP/1.1',
                  [(u'Host', b'example.com')], None)
    assert repr(req) == '<Request GET>'
    resp1 = Response(u'HTTP/1.1', 123, u'Please wait', [], None)
    assert repr(resp1) == '<Response 123>'
    resp2 = Response(u'HTTP/1.1', 200, u'OK', [(u'Content-Length', b'14')],
                     b'Hello world!\r\n', None)
    exch = Exchange(req, [resp1, resp2])
    assert repr(exch) == \
        'Exchange(<Request GET>, [<Response 123>, <Response 200>])'
    assert isinstance(exch.request.method, Method)
    assert isinstance(exch.request.version, HTTPVersion)
    assert isinstance(exch.request.header_entries[0].name, FieldName)
    assert isinstance(exch.responses[0].version, HTTPVersion)
    assert isinstance(exch.responses[0].status, StatusCode)
    assert isinstance(exch.responses[1].header_entries[0].name, FieldName)
예제 #5
0
def _parse_request_heading(stream, scheme=None):
    beginning = stream.tell()
    with stream.parsing(request_line):
        line = stream.readline()
        pieces = line.split(u' ')
        if len(pieces) != 3 or not HTTP_VERSION.match(pieces[2]):
            raise stream.error(beginning)
    method = Method(pieces[0])
    target = pieces[1]
    version_ = HTTPVersion(pieces[2])
    entries = parse_header_fields(stream)
    with stream.parsing(HTTP_message):
        stream.readlineend()
    req = Request(scheme, method, target, version_, entries, body=None,
                  remark=u'from %s, offset %d' % (stream.name, beginning))
    stream.dump_complaints(req.complain, place=u'request heading')
    return req
예제 #6
0
파일: har.py 프로젝트: jayvdb/httpolice
def _process_request(data, creator, path):
    (version, header_entries, pseudo_headers) = _process_message(data, creator)
    if creator.is_chrome and version == http11 and u':host' in pseudo_headers:
        # SPDY exported from Chrome.
        version = None

    # Firefox exports "Connection: keep-alive" on HTTP/2 requests
    # (which triggers notice 1244)
    # even though it does not actually send it
    # (this can be verified with SSLKEYLOGFILE + Wireshark).
    if creator.is_firefox and version == http2:
        header_entries = [(name, value) for (name, value) in header_entries
                          if (name, value) != (h.connection, u'keep-alive')]

    method = data['method']
    header_names = {name for (name, _) in header_entries}

    parsed = urlparse(data['url'])
    scheme = parsed.scheme

    if creator.is_insomnia:
        # https://github.com/getinsomnia/insomnia/issues/840
        if h.host not in header_names:
            header_entries.insert(0, (h.host, parsed.netloc))
        if h.user_agent not in header_names:
            # The actual version can probably be extracted from
            ua_string = u'insomnia/%s' % creator.reconstruct_insomnia_version()
            header_entries.append((h.user_agent, ua_string))
        if h.accept not in header_names:
            header_entries.append((h.accept, u'*/*'))
        header_names = {name for (name, _) in header_entries}

    if method == m.CONNECT:
        target = parsed.netloc
    elif h.host in header_names:
        # With HAR, we can't tell if the request was to a proxy or to a server.
        # So we force most requests into the "origin form" of the target,
        target = parsed.path
        if parsed.query:
            target += u'?' + parsed.query
    else:
        # However, if the request has no ``Host`` header,
        # the user won't be able to see the target host
        # unless we set the full URL ("absolute form") as the target.
        # To prevent this from having an effect on the proxy logic,
        # we explicitly set `Request.is_to_proxy` to `None` later.
        target = data['url']

    if data['bodySize'] == 0:
        # No body, or a body of length 0 (which we do not distinguish).
        body = b''
    elif data['bodySize'] > 0:
        # A message body was present, but we cannot recover it,
        # because message body is the body *with* ``Content-Encoding``,
        # and HAR does not include that.
        body = Unavailable()
    else:
        # Unknown. Maybe there was a body, maybe there wasn't.
        body = None

    text = None
    post = data.get('postData')
    if post and post.get('text'):
        text = post['text']

        if creator.is_firefox and \
                post['mimeType'] == media.application_x_www_form_urlencoded \
                and u'\r\n' in text:
            # Yes, Firefox actually outputs this stuff. Go figure.
            (wtf, actual_text) = text.rsplit(u'\r\n', 1)
            try:
                buf = io.BufferedReader(io.BytesIO(wtf.encode('iso-8859-1')))
                more_entries = framing1.parse_header_fields(Stream(buf))
            except (UnicodeError, ParseError):  # pragma: no cover
                pass
            else:
                header_entries.extend(more_entries)
                text = actual_text

        if creator.is_fiddler and method == m.CONNECT and u'Fiddler' in text:
            # Fiddler's HAR export adds a body with debug information
            # to CONNECT requests.
            text = None
            body = b''

    req = Request(scheme,
                  method,
                  target,
                  version,
                  header_entries,
                  body,
                  remark=u'from %s' % path)
    if text is not None:
        req.unicode_body = text
    req.is_to_proxy = None  # See above.
    return req
예제 #7
0
def test_effective_uri_4():
    req = Request(u'http', m.GET, u'myproto://www.example.org/index.html',
                  http11, [(h.host, b'www.example.org')], b'')
    assert req.effective_uri == u'myproto://www.example.org/index.html'
예제 #8
0
def test_effective_uri_3():
    req = Request(u'https', m.OPTIONS, u'*', http11,
                  [(h.host, b'www.example.org')], b'')
    assert req.effective_uri == u'https://www.example.org'
예제 #9
0
def test_effective_uri_2():
    req = Request(u'http', m.GET, u'/pub/WWW/TheProject.html', http10, [], b'')
    assert req.effective_uri is None
예제 #10
0
파일: har.py 프로젝트: garmann/httpolice
def _process_request(data, creator):
    (version, header_entries, pseudo_headers) = _process_message(data, creator)
    if creator in CHROME and version == http11 and u':host' in pseudo_headers:
        # SPDY exported from Chrome.
        version = None

    # Firefox exports "Connection: keep-alive" on HTTP/2 requests
    # (which triggers notice 1244)
    # even though it does not actually send it
    # (this can be verified with SSLKEYLOGFILE + Wireshark).
    if creator in FIREFOX and version == http2:
        header_entries = [
            (name, value)
            for (name, value) in header_entries
            if (name, value) != (h.connection, u'keep-alive')
        ]

    method = data['method']

    parsed = urlparse(data['url'])
    scheme = parsed.scheme

    if method == m.CONNECT:
        target = parsed.netloc
    elif any(name == h.host for (name, _) in header_entries):
        # With HAR, we can't tell if the request was to a proxy or to a server.
        # So we force most requests into the "origin form" of the target,
        target = parsed.path
        if parsed.query:
            target += u'?' + parsed.query
    else:
        # However, if the request has no ``Host`` header,
        # the user won't be able to see the target host
        # unless we set the full URL ("absolute form") as the target.
        # To prevent this from having an effect on the proxy logic,
        # we explicitly set `Request.is_to_proxy` to `None` later.
        target = data['url']

    if data['bodySize'] == 0:
        # No body, or a body of length 0 (which we do not distinguish).
        body = b''
    elif data['bodySize'] > 0:
        # A message body was present, but we cannot recover it,
        # because message body is the body *with* ``Content-Encoding``,
        # and HAR does not include that.
        body = Unavailable
    else:
        # Unknown. Maybe there was a body, maybe there wasn't.
        body = None

    text = None
    post = data.get('postData')
    if post and post.get('text'):
        text = post['text']

        if creator in FIREFOX and \
                post['mimeType'] == media.application_x_www_form_urlencoded \
                and u'\r\n' in text:
            # Yes, Firefox actually outputs this stuff. Go figure.
            (wtf, actual_text) = text.rsplit(u'\r\n', 1)
            try:
                stream = Stream((wtf + u'\r\n').encode('iso-8859-1'))
                more_entries = framing1.parse_header_fields(stream)
            except (UnicodeError, ParseError):
                pass
            else:
                header_entries.extend(more_entries)
                text = actual_text

        if creator in FIDDLER and method == m.CONNECT and u'Fiddler' in text:
            # Fiddler's HAR export adds a body with debug information
            # to CONNECT requests.
            text = None
            body = b''

    req = Request(scheme, method, target, version, header_entries, body)
    if text is not None:
        req.unicode_body = text
    req.is_to_proxy = None                      # See above.
    return req