Exemple #1
0
def parse_header_fields(stream):
    """Parse a block of HTTP/1.x header fields.

    :param stream: The :class:`~httpolice.parse.Stream` from which to parse.
    :return: A list of :class:`HeaderEntry`.
    :raises: :class:`ParseError`
    """
    entries = []
    while True:
        name = stream.maybe_consume_regex(rfc7230.field_name)
        if name is None:
            break
        stream.consume_regex(b':')
        stream.consume_regex(rfc7230.OWS)
        vs = []
        while True:
            v = stream.maybe_consume_regex(rfc7230.field_content)
            if v is None:
                if stream.maybe_consume_regex(rfc7230.obs_fold):
                    stream.complain(1016)
                    vs.append(b' ')
                else:
                    break
            else:
                vs.append(v.encode('iso-8859-1'))  # back to bytes
        value = b''.join(vs)
        stream.consume_regex(rfc7230.OWS)
        _parse_line_ending(stream)
        entries.append(HeaderEntry(FieldName(name), value))
    _parse_line_ending(stream)
    return entries
Exemple #2
0
 def _from_record(self, record):
     if record.find('iana:protocol', self.xmlns).text != 'http':
         return None
     entry = {
         'key': FieldName(record.find('iana:value', self.xmlns).text),
         'citation': self.extract_citation(record),
     }
     status = record.find('iana:status', self.xmlns)
     if (status is not None) and status.text:
         entry['iana_status'] = status.text
     return entry
Exemple #3
0
def _process_message(data, creator):
    header_entries = [(FieldName(d['name']), d['value'])
                      for d in data['headers']]
    pseudo_headers = pop_pseudo_headers(header_entries)
    if creator in EDGE:  # Edge exports HTTP/2 messages as HTTP/1.1.
        version = None
    elif data['httpVersion'] == u'HTTP/2.0':  # Used by Firefox.
        version = http2
    elif data['httpVersion'] == u'unknown':  # Used by Chrome.
        version = None
    else:
        version = data['httpVersion']
    return (version, header_entries, pseudo_headers)
Exemple #4
0
def parse_header_fields(stream):
    """Parse a block of HTTP/1.x header fields.

    :param stream: The :class:`~httpolice.stream.Stream` from which to parse.
    :return: A list of :class:`HeaderEntry`.
    :raises: :class:`ParseError`
    """
    entries = []
    while stream.peek() not in [b'\r', b'\n', b'']:
        with stream.parsing(header_field):
            pos = stream.tell()
            line = stream.readline(decode=False)
            (name, colon, v) = line.partition(b':')
            if not colon:
                raise stream.error(pos)
            vs = [v]
            while stream.peek() in [b' ', b'\t']:
                stream.complain(1016)
                vs.append(b' ' + stream.readline(decode=False).lstrip(b' \t'))
        name = FieldName(name.decode('iso-8859-1'))
        value = b''.join(vs).strip(b' \t')
        entries.append(HeaderEntry(name, value))
    return entries
Exemple #5
0
def parse_header_fields(stream):
    """Parse a block of HTTP/1.x header fields.

    :param stream: The :class:`~httpolice.stream.Stream` from which to parse.
    :return: A list of :class:`HeaderEntry`.
    :raises: :class:`ParseError`
    """
    entries = []
    while stream.peek() not in [b'\r', b'\n', b'']:
        with stream.parsing(header_field):
            pos = stream.tell()
            line = stream.readline(decode=False)
            (name, colon, v) = line.partition(b':')
            if not colon:
                raise stream.error(pos)
            vs = [v]
            while stream.peek() in [b' ', b'\t']:
                stream.complain(1016)
                vs.append(b' ' + stream.readline(decode=False).lstrip(b' \t'))
        name = FieldName(name.decode('iso-8859-1'))
        value = b''.join(vs).strip(b' \t')
        entries.append(HeaderEntry(name, value))
    return entries
Exemple #6
0
def _process_message(data, creator):
    header_entries = [(FieldName(d['name']), d['value'])
                      for d in data['headers']]
    pseudo_headers = pop_pseudo_headers(header_entries)
    if creator.is_edge:  # Edge exports HTTP/2 messages as HTTP/1.1.
        version = None
    elif creator.is_insomnia:  # Insomnia's HAR export hardcodes HTTP/1.1.
        version = None
    elif data['httpVersion'] == u'unknown':  # Used by Chrome.
        version = None
    else:
        version = data['httpVersion'].upper()
        if version == u'HTTP/2.0':  # Used by Firefox, Chrome, ...
            version = http2
    return (version, header_entries, pseudo_headers)
Exemple #7
0
def _process_message(data, creator):
    header_entries = [(FieldName(d['name']), d['value'])
                      for d in data['headers']]
    pop_pseudo_headers(header_entries)

    # Web browsers' HAR export poorly reflects the actual traffic on the wire.
    # Their httpVersion can't be trusted, and they often mangle lower-level
    # parts of the protocol, e.g. at the time of writing Chrome sometimes omits
    # the Host header from HTTP/1.1 requests. Just consider their HTTP version
    # to be always unknown, and a lot of this pain goes away.
    version = None
    if data['httpVersion'].startswith(u'HTTP/') and \
            creator not in CHROME + FIREFOX + EDGE:
        version = data['httpVersion']

    return version, header_entries
Exemple #8
0
#     the presence of this header in a ``Connection`` header
#     should trigger notice 1034.
#
#   ``bad_for_trailer``
#     You can set this to ``True`` if
#     the presence of this header in a trailer
#     should trigger notice 1026.
#
#   ``iana_status``
#     Filled by ``tools/iana.py``. You should not need to change it.

known = KnownDict(
    FieldName,
    [
        {
            '_': FieldName(u'A-IM'),
            '_citations': [RFC(4229)]
        },
        {
            '_': FieldName(u'Accept'),
            '_citations': [RFC(7231, section=(5, 3, 2))],
            'for_request': True,
            'for_response': False,
            'iana_status': u'standard',
            'parser': rfc7231.Accept,
            'precondition': False,
            'proactive_conneg': True,
            'rule': MULTI
        },
        {
            '_': FieldName(u'Accept-Additions'),
Exemple #9
0
def check_message(msg):
    """Run all checks that apply to any message (both request and response)."""
    complain = msg.complain
    version = msg.version
    headers = msg.headers

    for hdr in headers:
        # Force parsing every header present in the message
        # according to its syntax rules.
        _ = hdr.value
        if header.deprecated(hdr.name):
            complain(1197, header=hdr)
        if hdr.name.startswith(u'X-') and hdr.name not in h:    # not in known
            complain(1277, header=hdr)

    # Force checking the payload according to various rules.
    _ = msg.decoded_body
    _ = msg.unicode_body
    _ = msg.json_data
    _ = msg.xml_data
    _ = msg.multipart_data
    _ = msg.url_encoded_data

    if version == http11 and headers.trailer.is_present and \
            tc.chunked not in headers.transfer_encoding:
        # HTTP/2 supports trailers but has no notion of "chunked".
        complain(1054)

    for entry in msg.trailer_entries:
        if entry.name not in headers.trailer:
            complain(1030, header=entry)

    if headers.transfer_encoding.is_present and \
            headers.content_length.is_present:
        complain(1020)

    for opt in headers.connection:
        if header.is_bad_for_connection(FieldName(opt)):
            complain(1034, header=headers[FieldName(opt)])

    if headers.content_type.is_okay:
        if media_type.deprecated(headers.content_type.item):
            complain(1035)
        for dupe in headers.content_type.param.duplicates():
            complain(1042, param=dupe)

    if headers.content_type == media.application_json and \
            u'charset' in headers.content_type.param:
        complain(1280, header=headers.content_type)

    if headers.upgrade.is_present and u'upgrade' not in headers.connection:
        complain(1050)

    if headers.date > datetime.utcnow() + timedelta(seconds=10):
        complain(1109)

    for warning in headers.warning:
        if warning.code < 100 or warning.code > 299:
            complain(1163, code=warning.code)
        if okay(warning.date) and headers.date != warning.date:
            complain(1164, code=warning.code)

    if msg.transformed_by_proxy:
        if warn.transformation_applied not in headers.warning:
            complain(1191)
        if headers.cache_control.no_transform:
            complain(1192)

    for pragma in headers.pragma:
        if pragma != u'no-cache':
            complain(1160, pragma=pragma.item)

    if version == http2:
        for hdr in headers:
            if hdr.name in [h.connection, h.transfer_encoding, h.keep_alive]:
                complain(1244, header=hdr)
            elif hdr.name == h.upgrade:
                complain(1245)

    for protocol in headers.upgrade:
        if protocol.item == u'h2':
            complain(1228)
        if protocol.item == upgrade.h2c and msg.is_tls:
            complain(1233)
Exemple #10
0
def check_message(msg):
    """Run all checks that apply to any message (both request and response)."""
    complain = msg.complain
    version = msg.version
    headers = msg.headers

    x_prefixed = []
    for hdr in headers:
        # Check the header name syntax.
        parse(hdr.name,
              rfc7230.field_name,
              complain,
              1293,
              header=hdr,
              place=u'field name')
        # Force parsing every header present in the message
        # according to its syntax rules.
        _ = hdr.value
        if known.header.is_deprecated(hdr.name):
            complain(1197, header=hdr)
        if hdr.name.startswith(u'X-') and hdr.name not in known.header:
            x_prefixed.append(hdr)
    if x_prefixed:
        complain(1277, headers=x_prefixed)

    # Force checking the payload according to various rules.
    _ = msg.decoded_body
    _ = msg.unicode_body
    _ = msg.json_data
    _ = msg.xml_data
    _ = msg.multipart_data
    _ = msg.url_encoded_data

    if version == http11 and headers.trailer.is_present and \
            tc.chunked not in headers.transfer_encoding:
        # HTTP/2 supports trailers but has no notion of "chunked".
        complain(1054)

    for entry in msg.trailer_entries:
        if entry.name not in headers.trailer:
            complain(1030, header=entry)

    if headers.transfer_encoding.is_present and \
            headers.content_length.is_present:
        complain(1020)

    for opt in headers.connection:
        if known.header.is_bad_for_connection(FieldName(opt)):
            complain(1034, header=headers[FieldName(opt)])

    if headers.content_type.is_okay:
        if known.media_type.is_deprecated(headers.content_type.item):
            complain(1035)
        for dupe in headers.content_type.param.duplicates():
            complain(1042, param=dupe)

    if headers.content_type == media.application_json and \
            u'charset' in headers.content_type.param:
        complain(1280, header=headers.content_type)

    if headers.date > datetime.utcnow() + timedelta(seconds=10):
        complain(1109)

    for warning in headers.warning:
        if warning.code < 100 or warning.code > 299:
            complain(1163, code=warning.code)
        if okay(warning.date) and headers.date != warning.date:
            complain(1164, code=warning.code)

    for pragma in headers.pragma:
        if pragma != u'no-cache':
            complain(1160, pragma=pragma.item)

    for protocol in headers.upgrade:
        if protocol.item == u'h2':
            complain(1228)
        if protocol.item == upgrade.h2c and msg.is_tls:
            complain(1233)

    if getattr(msg, 'status', None) == st.early_hints:
        # 103 (Early Hints) responses are weird in that the headers they carry
        # do not apply to themselves (RFC 8297 Section 2) but only to the final
        # response (and then only speculatively). For such responses, we limit
        # ourselves to checks that do not rely on having a complete and
        # self-consistent message header block.
        return

    if headers.upgrade.is_present and u'upgrade' not in headers.connection:
        complain(1050)

    if msg.transformed_by_proxy:
        if warn.transformation_applied not in headers.warning:
            complain(1191)
        if headers.cache_control.no_transform:
            complain(1192)

    if version == http2:
        for hdr in headers:
            if hdr.name in [h.connection, h.transfer_encoding, h.keep_alive]:
                complain(1244, header=hdr)
            elif hdr.name == h.upgrade:
                complain(1245)