def parse_header_fields(stream): """Parse a block of HTTP/1.x header fields. :param stream: The :class:`~httpolice.parse.Stream` from which to parse. :return: A list of :class:`HeaderEntry`. :raises: :class:`ParseError` """ entries = [] while True: name = stream.maybe_consume_regex(rfc7230.field_name) if name is None: break stream.consume_regex(b':') stream.consume_regex(rfc7230.OWS) vs = [] while True: v = stream.maybe_consume_regex(rfc7230.field_content) if v is None: if stream.maybe_consume_regex(rfc7230.obs_fold): stream.complain(1016) vs.append(b' ') else: break else: vs.append(v.encode('iso-8859-1')) # back to bytes value = b''.join(vs) stream.consume_regex(rfc7230.OWS) _parse_line_ending(stream) entries.append(HeaderEntry(FieldName(name), value)) _parse_line_ending(stream) return entries
def _from_record(self, record): if record.find('iana:protocol', self.xmlns).text != 'http': return None entry = { 'key': FieldName(record.find('iana:value', self.xmlns).text), 'citation': self.extract_citation(record), } status = record.find('iana:status', self.xmlns) if (status is not None) and status.text: entry['iana_status'] = status.text return entry
def _process_message(data, creator): header_entries = [(FieldName(d['name']), d['value']) for d in data['headers']] pseudo_headers = pop_pseudo_headers(header_entries) if creator in EDGE: # Edge exports HTTP/2 messages as HTTP/1.1. version = None elif data['httpVersion'] == u'HTTP/2.0': # Used by Firefox. version = http2 elif data['httpVersion'] == u'unknown': # Used by Chrome. version = None else: version = data['httpVersion'] return (version, header_entries, pseudo_headers)
def parse_header_fields(stream): """Parse a block of HTTP/1.x header fields. :param stream: The :class:`~httpolice.stream.Stream` from which to parse. :return: A list of :class:`HeaderEntry`. :raises: :class:`ParseError` """ entries = [] while stream.peek() not in [b'\r', b'\n', b'']: with stream.parsing(header_field): pos = stream.tell() line = stream.readline(decode=False) (name, colon, v) = line.partition(b':') if not colon: raise stream.error(pos) vs = [v] while stream.peek() in [b' ', b'\t']: stream.complain(1016) vs.append(b' ' + stream.readline(decode=False).lstrip(b' \t')) name = FieldName(name.decode('iso-8859-1')) value = b''.join(vs).strip(b' \t') entries.append(HeaderEntry(name, value)) return entries
def parse_header_fields(stream): """Parse a block of HTTP/1.x header fields. :param stream: The :class:`~httpolice.stream.Stream` from which to parse. :return: A list of :class:`HeaderEntry`. :raises: :class:`ParseError` """ entries = [] while stream.peek() not in [b'\r', b'\n', b'']: with stream.parsing(header_field): pos = stream.tell() line = stream.readline(decode=False) (name, colon, v) = line.partition(b':') if not colon: raise stream.error(pos) vs = [v] while stream.peek() in [b' ', b'\t']: stream.complain(1016) vs.append(b' ' + stream.readline(decode=False).lstrip(b' \t')) name = FieldName(name.decode('iso-8859-1')) value = b''.join(vs).strip(b' \t') entries.append(HeaderEntry(name, value)) return entries
def _process_message(data, creator): header_entries = [(FieldName(d['name']), d['value']) for d in data['headers']] pseudo_headers = pop_pseudo_headers(header_entries) if creator.is_edge: # Edge exports HTTP/2 messages as HTTP/1.1. version = None elif creator.is_insomnia: # Insomnia's HAR export hardcodes HTTP/1.1. version = None elif data['httpVersion'] == u'unknown': # Used by Chrome. version = None else: version = data['httpVersion'].upper() if version == u'HTTP/2.0': # Used by Firefox, Chrome, ... version = http2 return (version, header_entries, pseudo_headers)
def _process_message(data, creator): header_entries = [(FieldName(d['name']), d['value']) for d in data['headers']] pop_pseudo_headers(header_entries) # Web browsers' HAR export poorly reflects the actual traffic on the wire. # Their httpVersion can't be trusted, and they often mangle lower-level # parts of the protocol, e.g. at the time of writing Chrome sometimes omits # the Host header from HTTP/1.1 requests. Just consider their HTTP version # to be always unknown, and a lot of this pain goes away. version = None if data['httpVersion'].startswith(u'HTTP/') and \ creator not in CHROME + FIREFOX + EDGE: version = data['httpVersion'] return version, header_entries
# the presence of this header in a ``Connection`` header # should trigger notice 1034. # # ``bad_for_trailer`` # You can set this to ``True`` if # the presence of this header in a trailer # should trigger notice 1026. # # ``iana_status`` # Filled by ``tools/iana.py``. You should not need to change it. known = KnownDict( FieldName, [ { '_': FieldName(u'A-IM'), '_citations': [RFC(4229)] }, { '_': FieldName(u'Accept'), '_citations': [RFC(7231, section=(5, 3, 2))], 'for_request': True, 'for_response': False, 'iana_status': u'standard', 'parser': rfc7231.Accept, 'precondition': False, 'proactive_conneg': True, 'rule': MULTI }, { '_': FieldName(u'Accept-Additions'),
def check_message(msg): """Run all checks that apply to any message (both request and response).""" complain = msg.complain version = msg.version headers = msg.headers for hdr in headers: # Force parsing every header present in the message # according to its syntax rules. _ = hdr.value if header.deprecated(hdr.name): complain(1197, header=hdr) if hdr.name.startswith(u'X-') and hdr.name not in h: # not in known complain(1277, header=hdr) # Force checking the payload according to various rules. _ = msg.decoded_body _ = msg.unicode_body _ = msg.json_data _ = msg.xml_data _ = msg.multipart_data _ = msg.url_encoded_data if version == http11 and headers.trailer.is_present and \ tc.chunked not in headers.transfer_encoding: # HTTP/2 supports trailers but has no notion of "chunked". complain(1054) for entry in msg.trailer_entries: if entry.name not in headers.trailer: complain(1030, header=entry) if headers.transfer_encoding.is_present and \ headers.content_length.is_present: complain(1020) for opt in headers.connection: if header.is_bad_for_connection(FieldName(opt)): complain(1034, header=headers[FieldName(opt)]) if headers.content_type.is_okay: if media_type.deprecated(headers.content_type.item): complain(1035) for dupe in headers.content_type.param.duplicates(): complain(1042, param=dupe) if headers.content_type == media.application_json and \ u'charset' in headers.content_type.param: complain(1280, header=headers.content_type) if headers.upgrade.is_present and u'upgrade' not in headers.connection: complain(1050) if headers.date > datetime.utcnow() + timedelta(seconds=10): complain(1109) for warning in headers.warning: if warning.code < 100 or warning.code > 299: complain(1163, code=warning.code) if okay(warning.date) and headers.date != warning.date: complain(1164, code=warning.code) if msg.transformed_by_proxy: if warn.transformation_applied not in headers.warning: complain(1191) if headers.cache_control.no_transform: complain(1192) for pragma in headers.pragma: if pragma != u'no-cache': complain(1160, pragma=pragma.item) if version == http2: for hdr in headers: if hdr.name in [h.connection, h.transfer_encoding, h.keep_alive]: complain(1244, header=hdr) elif hdr.name == h.upgrade: complain(1245) for protocol in headers.upgrade: if protocol.item == u'h2': complain(1228) if protocol.item == upgrade.h2c and msg.is_tls: complain(1233)
def check_message(msg): """Run all checks that apply to any message (both request and response).""" complain = msg.complain version = msg.version headers = msg.headers x_prefixed = [] for hdr in headers: # Check the header name syntax. parse(hdr.name, rfc7230.field_name, complain, 1293, header=hdr, place=u'field name') # Force parsing every header present in the message # according to its syntax rules. _ = hdr.value if known.header.is_deprecated(hdr.name): complain(1197, header=hdr) if hdr.name.startswith(u'X-') and hdr.name not in known.header: x_prefixed.append(hdr) if x_prefixed: complain(1277, headers=x_prefixed) # Force checking the payload according to various rules. _ = msg.decoded_body _ = msg.unicode_body _ = msg.json_data _ = msg.xml_data _ = msg.multipart_data _ = msg.url_encoded_data if version == http11 and headers.trailer.is_present and \ tc.chunked not in headers.transfer_encoding: # HTTP/2 supports trailers but has no notion of "chunked". complain(1054) for entry in msg.trailer_entries: if entry.name not in headers.trailer: complain(1030, header=entry) if headers.transfer_encoding.is_present and \ headers.content_length.is_present: complain(1020) for opt in headers.connection: if known.header.is_bad_for_connection(FieldName(opt)): complain(1034, header=headers[FieldName(opt)]) if headers.content_type.is_okay: if known.media_type.is_deprecated(headers.content_type.item): complain(1035) for dupe in headers.content_type.param.duplicates(): complain(1042, param=dupe) if headers.content_type == media.application_json and \ u'charset' in headers.content_type.param: complain(1280, header=headers.content_type) if headers.date > datetime.utcnow() + timedelta(seconds=10): complain(1109) for warning in headers.warning: if warning.code < 100 or warning.code > 299: complain(1163, code=warning.code) if okay(warning.date) and headers.date != warning.date: complain(1164, code=warning.code) for pragma in headers.pragma: if pragma != u'no-cache': complain(1160, pragma=pragma.item) for protocol in headers.upgrade: if protocol.item == u'h2': complain(1228) if protocol.item == upgrade.h2c and msg.is_tls: complain(1233) if getattr(msg, 'status', None) == st.early_hints: # 103 (Early Hints) responses are weird in that the headers they carry # do not apply to themselves (RFC 8297 Section 2) but only to the final # response (and then only speculatively). For such responses, we limit # ourselves to checks that do not rely on having a complete and # self-consistent message header block. return if headers.upgrade.is_present and u'upgrade' not in headers.connection: complain(1050) if msg.transformed_by_proxy: if warn.transformation_applied not in headers.warning: complain(1191) if headers.cache_control.no_transform: complain(1192) if version == http2: for hdr in headers: if hdr.name in [h.connection, h.transfer_encoding, h.keep_alive]: complain(1244, header=hdr) elif hdr.name == h.upgrade: complain(1245)