def comment(include_parens=False): inner = recursive() > named(u'comment', RFC(7230)) inner.rec = '(' + string(ctext | quoted_pair(sensible_for=u'()\\') | inner) + ')' if not include_parens: inner = (lambda s: s[1:-1]) << inner return inner > named(u'comment', RFC(7230))
def preference_parameter(head=False): # The head (first) ``preference-parameter`` of a ``preference`` # contains the actual preference name, which we want to annotate. name_cls = Preference if head else CaseInsensitive return (_normalize_empty_value << (parameter(name_cls=name_cls) | name_cls << token)) > named( u'preference-parameter', RFC(7240, errata=4439), is_pivot=True)
def process(self, raw): processed = {} for (field, value) in raw.items(): if not value: continue elif field == 'key': processed[field] = self.cls(value) elif value.isdigit(): processed[field] = int(value) elif value.lower() in [u'true', u'false']: processed[field] = (value.lower() == u'true') elif isinstance(getattr(self, field, None), type): # Enum processed[field] = getattr(self, field)[value] else: processed[field] = value if 'rfc' in processed: processed['citation'] = RFC(processed.pop('rfc'), processed.pop('rfc_section', None), processed.pop('rfc_appendix', None)) if 'cite_url' in processed: processed['citation'] = Citation(processed.pop('cite_title', None), processed.pop('cite_url')) return processed
def transfer_coding(no_trailers=False, no_q=False): exclude = _built_in_codings if no_trailers: exclude = exclude + ['trailers'] r = transfer_extension(exclude, no_q) for name in _built_in_codings: r = r | _empty_params << (TransferCoding << literal(name)) return r > named(u'transfer-coding', RFC(7230), is_pivot=True)
def extract_citations(self, record): for xref in record.findall('iana:xref', self.xmlns): if xref.get('type') == 'rfc': match = re.search( r'RFC(\d+), (Section|Appendix) ([A-Z0-9]+(\.[0-9]+)*)', xref.text or '') if match: num = int(match.group(1)) kw = match.group(2).lower() sect = RFC.parse_sect(match.group(3)) yield RFC(num, **{kw: sect}) else: num = int(xref.get('data')[3:]) yield RFC(num) elif xref.get('type') == 'uri': title = normalize_whitespace(xref.text) if xref.text else None url = xref.get('data') yield Citation(title, url)
def quoted_pair(sensible_for): # In RFC 7230, ``<quoted-pair>`` is a single rule, # but we parametrize it to report no. 1017 depending on the context. @can_complain def check_sensible(complain, c): if c not in sensible_for: complain(1017, char=c) return c return (check_sensible << skip('\\') * (HTAB | SP | VCHAR | obs_text) > named(u'quoted-pair', RFC(7230)))
def extract_citations(self, record): for xref in record.findall('iana:xref', self.xmlns): if xref.get('type') == 'rfc': match = re.search( r'RFC(\d+), (Section|Appendix) ([A-Z0-9]+(\.[0-9]+)*)', xref.text or '') if match: num = int(match.group(1)) kw = match.group(2).lower() sect = RFC.parse_sect(match.group(3)) yield RFC(num, **{kw: sect}) else: num = int(xref.get('data')[3:]) yield RFC(num) elif xref.get('type') == 'uri': yield Citation(xref.text, xref.get('data'))
def link_extension(exclude_builtin): if exclude_builtin: exclude1 = [name for name in _builtin_params if not name.endswith('*')] exclude2 = [name.rstrip('*') for name in _builtin_params if name.endswith('*')] else: # pragma: no cover exclude1 = exclude2 = None return ( ( (CaseInsensitive << parmname__excluding(exclude1)) * maybe(skip(OWS * '=' * OWS) * (ptoken | quoted_string)) ) | ( (CaseInsensitive << ext_name_star__excluding(exclude2)) * skip(OWS * '=' * OWS) * ext_value ) ) > named(u'link-extension', RFC(5988), is_pivot=True)
def media_range(no_q=False): return Parametrized << ( (literal('*/*') | type_ + '/' + '*' | _check_media_type << (MediaType << type_ + '/' + subtype)) * (MultiDict << many( skip(OWS * ';' * OWS) * parameter(exclude=['q'] if no_q else []))) ) > named(u'media-range', RFC(7231), is_pivot=True)
from httpolice.syntax.rfc2616 import value from httpolice.syntax.rfc5987 import ext_value from httpolice.syntax.rfc7230 import OWS, token, token__excluding # This has been slightly adapted to the rules of RFC 7230. # The ``OWS`` are derived from the "implied ``*LWS``" requirement. # We have no need to special-case "inline" and "attachment", simplify. disposition_type = CaseInsensitive << token > pivot filename_parm = ( (CaseInsensitive << literal('filename')) * skip(OWS * '=' * OWS) * value | (CaseInsensitive << literal('filename*')) * skip(OWS * '=' * OWS) * ext_value) > pivot # ``token`` is a superset of ``ext-token``, # and special-casing ``ext-token`` requires # something more complex than our `string_excluding`. # Until then, we can simplify a bit. disp_ext_parm = ( (CaseInsensitive << token__excluding(['filename', 'filename*'])) * skip(OWS * '=' * OWS) * value) > pivot disposition_parm = filename_parm | disp_ext_parm > auto content_disposition = Parametrized << ( disposition_type * (MultiDict << many(skip(OWS * ';' * OWS) * disposition_parm))) > pivot fill_names(globals(), RFC(6266))
# -*- coding: utf-8; -*- from httpolice.citation import RFC from httpolice.parse import auto, fill_names, many, maybe, pivot, skip, string1 from httpolice.structure import HSTSDirective, Parametrized from httpolice.syntax.common import DIGIT from httpolice.syntax.rfc7230 import OWS, quoted_string, token # This has been slightly adapted to the rules of RFC 7230. # The ``OWS`` are derived from the "implied ``*LWS``" requirement. directive_name = HSTSDirective << token > auto directive_value = token | quoted_string > auto directive = Parametrized << ( directive_name * maybe(skip(OWS * '=' * OWS) * directive_value)) > pivot def _collect_elements(xs): return [elem for elem in xs if elem is not None] Strict_Transport_Security = _collect_elements << ( maybe(directive) % many(skip(OWS * ';' * OWS) * maybe(directive))) > pivot max_age_value = int << string1(DIGIT) > pivot fill_names(globals(), RFC(6797))
from httpolice.known.base import KnownDict from httpolice.structure import Preference from httpolice.syntax import rfc7240 NO = 0 OPTIONAL = 1 REQUIRED = 2 def argument_required(name): return known.get_info(name).get('argument') == REQUIRED def no_argument(name): return known.get_info(name).get('argument') == NO def parser_for(name): return known.get_info(name).get('parser') known = KnownDict(Preference, [ {'_': Preference(u'handling'), '_citations': [RFC(7240, section=(4, 4))], 'argument': REQUIRED, 'parser': rfc7240.handling}, {'_': Preference(u'respond-async'), '_citations': [RFC(7240, section=(4, 1))], 'argument': NO}, {'_': Preference(u'return'), '_citations': [RFC(7240, section=(4, 2))], 'argument': REQUIRED, 'parser': rfc7240.return_}, {'_': Preference(u'wait'), '_citations': [RFC(7240, section=(4, 3))], 'argument': REQUIRED, 'parser': rfc7240.wait} ], extra_info=['argument', 'parser'])
# -*- coding: utf-8; -*- from httpolice.citation import RFC, Citation from httpolice.known.base import KnownDict from httpolice.structure import ContentCoding known = KnownDict(ContentCoding, [ {'_': ContentCoding(u'br'), '_citations': [RFC(7932)]}, {'_': ContentCoding(u'compress'), '_citations': [RFC(7230, section=(4, 2, 1))]}, {'_': ContentCoding(u'deflate'), '_citations': [RFC(7230, section=(4, 2, 2))]}, {'_': ContentCoding(u'exi'), '_citations': [Citation(u'W3C Recommendation: ' u'Efficient XML Interchange (EXI) Format', u'http://www.w3.org/TR/exi/')]}, {'_': ContentCoding(u'gzip'), '_citations': [RFC(7230, section=(4, 2, 3))]}, {'_': ContentCoding(u'identity'), '_citations': [RFC(7231, section=(5, 3, 4))]}, {'_': ContentCoding(u'pack200-gzip'), '_citations': [Citation(u'JSR 200: Network Transfer Format for Java', u'http://www.jcp.org/en/jsr/detail?id=200')]}, {'_': ContentCoding(u'x-compress'), '_citations': [RFC(7230, section=(4, 2, 1))]}, {'_': ContentCoding(u'x-gzip'), '_citations': [RFC(7230, section=(4, 2, 3))]} ])
# # ``_title`` # The default reason phrase, usually filled by ``tools/iana.py``. # # ``cacheable`` # If the status code is defined as cacheable by default, # set this to ``BY_DEFAULT``. # If it is defined as never cacheable, set to ``NOT_AT_ALL``. # Otherwise, set to ``NOT_BY_DEFAULT``. known = KnownDict( StatusCode, [ { '_': StatusCode(100), '_citations': [RFC(7231, section=(6, 2, 1))], '_title': u'Continue', 'cacheable': NOT_BY_DEFAULT }, { '_': StatusCode(101), '_citations': [RFC(7231, section=(6, 2, 2))], '_title': u'Switching Protocols', 'cacheable': NOT_BY_DEFAULT }, { '_': StatusCode(102), '_citations': [RFC(2518)], '_title': u'Processing' }, {
from httpolice.parse import (auto, can_complain, fill_names, maybe, octet, octet_range, pivot, string, subst) from httpolice.structure import EntityTag from httpolice.syntax.common import DQUOTE from httpolice.syntax.rfc7230 import comma_list1, obs_text from httpolice.syntax.rfc7231 import HTTP_date weak = subst(True) << octet(0x57) * octet(0x2F) > auto etagc = octet(0x21) | octet_range(0x23, 0x7E) | obs_text > auto @can_complain def _no_backslashes(complain, s): if u'\\' in s: complain(1119) return s opaque_tag = _no_backslashes << DQUOTE + string(etagc) + DQUOTE > auto entity_tag = EntityTag << maybe(weak, False) * opaque_tag > pivot ETag = entity_tag > pivot Last_Modified = HTTP_date > pivot If_Match = '*' | comma_list1(entity_tag) > pivot If_None_Match = '*' | comma_list1(entity_tag) > pivot If_Modified_Since = HTTP_date > pivot If_Unmodified_Since = HTTP_date > pivot fill_names(globals(), RFC(7232))
def transfer_extension(exclude=None, no_q=False): return Parametrized << ( (TransferCoding << token__excluding(exclude or [])) * (MultiDict << many(skip(OWS * ';' * OWS) * transfer_parameter(no_q))) ) > named(u'transfer-extension', RFC(7230), is_pivot=True)
def transfer_parameter(no_q=False): return ((token__excluding(['q']) if no_q else token) * skip(BWS * '=' * BWS) * (token | quoted_string)) > named( u'transfer-parameter', RFC(7230), is_pivot=True)
OPTIONAL = 1 REQUIRED = 2 def argument_required(name): return known.get_info(name).get('argument') == REQUIRED def no_argument(name): return known.get_info(name).get('argument') == NO def parser_for(name): return known.get_info(name).get('parser') known = KnownDict(HSTSDirective, [ { '_': HSTSDirective(u'includeSubDomains'), '_citations': [RFC(6797, section=(6, 1, 2))], 'argument': NO }, { '_': HSTSDirective(u'max-age'), '_citations': [RFC(6797, section=(6, 1, 1))], 'argument': REQUIRED, 'parser': rfc6797.max_age_value }, ], extra_info=['argument', 'parser'])
from httpolice.citation import RFC from httpolice.parse import fill_names, pivot from httpolice.syntax.rfc7235 import token68 HTTP2_Settings = token68 > pivot fill_names(globals(), RFC(7540))
# -*- coding: utf-8; -*- from httpolice.citation import RFC from httpolice.known.base import KnownDict from httpolice.structure import AltSvcParam from httpolice.syntax import rfc7838 def parser_for(name): return known.get_info(name).get('parser') known = KnownDict(AltSvcParam, [{ '_': AltSvcParam(u'ma'), '_citations': [RFC(7838, section=(3, 1))], 'parser': rfc7838.ma }, { '_': AltSvcParam(u'persist'), '_citations': [RFC(7838, section=(3, 1))], 'parser': rfc7838.persist }], extra_info=['parser'])
def parameter(exclude=None): return ((CaseInsensitive << token__excluding(exclude or [])) * skip('=') * (token | quoted_string)) > named( u'parameter', RFC(7231), is_pivot=True)
parmname = string(attr_char) > pivot # We don't need to special-case "UTF-8", simplify. mime_charsetc = (ALPHA | DIGIT | '!' | '#' | '$' | '%' | '&' | '+' | '-' | '^' | '_' | '`' | '{' | '}' | '~') > auto mime_charset = string1(mime_charsetc) > auto charset = CaseInsensitive << mime_charset > pivot pct_encoded = '%' + HEXDIG + HEXDIG > auto value_chars = pct_decode << ( force_bytes << string(pct_encoded | attr_char)) > auto @can_complain def _check_ext_value(complain, val): if val.charset == u'UTF-8': try: val.value_bytes.decode(val.charset) except UnicodeError as e: complain(1254, charset=val.charset, error=e) else: complain(1253, charset=val.charset) return val ext_value = _check_ext_value << ( ExtValue << (charset * skip("'") * maybe(language) * skip("'") * value_chars)) > pivot fill_names(globals(), RFC(8187))
# should trigger notice 1034. # # ``bad_for_trailer`` # You can set this to ``True`` if # the presence of this header in a trailer # should trigger notice 1026. # # ``iana_status`` # Filled by ``tools/iana.py``. You should not need to change it. known = KnownDict( FieldName, [ { '_': FieldName(u'A-IM'), '_citations': [RFC(4229)] }, { '_': FieldName(u'Accept'), '_citations': [RFC(7231, section=(5, 3, 2))], 'for_request': True, 'for_response': False, 'iana_status': u'standard', 'parser': rfc7231.Accept, 'precondition': False, 'proactive_conneg': True, 'rule': MULTI }, { '_': FieldName(u'Accept-Additions'), '_citations': [RFC(4229)]
from httpolice.citation import RFC from httpolice.parse import fill_names, pivot from httpolice.syntax.rfc7230 import comma_list1 from httpolice.syntax.rfc7231 import media_type Accept_Patch = comma_list1(media_type) > pivot fill_names(globals(), RFC(5789))
def comma_list1(element): return _collect_elements << (many(subst(None) << ',' * OWS) + ( (lambda x: [x]) << group(element)) + many( skip(OWS * ',') * maybe(skip(OWS) * element))) > named( u'1#rule', RFC(7230, section=u'7'))
# -*- coding: utf-8; -*- from httpolice.citation import RFC from httpolice.known.base import KnownDict from httpolice.structure import UpgradeToken known = KnownDict( UpgradeToken, [{ '_': UpgradeToken(u'HTTP'), '_citations': [RFC(7230, section=(2, 6))], '_title': u'Hypertext Transfer Protocol' }, { '_': UpgradeToken(u'TLS'), '_citations': [RFC(2817)], '_title': u'Transport Layer Security' }, { '_': UpgradeToken(u'WebSocket'), '_citations': [RFC(6455)], '_title': u'The Web Socket Protocol' }, { '_': UpgradeToken(u'h2c'), '_citations': [RFC(7540, section=(3, 2))], '_title': u'Hypertext Transfer Protocol version 2 (HTTP/2)' }])
protocol_id = _check_protocol_id << token > pivot @can_complain def _check_alt_authority(complain, value): return parse(value, maybe_str(uri_host) + ':' + port, complain, 1257, authority=value) alt_authority = _check_alt_authority << quoted_string > pivot alternative = protocol_id * skip('=') * alt_authority > pivot parameter = ((AltSvcParam << token) * skip('=') * (token | quoted_string)) > pivot alt_value = Parametrized << ( alternative * (MultiDict << many(skip(OWS * ';' * OWS) * parameter))) > pivot Alt_Svc = clear | comma_list1(alt_value) > pivot ma = delta_seconds > pivot persist = subst(True) << literal('1') > pivot Alt_Used = uri_host + maybe_str(':' + port) > pivot fill_names(globals(), RFC(7838))
def parser_for(name): return known.get_info(name).get('parser') # A few of these directives (such as ``no-cache``) actually have # different citations for requests and for responses; # but it's hard for us to know whether a given instance of ``CacheDirective`` # refers to a request directive or a response directive, # so we have to use one, more general citation. known = KnownDict(CacheDirective, [ { '_': CacheDirective(u'max-age'), '_citations': [RFC(7234, section=(5, 2))], '_no_sync': ['_citations'], 'argument': REQUIRED, 'argument_form': TOKEN_PREFERRED, 'for_request': True, 'for_response': True, 'parser': rfc7234.delta_seconds }, { '_': CacheDirective(u'max-stale'), '_citations': [RFC(7234, section=(5, 2, 1, 2))], 'argument': OPTIONAL, 'argument_form': TOKEN_PREFERRED, 'for_request': True, 'for_response': False, 'parser': rfc7234.delta_seconds
import re from httpolice.citation import RFC from httpolice.codings import decode_deflate, decode_gzip from httpolice.exchange import Exchange, complaint_box from httpolice.known import m, st, tc from httpolice.parse import ParseError, Symbol from httpolice.request import Request from httpolice.response import Response from httpolice.structure import (FieldName, HeaderEntry, HTTPVersion, Method, StatusCode, Unavailable, okay) # Create empty symbols just for referring to them in parse errors. HTTP_message = Symbol(u'HTTP-message', RFC(7230, section=u'3')) request_line = Symbol(u'request-line', RFC(7230, section=u'3.1.1')) status_line = Symbol(u'status-line', RFC(7230, section=u'3.1.2')) header_field = Symbol(u'header-field', RFC(7230, section=u'3.2')) chunked_body = Symbol(u'chunked-body', RFC(7230, section=u'4.1')) chunk = Symbol(u'chunk', RFC(7230, section=u'4.1')) chunk_size = Symbol(u'chunk-size', RFC(7230, section=u'4.1')) HTTP_VERSION = re.compile(u'^HTTP/[0-9]\\.[0-9]$') STATUS_CODE = re.compile(u'^[0-9]{3}$') MAX_BODY_SIZE = 1024 * 1024 * 1024 def parse_streams(inbound, outbound, scheme=None): """Parse one or two HTTP/1.x streams.
# (see RFC 5789 errata). # # ``is_json`` # Set this to ``True`` if the media type uses JSON syntax # but **does not end** with ``+json``. # # ``is_xml`` # Set this to ``True`` if the media type uses XML syntax # but **does not end** with ``+xml``. # # ``deprecated`` # Filled by ``tools/iana.py``. You should not need to change it. known = KnownDict(MediaType, [ {'_': MediaType(u'application/1d-interleaved-parityfec'), '_citations': [RFC(6015)]}, {'_': MediaType(u'application/alto-costmap+json'), '_citations': [RFC(7285)]}, {'_': MediaType(u'application/alto-costmapfilter+json'), '_citations': [RFC(7285)]}, {'_': MediaType(u'application/alto-directory+json'), '_citations': [RFC(7285)]}, {'_': MediaType(u'application/alto-endpointprop+json'), '_citations': [RFC(7285)]}, {'_': MediaType(u'application/alto-endpointpropparams+json'), '_citations': [RFC(7285)]}, {'_': MediaType(u'application/alto-endpointcost+json'), '_citations': [RFC(7285)]}, {'_': MediaType(u'application/alto-endpointcostparams+json'), '_citations': [RFC(7285)]}, {'_': MediaType(u'application/alto-error+json'), '_citations': [RFC(7285)]}, {'_': MediaType(u'application/alto-networkmapfilter+json'),
def comma_list(element): # RFC Errata ID: 5257 return _collect_elements << (maybe(group(element) * skip(OWS)) % many( skip(literal(',') * OWS) * maybe(group(element) * skip(OWS)))) > named( u'#rule', RFC(7230, section=u'7'))