def test_parser_edge_cases(): # Our parser implementation is general enough that # some of its branches are not being exercised by our regular tests, # so I had to come up with these contrived examples to test them. p = many(rfc7230.tchar) > named(u'p') p1 = '1' * p > named(u'p1') p2 = '11' * p * skip('\n') > named(u'p2') assert parse(p1 | p2, b'11abc') == (u'1', [u'1', u'a', u'b', u'c']) assert parse(p1 | p2, b'11abc\n') == (u'11', [u'a', u'b', u'c']) p = recursive() > named(u'p') p.rec = (rfc7230.tchar * p | subst(None) << empty) assert parse(p, b'abc') == (u'a', (u'b', (u'c', None))) p = literal('ab') > named(u'p') p0 = subst(u'') << empty | p > named(u'p0') p1 = 'xab' * p0 > named(u'p1') p2 = 'x' * string(p0) * '!' > named(u'p2') assert parse(p1 | p2, b'xabab') == (u'xab', u'ab') assert parse(p1 | p2, b'xabab!') == (u'x', u'abab', u'!') p = empty | literal('a') > named(u'p') p0 = p * 'x' > named(u'x') assert parse(p0, b'x') == u'x'
def test_parser_edge_cases(): # Our parser implementation is general enough that # some of its branches are not being exercised by our regular tests, # so I had to come up with these contrived examples to test them. p = many(rfc7230.tchar) > named(u'p') p1 = '1' * p > named(u'p1') p2 = '11' * p * skip('\n') > named(u'p2') assert parse(p1 | p2, b'11abc') == (u'1', [u'1', u'a', u'b', u'c']) assert parse(p1 | p2, b'11abc\n') == (u'11', [u'a', u'b', u'c']) p = recursive() > named(u'p') p.rec = (rfc7230.tchar * p | subst(None) << empty) assert parse(p, b'abc') == (u'a', (u'b', (u'c', None))) p = literal('ab') > named(u'p') p0 = subst(u'') << empty | p > named(u'p0') p1 = 'xab' * p0 > named(u'p1') p2 = 'x' * string(p0) * '!' > named(u'p2') assert parse(p1 | p2, b'xabab') == (u'xab', u'ab') assert parse(p1 | p2, b'xabab!') == (u'x', u'abab', u'!') p = empty | literal('a') > named(u'p') p0 = p * 'x' > named(u'x') assert parse(p0, b'x') == u'x'
def media_range(no_q=False): return Parametrized << ( ( literal('*/*') | type_ + '/' + '*' | MediaType << type_ + '/' + subtype ) * ( MultiDict << many( skip(OWS * ';' * OWS) * parameter(exclude=['q'] if no_q else []) ) ) ) > named(u'media-range', RFC(7231), is_pivot=True)
# -*- coding: utf-8; -*- from httpolice.citation import RFC from httpolice.parse import auto, fill_names, many, maybe, pivot, skip, string1 from httpolice.structure import HSTSDirective, Parametrized from httpolice.syntax.common import DIGIT from httpolice.syntax.rfc7230 import OWS, quoted_string, token # This has been slightly adapted to the rules of RFC 7230. # The ``OWS`` are derived from the "implied ``*LWS``" requirement. directive_name = HSTSDirective << token > auto directive_value = token | quoted_string > auto directive = Parametrized << ( directive_name * maybe(skip(OWS * '=' * OWS) * directive_value)) > pivot def _collect_elements(xs): return [elem for elem in xs if elem is not None] Strict_Transport_Security = _collect_elements << ( maybe(directive) % many(skip(OWS * ';' * OWS) * maybe(directive))) > pivot max_age_value = int << string1(DIGIT) > pivot fill_names(globals(), RFC(6797))
correct_encoded_id += pct_encode(c, safe='').upper() if encoded_id != correct_encoded_id: complain(1256, actual=encoded_id, correct=correct_encoded_id) return decoded_id protocol_id = _check_protocol_id << token > pivot @can_complain def _check_alt_authority(complain, value): return parse(value, maybe_str(uri_host) + ':' + port, complain, 1257, authority=value) alt_authority = _check_alt_authority << quoted_string > pivot alternative = protocol_id * skip('=') * alt_authority > pivot parameter = ((AltSvcParam << token) * skip('=') * (token | quoted_string)) > pivot alt_value = Parametrized << ( alternative * (MultiDict << many(skip(OWS * ';' * OWS) * parameter))) > pivot Alt_Svc = clear | comma_list1(alt_value) > pivot ma = delta_seconds > pivot persist = subst(True) << literal('1') > pivot Alt_Used = uri_host + maybe_str(':' + port) > pivot fill_names(globals(), RFC(7838))
protocol_id = _check_protocol_id << token > pivot @can_complain def _check_alt_authority(complain, value): return parse(value, maybe_str(uri_host) + ':' + port, complain, 1257, authority=value) alt_authority = _check_alt_authority << quoted_string > pivot alternative = protocol_id * skip('=') * alt_authority > pivot parameter = ((AltSvcParam << token) * skip('=') * (token | quoted_string)) > pivot alt_value = Parametrized << ( alternative * (MultiDict << many(skip(OWS * ';' * OWS) * parameter))) > pivot Alt_Svc = clear | comma_list1(alt_value) > pivot ma = delta_seconds > pivot persist = subst(True) << literal('1') > pivot Alt_Used = uri_host + maybe_str(':' + port) > pivot fill_names(globals(), RFC(7838))
def comma_list1(element): return _collect_elements << ( many(subst(None) << ',' * OWS) + ((lambda x: [x]) << group(element)) + many(skip(OWS * ',') * maybe(skip(OWS) * element)) ) > named(u'1#rule', RFC(7230, section=(7,)))
def comma_list(element): return _collect_elements << maybe( (subst([None, None]) << literal(',') | (lambda x: [x]) << group(element)) + many(skip(OWS * ',') * maybe(skip(OWS) * element))) > named( u'#rule', RFC(7230, section=(7, )))
def media_range(no_q=False): return Parametrized << ( (literal('*/*') | type_ + '/' + '*' | _check_media_type << (MediaType << type_ + '/' + subtype)) * (MultiDict << many( skip(OWS * ';' * OWS) * parameter(exclude=['q'] if no_q else []))) ) > named(u'media-range', RFC(7231), is_pivot=True)
def transfer_extension(exclude=None, no_q=False): return Parametrized << ( (TransferCoding << token__excluding(exclude or [])) * (MultiDict << many(skip(OWS * ';' * OWS) * transfer_parameter(no_q))) ) > named(u'transfer-extension', RFC(7230), is_pivot=True)
':' | '<' | '=' | '>' | '?' | '@' | ALPHA | '[' | ']' | '^' | '_' | '`' | '{' | '|' | '}' | '~') > auto ptoken = string1(ptokenchar) > auto media_type = MediaType << type_name + '/' + subtype_name > pivot quoted_mt = skip('"') * media_type * skip('"') > pivot reg_rel_type = RelationType << ( LOALPHA + string(LOALPHA | DIGIT | '.' | '-')) > auto ext_rel_type = URI > auto relation_type = reg_rel_type | ext_rel_type > pivot relation_types = ( (lambda x: [x]) << relation_type | skip('"' * OWS) * (relation_type % many(skip(string1(SP)) * relation_type)) * skip(OWS * '"')) > pivot def ext_name_star__excluding(exclude): return (parmname__excluding(exclude) + '*' > named(u'ext-name-star', RFC(5988))) _builtin_params = { 'rel': relation_types, 'anchor': skip('"' * OWS) * URI_Reference * skip(OWS * '"'), 'rev': relation_types, 'hreflang': Language_Tag, 'media': (_MediaDesc_no_delim | skip('"' * OWS) * _MediaDesc * skip(OWS * '"')), 'title': quoted_string, 'title*': ext_value,
# This has been slightly adapted to the rules of RFC 7230. # The ``OWS`` are derived from the "implied ``*LWS``" requirement. # We have no need to special-case "inline" and "attachment", simplify. disposition_type = CaseInsensitive << token > pivot filename_parm = ( (CaseInsensitive << literal('filename')) * skip(OWS * '=' * OWS) * value | (CaseInsensitive << literal('filename*')) * skip(OWS * '=' * OWS) * ext_value) > pivot # ``token`` is a superset of ``ext-token``, # and special-casing ``ext-token`` requires # something more complex than our `string_excluding`. # Until then, we can simplify a bit. disp_ext_parm = ( (CaseInsensitive << token__excluding(['filename', 'filename*'])) * skip(OWS * '=' * OWS) * value) > pivot disposition_parm = filename_parm | disp_ext_parm > auto content_disposition = Parametrized << ( disposition_type * (MultiDict << many(skip(OWS * ';' * OWS) * disposition_parm))) > pivot fill_names(globals(), RFC(6266))
def comma_list(element): # RFC Errata ID: 5257 return _collect_elements << ( maybe(group(element) * skip(OWS)) % many(skip(literal(',') * OWS) * maybe(group(element) * skip(OWS))) ) > named(u'#rule', RFC(7230, section=u'7'))
from httpolice.citation import RFC from httpolice.parse import ( auto, fill_names, many, octet, octet_range, pivot, skip, string1, ) from httpolice.syntax.common import SP from httpolice.syntax.rfc3986 import URI_reference NQSCHAR = (octet_range(0x20, 0x21) | octet_range(0x23, 0x5B) | octet_range(0x5D, 0x7E)) > auto NQCHAR = (octet(0x21) | octet_range(0x23, 0x5B) | octet_range(0x5D, 0x7E)) > auto scope_token = string1(NQCHAR) > pivot scope = scope_token % many(skip(SP) * scope_token) > pivot error = string1(NQSCHAR) > pivot error_description = string1(NQSCHAR) > pivot error_uri = URI_reference > pivot fill_names(globals(), RFC(6749))
# RFC 7240 Section 2: "Empty or zero-length values on both # the preference token and within parameters are equivalent # to no value being specified at all." (name, value) = x if isinstance(x, tuple) else (x, None) return Parametrized(name, None if value == u'' else value) def preference_parameter(head=False): # The head (first) ``preference-parameter`` of a ``preference`` # contains the actual preference name, which we want to annotate. name_cls = Preference if head else CaseInsensitive return ( _normalize_empty_value << (parameter(name_cls=name_cls) | name_cls << token) ) > named(u'preference-parameter', RFC(7240, errata=4439), is_pivot=True) preference = Parametrized << ( preference_parameter(head=True) * many(skip(OWS * ';') * maybe(skip(OWS) * preference_parameter())) ) > named(u'preference', RFC(7240, errata=4439), is_pivot=True) Prefer = comma_list1(preference) > pivot Preference_Applied = comma_list1(preference_parameter(head=True)) > pivot return_ = CaseInsensitive << (literal('representation') | 'minimal') > pivot wait = delay_seconds > auto handling = CaseInsensitive << (literal('strict') | 'lenient') > pivot fill_names(globals(), RFC(7240))
':' | '<' | '=' | '>' | '?' | '@' | ALPHA | '[' | ']' | '^' | '_' | '`' | '{' | '|' | '}' | '~') > auto ptoken = string1(ptokenchar) > auto media_type = MediaType << type_name + '/' + subtype_name > pivot quoted_mt = skip('"') * media_type * skip('"') > pivot reg_rel_type = RelationType << ( LOALPHA + string(LOALPHA | DIGIT | '.' | '-')) > auto ext_rel_type = URI > auto relation_type = reg_rel_type | ext_rel_type > pivot relation_types = ( (lambda x: [x]) << relation_type | skip('"' * OWS) * (relation_type % many(skip(string1(SP)) * relation_type)) * skip(OWS * '"')) > pivot def ext_name_star__excluding(exclude): return (parmname__excluding(exclude) + '*' > named(u'ext-name-star', RFC(5988))) _builtin_params = { 'rel': relation_types, 'anchor': skip('"' * OWS) * URI_Reference * skip(OWS * '"'), 'rev': relation_types, 'hreflang': Language_Tag, 'media': (_MediaDesc_no_delim | skip('"' * OWS) * _MediaDesc * skip(OWS * '"')), 'title': quoted_string, 'title*': ext_value,
from httpolice.citation import RFC from httpolice.parse import (auto, fill_names, many, octet, octet_range, pivot, skip, string1) from httpolice.syntax.common import SP from httpolice.syntax.rfc3986 import URI_reference NQSCHAR = (octet_range(0x20, 0x21) | octet_range(0x23, 0x5B) | octet_range(0x5D, 0x7E)) > auto NQCHAR = (octet(0x21) | octet_range(0x23, 0x5B) | octet_range(0x5D, 0x7E)) > auto scope_token = string1(NQCHAR) > pivot scope = scope_token % many(skip(SP) * scope_token) > pivot error = string1(NQSCHAR) > pivot error_description = string1(NQSCHAR) > pivot error_uri = URI_reference > pivot fill_names(globals(), RFC(6749))
from httpolice.syntax.rfc2616 import value from httpolice.syntax.rfc5987 import ext_value from httpolice.syntax.rfc7230 import OWS, token, token__excluding # This has been slightly adapted to the rules of RFC 7230. # The ``OWS`` are derived from the "implied ``*LWS``" requirement. # We have no need to special-case "inline" and "attachment", simplify. disposition_type = CaseInsensitive << token > pivot filename_parm = ( (CaseInsensitive << literal('filename')) * skip(OWS * '=' * OWS) * value | (CaseInsensitive << literal('filename*')) * skip(OWS * '=' * OWS) * ext_value) > pivot # ``token`` is a superset of ``ext-token``, # and special-casing ``ext-token`` requires # something more complex than our `string_excluding`. # Until then, we can simplify a bit. disp_ext_parm = ( (CaseInsensitive << token__excluding(['filename', 'filename*'])) * skip(OWS * '=' * OWS) * value) > pivot disposition_parm = filename_parm | disp_ext_parm > auto content_disposition = Parametrized << ( disposition_type * (MultiDict << many(skip(OWS * ';' * OWS) * disposition_parm))) > pivot fill_names(globals(), RFC(6266))
string_times) from httpolice.structure import ForwardedParam from httpolice.syntax.common import ALPHA, DIGIT from httpolice.syntax.rfc3986 import IPv4address, IPv6address from httpolice.syntax.rfc7230 import comma_list1, quoted_string, token def _remove_empty(xs): return [x for x in xs if x is not None] obfnode = '_' + string1(ALPHA | DIGIT | '.' | '_' | '-') > pivot nodename = (IPv4address | skip('[') * IPv6address * skip(']') | 'unknown' | obfnode) > pivot port = int << string_times(1, 5, DIGIT) > pivot obfport = '_' + string1(ALPHA | DIGIT | '.' | '_' | '-') > pivot node_port = port | obfport > pivot node = nodename * maybe(skip(':') * node_port) > pivot value = token | quoted_string > pivot forwarded_pair = (ForwardedParam << token) * skip('=') * value > pivot forwarded_element = _remove_empty << ( maybe(forwarded_pair) % many(skip(';') * maybe(forwarded_pair))) > pivot Forwarded = comma_list1(forwarded_element) > pivot fill_names(globals(), RFC(7239))
if mtype in _BAD_MEDIA_TYPES: complain(1282, bad=mtype, good=_BAD_MEDIA_TYPES[mtype]) return mtype def parameter(exclude=None): return ((CaseInsensitive << token__excluding(exclude or [])) * skip('=') * (token | quoted_string)) > named( u'parameter', RFC(7231), is_pivot=True) type_ = token > pivot subtype = token > pivot media_type = Parametrized << ( (_check_media_type << (MediaType << type_ + '/' + subtype)) * (MultiDict << many(skip(OWS * ';' * OWS) * parameter()))) > pivot content_coding = ContentCoding << token > pivot product_version = token > pivot product = Versioned << ( (ProductName << token) * maybe(skip('/') * product_version)) > pivot User_Agent = product % many( skip(RWS) * (product | comment(include_parens=False))) > pivot Server = product % many(skip(RWS) * (product | comment(include_parens=False))) > pivot day_name = (subst(0) << octet(0x4D) * octet(0x6F) * octet(0x6E) | subst(1) << octet(0x54) * octet(0x75) * octet(0x65) | subst(2) << octet(0x57) * octet(0x65) * octet(0x64) | subst(3) << octet(0x54) * octet(0x68) * octet(0x75)
# which brings in Unicode problems. _DAY_NAMES = [u'Monday', u'Tuesday', u'Wednesday', u'Thursday', u'Friday', u'Saturady', u'Sunday'] def parameter(exclude=None): return ( (CaseInsensitive << token__excluding(exclude or [])) * skip('=') * (token | quoted_string) ) > named(u'parameter', RFC(7231), is_pivot=True) type_ = token > pivot subtype = token > pivot media_type = Parametrized << ( (MediaType << type_ + '/' + subtype) * (MultiDict << many(skip(OWS * ';' * OWS) * parameter()))) > pivot content_coding = ContentCoding << token > pivot product_version = token > pivot product = Versioned << ((ProductName << token) * maybe(skip('/') * product_version)) > pivot User_Agent = product % many(skip(RWS) * (product | comment(include_parens=False))) > pivot Server = product % many(skip(RWS) * (product | comment(include_parens=False))) > pivot day_name = (subst(0) << octet(0x4D) * octet(0x6F) * octet(0x6E) | subst(1) << octet(0x54) * octet(0x75) * octet(0x65) | subst(2) << octet(0x57) * octet(0x65) * octet(0x64) | subst(3) << octet(0x54) * octet(0x68) * octet(0x75) |
Transfer_Encoding = comma_list1(transfer_coding()) > pivot rank = (float << '0' + maybe_str('.' + string_times(0, 3, DIGIT)) | float << '1' + maybe_str('.' + string_times(0, 3, '0'))) > pivot t_ranking = skip(OWS * ';' * OWS * 'q=') * rank > pivot t_codings = (CaseInsensitive << literal('trailers') | Parametrized << (transfer_coding(no_trailers=True, no_q=True) * maybe(t_ranking))) > pivot TE = comma_list(t_codings) > pivot Trailer = comma_list1(field_name) > pivot chunk_size = (lambda s: int(s, 16)) << string1(HEXDIG) > pivot chunk_ext_name = token > auto chunk_ext_val = token | quoted_string > auto chunk_ext = many(skip(';') * chunk_ext_name * maybe(skip('=') * chunk_ext_val)) > pivot Host = uri_host + maybe_str(':' + port) > pivot connection_option = ConnectionOption << token > pivot Connection = comma_list1(connection_option) > pivot protocol_name = token > pivot protocol_version = token > pivot protocol = Versioned << ((UpgradeToken << protocol_name) * maybe(skip('/') * protocol_version)) > pivot Upgrade = comma_list1(protocol) > pivot received_protocol = Versioned << (maybe(protocol_name * skip('/'), u'HTTP') * protocol_version) > pivot pseudonym = token > pivot
def _normalize_empty_value(x): # RFC 7240 Section 2: "Empty or zero-length values on both # the preference token and within parameters are equivalent # to no value being specified at all." (name, value) = x if isinstance(x, tuple) else (x, None) return Parametrized(name, None if value == u'' else value) def preference_parameter(head=False): # The head (first) ``preference-parameter`` of a ``preference`` # contains the actual preference name, which we want to annotate. name_cls = Preference if head else CaseInsensitive return (_normalize_empty_value << (parameter(name_cls=name_cls) | name_cls << token)) > named( u'preference-parameter', RFC(7240, errata=4439), is_pivot=True) preference = Parametrized << (preference_parameter(head=True) * many( skip(OWS * ';') * maybe(skip(OWS) * preference_parameter()))) > named( u'preference', RFC(7240, errata=4439), is_pivot=True) Prefer = comma_list1(preference) > pivot Preference_Applied = comma_list1(preference_parameter(head=True)) > pivot return_ = CaseInsensitive << (literal('representation') | 'minimal') > pivot wait = delay_seconds > auto handling = CaseInsensitive << (literal('strict') | 'lenient') > pivot fill_names(globals(), RFC(7240))
def comma_list(element): return _collect_elements << maybe( (subst([None, None]) << literal(',') | (lambda x: [x]) << group(element)) + many(skip(OWS * ',') * maybe(skip(OWS) * element)) ) > named(u'#rule', RFC(7230, section=(7,)))
def comma_list1(element): return _collect_elements << (many(subst(None) << ',' * OWS) + ( (lambda x: [x]) << group(element)) + many( skip(OWS * ',') * maybe(skip(OWS) * element))) > named( u'1#rule', RFC(7230, section=u'7'))
def transfer_extension(exclude=None, no_q=False): return Parametrized << ( (TransferCoding << token__excluding(exclude or [])) * (MultiDict << many(skip(OWS * ';' * OWS) * transfer_parameter(no_q))) ) > named(u'transfer-extension', RFC(7230), is_pivot=True)
| float << '1' + maybe_str('.' + string_times(0, 3, '0'))) > pivot t_ranking = skip(OWS * ';' * OWS * 'q=') * rank > pivot t_codings = ( CaseInsensitive << literal('trailers') | Parametrized << (transfer_coding(no_trailers=True, no_q=True) * maybe(t_ranking))) > pivot TE = comma_list(t_codings) > pivot Trailer = comma_list1(field_name) > pivot chunk_size = (lambda s: int(s, 16)) << string1(HEXDIG) > pivot chunk_ext_name = token > auto chunk_ext_val = token | quoted_string > auto # As updated by RFC 7230 errata ID: 4667. chunk_ext = many( skip(BWS * ';' * BWS) * chunk_ext_name * maybe(skip(BWS * '=' * BWS) * chunk_ext_val)) > pivot Host = uri_host + maybe_str(':' + port) > pivot connection_option = ConnectionOption << token > pivot Connection = comma_list1(connection_option) > pivot protocol_name = token > pivot protocol_version = token > pivot protocol = Versioned << ((UpgradeToken << protocol_name) * maybe(skip('/') * protocol_version)) > pivot Upgrade = comma_list1(protocol) > pivot received_protocol = Versioned << (maybe(protocol_name * skip('/'), u'HTTP') * protocol_version) > pivot
def comma_list(element): # RFC Errata ID: 5257 return _collect_elements << (maybe(group(element) * skip(OWS)) % many( skip(literal(',') * OWS) * maybe(group(element) * skip(OWS)))) > named( u'#rule', RFC(7230, section=u'7'))
from httpolice.syntax.common import ALPHA, DIGIT from httpolice.syntax.rfc3986 import IPv4address, IPv6address from httpolice.syntax.rfc7230 import comma_list1, quoted_string, token def _remove_empty(xs): return [x for x in xs if x is not None] obfnode = '_' + string1(ALPHA | DIGIT | '.' | '_' | '-') > pivot nodename = (IPv4address | skip('[') * IPv6address * skip(']') | 'unknown' | obfnode) > pivot port = int << string_times(1, 5, DIGIT) > pivot obfport = '_' + string1(ALPHA | DIGIT | '.' | '_' | '-') > pivot node_port = port | obfport > pivot node = nodename * maybe(skip(':') * node_port) > pivot value = token | quoted_string > pivot forwarded_pair = (ForwardedParam << token) * skip('=') * value > pivot forwarded_element = _remove_empty << ( maybe(forwarded_pair) % many(skip(';') * maybe(forwarded_pair))) > pivot Forwarded = comma_list1(forwarded_element) > pivot fill_names(globals(), RFC(7239))