Exemple #1
0
def test_parser_edge_cases():
    # Our parser implementation is general enough that
    # some of its branches are not being exercised by our regular tests,
    # so I had to come up with these contrived examples to test them.

    p = many(rfc7230.tchar)                            > named(u'p')
    p1 = '1' * p                                       > named(u'p1')
    p2 = '11' * p * skip('\n')                         > named(u'p2')
    assert parse(p1 | p2, b'11abc') == (u'1', [u'1', u'a', u'b', u'c'])
    assert parse(p1 | p2, b'11abc\n') == (u'11', [u'a', u'b', u'c'])

    p = recursive()                                    > named(u'p')
    p.rec = (rfc7230.tchar * p | subst(None) << empty)
    assert parse(p, b'abc') == (u'a', (u'b', (u'c', None)))

    p = literal('ab')                                  > named(u'p')
    p0 = subst(u'') << empty | p                       > named(u'p0')
    p1 = 'xab' * p0                                    > named(u'p1')
    p2 = 'x' * string(p0) * '!'                        > named(u'p2')
    assert parse(p1 | p2, b'xabab') == (u'xab', u'ab')
    assert parse(p1 | p2, b'xabab!') == (u'x', u'abab', u'!')

    p = empty | literal('a')                           > named(u'p')
    p0 = p * 'x'                                       > named(u'x')
    assert parse(p0, b'x') == u'x'
Exemple #2
0
def test_parser_edge_cases():
    # Our parser implementation is general enough that
    # some of its branches are not being exercised by our regular tests,
    # so I had to come up with these contrived examples to test them.

    p = many(rfc7230.tchar)                            > named(u'p')
    p1 = '1' * p                                       > named(u'p1')
    p2 = '11' * p * skip('\n')                         > named(u'p2')
    assert parse(p1 | p2, b'11abc') == (u'1', [u'1', u'a', u'b', u'c'])
    assert parse(p1 | p2, b'11abc\n') == (u'11', [u'a', u'b', u'c'])

    p = recursive()                                    > named(u'p')
    p.rec = (rfc7230.tchar * p | subst(None) << empty)
    assert parse(p, b'abc') == (u'a', (u'b', (u'c', None)))

    p = literal('ab')                                  > named(u'p')
    p0 = subst(u'') << empty | p                       > named(u'p0')
    p1 = 'xab' * p0                                    > named(u'p1')
    p2 = 'x' * string(p0) * '!'                        > named(u'p2')
    assert parse(p1 | p2, b'xabab') == (u'xab', u'ab')
    assert parse(p1 | p2, b'xabab!') == (u'x', u'abab', u'!')

    p = empty | literal('a')                           > named(u'p')
    p0 = p * 'x'                                       > named(u'x')
    assert parse(p0, b'x') == u'x'
Exemple #3
0
def link_extension(exclude_builtin):
    if exclude_builtin:
        exclude1 = [name for name in _builtin_params if not name.endswith('*')]
        exclude2 = [name.rstrip('*')
                    for name in _builtin_params if name.endswith('*')]
    else:
        exclude1 = exclude2 = None
    return (
        (
            (CaseInsensitive << parmname__excluding(exclude1)) *
            maybe(skip(OWS * '=' * OWS) * (ptoken | quoted_string))
        ) |
        (
            (CaseInsensitive << ext_name_star__excluding(exclude2)) *
            skip(OWS * '=' * OWS) * ext_value
        )
    ) > named(u'link-extension', RFC(5988), is_pivot=True)
Exemple #4
0
def link_extension(exclude_builtin):
    if exclude_builtin:
        exclude1 = [name for name in _builtin_params if not name.endswith('*')]
        exclude2 = [name.rstrip('*')
                    for name in _builtin_params if name.endswith('*')]
    else:       # pragma: no cover
        exclude1 = exclude2 = None
    return (
        (
            (CaseInsensitive << parmname__excluding(exclude1)) *
            maybe(skip(OWS * '=' * OWS) * (ptoken | quoted_string))
        ) |
        (
            (CaseInsensitive << ext_name_star__excluding(exclude2)) *
            skip(OWS * '=' * OWS) * ext_value
        )
    ) > named(u'link-extension', RFC(5988), is_pivot=True)
Exemple #5
0
def _parse_chunk(stream):
    size = stream.parse(rfc7230.chunk_size * skip(maybe(rfc7230.chunk_ext)))
    _parse_line_ending(stream)
    if size == 0:
        return b''
    else:
        data = stream.consume_n_bytes(size)
        _parse_line_ending(stream)
        return data
Exemple #6
0
def _parse_chunk(stream):
    size = stream.parse(rfc7230.chunk_size * skip(maybe(rfc7230.chunk_ext)))
    _parse_line_ending(stream)
    if size == 0:
        return b''
    else:
        data = stream.consume_n_bytes(size)
        _parse_line_ending(stream)
        return data
Exemple #7
0
def quoted_pair(sensible_for):
    # In RFC 7230, ``<quoted-pair>`` is a single rule,
    # but we parametrize it to report no. 1017 depending on the context.
    @can_complain
    def check_sensible(complain, c):
        if c not in sensible_for:
            complain(1017, char=c)
        return c
    return (check_sensible << skip('\\') * (HTAB | SP | VCHAR | obs_text)
            > named(u'quoted-pair', RFC(7230)))
Exemple #8
0
def quoted_pair(sensible_for):
    # In RFC 7230, ``<quoted-pair>`` is a single rule,
    # but we parametrize it to report no. 1017 depending on the context.
    @can_complain
    def check_sensible(complain, c):
        if c not in sensible_for:
            complain(1017, char=c)
        return c

    return (check_sensible << skip('\\') *
            (HTAB | SP | VCHAR | obs_text) > named(u'quoted-pair', RFC(7230)))
Exemple #9
0
def media_range(no_q=False):
    return Parametrized << (
        (
            literal('*/*') |
            type_ + '/' + '*' |
            MediaType << type_ + '/' + subtype
        ) *
        (
            MultiDict << many(
                skip(OWS * ';' * OWS) *
                parameter(exclude=['q'] if no_q else [])
            )
        )
    ) > named(u'media-range', RFC(7231), is_pivot=True)
Exemple #10
0
parmname = string(attr_char) > pivot

# We don't need to special-case "UTF-8", simplify.
mime_charsetc = (ALPHA | DIGIT | '!' | '#' | '$' | '%' | '&' | '+' | '-' | '^'
                 | '_' | '`' | '{' | '}' | '~') > auto
mime_charset = string1(mime_charsetc) > auto
charset = CaseInsensitive << mime_charset > pivot

pct_encoded = '%' + HEXDIG + HEXDIG > auto
value_chars = pct_decode << (
    force_bytes << string(pct_encoded | attr_char)) > auto


@can_complain
def _check_ext_value(complain, val):
    if val.charset == u'UTF-8':
        try:
            val.value_bytes.decode(val.charset)
        except UnicodeError as e:
            complain(1254, charset=val.charset, error=e)
    else:
        complain(1253, charset=val.charset)
    return val


ext_value = _check_ext_value << (
    ExtValue <<
    (charset * skip("'") * maybe(language) * skip("'") * value_chars)) > pivot

fill_names(globals(), RFC(8187))
Exemple #11
0
# -*- coding: utf-8; -*-

from httpolice.parse import (fill_names, literal, maybe, pivot, skip, string1,
                             subst)
from httpolice.syntax.common import DIGIT
from httpolice.syntax.rfc7230 import RWS, comma_list

notice_id = int << string1(DIGIT) > pivot
resp = subst(True) << literal('resp') > pivot
HTTPolice_Silence = comma_list(notice_id * maybe(skip(RWS) * resp)) > pivot

fill_names(globals(), citation=None)
Exemple #12
0
from httpolice.citation import RFC
from httpolice.parse import auto, fill_names, literal, many, pivot, skip
from httpolice.structure import CaseInsensitive, MultiDict, Parametrized
from httpolice.syntax.rfc2616 import value
from httpolice.syntax.rfc5987 import ext_value
from httpolice.syntax.rfc7230 import OWS, token, token__excluding

# This has been slightly adapted to the rules of RFC 7230.
# The ``OWS`` are derived from the "implied ``*LWS``" requirement.

# We have no need to special-case "inline" and "attachment", simplify.
disposition_type = CaseInsensitive << token > pivot

filename_parm = (
    (CaseInsensitive << literal('filename')) * skip(OWS * '=' * OWS) * value |
    (CaseInsensitive << literal('filename*')) * skip(OWS * '=' * OWS) *
    ext_value) > pivot

# ``token`` is a superset of ``ext-token``,
# and special-casing ``ext-token`` requires
# something more complex than our `string_excluding`.
# Until then, we can simplify a bit.
disp_ext_parm = (
    (CaseInsensitive << token__excluding(['filename', 'filename*'])) *
    skip(OWS * '=' * OWS) * value) > pivot

disposition_parm = filename_parm | disp_ext_parm > auto

content_disposition = Parametrized << (
    disposition_type *
Exemple #13
0
def media_range(no_q=False):
    return Parametrized << (
        (literal('*/*') | type_ + '/' + '*' | _check_media_type <<
         (MediaType << type_ + '/' + subtype)) * (MultiDict << many(
             skip(OWS * ';' * OWS) * parameter(exclude=['q'] if no_q else [])))
    ) > named(u'media-range', RFC(7231), is_pivot=True)
Exemple #14
0
from httpolice.citation import RFC
from httpolice.parse import (auto, fill_names, many, octet, octet_range, pivot,
                             skip, string1)
from httpolice.syntax.common import SP
from httpolice.syntax.rfc3986 import URI_reference

NQSCHAR = (octet_range(0x20, 0x21) | octet_range(0x23, 0x5B)
           | octet_range(0x5D, 0x7E)) > auto
NQCHAR = (octet(0x21) | octet_range(0x23, 0x5B)
          | octet_range(0x5D, 0x7E)) > auto

scope_token = string1(NQCHAR) > pivot
scope = scope_token % many(skip(SP) * scope_token) > pivot

error = string1(NQSCHAR) > pivot
error_description = string1(NQSCHAR) > pivot
error_uri = URI_reference > pivot

fill_names(globals(), RFC(6749))
Exemple #15
0
protocol_id = _check_protocol_id << token > pivot


@can_complain
def _check_alt_authority(complain, value):
    return parse(value,
                 maybe_str(uri_host) + ':' + port,
                 complain,
                 1257,
                 authority=value)


alt_authority = _check_alt_authority << quoted_string > pivot

alternative = protocol_id * skip('=') * alt_authority > pivot
parameter = ((AltSvcParam << token) * skip('=') *
             (token | quoted_string)) > pivot
alt_value = Parametrized << (
    alternative *
    (MultiDict << many(skip(OWS * ';' * OWS) * parameter))) > pivot

Alt_Svc = clear | comma_list1(alt_value) > pivot

ma = delta_seconds > pivot
persist = subst(True) << literal('1') > pivot

Alt_Used = uri_host + maybe_str(':' + port) > pivot

fill_names(globals(), RFC(7838))
Exemple #16
0
def extension_pragma(exclude_no_cache=False):
    return Parametrized << (
        (token__excluding(['no-cache']) if exclude_no_cache else token) *
        maybe(skip('=') * (token | quoted_string))) > named(
            u'extension-pragma', RFC(7234), is_pivot=True)
Exemple #17
0
def comma_list1(element):
    return _collect_elements << (
        many(subst(None) << ',' * OWS) +
        ((lambda x: [x]) << group(element)) +
        many(skip(OWS * ',') * maybe(skip(OWS) * element))
    ) > named(u'1#rule', RFC(7230, section=(7,)))
Exemple #18
0
def comma_list(element):
    # RFC Errata ID: 5257
    return _collect_elements << (
        maybe(group(element) * skip(OWS)) %
        many(skip(literal(',') * OWS) * maybe(group(element) * skip(OWS)))
    ) > named(u'#rule', RFC(7230, section=u'7'))
Exemple #19
0
from httpolice.parse import (fill_names, literal, mark, maybe, maybe_str,
                             named, pivot, skip, string1, string_times)
from httpolice.structure import (CacheDirective, CaseInsensitive, Parametrized,
                                 WarnCode, WarningValue)
from httpolice.syntax.common import DIGIT, DQUOTE, SP
from httpolice.syntax.rfc7230 import (comma_list, comma_list1, field_name,
                                      port, pseudonym, quoted_string, token,
                                      token__excluding, uri_host)
from httpolice.syntax.rfc7231 import HTTP_date

delta_seconds = int << string1(DIGIT) > pivot
Age = delta_seconds > pivot

cache_directive = Parametrized << (
    (CacheDirective << token) *
    maybe(skip('=') * (mark(token) | mark(quoted_string)))) > pivot
Cache_Control = comma_list1(cache_directive) > pivot

# RFC 7234 does not, strictly speaking, define these productions:
no_cache = comma_list(field_name) > pivot
private = comma_list(field_name) > pivot

Expires = HTTP_date > pivot


def extension_pragma(exclude_no_cache=False):
    return Parametrized << (
        (token__excluding(['no-cache']) if exclude_no_cache else token) *
        maybe(skip('=') * (token | quoted_string))) > named(
            u'extension-pragma', RFC(7234), is_pivot=True)
Exemple #20
0
def comma_list(element):
    return _collect_elements << maybe(
        (subst([None, None]) << literal(',') |
         (lambda x: [x]) << group(element)) +
        many(skip(OWS * ',') * maybe(skip(OWS) * element))) > named(
            u'#rule', RFC(7230, section=(7, )))
Exemple #21
0
    # RFC 7240 Section 2: "Empty or zero-length values on both
    # the preference token and within parameters are equivalent
    # to no value being specified at all."
    (name, value) = x if isinstance(x, tuple) else (x, None)
    return Parametrized(name, None if value == u'' else value)

def preference_parameter(head=False):
    # The head (first) ``preference-parameter`` of a ``preference``
    # contains the actual preference name, which we want to annotate.
    name_cls = Preference if head else CaseInsensitive
    return (
        _normalize_empty_value << (parameter(name_cls=name_cls) |
                                   name_cls << token)
    ) > named(u'preference-parameter', RFC(7240, errata=4439), is_pivot=True)

preference = Parametrized << (
    preference_parameter(head=True) *
    many(skip(OWS * ';') * maybe(skip(OWS) * preference_parameter()))
) > named(u'preference', RFC(7240, errata=4439), is_pivot=True)

Prefer = comma_list1(preference)                                        > pivot

Preference_Applied = comma_list1(preference_parameter(head=True))       > pivot


return_ = CaseInsensitive << (literal('representation') | 'minimal')    > pivot
wait = delay_seconds                                                    > auto
handling = CaseInsensitive << (literal('strict') | 'lenient')           > pivot

fill_names(globals(), RFC(7240))
Exemple #22
0
other_range_unit = RangeUnit << token__excluding(['bytes'])             > auto
range_unit = bytes_unit | other_range_unit                              > pivot
acceptable_ranges = (
    subst([]) << literal('none') |
    comma_list1(range_unit))                                            > pivot
Accept_Ranges = acceptable_ranges                                       > pivot

@can_complain
def _well_formed1(complain, first, last):
    if (last is not None) and (first > last):
        complain(1133)
    return (first, last)

first_byte_pos = int << string1(DIGIT)                                  > auto
last_byte_pos = int << string1(DIGIT)                                   > auto
byte_range_spec = _well_formed1 << (first_byte_pos * skip('-') *
                                    maybe(last_byte_pos))               > pivot

suffix_length = int << string1(DIGIT)                                   > auto
suffix_byte_range_spec = \
    (lambda x: (None, x)) << skip('-') * suffix_length                  > pivot

byte_range_set = comma_list1(byte_range_spec | suffix_byte_range_spec)  > auto
byte_ranges_specifier = RangeSpecifier << (
    bytes_unit * skip('=') * byte_range_set)                            > pivot

other_range_set = string1(VCHAR)                                        > auto
other_ranges_specifier = RangeSpecifier << (
    other_range_unit * skip('=') * other_range_set)                     > pivot

Range = byte_ranges_specifier | other_ranges_specifier                  > pivot
Exemple #23
0
def transfer_parameter(no_q=False):
    return (
        (token__excluding(['q']) if no_q else token) *
        skip(BWS * '=' * BWS) * (token | quoted_string)
    ) > named(u'transfer-parameter', RFC(7230), is_pivot=True)
Exemple #24
0
from httpolice.citation import RFC
from httpolice.parse import (
    auto,
    fill_names,
    many,
    octet,
    octet_range,
    pivot,
    skip,
    string1,
)
from httpolice.syntax.common import SP
from httpolice.syntax.rfc3986 import URI_reference


NQSCHAR = (octet_range(0x20, 0x21) | octet_range(0x23, 0x5B) |
           octet_range(0x5D, 0x7E))                                     > auto
NQCHAR = (octet(0x21) | octet_range(0x23, 0x5B) |
          octet_range(0x5D, 0x7E))                                      > auto

scope_token = string1(NQCHAR)                                           > pivot
scope = scope_token % many(skip(SP) * scope_token)                      > pivot

error = string1(NQSCHAR)                                                > pivot
error_description = string1(NQSCHAR)                                    > pivot
error_uri = URI_reference                                               > pivot


fill_names(globals(), RFC(6749))
Exemple #25
0
        if symbol is not None:
            value = parse(value,
                          symbol,
                          complain,
                          1158,
                          name=name,
                          value=value)
        r.append((name, value))
        if name == u'rev':
            complain(1226)
    if u'rel' not in seen:
        complain(1309)
    return MultiDict(r)


link_param = ((CaseInsensitive << token) * skip(BWS) * maybe(
    skip(literal('=') * BWS) * (mark(token) | mark(quoted_string)))) > pivot

link_value = Parametrized << (
    skip('<') * URI_Reference * skip('>') *
    (_process_params << many(skip(OWS * ';' * OWS) * link_param))) > pivot

Link = comma_list(link_value) > pivot

anchor = URI_Reference > auto

reg_rel_type = CaseInsensitive << (LOALPHA +
                                   string(LOALPHA | DIGIT | '.' | '-')) > auto
ext_rel_type = URI > auto
relation_type = reg_rel_type | ext_rel_type > pivot
rel = rev = relation_type % many(skip(string1(SP)) * relation_type) > auto
Exemple #26
0
_MediaDesc_no_delim = string((VCHAR | HTAB | SP) -
                             literal('"') - literal(';') - literal(','))


# This has been slightly adapted to the rules of RFC 7230.
# The ``OWS`` are derived from the "implied ``*LWS``" requirement.

ptokenchar = (literal('!') | '#' | '$' | '%' | '&' | "'" | '(' |
              ')' | '*' | '+' | '-' | '.' | '/' | DIGIT |
              ':' | '<' | '=' | '>' | '?' | '@' | ALPHA |
              '[' | ']' | '^' | '_' | '`' | '{' | '|' |
              '}' | '~')                                                > auto
ptoken = string1(ptokenchar)                                            > auto

media_type = MediaType << type_name + '/' + subtype_name                > pivot
quoted_mt = skip('"') * media_type * skip('"')                          > pivot

reg_rel_type = RelationType << (
    LOALPHA + string(LOALPHA | DIGIT | '.' | '-'))                      > auto
ext_rel_type = URI                                                      > auto
relation_type = reg_rel_type | ext_rel_type                             > pivot
relation_types = (
    (lambda x: [x]) << relation_type |
    skip('"' * OWS) *
    (relation_type % many(skip(string1(SP)) * relation_type)) *
    skip(OWS * '"'))                                                    > pivot

def ext_name_star__excluding(exclude):
    return (parmname__excluding(exclude) + '*'
            > named(u'ext-name-star', RFC(5988)))
Exemple #27
0
# which brings in Unicode problems.
_DAY_NAMES = [u'Monday', u'Tuesday', u'Wednesday', u'Thursday', u'Friday',
              u'Saturady', u'Sunday']


def parameter(exclude=None):
    return (
        (CaseInsensitive << token__excluding(exclude or [])) *
        skip('=') * (token | quoted_string)
    ) > named(u'parameter', RFC(7231), is_pivot=True)

type_ = token                                                           > pivot
subtype = token                                                         > pivot
media_type = Parametrized << (
    (MediaType << type_ + '/' + subtype) *
    (MultiDict << many(skip(OWS * ';' * OWS) * parameter())))           > pivot

content_coding = ContentCoding << token                                 > pivot

product_version = token                                                 > pivot
product = Versioned << ((ProductName << token) *
                        maybe(skip('/') * product_version))             > pivot
User_Agent = product % many(skip(RWS) *
                            (product | comment(include_parens=False)))  > pivot
Server = product % many(skip(RWS) *
                        (product | comment(include_parens=False)))      > pivot

day_name = (subst(0) << octet(0x4D) * octet(0x6F) * octet(0x6E) |
            subst(1) << octet(0x54) * octet(0x75) * octet(0x65) |
            subst(2) << octet(0x57) * octet(0x65) * octet(0x64) |
            subst(3) << octet(0x54) * octet(0x68) * octet(0x75) |
Exemple #28
0
# -*- coding: utf-8; -*-

from httpolice.parse import (
    fill_names,
    literal,
    maybe,
    pivot,
    skip,
    string1,
    subst,
)
from httpolice.syntax.common import DIGIT
from httpolice.syntax.rfc7230 import RWS, comma_list


notice_id = int << string1(DIGIT)                                       > pivot
resp = subst(True) << literal('resp')                                   > pivot
HTTPolice_Silence = comma_list(notice_id * maybe(skip(RWS) * resp))     > pivot


fill_names(globals(), citation=None)
Exemple #29
0
        else:
            correct_encoded_id += pct_encode(c, safe='').upper()
    if encoded_id != correct_encoded_id:
        complain(1256, actual=encoded_id, correct=correct_encoded_id)
    return decoded_id

protocol_id = _check_protocol_id << token                               > pivot

@can_complain
def _check_alt_authority(complain, value):
    return parse(value, maybe_str(uri_host) + ':' + port, complain, 1257,
                 authority=value)

alt_authority = _check_alt_authority << quoted_string                   > pivot

alternative = protocol_id * skip('=') * alt_authority                   > pivot
parameter = ((AltSvcParam << token) *
             skip('=') * (token | quoted_string))                       > pivot
alt_value = Parametrized << (
    alternative *
    (MultiDict << many(skip(OWS * ';' * OWS) * parameter)))             > pivot

Alt_Svc = clear | comma_list1(alt_value)                                > pivot

ma = delta_seconds                                                      > pivot
persist = subst(True) << literal('1')                                   > pivot

Alt_Used = uri_host + maybe_str(':' + port)                             > pivot


fill_names(globals(), RFC(7838))
Exemple #30
0
from httpolice.structure import CaseInsensitive, MultiDict, Parametrized
from httpolice.syntax.rfc2616 import value
from httpolice.syntax.rfc5987 import ext_value
from httpolice.syntax.rfc7230 import OWS, token, token__excluding


# This has been slightly adapted to the rules of RFC 7230.
# The ``OWS`` are derived from the "implied ``*LWS``" requirement.


# We have no need to special-case "inline" and "attachment", simplify.
disposition_type = CaseInsensitive << token                             > pivot

filename_parm = (
    (CaseInsensitive << literal('filename')) *
    skip(OWS * '=' * OWS) * value |
    (CaseInsensitive << literal('filename*')) *
    skip(OWS * '=' * OWS) * ext_value)                                  > pivot

# ``token`` is a superset of ``ext-token``,
# and special-casing ``ext-token`` requires
# something more complex than our `string_excluding`.
# Until then, we can simplify a bit.
disp_ext_parm = (
    (CaseInsensitive << token__excluding(['filename', 'filename*'])) *
    skip(OWS * '=' * OWS) * value)                                      > pivot

disposition_parm = filename_parm | disp_ext_parm                        > auto

content_disposition = Parametrized << (
    disposition_type *
Exemple #31
0
# -*- coding: utf-8; -*-

from httpolice.citation import RFC
from httpolice.parse import auto, fill_names, many, maybe, pivot, skip, string1
from httpolice.structure import HSTSDirective, Parametrized
from httpolice.syntax.common import DIGIT
from httpolice.syntax.rfc7230 import OWS, quoted_string, token


# This has been slightly adapted to the rules of RFC 7230.
# The ``OWS`` are derived from the "implied ``*LWS``" requirement.

directive_name = HSTSDirective << token                                 > auto
directive_value = token | quoted_string                                 > auto
directive = Parametrized << (
    directive_name * maybe(skip(OWS * '=' * OWS) * directive_value))    > pivot

def _collect_elements(xs):
    return [elem for elem in xs if elem is not None]

Strict_Transport_Security = _collect_elements << (
    maybe(directive) % many(skip(OWS * ';' * OWS) * maybe(directive)))  > pivot

max_age_value = int << string1(DIGIT)                                   > pivot

fill_names(globals(), RFC(6797))
Exemple #32
0
from httpolice.citation import RFC
from httpolice.parse import (fill_names, many, maybe, pivot, skip, string1,
                             string_times)
from httpolice.structure import ForwardedParam
from httpolice.syntax.common import ALPHA, DIGIT
from httpolice.syntax.rfc3986 import IPv4address, IPv6address
from httpolice.syntax.rfc7230 import comma_list1, quoted_string, token


def _remove_empty(xs):
    return [x for x in xs if x is not None]


obfnode = '_' + string1(ALPHA | DIGIT | '.' | '_' | '-') > pivot
nodename = (IPv4address | skip('[') * IPv6address * skip(']') | 'unknown'
            | obfnode) > pivot

port = int << string_times(1, 5, DIGIT) > pivot
obfport = '_' + string1(ALPHA | DIGIT | '.' | '_' | '-') > pivot
node_port = port | obfport > pivot

node = nodename * maybe(skip(':') * node_port) > pivot

value = token | quoted_string > pivot
forwarded_pair = (ForwardedParam << token) * skip('=') * value > pivot

forwarded_element = _remove_empty << (
    maybe(forwarded_pair) % many(skip(';') * maybe(forwarded_pair))) > pivot

Forwarded = comma_list1(forwarded_element) > pivot
Exemple #33
0
    if mtype in _BAD_MEDIA_TYPES:
        complain(1282, bad=mtype, good=_BAD_MEDIA_TYPES[mtype])
    return mtype


def parameter(exclude=None):
    return ((CaseInsensitive << token__excluding(exclude or [])) * skip('=') *
            (token | quoted_string)) > named(
                u'parameter', RFC(7231), is_pivot=True)


type_ = token > pivot
subtype = token > pivot
media_type = Parametrized << (
    (_check_media_type << (MediaType << type_ + '/' + subtype)) *
    (MultiDict << many(skip(OWS * ';' * OWS) * parameter()))) > pivot

content_coding = ContentCoding << token > pivot

product_version = token > pivot
product = Versioned << (
    (ProductName << token) * maybe(skip('/') * product_version)) > pivot
User_Agent = product % many(
    skip(RWS) * (product | comment(include_parens=False))) > pivot
Server = product % many(skip(RWS) *
                        (product | comment(include_parens=False))) > pivot

day_name = (subst(0) << octet(0x4D) * octet(0x6F) * octet(0x6E)
            | subst(1) << octet(0x54) * octet(0x75) * octet(0x65)
            | subst(2) << octet(0x57) * octet(0x65) * octet(0x64)
            | subst(3) << octet(0x54) * octet(0x68) * octet(0x75)
Exemple #34
0
def _normalize_empty_value(x):
    # RFC 7240 Section 2: "Empty or zero-length values on both
    # the preference token and within parameters are equivalent
    # to no value being specified at all."
    (name, value) = x if isinstance(x, tuple) else (x, None)
    return Parametrized(name, None if value == u'' else value)


def preference_parameter(head=False):
    # The head (first) ``preference-parameter`` of a ``preference``
    # contains the actual preference name, which we want to annotate.
    name_cls = Preference if head else CaseInsensitive
    return (_normalize_empty_value <<
            (parameter(name_cls=name_cls) | name_cls << token)) > named(
                u'preference-parameter', RFC(7240, errata=4439), is_pivot=True)


preference = Parametrized << (preference_parameter(head=True) * many(
    skip(OWS * ';') * maybe(skip(OWS) * preference_parameter()))) > named(
        u'preference', RFC(7240, errata=4439), is_pivot=True)

Prefer = comma_list1(preference) > pivot

Preference_Applied = comma_list1(preference_parameter(head=True)) > pivot

return_ = CaseInsensitive << (literal('representation') | 'minimal') > pivot
wait = delay_seconds > auto
handling = CaseInsensitive << (literal('strict') | 'lenient') > pivot

fill_names(globals(), RFC(7240))
Exemple #35
0
def parameter(exclude=None):
    return ((CaseInsensitive << token__excluding(exclude or [])) * skip('=') *
            (token | quoted_string)) > named(
                u'parameter', RFC(7231), is_pivot=True)
Exemple #36
0
def quoted_pair(sensible_for):
    # In RFC 7230, ``<quoted-pair>`` is a single rule,
    # but we parametrize it to report no. 1017 depending on the context.
    @can_complain
    def check_sensible(complain, c):
        if c not in sensible_for:
            complain(1017, char=c)
        return c

    return (check_sensible << skip('\\') *
            (HTAB | SP | VCHAR | obs_text) > named(u'quoted-pair', RFC(7230)))


qdtext = (HTAB | SP | octet(0x21) | octet_range(0x23, 0x5B)
          | octet_range(0x5D, 0x7E) | obs_text) > auto
quoted_string = (skip(DQUOTE) *
                 string(qdtext | quoted_pair(sensible_for=u'"\\')) *
                 skip(DQUOTE)) > auto

ctext = (HTAB | SP | octet_range(0x21, 0x27) | octet_range(0x2A, 0x5B)
         | octet_range(0x5D, 0x7E) | obs_text) > auto


def comment(include_parens=False):
    inner = recursive() > named(u'comment', RFC(7230))
    inner.rec = '(' + string(ctext | quoted_pair(sensible_for=u'()\\')
                             | inner) + ')'
    if not include_parens:
        inner = (lambda s: s[1:-1]) << inner
    return inner > named(u'comment', RFC(7230))
Exemple #37
0
range_unit = bytes_unit | other_range_unit > pivot
acceptable_ranges = (CaseInsensitive << literal('none')
                     | comma_list1(range_unit)) > pivot
Accept_Ranges = acceptable_ranges > pivot


@can_complain
def _well_formed1(complain, first, last):
    if (last is not None) and (first > last):
        complain(1133)
    return (first, last)


first_byte_pos = int << string1(DIGIT) > auto
last_byte_pos = int << string1(DIGIT) > auto
byte_range_spec = _well_formed1 << (first_byte_pos * skip('-') *
                                    maybe(last_byte_pos)) > pivot

suffix_length = int << string1(DIGIT) > auto
suffix_byte_range_spec = \
    (lambda x: (None, x)) << skip('-') * suffix_length                  > pivot

byte_range_set = comma_list1(byte_range_spec | suffix_byte_range_spec) > auto
byte_ranges_specifier = RangeSpecifier << (bytes_unit * skip('=') *
                                           byte_range_set) > pivot

other_range_set = string1(VCHAR) > auto
other_ranges_specifier = RangeSpecifier << (other_range_unit * skip('=') *
                                            other_range_set) > pivot

Range = byte_ranges_specifier | other_ranges_specifier > pivot
Exemple #38
0
def parameter(exclude=None):
    return (
        (CaseInsensitive << token__excluding(exclude or [])) *
        skip('=') * (token | quoted_string)
    ) > named(u'parameter', RFC(7231), is_pivot=True)
Exemple #39
0
    return string_excluding(tchar, [''] + list(excluding))

def quoted_pair(sensible_for):
    # In RFC 7230, ``<quoted-pair>`` is a single rule,
    # but we parametrize it to report no. 1017 depending on the context.
    @can_complain
    def check_sensible(complain, c):
        if c not in sensible_for:
            complain(1017, char=c)
        return c
    return (check_sensible << skip('\\') * (HTAB | SP | VCHAR | obs_text)
            > named(u'quoted-pair', RFC(7230)))

qdtext = (HTAB | SP | octet(0x21) | octet_range(0x23, 0x5B) |
          octet_range(0x5D, 0x7E) | obs_text)                           > auto
quoted_string = (skip(DQUOTE) *
                 string(qdtext | quoted_pair(sensible_for=u'"\\')) *
                 skip(DQUOTE))                                          > auto

ctext = (HTAB | SP | octet_range(0x21, 0x27) | octet_range(0x2A, 0x5B) |
         octet_range(0x5D, 0x7E) | obs_text)                            > auto

def comment(include_parens=False):
    inner = recursive() > named(u'comment', RFC(7230))
    inner.rec = '(' + string(ctext | quoted_pair(sensible_for=u'()\\') |
                             inner) + ')'
    if not include_parens:
        inner = (lambda s: s[1:-1]) << inner
    return inner > named(u'comment', RFC(7230))

OWS = string(SP | HTAB)                                                 > auto
Exemple #40
0
    port,
    pseudonym,
    quoted_string,
    token,
    token__excluding,
    uri_host,
)
from httpolice.syntax.rfc7231 import HTTP_date


delta_seconds = int << string1(DIGIT)                                   > pivot
Age = delta_seconds                                                     > pivot

cache_directive = Parametrized << (
    (CacheDirective << token) *
    maybe(skip('=') * (mark(token) | mark(quoted_string))))             > pivot
Cache_Control = comma_list1(cache_directive)                            > pivot

Expires = HTTP_date                                                     > pivot

def extension_pragma(exclude_no_cache=False):
    return Parametrized << (
        (token__excluding(['no-cache']) if exclude_no_cache else token) *
        maybe(skip('=') * (token | quoted_string))
    ) > named(u'extension-pragma', RFC(7234), is_pivot=True)

pragma_directive = (CaseInsensitive << literal('no-cache') |
                    extension_pragma(exclude_no_cache=True))            > pivot
Pragma = comma_list1(pragma_directive)                                  > pivot

warn_code = WarnCode << string_times(3, 3, DIGIT)                       > pivot
Exemple #41
0
def transfer_extension(exclude=None, no_q=False):
    return Parametrized << (
        (TransferCoding << token__excluding(exclude or [])) *
        (MultiDict << many(skip(OWS * ';' * OWS) * transfer_parameter(no_q)))
    ) > named(u'transfer-extension', RFC(7230), is_pivot=True)
Exemple #42
0
                                      quoted_string, token)


auth_scheme = AuthScheme << token                                       > pivot
token68 = (string1(ALPHA | DIGIT | '-' | '.' | '_' | '~' | '+' | '/') +
           string('='))                                                 > pivot

@can_complain
def _check_realm(complain, k, v):
    (symbol, v) = v
    if k == u'realm' and symbol is not quoted_string:
        complain(1196)
    return (k, v)

auth_param = _check_realm << ((CaseInsensitive << token) *
                              skip(BWS * '=' * BWS) *
                              (mark(token) | mark(quoted_string)))      > pivot

challenge = Parametrized << (
    auth_scheme *
    maybe(skip(string1(SP)) * (token68 |
                               MultiDict << comma_list(auth_param)),
          default=MultiDict()))                                         > auto

WWW_Authenticate = comma_list1(challenge)                               > pivot
Proxy_Authenticate = comma_list1(challenge)                             > pivot

credentials = Parametrized << (
    auth_scheme *
    maybe(skip(string1(SP)) * (token68 |
                               MultiDict << comma_list(auth_param)),
Exemple #43
0
def comma_list(element):
    return _collect_elements << maybe(
        (subst([None, None]) << literal(',') |
         (lambda x: [x]) << group(element)) +
        many(skip(OWS * ',') * maybe(skip(OWS) * element))
    ) > named(u'#rule', RFC(7230, section=(7,)))
Exemple #44
0
def comma_list(element):
    # RFC Errata ID: 5257
    return _collect_elements << (maybe(group(element) * skip(OWS)) % many(
        skip(literal(',') * OWS) * maybe(group(element) * skip(OWS)))) > named(
            u'#rule', RFC(7230, section=u'7'))
Exemple #45
0
from httpolice.citation import RFC
from httpolice.parse import (fill_names, many, maybe, pivot, skip, string1,
                             string_times)
from httpolice.structure import ForwardedParam
from httpolice.syntax.common import ALPHA, DIGIT
from httpolice.syntax.rfc3986 import IPv4address, IPv6address
from httpolice.syntax.rfc7230 import comma_list1, quoted_string, token


def _remove_empty(xs):
    return [x for x in xs if x is not None]


obfnode = '_' + string1(ALPHA | DIGIT | '.' | '_' | '-')                > pivot
nodename = (IPv4address |
            skip('[') * IPv6address * skip(']') |
            'unknown' | obfnode)                                        > pivot

port = int << string_times(1, 5, DIGIT)                                 > pivot
obfport = '_' + string1(ALPHA | DIGIT | '.' | '_' | '-')                > pivot
node_port = port | obfport                                              > pivot

node = nodename * maybe(skip(':') * node_port)                          > pivot

value = token | quoted_string                                           > pivot
forwarded_pair = (ForwardedParam << token) * skip('=') * value          > pivot

forwarded_element = _remove_empty << (
    maybe(forwarded_pair) % many(skip(';') * maybe(forwarded_pair)))    > pivot

Forwarded = comma_list1(forwarded_element)                              > pivot
Exemple #46
0
def comma_list1(element):
    return _collect_elements << (many(subst(None) << ',' * OWS) + (
        (lambda x: [x]) << group(element)) + many(
            skip(OWS * ',') * maybe(skip(OWS) * element))) > named(
                u'1#rule', RFC(7230, section=u'7'))
Exemple #47
0
def transfer_parameter(no_q=False):
    return ((token__excluding(['q']) if no_q else token) *
            skip(BWS * '=' * BWS) * (token | quoted_string)) > named(
                u'transfer-parameter', RFC(7230), is_pivot=True)
Exemple #48
0
_MediaDesc_no_delim = string((VCHAR | HTAB | SP) -
                             literal('"') - literal(';') - literal(','))


# This has been slightly adapted to the rules of RFC 7230.
# The ``OWS`` are derived from the "implied ``*LWS``" requirement.

ptokenchar = (literal('!') | '#' | '$' | '%' | '&' | "'" | '(' |
              ')' | '*' | '+' | '-' | '.' | '/' | DIGIT |
              ':' | '<' | '=' | '>' | '?' | '@' | ALPHA |
              '[' | ']' | '^' | '_' | '`' | '{' | '|' |
              '}' | '~')                                                > auto
ptoken = string1(ptokenchar)                                            > auto

media_type = MediaType << type_name + '/' + subtype_name                > pivot
quoted_mt = skip('"') * media_type * skip('"')                          > pivot

reg_rel_type = RelationType << (
    LOALPHA + string(LOALPHA | DIGIT | '.' | '-'))                      > auto
ext_rel_type = URI                                                      > auto
relation_type = reg_rel_type | ext_rel_type                             > pivot
relation_types = (
    (lambda x: [x]) << relation_type |
    skip('"' * OWS) *
    (relation_type % many(skip(string1(SP)) * relation_type)) *
    skip(OWS * '"'))                                                    > pivot

def ext_name_star__excluding(exclude):
    return (parmname__excluding(exclude) + '*'
            > named(u'ext-name-star', RFC(5988)))
Exemple #49
0
# We don't need to special-case "UTF-8" and "ISO-8859-1", simplify.
mime_charsetc = (ALPHA | DIGIT |
                 '!' | '#' | '$' | '%' | '&' | '+' | '-' | '^' | '_' | '`' |
                 '{' | '}' | '~')                                       > auto
mime_charset = string1(mime_charsetc)                                   > auto
charset = CaseInsensitive << mime_charset                               > pivot

pct_encoded = '%' + HEXDIG + HEXDIG                                     > auto
value_chars = pct_decode << (
    force_bytes << string(pct_encoded | attr_char))                     > auto

@can_complain
def _check_ext_value(complain, val):
    if val.charset in [u'UTF-8', u'ISO-8859-1']:
        try:
            val.value_bytes.decode(val.charset)
        except UnicodeError as e:
            complain(1254, charset=val.charset, error=e)
    else:
        complain(1253, charset=val.charset)
    return val

ext_value = _check_ext_value << (
    ExtValue << (charset * skip("'") *
                 maybe(language) * skip("'") *
                 value_chars))                                          > pivot


fill_names(globals(), RFC(5987))
Exemple #50
0
def transfer_extension(exclude=None, no_q=False):
    return Parametrized << (
        (TransferCoding << token__excluding(exclude or [])) *
        (MultiDict << many(skip(OWS * ';' * OWS) * transfer_parameter(no_q)))
    ) > named(u'transfer-extension', RFC(7230), is_pivot=True)
Exemple #51
0
def extension_pragma(exclude_no_cache=False):
    return Parametrized << (
        (token__excluding(['no-cache']) if exclude_no_cache else token) *
        maybe(skip('=') * (token | quoted_string))
    ) > named(u'extension-pragma', RFC(7234), is_pivot=True)
Exemple #52
0
        symbol = {
            u'anchor': URI_Reference, u'rel': rel, u'rev': rev,
            u'hreflang': hreflang, u'type': type_, u'title*': ext_value,
        }.get(name)
        if symbol is not None:
            value = parse(value, symbol, complain, 1158,
                          name=name, value=value)
        r.append((name, value))
        if name == u'rev':
            complain(1226)
    if u'rel' not in seen:
        complain(1309)
    return MultiDict(r)

link_param = (
    (CaseInsensitive << token) * skip(BWS) *
    maybe(skip(literal('=') * BWS) * (mark(token) |
                                      mark(quoted_string))))            > pivot

link_value = Parametrized << (
    skip('<') * URI_Reference * skip('>') *
    (_process_params << many(skip(OWS * ';' * OWS) * link_param)))      > pivot

Link = comma_list(link_value)                                           > pivot


anchor = URI_Reference                                                  > auto

reg_rel_type = CaseInsensitive << (
    LOALPHA + string(LOALPHA | DIGIT | '.' | '-'))                      > auto
ext_rel_type = URI                                                      > auto