Example #1
0
def comment(include_parens=False):
    inner = recursive() > named(u'comment', RFC(7230))
    inner.rec = '(' + string(ctext | quoted_pair(sensible_for=u'()\\')
                             | inner) + ')'
    if not include_parens:
        inner = (lambda s: s[1:-1]) << inner
    return inner > named(u'comment', RFC(7230))
Example #2
0
def test_parser_edge_cases():
    # Our parser implementation is general enough that
    # some of its branches are not being exercised by our regular tests,
    # so I had to come up with these contrived examples to test them.

    p = many(rfc7230.tchar)                            > named(u'p')
    p1 = '1' * p                                       > named(u'p1')
    p2 = '11' * p * skip('\n')                         > named(u'p2')
    assert parse(p1 | p2, b'11abc') == (u'1', [u'1', u'a', u'b', u'c'])
    assert parse(p1 | p2, b'11abc\n') == (u'11', [u'a', u'b', u'c'])

    p = recursive()                                    > named(u'p')
    p.rec = (rfc7230.tchar * p | subst(None) << empty)
    assert parse(p, b'abc') == (u'a', (u'b', (u'c', None)))

    p = literal('ab')                                  > named(u'p')
    p0 = subst(u'') << empty | p                       > named(u'p0')
    p1 = 'xab' * p0                                    > named(u'p1')
    p2 = 'x' * string(p0) * '!'                        > named(u'p2')
    assert parse(p1 | p2, b'xabab') == (u'xab', u'ab')
    assert parse(p1 | p2, b'xabab!') == (u'x', u'abab', u'!')

    p = empty | literal('a')                           > named(u'p')
    p0 = p * 'x'                                       > named(u'x')
    assert parse(p0, b'x') == u'x'
Example #3
0
def test_parser_edge_cases():
    # Our parser implementation is general enough that
    # some of its branches are not being exercised by our regular tests,
    # so I had to come up with these contrived examples to test them.

    p = many(rfc7230.tchar)                            > named(u'p')
    p1 = '1' * p                                       > named(u'p1')
    p2 = '11' * p * skip('\n')                         > named(u'p2')
    assert parse(p1 | p2, b'11abc') == (u'1', [u'1', u'a', u'b', u'c'])
    assert parse(p1 | p2, b'11abc\n') == (u'11', [u'a', u'b', u'c'])

    p = recursive()                                    > named(u'p')
    p.rec = (rfc7230.tchar * p | subst(None) << empty)
    assert parse(p, b'abc') == (u'a', (u'b', (u'c', None)))

    p = literal('ab')                                  > named(u'p')
    p0 = subst(u'') << empty | p                       > named(u'p0')
    p1 = 'xab' * p0                                    > named(u'p1')
    p2 = 'x' * string(p0) * '!'                        > named(u'p2')
    assert parse(p1 | p2, b'xabab') == (u'xab', u'ab')
    assert parse(p1 | p2, b'xabab!') == (u'x', u'abab', u'!')

    p = empty | literal('a')                           > named(u'p')
    p0 = p * 'x'                                       > named(u'x')
    assert parse(p0, b'x') == u'x'
Example #4
0
def comment(include_parens=False):
    inner = recursive() > named(u'comment', RFC(7230))
    inner.rec = '(' + string(ctext | quoted_pair(sensible_for=u'()\\') |
                             inner) + ')'
    if not include_parens:
        inner = (lambda s: s[1:-1]) << inner
    return inner > named(u'comment', RFC(7230))
Example #5
0
    # In RFC 7230, ``<quoted-pair>`` is a single rule,
    # but we parametrize it to report no. 1017 depending on the context.
    @can_complain
    def check_sensible(complain, c):
        if c not in sensible_for:
            complain(1017, char=c)
        return c

    return (check_sensible << skip('\\') *
            (HTAB | SP | VCHAR | obs_text) > named(u'quoted-pair', RFC(7230)))


qdtext = (HTAB | SP | octet(0x21) | octet_range(0x23, 0x5B)
          | octet_range(0x5D, 0x7E) | obs_text) > auto
quoted_string = (skip(DQUOTE) *
                 string(qdtext | quoted_pair(sensible_for=u'"\\')) *
                 skip(DQUOTE)) > auto

ctext = (HTAB | SP | octet_range(0x21, 0x27) | octet_range(0x2A, 0x5B)
         | octet_range(0x5D, 0x7E) | obs_text) > auto


def comment(include_parens=False):
    inner = recursive() > named(u'comment', RFC(7230))
    inner.rec = '(' + string(ctext | quoted_pair(sensible_for=u'()\\')
                             | inner) + ')'
    if not include_parens:
        inner = (lambda s: s[1:-1]) << inner
    return inner > named(u'comment', RFC(7230))

Example #6
0
# -*- coding: utf-8; -*-

from httpolice.citation import RFC
from httpolice.parse import (auto, can_complain, fill_names, mark, maybe,
                             pivot, skip, string, string1)
from httpolice.structure import (AuthScheme, CaseInsensitive, MultiDict,
                                 Parametrized)
from httpolice.syntax.common import ALPHA, DIGIT, SP
from httpolice.syntax.rfc7230 import (BWS, comma_list, comma_list1,
                                      quoted_string, token)


auth_scheme = AuthScheme << token                                       > pivot
token68 = (string1(ALPHA | DIGIT | '-' | '.' | '_' | '~' | '+' | '/') +
           string('='))                                                 > pivot

@can_complain
def _check_realm(complain, k, v):
    (symbol, v) = v
    if k == u'realm' and symbol is not quoted_string:
        complain(1196)
    return (k, v)

auth_param = _check_realm << ((CaseInsensitive << token) *
                              skip(BWS * '=' * BWS) *
                              (mark(token) | mark(quoted_string)))      > pivot

challenge = Parametrized << (
    auth_scheme *
    maybe(skip(string1(SP)) * (token68 |
                               MultiDict << comma_list(auth_param)),
Example #7
0
def quoted_pair(sensible_for):
    # In RFC 7230, ``<quoted-pair>`` is a single rule,
    # but we parametrize it to report no. 1017 depending on the context.
    @can_complain
    def check_sensible(complain, c):
        if c not in sensible_for:
            complain(1017, char=c)
        return c
    return (check_sensible << skip('\\') * (HTAB | SP | VCHAR | obs_text)
            > named(u'quoted-pair', RFC(7230)))

qdtext = (HTAB | SP | octet(0x21) | octet_range(0x23, 0x5B) |
          octet_range(0x5D, 0x7E) | obs_text)                           > auto
quoted_string = (skip(DQUOTE) *
                 string(qdtext | quoted_pair(sensible_for=u'"\\')) *
                 skip(DQUOTE))                                          > auto

ctext = (HTAB | SP | octet_range(0x21, 0x27) | octet_range(0x2A, 0x5B) |
         octet_range(0x5D, 0x7E) | obs_text)                            > auto

def comment(include_parens=False):
    inner = recursive() > named(u'comment', RFC(7230))
    inner.rec = '(' + string(ctext | quoted_pair(sensible_for=u'()\\') |
                             inner) + ')'
    if not include_parens:
        inner = (lambda s: s[1:-1]) << inner
    return inner > named(u'comment', RFC(7230))

OWS = string(SP | HTAB)                                                 > auto
Example #8
0
from urllib.parse import unquote_to_bytes as pct_decode

from httpolice.citation import RFC
from httpolice.parse import (auto, can_complain, fill_names, maybe, pivot,
                             skip, string, string1)
from httpolice.structure import CaseInsensitive, ExtValue
from httpolice.syntax.common import ALPHA, DIGIT, HEXDIG
from httpolice.syntax.rfc5646 import Language_Tag as language
from httpolice.util.text import force_bytes


attr_char = (ALPHA | DIGIT |
             '!' | '#' | '$' | '&' | '+' | '-' | '.' |
             '^' | '_' | '`' | '|' | '~')                               > auto
parmname = string(attr_char)                                            > pivot

# We don't need to special-case "UTF-8", simplify.
mime_charsetc = (ALPHA | DIGIT |
                 '!' | '#' | '$' | '%' | '&' | '+' | '-' | '^' | '_' | '`' |
                 '{' | '}' | '~')                                       > auto
mime_charset = string1(mime_charsetc)                                   > auto
charset = CaseInsensitive << mime_charset                               > pivot

pct_encoded = '%' + HEXDIG + HEXDIG                                     > auto
value_chars = pct_decode << (
    force_bytes << string(pct_encoded | attr_char))                     > auto

@can_complain
def _check_ext_value(complain, val):
    if val.charset == u'UTF-8':
Example #9
0
complete_length = int << string1(DIGIT) > auto
byte_range_resp = (byte_range * skip('/') *
                   (complete_length | subst(None) << literal('*'))) > pivot

unsatisfied_range = ((subst(None) << literal('*/')) * complete_length) > pivot


@can_complain
def _well_formed2(complain, r):
    bounds, complete = r.range
    if bounds is not None:
        first, last = bounds
        if (last < first) or ((complete is not None) and (complete <= last)):
            complain(1148)
    return r


byte_content_range = _well_formed2 << (
    ContentRange <<
    (bytes_unit * skip(SP) * (byte_range_resp | unsatisfied_range))) > pivot

other_range_resp = string(CHAR) > pivot
other_content_range = ContentRange << (other_range_unit * skip(SP) *
                                       other_range_resp) > pivot

Content_Range = byte_content_range | other_content_range > pivot

If_Range = entity_tag | HTTP_date > pivot

fill_names(globals(), RFC(7233))
Example #10
0
from httpolice.syntax.rfc2616 import LOALPHA
from httpolice.syntax.rfc3986 import URI, URI_reference as URI_Reference
from httpolice.syntax.rfc5646 import Language_Tag
from httpolice.syntax.rfc5987 import ext_value, parmname__excluding
from httpolice.syntax.rfc6838 import subtype_name, type_name
from httpolice.syntax.rfc7230 import OWS, comma_list, quoted_string


# RFC 5988 refers to HTML 4.01 for the ``MediaDesc`` rule,
# but HTML 4.01 doesn't actually define a grammar for that;
# it only gives a vague idea of what it is supposed to be.
# So we use a fairly permissive form.
# Also, from RFC 5988 Section 5.4:
# "its value MUST be quoted if it contains a semicolon (';') or comma (',')".

_MediaDesc = string((VCHAR | HTAB | SP) - literal('"'))
_MediaDesc_no_delim = string((VCHAR | HTAB | SP) -
                             literal('"') - literal(';') - literal(','))


# This has been slightly adapted to the rules of RFC 7230.
# The ``OWS`` are derived from the "implied ``*LWS``" requirement.

ptokenchar = (literal('!') | '#' | '$' | '%' | '&' | "'" | '(' |
              ')' | '*' | '+' | '-' | '.' | '/' | DIGIT |
              ':' | '<' | '=' | '>' | '?' | '@' | ALPHA |
              '[' | ']' | '^' | '_' | '`' | '{' | '|' |
              '}' | '~')                                                > auto
ptoken = string1(ptokenchar)                                            > auto

media_type = MediaType << type_name + '/' + subtype_name                > pivot
Example #11
0
# -*- coding: utf-8; -*-

from httpolice.citation import RFC
from httpolice.parse import auto, fill_names, pivot, string, string_times
from httpolice.structure import CaseInsensitive
from httpolice.syntax.common import ALPHA, DIGIT

alphanum = ALPHA | DIGIT > auto
language_range = CaseInsensitive << (string_times(
    1, 8, ALPHA) + string('-' + string_times(1, 8, alphanum)) | '*') > pivot

fill_names(globals(), RFC(4647))
Example #12
0
    # In RFC 7230, ``<quoted-pair>`` is a single rule,
    # but we parametrize it to report no. 1017 depending on the context.
    @can_complain
    def check_sensible(complain, c):
        if c not in sensible_for:
            complain(1017, char=c)
        return c

    return (check_sensible << skip('\\') *
            (HTAB | SP | VCHAR | obs_text) > named(u'quoted-pair', RFC(7230)))


qdtext = (HTAB | SP | octet(0x21) | octet_range(0x23, 0x5B)
          | octet_range(0x5D, 0x7E) | obs_text) > auto
quoted_string = (skip(DQUOTE) *
                 string(qdtext | quoted_pair(sensible_for=u'"\\')) *
                 skip(DQUOTE)) > auto

ctext = (HTAB | SP | octet_range(0x21, 0x27) | octet_range(0x2A, 0x5B)
         | octet_range(0x5D, 0x7E) | obs_text) > auto


def comment(include_parens=False):
    inner = recursive() > named(u'comment', RFC(7230))
    inner.rec = '(' + string(ctext | quoted_pair(sensible_for=u'()\\')
                             | inner) + ')'
    if not include_parens:
        inner = (lambda s: s[1:-1]) << inner
    return inner > named(u'comment', RFC(7230))

Example #13
0
        if name == u'rev':
            complain(1226)
    if u'rel' not in seen:
        complain(1309)
    return MultiDict(r)


link_param = ((CaseInsensitive << token) * skip(BWS) * maybe(
    skip(literal('=') * BWS) * (mark(token) | mark(quoted_string)))) > pivot

link_value = Parametrized << (
    skip('<') * URI_Reference * skip('>') *
    (_process_params << many(skip(OWS * ';' * OWS) * link_param))) > pivot

Link = comma_list(link_value) > pivot

anchor = URI_Reference > auto

reg_rel_type = CaseInsensitive << (LOALPHA +
                                   string(LOALPHA | DIGIT | '.' | '-')) > auto
ext_rel_type = URI > auto
relation_type = reg_rel_type | ext_rel_type > pivot
rel = rev = relation_type % many(skip(string1(SP)) * relation_type) > auto

hreflang = Language_Tag > auto

type_ = check_media_type << (
    MediaType << type_name + '/' + subtype_name) > auto

fill_names(globals(), RFC(8288))
Example #14
0
# -*- coding: utf-8; -*-

from httpolice.citation import RFC
from httpolice.parse import (auto, empty, fill_names, literal, maybe_str,
                             octet_range, pivot, string, string1, string_times,
                             subst)
from httpolice.syntax.common import ALPHA, DIGIT, HEXDIG


pct_encoded = '%' + HEXDIG + HEXDIG                                     > auto
sub_delims = (literal('!') | '$' | '&' | "'" | '(' | ')' | '*' | '+' |
              ',' | ';' | '=')                                          > auto
unreserved = ALPHA | DIGIT | '-' | '.' | '_' | '~'                      > auto
pchar = unreserved | sub_delims | ':' | '@' | pct_encoded               > auto

segment = string(pchar)                                                 > auto
segment_nz = string1(pchar)                                             > auto
segment_nz_nc = string1(unreserved | sub_delims | '@' | pct_encoded)    > auto

scheme = ALPHA + string(ALPHA | DIGIT | '+' | '-' | '.')                > pivot
userinfo = string(unreserved | sub_delims | ':' | pct_encoded)          > pivot
dec_octet = (DIGIT |
             octet_range(0x31, 0x39) + DIGIT |
             '1' + DIGIT + DIGIT |
             '2' + octet_range(0x30, 0x34) + DIGIT |
             '25' + octet_range(0x30, 0x35))                            > auto
IPv4address = (dec_octet + '.' + dec_octet + '.' +
               dec_octet + '.' + dec_octet)                             > pivot
h16 = string_times(1, 4, HEXDIG)                                        > auto
ls32 = (h16 + ':' + h16) | IPv4address                                  > auto
IPv6address = (
Example #15
0
# -*- coding: utf-8; -*-

from httpolice.citation import RFC
from httpolice.parse import auto, fill_names, pivot, string, string_times
from httpolice.structure import CaseInsensitive
from httpolice.syntax.common import ALPHA, DIGIT


alphanum = ALPHA | DIGIT                                                > auto
language_range = CaseInsensitive << (
    string_times(1, 8, ALPHA) + string('-' + string_times(1, 8, alphanum)) |
    '*')                                                                > pivot


fill_names(globals(), RFC(4647))
Example #16
0
           'zh-min-nan'          |
           'zh-xiang')                                                  > auto

grandfathered = irregular | regular                                     > pivot
privateuse = 'x' + string1('-' + string_times(1, 8, alphanum))          > pivot

extlang = (string_times(3, 3, ALPHA) +
           string_times(0, 2, '-' + string_times(3, 3, ALPHA)))         > pivot

language = (string_times(2, 3, ALPHA) + maybe_str('-' + extlang) |
            string_times(4, 4, ALPHA) | string_times(5, 8, ALPHA))      > pivot
script = string_times(4, 4, ALPHA)                                      > pivot
region = string_times(2, 2, ALPHA) | string_times(3, 3, DIGIT)          > pivot
variant = (string_times(5, 8, alphanum) |
           (DIGIT + string_times(3, 3, alphanum)))                      > pivot
extension = (singleton + string1('-' + string_times(2, 8, alphanum)))   > pivot

langtag = (language +
           maybe_str('-' + script) +
           maybe_str('-' + region) +
           string('-' + variant) +
           string('-' + extension) +
           maybe_str('-' + privateuse))                                 > pivot

Language_Tag = (LanguageTag << langtag |
                LanguageTag << privateuse |
                LanguageTag << grandfathered)                           > pivot


fill_names(globals(), RFC(5646))
Example #17
0
from urllib.parse import unquote_to_bytes as pct_decode

from httpolice.citation import RFC
from httpolice.parse import (auto, can_complain, fill_names, maybe, pivot,
                             skip, string, string1)
from httpolice.structure import CaseInsensitive, ExtValue
from httpolice.syntax.common import ALPHA, DIGIT, HEXDIG
from httpolice.syntax.rfc5646 import Language_Tag as language
from httpolice.util.text import force_bytes

attr_char = (ALPHA | DIGIT | '!' | '#' | '$' | '&' | '+' | '-' | '.' | '^'
             | '_' | '`' | '|' | '~') > auto
parmname = string(attr_char) > pivot

# We don't need to special-case "UTF-8", simplify.
mime_charsetc = (ALPHA | DIGIT | '!' | '#' | '$' | '%' | '&' | '+' | '-' | '^'
                 | '_' | '`' | '{' | '}' | '~') > auto
mime_charset = string1(mime_charsetc) > auto
charset = CaseInsensitive << mime_charset > pivot

pct_encoded = '%' + HEXDIG + HEXDIG > auto
value_chars = pct_decode << (
    force_bytes << string(pct_encoded | attr_char)) > auto


@can_complain
def _check_ext_value(complain, val):
    if val.charset == u'UTF-8':
        try:
            val.value_bytes.decode(val.charset)
        except UnicodeError as e:
Example #18
0
             | 'i-enochian' | 'i-hak' | 'i-klingon' | 'i-lux' | 'i-mingo'
             | 'i-navajo' | 'i-pwn' | 'i-tao' | 'i-tay' | 'i-tsu' | 'sgn-BE-FR'
             | 'sgn-BE-NL' | 'sgn-CH-DE') > auto

regular = (literal('art-lojban') | 'cel-gaulish' | 'no-bok' | 'no-nyn'
           | 'zh-guoyu' | 'zh-hakka' | 'zh-min' | 'zh-min-nan'
           | 'zh-xiang') > auto

grandfathered = irregular | regular > pivot
privateuse = 'x' + string1('-' + string_times(1, 8, alphanum)) > pivot

extlang = (string_times(3, 3, ALPHA) +
           string_times(0, 2, '-' + string_times(3, 3, ALPHA))) > pivot

language = (string_times(2, 3, ALPHA) + maybe_str('-' + extlang)
            | string_times(4, 4, ALPHA) | string_times(5, 8, ALPHA)) > pivot
script = string_times(4, 4, ALPHA) > pivot
region = string_times(2, 2, ALPHA) | string_times(3, 3, DIGIT) > pivot
variant = (string_times(5, 8, alphanum) |
           (DIGIT + string_times(3, 3, alphanum))) > pivot
extension = (singleton + string1('-' + string_times(2, 8, alphanum))) > pivot

langtag = (language + maybe_str('-' + script) + maybe_str('-' + region) +
           string('-' + variant) + string('-' + extension) +
           maybe_str('-' + privateuse)) > pivot

Language_Tag = (LanguageTag << langtag | LanguageTag << privateuse
                | LanguageTag << grandfathered) > pivot

fill_names(globals(), RFC(5646))
Example #19
0
from httpolice.parse import (auto, can_complain, fill_names, maybe, octet,
                             octet_range, pivot, string, subst)
from httpolice.structure import EntityTag
from httpolice.syntax.common import DQUOTE
from httpolice.syntax.rfc7230 import comma_list1, obs_text
from httpolice.syntax.rfc7231 import HTTP_date

weak = subst(True) << octet(0x57) * octet(0x2F) > auto
etagc = octet(0x21) | octet_range(0x23, 0x7E) | obs_text > auto


@can_complain
def _no_backslashes(complain, s):
    if u'\\' in s:
        complain(1119)
    return s


opaque_tag = _no_backslashes << DQUOTE + string(etagc) + DQUOTE > auto
entity_tag = EntityTag << maybe(weak, False) * opaque_tag > pivot

ETag = entity_tag > pivot
Last_Modified = HTTP_date > pivot

If_Match = '*' | comma_list1(entity_tag) > pivot
If_None_Match = '*' | comma_list1(entity_tag) > pivot
If_Modified_Since = HTTP_date > pivot
If_Unmodified_Since = HTTP_date > pivot

fill_names(globals(), RFC(7232))
Example #20
0
byte_range = first_byte_pos * skip('-') * last_byte_pos                 > auto
complete_length = int << string1(DIGIT)                                 > auto
byte_range_resp = (
    byte_range * skip('/') *
    (complete_length | subst(None) << literal('*')))                    > pivot

unsatisfied_range = (
    (subst(None) << literal('*/')) * complete_length)                   > pivot

@can_complain
def _well_formed2(complain, r):
    bounds, complete = r.range
    if bounds is not None:
        first, last = bounds
        if (last < first) or ((complete is not None) and (complete <= last)):
            complain(1148)
    return r

byte_content_range = _well_formed2 << (ContentRange << (
    bytes_unit * skip(SP) * (byte_range_resp | unsatisfied_range)))     > pivot

other_range_resp = string(CHAR)                                         > pivot
other_content_range = ContentRange << (
    other_range_unit * skip(SP) * other_range_resp)                     > pivot

Content_Range = byte_content_range | other_content_range                > pivot

If_Range = entity_tag | HTTP_date                                       > pivot

fill_names(globals(), RFC(7233))
Example #21
0
    string,
    subst,
)
from httpolice.structure import EntityTag
from httpolice.syntax.common import DQUOTE
from httpolice.syntax.rfc7230 import comma_list1, obs_text
from httpolice.syntax.rfc7231 import HTTP_date


weak = subst(True) << octet(0x57) * octet(0x2F)                         > auto
etagc = octet(0x21) | octet_range(0x23, 0x7E) | obs_text                > auto

@can_complain
def _no_backslashes(complain, s):
    if u'\\' in s:
        complain(1119)
    return s

opaque_tag = _no_backslashes << DQUOTE + string(etagc) + DQUOTE         > auto
entity_tag = EntityTag << maybe(weak, False) * opaque_tag               > pivot

ETag = entity_tag                                                       > pivot
Last_Modified = HTTP_date                                               > pivot

If_Match = '*' | comma_list1(entity_tag)                                > pivot
If_None_Match = '*' | comma_list1(entity_tag)                           > pivot
If_Modified_Since = HTTP_date                                           > pivot
If_Unmodified_Since = HTTP_date                                         > pivot

fill_names(globals(), RFC(7232))
Example #22
0
def parmname__excluding(exclude):
    return (string_excluding(attr_char, [''] + exclude) > named(
        u'parmname', RFC(5987), is_pivot=True))


parmname = parmname__excluding([])

# We don't need to special-case "UTF-8" and "ISO-8859-1", simplify.
mime_charsetc = (ALPHA | DIGIT | '!' | '#' | '$' | '%' | '&' | '+' | '-' | '^'
                 | '_' | '`' | '{' | '}' | '~') > auto
mime_charset = string1(mime_charsetc) > auto
charset = CaseInsensitive << mime_charset > pivot

pct_encoded = '%' + HEXDIG + HEXDIG > auto
value_chars = pct_decode << (
    force_bytes << string(pct_encoded | attr_char)) > auto


@can_complain
def _check_ext_value(complain, val):
    if val.charset in [u'UTF-8', u'ISO-8859-1']:
        try:
            val.value_bytes.decode(val.charset)
        except UnicodeError as e:
            complain(1254, charset=val.charset, error=e)
    else:
        complain(1253, charset=val.charset)
    return val


ext_value = _check_ext_value << (
Example #23
0
# -*- coding: utf-8; -*-

from httpolice.citation import RFC
from httpolice.parse import (auto, empty, fill_names, literal, maybe_str,
                             octet_range, pivot, string, string1, string_times,
                             subst)
from httpolice.syntax.common import ALPHA, DIGIT, HEXDIG

pct_encoded = '%' + HEXDIG + HEXDIG > auto
sub_delims = (literal('!') | '$' | '&' | "'" | '(' | ')' | '*' | '+' | ','
              | ';' | '=') > auto
unreserved = ALPHA | DIGIT | '-' | '.' | '_' | '~' > auto
pchar = unreserved | sub_delims | ':' | '@' | pct_encoded > auto

segment = string(pchar) > auto
segment_nz = string1(pchar) > auto
segment_nz_nc = string1(unreserved | sub_delims | '@' | pct_encoded) > auto

scheme = ALPHA + string(ALPHA | DIGIT | '+' | '-' | '.') > pivot
userinfo = string(unreserved | sub_delims | ':' | pct_encoded) > pivot
dec_octet = (DIGIT | octet_range(0x31, 0x39) + DIGIT | '1' + DIGIT + DIGIT
             | '2' + octet_range(0x30, 0x34) + DIGIT
             | '25' + octet_range(0x30, 0x35)) > auto
IPv4address = (dec_octet + '.' + dec_octet + '.' + dec_octet + '.' +
               dec_octet) > pivot
h16 = string_times(1, 4, HEXDIG) > auto
ls32 = (h16 + ':' + h16) | IPv4address > auto
IPv6address = (
    string_times(6, 6, h16 + ':') + ls32
    | '::' + string_times(5, 5, h16 + ':') + ls32
    | maybe_str(h16) + '::' + string_times(4, 4, h16 + ':') + ls32
Example #24
0
from httpolice.syntax.rfc2616 import LOALPHA
from httpolice.syntax.rfc3986 import URI, URI_reference as URI_Reference
from httpolice.syntax.rfc5646 import Language_Tag
from httpolice.syntax.rfc5987 import ext_value, parmname__excluding
from httpolice.syntax.rfc6838 import subtype_name, type_name
from httpolice.syntax.rfc7230 import OWS, comma_list, quoted_string


# RFC 5988 refers to HTML 4.01 for the ``MediaDesc`` rule,
# but HTML 4.01 doesn't actually define a grammar for that;
# it only gives a vague idea of what it is supposed to be.
# So we use a fairly permissive form.
# Also, from RFC 5988 Section 5.4:
# "its value MUST be quoted if it contains a semicolon (';') or comma (',')".

_MediaDesc = string((VCHAR | HTAB | SP) - literal('"'))
_MediaDesc_no_delim = string((VCHAR | HTAB | SP) -
                             literal('"') - literal(';') - literal(','))


# This has been slightly adapted to the rules of RFC 7230.
# The ``OWS`` are derived from the "implied ``*LWS``" requirement.

ptokenchar = (literal('!') | '#' | '$' | '%' | '&' | "'" | '(' |
              ')' | '*' | '+' | '-' | '.' | '/' | DIGIT |
              ':' | '<' | '=' | '>' | '?' | '@' | ALPHA |
              '[' | ']' | '^' | '_' | '`' | '{' | '|' |
              '}' | '~')                                                > auto
ptoken = string1(ptokenchar)                                            > auto

media_type = MediaType << type_name + '/' + subtype_name                > pivot
Example #25
0
def parmname__excluding(exclude):
    return (string_excluding(attr_char, [''] + exclude)
            > named(u'parmname', RFC(5987), is_pivot=True))

parmname = parmname__excluding([])

# We don't need to special-case "UTF-8" and "ISO-8859-1", simplify.
mime_charsetc = (ALPHA | DIGIT |
                 '!' | '#' | '$' | '%' | '&' | '+' | '-' | '^' | '_' | '`' |
                 '{' | '}' | '~')                                       > auto
mime_charset = string1(mime_charsetc)                                   > auto
charset = CaseInsensitive << mime_charset                               > pivot

pct_encoded = '%' + HEXDIG + HEXDIG                                     > auto
value_chars = pct_decode << (
    force_bytes << string(pct_encoded | attr_char))                     > auto

@can_complain
def _check_ext_value(complain, val):
    if val.charset in [u'UTF-8', u'ISO-8859-1']:
        try:
            val.value_bytes.decode(val.charset)
        except UnicodeError as e:
            complain(1254, charset=val.charset, error=e)
    else:
        complain(1253, charset=val.charset)
    return val

ext_value = _check_ext_value << (
    ExtValue << (charset * skip("'") *
                 maybe(language) * skip("'") *
Example #26
0
        complain(1309)
    return MultiDict(r)

link_param = (
    (CaseInsensitive << token) * skip(BWS) *
    maybe(skip(literal('=') * BWS) * (mark(token) |
                                      mark(quoted_string))))            > pivot

link_value = Parametrized << (
    skip('<') * URI_Reference * skip('>') *
    (_process_params << many(skip(OWS * ';' * OWS) * link_param)))      > pivot

Link = comma_list(link_value)                                           > pivot


anchor = URI_Reference                                                  > auto

reg_rel_type = CaseInsensitive << (
    LOALPHA + string(LOALPHA | DIGIT | '.' | '-'))                      > auto
ext_rel_type = URI                                                      > auto
relation_type = reg_rel_type | ext_rel_type                             > pivot
rel = rev = relation_type % many(skip(string1(SP)) * relation_type)     > auto

hreflang = Language_Tag                                                 > auto

type_ = check_media_type << (
    MediaType << type_name + '/' + subtype_name)                        > auto


fill_names(globals(), RFC(8288))