Ejemplo n.º 1
0
 def _assert_string(self, separator, expecteds, streams=STREAMS_3):
     with separator:
         parser = And(Optional('a') & Optional('b') & 'c', Eos())
     ok = True
     parser.config.no_full_first_match()
     for (stream, expected) in zip(streams, expecteds):
         parsed = parser.parse_string(stream) is not None
         if PRINT:
             print('{0!r:9} : {1!r:5} {2!r:5}'
                   .format(stream, parsed, parsed == expected))
         ok = ok and (parsed == expected)
     assert ok
Ejemplo n.º 2
0
 def left_token(self, contents=False):
     matcher = Delayed()
     inner = Token(Any())
     if contents:
         inner = inner(Or('a', 'b'))
     matcher += Optional(matcher) & inner
     return matcher
Ejemplo n.º 3
0
 def test_and(self):
     matcher = (Any() & Optional(Any())) > append('x')
     matcher.config.clear().compose_transforms()
     parser = matcher.get_parse()
     result = parser('a')[0]
     assert result == 'ax', result
     assert is_child(parser.matcher, And), type(parser.matcher)
Ejemplo n.º 4
0
    def test_eos(self):
        matcher = Optional(Any('a'))
        matcher.config.full_first_match(eos=True)
        try:
            list(matcher.match('b'))
            assert False, 'expected error'
        except FullFirstMatchException as e:
            assert str(e) == """The match failed at 'b',
Line 1, character 0 of str: 'b'.""", str(e)
Ejemplo n.º 5
0
 def test_smart_spaces(self):
     with SmartSeparator1(Space()):
         parser = 'a' & Optional('b') & 'c' & Eos()
     parser.config.no_full_first_match()
     assert parser.parse('a b c')
     assert parser.parse('a c')
     assert not parser.parse('a b c ')
     assert not parser.parse('a c ')
     assert not parser.parse('a bc')
     assert not parser.parse('ab c')
     assert not parser.parse('abc')
     assert not parser.parse('ac')
     assert not parser.parse('a  c')
Ejemplo n.º 6
0
    def test_left1b(self):

        #basicConfig(level=DEBUG)

        seq = Delayed()
        letter = Any()
        seq += Optional(seq) & letter

        seq.config.clear().left_memoize().trace_stack(True)
        p = seq.get_match_string()
        results = list(p('ab'))
        assert len(results) == 2, len(results)
        assert results[0][0] == ['a', 'b'], results[0][0]
        assert results[1][0] == ['a'], results[1][0]
Ejemplo n.º 7
0
    def test_node(self):
        
        class Term(Node): pass

        number      = Any('1')                             > 'number'
        term        = number                               > Term
        factor      = term | Drop(Optional(term))
        
        factor.config.clear().compose_transforms()
        p = factor.get_parse_string()
        ast = p('1')[0]
        assert type(ast) == Term, type(ast)
        assert ast[0] == '1', ast[0]
        assert str26(ast) == """Term
 `- number '1'""", ast
Ejemplo n.º 8
0
    def test_right(self):

        #basicConfig(level=DEBUG)

        seq = Delayed()
        letter = Any()
        seq += letter & Optional(seq)

        #print(seq.tree())
        seq.config.clear().right_memoize().trace_stack(True)
        #seq.config.clear().right_memoize()
        p = seq.get_match_string()
        #print(p.matcher.tree())

        results = list(p('ab'))
        assert len(results) == 2, len(results)
        assert results[0][0] == ['a', 'b'], results[0][0]
        assert results[1][0] == ['a'], results[1][0]
Ejemplo n.º 9
0
 def left(self):
     matcher = Delayed()
     matcher += Optional(matcher) & Any()
     return matcher
Ejemplo n.º 10
0
 def right(self):
     matcher = Delayed()
     matcher += Any() & Optional(matcher)
     return matcher
Ejemplo n.º 11
0
    def make_binary_parser():
        '''
        Create a parser for binary data.
        '''

        # avoid import loops
        from lepl import Word, Letter, Digit, UnsignedInteger, \
            Regexp, DfaRegexp, Drop, Separator, Delayed, Optional, Any, First, \
            args, Trace, TraceVariables
        from lepl.bin.bits import BitString
        from lepl.support.node import Node

        classes = {}

        def named_class(name, *args):
            '''
            Given a name and some args, create a sub-class of Binary and 
            create an instance with the given content.
            '''
            if name not in classes:
                classes[name] = type(name, (Node, ), {})
            return classes[name](*args)

        with TraceVariables(False):

            mult = lambda l, n: BitString.from_sequence([l] * int(n, 0))

            # an attribute or class name
            name = Word(Letter(), Letter() | Digit() | '_')

            # lengths can be integers (bits) or floats (bytes.bits)
            # but if we have a float, we do not want to parse as an int
            # (or we will get a conversion error due to too small length)
            length = First(
                UnsignedInteger() + '.' + Optional(UnsignedInteger()),
                UnsignedInteger())

            # a literal decimal
            decimal = UnsignedInteger()

            # a binary number (without pre/postfix)
            binary = Any('01')[1:]

            # an octal number (without pre/postfix)
            octal = Any('01234567')[1:]

            # a hex number (without pre/postfix)
            hex_ = Regexp('[a-fA-F0-9]')[1:]

            # the letters used for binary, octal and hex values
            #(eg the 'x' in 0xffee)
            # pylint: disable-msg=C0103
            b, o, x, d = Any('bB'), Any('oO'), Any('xX'), Any('dD')

            # a decimal with optional pre/postfix
            dec = '0' + d + decimal | decimal + d + '0' | decimal

            # little-endian literals have normal prefix syntax (eg 0xffee)
            little = decimal | '0' + (b + binary | o + octal | x + hex_)

            # big-endian literals have postfix (eg ffeex0)
            big = (binary + b | octal + o | hex_ + x) + '0'

            # optional spaces - will be ignored
            # (use DFA here because it's multi-line, so \n will match ok)
            spaces = Drop(DfaRegexp('[ \t\n\r]*'))

            with Separator(spaces):

                # the grammar is recursive - expressions can contain expressions -
                # so we use a delayed matcher here as a placeholder, so that we can
                # use them before they are defined.
                expr = Delayed()

                # an implicit length value can be big or little-endian
                ivalue = big | little > args(BitString.from_int)

                # a value with a length can also be decimal
                lvalue = (big | little | dec) & Drop('/') & length  \
                                                      > args(BitString.from_int)

                value = lvalue | ivalue

                repeat = value & Drop('*') & little > args(mult)

                # a named value is also a tuple
                named = name & Drop('=') & (expr | value | repeat) > tuple

                # an entry in the expression could be any of these
                entry = named | value | repeat | expr

                # and an expression itself consists of a comma-separated list of
                # one or more entries, surrounded by paremtheses
                entries = Drop('(') & entry[1:, Drop(',')] & Drop(')')

                # the Binary node may be explicit or implicit and takes the list of
                # entries as an argument list
                node = Optional(Drop('Node')) & entries > Node

                # alternatively, we can give a name and create a named sub-class
                other = name & entries > args(named_class)

                # and finally, we "tie the knot" by giving a definition for the
                # delayed matcher we introduced earlier, which is either a binary
                # node or a subclass
                expr += spaces & (node | other) & spaces

        #expr = Trace(expr)
        # this changes order, making 0800x0 parse as binary
        expr.config.no_compile_to_regexp()
        # use sequence to force regexp over multiple lines
        return expr.get_parse_sequence()
Ejemplo n.º 12
0
value = token | quoted_string

# Other charsets are forbidden, the spec reserves them
# for future evolutions.
charset = (CaseInsensitiveLiteral('UTF-8')
           | CaseInsensitiveLiteral('ISO-8859-1'))

# XXX See RFC 5646 for the correct definition
language = token

attr_char = Any(attr_chars)
hexdig = Any(hexdigits)
pct_encoded = '%' + hexdig + hexdig
value_chars = (pct_encoded | attr_char)[...]
ext_value = (charset & Drop("'") & Optional(language) & Drop("'")
             & value_chars) > parse_ext_value
ext_token = token + '*'
noext_token = ~Lookahead(ext_token) & token

# Adapted from https://tools.ietf.org/html/rfc6266
# Mostly this was simplified to fold filename / filename*
# into the normal handling of ext_token / noext_token
with DroppedSpace():
    disposition_parm = ((ext_token & Drop('=') & ext_value)
                        | (noext_token & Drop('=') & value)) > tuple
    disposition_type = (CaseInsensitiveLiteral('inline')
                        | CaseInsensitiveLiteral('attachment')
                        | token)
    content_disposition_value = (disposition_type
                                 & Star(Drop(';') & disposition_parm))
Ejemplo n.º 13
0
array_element_type = Or(*generate_type_tokens(symboltable.array_element_types))

type_ = scalar_type | array_type | array_element_type

real = Token(UnsignedReal()) >> Real
integer = Token(UnsignedInteger()) >> Integer
number = integer | real
boolean = keyword('yes') >> Bool | keyword('no') >> Bool | keyword(
    'true') >> Bool | keyword('false') >> Bool

width = integer
height = integer
depth = integer

unopened_size_block = (
    width & Optional(~comma & height & Optional(~comma & depth))
    & symbol(']'))**make_error('no [ before {out_rest!s}') & symbol(']')
unclosed_size_block = (
    symbol('[') & width & Optional(~comma & height & Optional(~comma & depth))
)**make_error('Array size specification is missing a closing ]')

size = Or((~symbol('[') & width
           & Optional(~comma & height
                      & Optional(~comma & depth))
           & ~symbol(']'))**with_line(Size), unopened_size_block,
          unclosed_size_block)

#### Expression Parsing ####
# Operator precedence, inside to outside
#  1 parentheses ()
#  2 not, unary minus (!, -)