def _assert_string(self, separator, expecteds, streams=STREAMS_3): with separator: parser = And(Optional('a') & Optional('b') & 'c', Eos()) ok = True parser.config.no_full_first_match() for (stream, expected) in zip(streams, expecteds): parsed = parser.parse_string(stream) is not None if PRINT: print('{0!r:9} : {1!r:5} {2!r:5}' .format(stream, parsed, parsed == expected)) ok = ok and (parsed == expected) assert ok
def left_token(self, contents=False): matcher = Delayed() inner = Token(Any()) if contents: inner = inner(Or('a', 'b')) matcher += Optional(matcher) & inner return matcher
def test_and(self): matcher = (Any() & Optional(Any())) > append('x') matcher.config.clear().compose_transforms() parser = matcher.get_parse() result = parser('a')[0] assert result == 'ax', result assert is_child(parser.matcher, And), type(parser.matcher)
def test_eos(self): matcher = Optional(Any('a')) matcher.config.full_first_match(eos=True) try: list(matcher.match('b')) assert False, 'expected error' except FullFirstMatchException as e: assert str(e) == """The match failed at 'b', Line 1, character 0 of str: 'b'.""", str(e)
def test_smart_spaces(self): with SmartSeparator1(Space()): parser = 'a' & Optional('b') & 'c' & Eos() parser.config.no_full_first_match() assert parser.parse('a b c') assert parser.parse('a c') assert not parser.parse('a b c ') assert not parser.parse('a c ') assert not parser.parse('a bc') assert not parser.parse('ab c') assert not parser.parse('abc') assert not parser.parse('ac') assert not parser.parse('a c')
def test_left1b(self): #basicConfig(level=DEBUG) seq = Delayed() letter = Any() seq += Optional(seq) & letter seq.config.clear().left_memoize().trace_stack(True) p = seq.get_match_string() results = list(p('ab')) assert len(results) == 2, len(results) assert results[0][0] == ['a', 'b'], results[0][0] assert results[1][0] == ['a'], results[1][0]
def test_node(self): class Term(Node): pass number = Any('1') > 'number' term = number > Term factor = term | Drop(Optional(term)) factor.config.clear().compose_transforms() p = factor.get_parse_string() ast = p('1')[0] assert type(ast) == Term, type(ast) assert ast[0] == '1', ast[0] assert str26(ast) == """Term `- number '1'""", ast
def test_right(self): #basicConfig(level=DEBUG) seq = Delayed() letter = Any() seq += letter & Optional(seq) #print(seq.tree()) seq.config.clear().right_memoize().trace_stack(True) #seq.config.clear().right_memoize() p = seq.get_match_string() #print(p.matcher.tree()) results = list(p('ab')) assert len(results) == 2, len(results) assert results[0][0] == ['a', 'b'], results[0][0] assert results[1][0] == ['a'], results[1][0]
def left(self): matcher = Delayed() matcher += Optional(matcher) & Any() return matcher
def right(self): matcher = Delayed() matcher += Any() & Optional(matcher) return matcher
def make_binary_parser(): ''' Create a parser for binary data. ''' # avoid import loops from lepl import Word, Letter, Digit, UnsignedInteger, \ Regexp, DfaRegexp, Drop, Separator, Delayed, Optional, Any, First, \ args, Trace, TraceVariables from lepl.bin.bits import BitString from lepl.support.node import Node classes = {} def named_class(name, *args): ''' Given a name and some args, create a sub-class of Binary and create an instance with the given content. ''' if name not in classes: classes[name] = type(name, (Node, ), {}) return classes[name](*args) with TraceVariables(False): mult = lambda l, n: BitString.from_sequence([l] * int(n, 0)) # an attribute or class name name = Word(Letter(), Letter() | Digit() | '_') # lengths can be integers (bits) or floats (bytes.bits) # but if we have a float, we do not want to parse as an int # (or we will get a conversion error due to too small length) length = First( UnsignedInteger() + '.' + Optional(UnsignedInteger()), UnsignedInteger()) # a literal decimal decimal = UnsignedInteger() # a binary number (without pre/postfix) binary = Any('01')[1:] # an octal number (without pre/postfix) octal = Any('01234567')[1:] # a hex number (without pre/postfix) hex_ = Regexp('[a-fA-F0-9]')[1:] # the letters used for binary, octal and hex values #(eg the 'x' in 0xffee) # pylint: disable-msg=C0103 b, o, x, d = Any('bB'), Any('oO'), Any('xX'), Any('dD') # a decimal with optional pre/postfix dec = '0' + d + decimal | decimal + d + '0' | decimal # little-endian literals have normal prefix syntax (eg 0xffee) little = decimal | '0' + (b + binary | o + octal | x + hex_) # big-endian literals have postfix (eg ffeex0) big = (binary + b | octal + o | hex_ + x) + '0' # optional spaces - will be ignored # (use DFA here because it's multi-line, so \n will match ok) spaces = Drop(DfaRegexp('[ \t\n\r]*')) with Separator(spaces): # the grammar is recursive - expressions can contain expressions - # so we use a delayed matcher here as a placeholder, so that we can # use them before they are defined. expr = Delayed() # an implicit length value can be big or little-endian ivalue = big | little > args(BitString.from_int) # a value with a length can also be decimal lvalue = (big | little | dec) & Drop('/') & length \ > args(BitString.from_int) value = lvalue | ivalue repeat = value & Drop('*') & little > args(mult) # a named value is also a tuple named = name & Drop('=') & (expr | value | repeat) > tuple # an entry in the expression could be any of these entry = named | value | repeat | expr # and an expression itself consists of a comma-separated list of # one or more entries, surrounded by paremtheses entries = Drop('(') & entry[1:, Drop(',')] & Drop(')') # the Binary node may be explicit or implicit and takes the list of # entries as an argument list node = Optional(Drop('Node')) & entries > Node # alternatively, we can give a name and create a named sub-class other = name & entries > args(named_class) # and finally, we "tie the knot" by giving a definition for the # delayed matcher we introduced earlier, which is either a binary # node or a subclass expr += spaces & (node | other) & spaces #expr = Trace(expr) # this changes order, making 0800x0 parse as binary expr.config.no_compile_to_regexp() # use sequence to force regexp over multiple lines return expr.get_parse_sequence()
value = token | quoted_string # Other charsets are forbidden, the spec reserves them # for future evolutions. charset = (CaseInsensitiveLiteral('UTF-8') | CaseInsensitiveLiteral('ISO-8859-1')) # XXX See RFC 5646 for the correct definition language = token attr_char = Any(attr_chars) hexdig = Any(hexdigits) pct_encoded = '%' + hexdig + hexdig value_chars = (pct_encoded | attr_char)[...] ext_value = (charset & Drop("'") & Optional(language) & Drop("'") & value_chars) > parse_ext_value ext_token = token + '*' noext_token = ~Lookahead(ext_token) & token # Adapted from https://tools.ietf.org/html/rfc6266 # Mostly this was simplified to fold filename / filename* # into the normal handling of ext_token / noext_token with DroppedSpace(): disposition_parm = ((ext_token & Drop('=') & ext_value) | (noext_token & Drop('=') & value)) > tuple disposition_type = (CaseInsensitiveLiteral('inline') | CaseInsensitiveLiteral('attachment') | token) content_disposition_value = (disposition_type & Star(Drop(';') & disposition_parm))
array_element_type = Or(*generate_type_tokens(symboltable.array_element_types)) type_ = scalar_type | array_type | array_element_type real = Token(UnsignedReal()) >> Real integer = Token(UnsignedInteger()) >> Integer number = integer | real boolean = keyword('yes') >> Bool | keyword('no') >> Bool | keyword( 'true') >> Bool | keyword('false') >> Bool width = integer height = integer depth = integer unopened_size_block = ( width & Optional(~comma & height & Optional(~comma & depth)) & symbol(']'))**make_error('no [ before {out_rest!s}') & symbol(']') unclosed_size_block = ( symbol('[') & width & Optional(~comma & height & Optional(~comma & depth)) )**make_error('Array size specification is missing a closing ]') size = Or((~symbol('[') & width & Optional(~comma & height & Optional(~comma & depth)) & ~symbol(']'))**with_line(Size), unopened_size_block, unclosed_size_block) #### Expression Parsing #### # Operator precedence, inside to outside # 1 parentheses () # 2 not, unary minus (!, -)