def test_complex_loop(self): delayed1 = Delayed() delayed2 = Delayed() line1 = Any('a') | Any('b')[1:2,...] | delayed1 line2 = delayed1 & delayed2 matcher = line1 | line2 | delayed1 | delayed2 > 'foo' self.assert_clone(matcher) self.assert_relative(matcher)
def test_loop(self): matcher = Delayed() matcher += (Any() | matcher) > append('x') matcher.config.clear().compose_transforms() parser = matcher.get_parse() result = parser('a')[0] assert result == 'ax', result assert isinstance(parser.matcher, Delayed)
def test_liberal(self): matcher = Delayed() matcher += matcher | Any() assert isinstance(matcher.matcher.matchers[0], Delayed) matcher.config.clear().optimize_or(False) matcher.get_parse_string() # TODO - better test assert isinstance(matcher.matcher.matchers[0], TransformableWrapper)
def test_node(self): #basicConfig(level=DEBUG) class Term(Node): pass class Factor(Node): pass class Expression(Node): pass expression = Delayed() number = Digit()[1:, ...] > 'number' term = (number | '(' / expression / ')') > Term muldiv = Any('*/') > 'operator' factor = (term / (muldiv / term)[0::]) > Factor addsub = Any('+-') > 'operator' expression += (factor / (addsub / factor)[0::]) > Expression p = expression.get_parse_string() ast = p('1 + 2 * (3 + 4 - 5)') assert_str( ast[0], """Expression +- Factor | +- Term | | `- number '1' | `- ' ' +- operator '+' +- ' ' `- Factor +- Term | `- number '2' +- ' ' +- operator '*' +- ' ' `- Term +- '(' +- Expression | +- Factor | | +- Term | | | `- number '3' | | `- ' ' | +- operator '+' | +- ' ' | +- Factor | | +- Term | | | `- number '4' | | `- ' ' | +- operator '-' | +- ' ' | `- Factor | `- Term | `- number '5' `- ')'""")
class TypeSpecParser: int_tok = Token(r'int') float_tok = Token(r'float') str_tok = Token(r'str') unicode_tok = Token(r'unicode') bool_tok = Token(r'bool') unit_tok = Token(r'unit') var_tok = Token(r"'[a-zA-Z0-9]+") list_start = Token(r'\[') list_end = Token(r'\]') tuple_start = Token(r'\(') tuple_div = Token(r',') tuple_end = Token(r'\)') arrow_div = Token(r'\->') tight_typ = Delayed() typ = Delayed() num_typ = int_tok | float_tok # | long_tok | complex_tok str_typ = str_tok | unicode_tok base_typ = num_typ | str_typ | bool_tok | unit_tok | var_tok lst = ~list_start & typ & ~list_end > Lst empty_tup = ~tuple_start & ~tuple_end > Tup comma_tup = ~tuple_start & (typ & ~tuple_div)[1:] & ~tuple_end > Tup no_comma_tup = ~tuple_start & (typ & ~tuple_div)[1:] & typ & ~tuple_end > Tup tup = empty_tup | comma_tup | no_comma_tup arr = tight_typ & ~arrow_div & typ > Arr parens = ~tuple_start & typ & ~tuple_end tight_typ += base_typ | lst | tup | parens typ += arr | tight_typ @staticmethod def parse(s): try: return TypeSpecParser.typ.parse(s)[0] except (RuntimeLexerError, FullFirstMatchException): raise TypeIncorrectlySpecifiedError(s) @staticmethod def print_parse(s): try: return better_sexpr_to_tree(TypeSpecParser.typ.parse(s)[0]) except (RuntimeLexerError, FullFirstMatchException): raise TypeIncorrectlySpecifiedError(s)
def test_list(self): #basicConfig(level=DEBUG) expression = Delayed() number = Digit()[1:,...] > 'number' term = (number | '(' / expression / ')') > list muldiv = Any('*/') > 'operator' factor = (term / (muldiv / term)[0:]) > list addsub = Any('+-') > 'operator' expression += (factor / (addsub / factor)[0:]) > list ast = expression.parse_string('1 + 2 * (3 + 4 - 5)') assert ast == [[[[('number', '1')], ' '], ('operator', '+'), ' ', [[('number', '2')], ' ', ('operator', '*'), ' ', ['(', [[[('number', '3')], ' '], ('operator', '+'), ' ', [[('number', '4')], ' '], ('operator', '-'), ' ', [[('number', '5')]]], ')']]]], ast
def test_left2(self): #basicConfig(level=DEBUG) seq = Delayed() letter = Any() seq += letter | (seq & letter) seq.config.clear().left_memoize().trace(True) p = seq.get_match_string() results = list(p('abcdef')) assert len(results) == 6, len(results) assert results[0][0] == ['a'], results[0][0] assert results[1][0] == ['a', 'b'], results[1][0]
def test_complex(self): #basicConfig(level=DEBUG) class VerbPhrase(Node): pass class DetPhrase(Node): pass class SimpleTp(Node): pass class TermPhrase(Node): pass class Sentence(Node): pass verb = Literals('knows', 'respects', 'loves') > 'verb' join = Literals('and', 'or') > 'join' proper_noun = Literals('helen', 'john', 'pat') > 'proper_noun' determiner = Literals('every', 'some') > 'determiner' noun = Literals('boy', 'girl', 'man', 'woman') > 'noun' verbphrase = Delayed() verbphrase += verb | (verbphrase // join // verbphrase) > VerbPhrase det_phrase = determiner // noun > DetPhrase simple_tp = proper_noun | det_phrase > SimpleTp termphrase = Delayed() termphrase += simple_tp | (termphrase // join // termphrase) > TermPhrase sentence = termphrase // verbphrase // termphrase & Eos() > Sentence sentence.config.clear().left_memoize() p = sentence.get_match_string() print(p.matcher.tree()) text = 'every boy or some girl and helen and john or pat knows ' \ 'and respects or loves every boy or some girl and pat or ' \ 'john and helen' # text = 'every boy loves helen' count = 0 for _meaning in p(text): count += 1 if count < 3: #print(_meaning[0][0]) pass #print(count) assert count == 392, count
def test_safety(self): matcher3 = Delayed() matcher4 = Delayed() matcher1 = Any()[::'b', ...] & Eos() with Separator(Drop(Any('a')[:])): matcher2 = Any()[::'b', ...] & Eos() # pylint: disable-msg=W0613 def target(matcher3=matcher3, matcher4=matcher4): matcher3 += Any()[::'b', ...] & Eos() with Separator(Drop(Any('b')[:])): matcher4 += Any()[::'b', ...] & Eos() t = Thread(target=target) t.start() t.join() matcher5 = Any()[::'b', ...] & Eos() matcher6 = Any()[::'b', ...] & Eos() text = 'cababab' assert text == matcher1.parse_string(text)[0], matcher1.parse_string( text) assert 'cbbb' == matcher2.parse_string(text)[0], matcher2.parse_string( text) assert text == matcher3.parse_string(text)[0], matcher3.parse_string( text) assert 'caaa' == matcher4.parse_string(text)[0], matcher4.parse_string( text) assert 'cbbb' == matcher5.parse_string(text)[0], matcher5.parse_string( text) assert text == matcher6.parse_string(text)[0], matcher6.parse_string( text)
def test_left2(self): #basicConfig(level=DEBUG) seq = Delayed() letter = Any() seq += letter | (seq & letter) seq.config.clear().left_memoize().trace_stack(True) p = seq.get_match_string() results = list(p('abcdef')) assert len(results) == 6, len(results) assert results[0][0] == ['a'], results[0][0] assert results[1][0] == ['a', 'b'], results[1][0]
def test_left1b(self): #basicConfig(level=DEBUG) seq = Delayed() letter = Any() seq += Optional(seq) & letter seq.config.clear().left_memoize().trace_stack(True) p = seq.get_match_string() results = list(p('ab')) assert len(results) == 2, len(results) assert results[0][0] == ['a', 'b'], results[0][0] assert results[1][0] == ['a'], results[1][0]
def test_node(self): #basicConfig(level=DEBUG) class Term(Node): pass class Factor(Node): pass class Expression(Node): pass expression = Delayed() number = Digit()[1:,...] > 'number' term = (number | '(' / expression / ')') > Term muldiv = Any('*/') > 'operator' factor = (term / (muldiv / term)[0::]) > Factor addsub = Any('+-') > 'operator' expression += (factor / (addsub / factor)[0::]) > Expression p = expression.get_parse_string() ast = p('1 + 2 * (3 + 4 - 5)') assert_str(ast[0], """Expression +- Factor | +- Term | | `- number '1' | `- ' ' +- operator '+' +- ' ' `- Factor +- Term | `- number '2' +- ' ' +- operator '*' +- ' ' `- Term +- '(' +- Expression | +- Factor | | +- Term | | | `- number '3' | | `- ' ' | +- operator '+' | +- ' ' | +- Factor | | +- Term | | | `- number '4' | | `- ' ' | +- operator '-' | +- ' ' | `- Factor | `- Term | `- number '5' `- ')'""")
def test_left1a(self): #basicConfig(level=DEBUG) seq = Delayed() letter = Any() seq += Optional(seq) & letter seq.config.clear().left_memoize().trace_stack(True) p = seq.get_match() #print(p.matcher) results = list(p('ab')) assert len(results) == 2, len(results) assert results[0][0] == ['a', 'b'], results[0][0] assert results[1][0] == ['a'], results[1][0]
def left_token(self, contents=False): matcher = Delayed() inner = Token(Any()) if contents: inner = inner(Or('a', 'b')) matcher += Optional(matcher) & inner return matcher
def test_right(self): #basicConfig(level=DEBUG) seq = Delayed() letter = Any() seq += letter & Optional(seq) #print(seq) seq.config.clear().right_memoize().trace(True) p = seq.get_match_string() #print(p.matcher) results = list(p('ab')) assert len(results) == 2, len(results) assert results[0][0] == ['a', 'b'], results[0][0] assert results[1][0] == ['a'], results[1][0]
def test_right(self): #basicConfig(level=DEBUG) seq = Delayed() letter = Any() seq += letter & Optional(seq) #print(seq.tree()) seq.config.clear().right_memoize().trace_stack(True) #seq.config.clear().right_memoize() p = seq.get_match_string() #print(p.matcher.tree()) results = list(p('ab')) assert len(results) == 2, len(results) assert results[0][0] == ['a', 'b'], results[0][0] assert results[1][0] == ['a'], results[1][0]
def test_expr_with_functions(self): ''' Expression with function calls and appropriate binding. ''' #basicConfig(level=DEBUG) # pylint: disable-msg=C0111, C0321 class Call(Node): pass class Term(Node): pass class Factor(Node): pass class Expression(Node): pass value = Token(Float()) > 'value' name = Token('[a-z]+') symbol = Token('[^a-zA-Z0-9\\. ]') expr = Delayed() open_ = ~symbol('(') close = ~symbol(')') funcn = name > 'name' call = funcn & open_ & expr & close > Call term = call | value | open_ & expr & close > Term muldiv = symbol(Any('*/')) > 'operator' factor = term & (muldiv & term)[:] > Factor addsub = symbol(Any('+-')) > 'operator' expr += factor & (addsub & factor)[:] > Expression line = expr & Eos() line.config.trace(True).lexer() parser = line.get_parse_string() results = str26(parser('1 + 2*sin(3+ 4) - 5')[0]) assert results == """Expression +- Factor | `- Term | `- value '1' +- operator '+' +- Factor | +- Term | | `- value '2' | +- operator '*' | `- Term | `- Call | +- name 'sin' | `- Expression | +- Factor | | `- Term | | `- value '3' | +- operator '+' | `- Factor | `- Term | `- value '4' +- operator '-' `- Factor `- Term `- value '5'""", '[' + results + ']'
def get_bracket_parser(self): phrase = Delayed() label = Regexp(r"[^ \t\n\r\(\)]+") word = label > Node #terminal = Word() | ( Word() & Drop(Space()) & word ) terminal = label | (label & Drop(Space()) & word) with DroppedSpace(): phrase += Drop('(') & (terminal | label & phrase[1:] | phrase[1:]) & Drop(')') > Node return phrase
def test_transformed_etc(self): class Term(Node): pass class Factor(Node): pass class Expression(Node): pass expression = Delayed() number = Digit()[1:,...] > 'number' term = (number | '(' / expression / ')') > Term muldiv = Any('*/') > 'operator' factor = (term / (muldiv / term)[0::]) > Factor addsub = Any('+-') > 'operator' expression += (factor / (addsub / factor)[0::]) > Expression self.assert_clone(expression) self.assert_relative(expression) expression.config.no_full_first_match().no_compile_to_regexp() expression.config.no_compose_transforms().no_direct_eval() expression.config.no_flatten() copy = expression.get_parse_string().matcher self._assert_clone(expression, copy)
def test_list(self): #basicConfig(level=DEBUG) expression = Delayed() number = Digit()[1:, ...] > 'number' term = (number | '(' / expression / ')') > list muldiv = Any('*/') > 'operator' factor = (term / (muldiv / term)[0:]) > list addsub = Any('+-') > 'operator' expression += (factor / (addsub / factor)[0:]) > list ast = expression.parse_string('1 + 2 * (3 + 4 - 5)') assert ast == [[[[('number', '1')], ' '], ('operator', '+'), ' ', [[('number', '2')], ' ', ('operator', '*'), ' ', [ '(', [[[('number', '3')], ' '], ('operator', '+'), ' ', [[('number', '4')], ' '], ('operator', '-'), ' ', [[('number', '5')]]], ')' ]]]], ast
class ExtensionParser(object): """ A class that parses extensions. """ class ExtensionCall(Node): """ An extension call. """ _name = None _args = None _kwargs = None @property def name(self): return self._name[0] if self._name else None @property def args(self): return tuple(self._args) if self._args else tuple() @property def kwargs(self): return dict(self._kwargs) if self._kwargs else {} COMMA = Drop(',') NONE = Literal('None') >> (lambda x: None) BOOL = (Literal('True') | Literal('False')) >> (lambda x: x == 'True') IDENTIFIER = Word(Letter() | '_', Letter() | '_' | Digit()) FLOAT = Real() >> float INTEGER = Integer() >> int STRING = String() | String("'") ITEM = STRING | INTEGER | FLOAT | NONE | BOOL | IDENTIFIER with Separator(~Regexp(r'\s*')): VALUE = Delayed() LIST = Drop('[') & VALUE[:, COMMA] & Drop(']') > list TUPLE = Drop('(') & VALUE[:, COMMA] & Drop(')') > tuple VALUE += LIST | TUPLE | ITEM ARGUMENT = VALUE >> '_args' KWARGUMENT = (IDENTIFIER & Drop('=') & VALUE > tuple) >> '_kwargs' ARGUMENTS = (KWARGUMENT | ARGUMENT)[:, COMMA] NAME = IDENTIFIER > '_name' EXTENSION = ((NAME & Drop('(') & ARGUMENTS & Drop(')')) | NAME) & Eos() > ExtensionCall @property def parser(self): return self.EXTENSION.get_parse_string()
def test_error(self): #basicConfig(level=INFO) class Term(Node): pass class Factor(Node): pass class Expression(Node): pass expression = Delayed() number = Digit()[1:, ...] > 'number' term = Or( AnyBut(Space() | Digit() | '(')[1:, ...] ^ 'unexpected text: {results[0]}', number > Term, number**make_error("no ( before '{stream_out}'") / ')' >> node_throw, '(' / expression / ')' > Term, ('(' / expression / Eos())**make_error("no ) for '{stream_in}'") >> node_throw) muldiv = Any('*/') > 'operator' factor = (term / (muldiv / term)[0:, r'\s*']) > Factor addsub = Any('+-') > 'operator' expression += (factor / (addsub / factor)[0:, r'\s*']) > Expression line = expression / Eos() parser = line.get_parse_string() try: parser('1 + 2 * 3 + 4 - 5)')[0] assert False, 'expected error' except SyntaxError as e: assert e.msg == "no ( before ')'", e.msg try: parser('1 + 2 * (3 + 4 - 5') assert False, 'expected error' except SyntaxError as e: assert e.msg == "no ) for '(3 + 4 - 5'", e.msg try: parser('1 + 2 * foo') assert False, 'expected error' except SyntaxError as e: assert e.msg == "unexpected text: foo", e.msg
def create_parser(delimiter): space = Space() comma = Drop(',') | Drop(',') + space if delimiter == ',': # by comma seperator = Separator(~Regexp(r'\s*')) delimiter = comma else: assert delimiter == ' ', 'delimiter "%s" not supported' % delimiter seperator = DroppedSpace() delimiter = space none = Literal('None') >> (lambda x: None) bool = (Literal('True') | Literal('False')) >> (lambda x: x == 'True') ident = Word(Letter() | '_', Letter() | '_' | Digit()) float_ = Float() >> float int_ = Integer() >> int str_ = String() | String("'") dict_key = str_ | int_ | float_ | Word() dict_spaces = ~Whitespace()[:] dict_value = dict_key item = str_ | int_ | float_ | none | bool | ident | Word() with seperator: value = Delayed() list_ = Drop('[') & value[:, comma] & Drop(']') > list tuple_ = Drop('(') & value[:, comma] & Drop(')') > tuple dict_el = dict_key & Drop(':') & value > tuple dict_ = Drop('{') & dict_el[1:, Drop(',')] & Drop('}') > dict value += list_ | tuple_ | dict_ | item | space arg = value >> 'arg' karg = (ident & Drop('=') & value > tuple) >> 'karg' expr = (karg | arg)[:, delimiter] & Drop(Eos()) > Node return expr.get_parse()
def test_safety(self): matcher3 = Delayed() matcher4 = Delayed() matcher1 = Any()[::'b',...] & Eos() with Separator(Drop(Any('a')[:])): matcher2 = Any()[::'b',...] & Eos() # pylint: disable-msg=W0613 def target(matcher3=matcher3, matcher4=matcher4): matcher3 += Any()[::'b',...] & Eos() with Separator(Drop(Any('b')[:])): matcher4 += Any()[::'b',...] & Eos() t = Thread(target=target) t.start() t.join() matcher5 = Any()[::'b',...] & Eos() matcher6 = Any()[::'b',...] & Eos() text = 'cababab' assert text == matcher1.parse_string(text)[0], matcher1.parse_string(text) assert 'cbbb' == matcher2.parse_string(text)[0], matcher2.parse_string(text) assert text == matcher3.parse_string(text)[0], matcher3.parse_string(text) assert 'caaa' == matcher4.parse_string(text)[0], matcher4.parse_string(text) assert 'cbbb' == matcher5.parse_string(text)[0], matcher5.parse_string(text) assert text == matcher6.parse_string(text)[0], matcher6.parse_string(text)
def full_first_match_exception_init(filename): def init(self, stream): super(FullFirstMatchException, self).__init__( s_fmt( s_deepest(stream), 'Chestnut stumbled at somewhere around {rest} ({location}). Check for syntax errors. (file: ' + str(filename) + ')')) self.deepest = s_deepest(stream) self.kargs = self.deepest[1].kargs(self.deepest[0]) return init # Delayed matchers can be used before they are defined group2, group3_product, group4_sum, group5, group6, group7, group8 = [ Delayed() for _ in range(7) ] variable_declaration = Delayed() expression = Delayed() primary = Delayed() function_call = Delayed() #Line Helpers def with_line(node): def wrapper(results, stream_in, stream_out, *kargs): start_line = s_delta(stream_in)[1] try: end_line = s_delta(stream_out)[1]
def test_calculation(self): ''' We could do evaluation directly in the parser actions. but by using the nodes instead we allow future expansion into a full interpreter. ''' # pylint: disable-msg=C0111, C0321 class BinaryExpression(Node): op = lambda x, y: None def __float__(self): return self.op(float(self[0]), float(self[1])) class Sum(BinaryExpression): op = add class Difference(BinaryExpression): op = sub class Product(BinaryExpression): op = mul class Ratio(BinaryExpression): op = truediv class Call(Node): funs = {'sin': sin, 'cos': cos} def __float__(self): return self.funs[self[0]](self[1]) # we use unsigned float then handle negative values explicitly; # this lets us handle the ambiguity between subtraction and # negation which requires context (not available to the the lexer) # to resolve correctly. number = Token(UnsignedReal()) name = Token('[a-z]+') symbol = Token('[^a-zA-Z0-9\\. ]') expr = Delayed() factor = Delayed() real_ = Or(number >> float, ~symbol('-') & number >> (lambda x: -float(x))) open_ = ~symbol('(') close = ~symbol(')') trig = name(Or('sin', 'cos')) call = trig & open_ & expr & close > Call parens = open_ & expr & close value = parens | call | real_ ratio = value & ~symbol('/') & factor > Ratio prod = value & ~symbol('*') & factor > Product factor += prod | ratio | value diff = factor & ~symbol('-') & expr > Difference sum_ = factor & ~symbol('+') & expr > Sum expr += sum_ | diff | factor | value line = expr & Eos() parser = line.get_parse() def calculate(text): return float(parser(text)[0]) self.examples([(lambda: calculate('1'), '1.0'), (lambda: calculate('1 + 2*3'), '7.0'), (lambda: calculate('-1 - 4 / (3 - 1)'), '-3.0'), (lambda: calculate('1 -4 / (3 -1)'), '-1.0'), (lambda: str(calculate('1 + 2*sin(3+ 4) - 5'))[:5], '-2.68')])
def left(self): matcher = Delayed() matcher += Optional(matcher) & Any() return matcher
def test_full_config_loop(self): matcher = Delayed() matcher += Any() & matcher matcher.config.no_full_first_match() copy = matcher.get_parse_string().matcher self._assert_clone(matcher, copy)
def test_expression2(self): ''' As before, but with evaluation. ''' #basicConfig(level=DEBUG) # we could do evaluation directly in the parser actions. but by # using the nodes instead we allow future expansion into a full # interpreter # pylint: disable-msg=C0111, C0321 class BinaryExpression(Node): op = lambda x, y: None def __float__(self): return self.op(float(self[0]), float(self[1])) class Sum(BinaryExpression): op = add class Difference(BinaryExpression): op = sub class Product(BinaryExpression): op = mul class Ratio(BinaryExpression): op = truediv class Call(Node): funs = {'sin': sin, 'cos': cos} def __float__(self): return self.funs[self[0]](self[1]) # we use unsigned float then handle negative values explicitly; # this lets us handle the ambiguity between subtraction and # negation which requires context (not available to the the lexer) # to resolve correctly. number = Token(UnsignedFloat()) name = Token('[a-z]+') symbol = Token('[^a-zA-Z0-9\\. ]') expr = Delayed() factor = Delayed() float_ = Or(number >> float, ~symbol('-') & number >> (lambda x: -float(x))) open_ = ~symbol('(') close = ~symbol(')') trig = name(Or('sin', 'cos')) call = trig & open_ & expr & close > Call parens = open_ & expr & close value = parens | call | float_ ratio = value & ~symbol('/') & factor > Ratio prod = value & ~symbol('*') & factor > Product factor += prod | ratio | value diff = factor & ~symbol('-') & expr > Difference sum_ = factor & ~symbol('+') & expr > Sum expr += sum_ | diff | factor | value line = expr & Eos() parser = line.get_parse() def myeval(text): return float(parser(text)[0]) self.assertAlmostEqual(myeval('1'), 1) self.assertAlmostEqual(myeval('1 + 2*3'), 7) self.assertAlmostEqual(myeval('1 - 4 / (3 - 1)'), -1) self.assertAlmostEqual(myeval('1 -4 / (3 -1)'), -1) self.assertAlmostEqual(myeval('1 + 2*sin(3+ 4) - 5'), -2.68602680256)
from lepl import Any, Delayed, Node, Space expr = Delayed() expr += '{' / (Any() | expr[1:, Space()[:]]) / '}' > Node print expr.parse("{{a}{b}{{{c}}}}")[0]
def test_simple_loop(self): delayed = Delayed() matcher = Any('a') | Any('b')[1:2,...] | delayed self.assert_clone(matcher) self.assert_relative(matcher)
def right(self): matcher = Delayed() matcher += Any() & Optional(matcher) return matcher
def make_binary_parser(): ''' Create a parser for binary data. ''' # avoid import loops from lepl import Word, Letter, Digit, UnsignedInteger, \ Regexp, DfaRegexp, Drop, Separator, Delayed, Optional, Any, First, \ args, Trace, TraceVariables from lepl.bin.bits import BitString from lepl.support.node import Node classes = {} def named_class(name, *args): ''' Given a name and some args, create a sub-class of Binary and create an instance with the given content. ''' if name not in classes: classes[name] = type(name, (Node,), {}) return classes[name](*args) with TraceVariables(False): mult = lambda l, n: BitString.from_sequence([l] * int(n, 0)) # an attribute or class name name = Word(Letter(), Letter() | Digit() | '_') # lengths can be integers (bits) or floats (bytes.bits) # but if we have a float, we do not want to parse as an int # (or we will get a conversion error due to too small length) length = First(UnsignedInteger() + '.' + Optional(UnsignedInteger()), UnsignedInteger()) # a literal decimal decimal = UnsignedInteger() # a binary number (without pre/postfix) binary = Any('01')[1:] # an octal number (without pre/postfix) octal = Any('01234567')[1:] # a hex number (without pre/postfix) hex_ = Regexp('[a-fA-F0-9]')[1:] # the letters used for binary, octal and hex values #(eg the 'x' in 0xffee) # pylint: disable-msg=C0103 b, o, x, d = Any('bB'), Any('oO'), Any('xX'), Any('dD') # a decimal with optional pre/postfix dec = '0' + d + decimal | decimal + d + '0' | decimal # little-endian literals have normal prefix syntax (eg 0xffee) little = decimal | '0' + (b + binary | o + octal | x + hex_) # big-endian literals have postfix (eg ffeex0) big = (binary + b | octal + o | hex_ + x) + '0' # optional spaces - will be ignored # (use DFA here because it's multi-line, so \n will match ok) spaces = Drop(DfaRegexp('[ \t\n\r]*')) with Separator(spaces): # the grammar is recursive - expressions can contain expressions - # so we use a delayed matcher here as a placeholder, so that we can # use them before they are defined. expr = Delayed() # an implicit length value can be big or little-endian ivalue = big | little > args(BitString.from_int) # a value with a length can also be decimal lvalue = (big | little | dec) & Drop('/') & length \ > args(BitString.from_int) value = lvalue | ivalue repeat = value & Drop('*') & little > args(mult) # a named value is also a tuple named = name & Drop('=') & (expr | value | repeat) > tuple # an entry in the expression could be any of these entry = named | value | repeat | expr # and an expression itself consists of a comma-separated list of # one or more entries, surrounded by paremtheses entries = Drop('(') & entry[1:, Drop(',')] & Drop(')') # the Binary node may be explicit or implicit and takes the list of # entries as an argument list node = Optional(Drop('Node')) & entries > Node # alternatively, we can give a name and create a named sub-class other = name & entries > args(named_class) # and finally, we "tie the knot" by giving a definition for the # delayed matcher we introduced earlier, which is either a binary # node or a subclass expr += spaces & (node | other) & spaces #expr = Trace(expr) # this changes order, making 0800x0 parse as binary expr.config.no_compile_to_regexp() # use sequence to force regexp over multiple lines return expr.get_parse_sequence()
def make_binary_parser(): ''' Create a parser for binary data. ''' # avoid import loops from lepl import Word, Letter, Digit, UnsignedInteger, \ Regexp, DfaRegexp, Drop, Separator, Delayed, Optional, Any, First, \ args, Trace, TraceVariables from lepl.bin.bits import BitString from lepl.support.node import Node classes = {} def named_class(name, *args): ''' Given a name and some args, create a sub-class of Binary and create an instance with the given content. ''' if name not in classes: classes[name] = type(name, (Node, ), {}) return classes[name](*args) with TraceVariables(False): mult = lambda l, n: BitString.from_sequence([l] * int(n, 0)) # an attribute or class name name = Word(Letter(), Letter() | Digit() | '_') # lengths can be integers (bits) or floats (bytes.bits) # but if we have a float, we do not want to parse as an int # (or we will get a conversion error due to too small length) length = First( UnsignedInteger() + '.' + Optional(UnsignedInteger()), UnsignedInteger()) # a literal decimal decimal = UnsignedInteger() # a binary number (without pre/postfix) binary = Any('01')[1:] # an octal number (without pre/postfix) octal = Any('01234567')[1:] # a hex number (without pre/postfix) hex_ = Regexp('[a-fA-F0-9]')[1:] # the letters used for binary, octal and hex values #(eg the 'x' in 0xffee) # pylint: disable-msg=C0103 b, o, x, d = Any('bB'), Any('oO'), Any('xX'), Any('dD') # a decimal with optional pre/postfix dec = '0' + d + decimal | decimal + d + '0' | decimal # little-endian literals have normal prefix syntax (eg 0xffee) little = decimal | '0' + (b + binary | o + octal | x + hex_) # big-endian literals have postfix (eg ffeex0) big = (binary + b | octal + o | hex_ + x) + '0' # optional spaces - will be ignored # (use DFA here because it's multi-line, so \n will match ok) spaces = Drop(DfaRegexp('[ \t\n\r]*')) with Separator(spaces): # the grammar is recursive - expressions can contain expressions - # so we use a delayed matcher here as a placeholder, so that we can # use them before they are defined. expr = Delayed() # an implicit length value can be big or little-endian ivalue = big | little > args(BitString.from_int) # a value with a length can also be decimal lvalue = (big | little | dec) & Drop('/') & length \ > args(BitString.from_int) value = lvalue | ivalue repeat = value & Drop('*') & little > args(mult) # a named value is also a tuple named = name & Drop('=') & (expr | value | repeat) > tuple # an entry in the expression could be any of these entry = named | value | repeat | expr # and an expression itself consists of a comma-separated list of # one or more entries, surrounded by paremtheses entries = Drop('(') & entry[1:, Drop(',')] & Drop(')') # the Binary node may be explicit or implicit and takes the list of # entries as an argument list node = Optional(Drop('Node')) & entries > Node # alternatively, we can give a name and create a named sub-class other = name & entries > args(named_class) # and finally, we "tie the knot" by giving a definition for the # delayed matcher we introduced earlier, which is either a binary # node or a subclass expr += spaces & (node | other) & spaces #expr = Trace(expr) # this changes order, making 0800x0 parse as binary expr.config.no_compile_to_regexp() # use sequence to force regexp over multiple lines return expr.get_parse_sequence()