def test_builder_semantics(self): grammar = ''' start::sum = {number}+ $ ; number::int = /\d+/ ; ''' text = '5 4 3 2 1' semantics = ModelBuilderSemantics() model = compile(grammar, 'test') ast = model.parse(text, semantics=semantics) self.assertEqual(15, ast) import functools dotted = functools.partial(type('').join, '.') dotted.__name__ = 'dotted' grammar = ''' start::dotted = {number}+ $ ; number = /\d+/ ; ''' semantics = ModelBuilderSemantics(types=[dotted]) model = compile(grammar, 'test') ast = model.parse(text, semantics=semantics) self.assertEqual('5.4.3.2.1', ast)
def test_nested_left_recursion(self, trace=False): grammar_a = ''' @@left_recursion :: True s = e $ ; e = [e '+'] t ; t = [t '*'] a ; a = ?/[0-9]/? ; ''' grammar_b = ''' @@left_recursion :: True s = e $ ; e = [e '+'] a ; a = n | p ; n = ?/[0-9]/? ; p = '(' @:e ')' ; ''' model_a = compile(grammar_a, "test") model_b = compile(grammar_b, "test") ast = model_a.parse("1*2+3*4", trace=trace, colorize=True) self.assertEqual(['1', '*', '2', '+', ['3', '*', '4']], ast) ast = model_b.parse("(1+2)+(3+4)", trace=trace, colorize=True) self.assertEqual(['1', '+', '2', '+', ['3', '+', '4']], ast) ast = model_a.parse("1*2*3", trace=trace, colorize=True) self.assertEqual(['1', '*', '2', '*', '3'], ast) ast = model_b.parse("(((1+2)))", trace=trace, colorize=True) self.assertEqual(['1', '+', '2'], ast)
def test_empty_match_token(self): grammar = """ table = { row }+ ; row = (cell1:cell "|" cell2:cell) "\n"; cell = /[a-z]+/ ; """ try: compile(grammar, "model") self.fail('allowed empty token') except FailedParse: pass
def test_optional_sequence(self): grammar = ''' start = '1' ['2' '3'] '4' $ ; ''' model = compile(grammar, "test") ast = model.parse("1234", nameguard=False) self.assertEqual(['1', '2', '3', '4'], ast) grammar = ''' start = '1' foo:['2' '3'] '4' $ ; ''' model = compile(grammar, "test") ast = model.parse("1234", nameguard=False) self.assertEqual(['2', '3'], ast.foo)
def test_check_keywords(self): import parser grammar = ''' @@keyword :: A start = {id}+ $ ; @name id = /\w+/ ; ''' model = compile(grammar, 'test') c = codegen(model) parser.suite(c) ast = model.parse('hello world') self.assertEqual(['hello', 'world'], ast) try: ast = model.parse("hello A world") self.assertEqual(['hello', 'A', 'world'], ast) self.fail('accepted keyword as name') except FailedParse as e: self.assertTrue('"A" is a reserved word' in str(e)) pass
def test_right_join(self): grammar = ''' start = (op)>{number}+ $ ; op = '+' | '-' ; number = /\d+/ ; ''' text = '1 + 2 - 3 + 4' model = compile(grammar, "test") self.assertEqual(trim(grammar).strip(), str(model).strip()) codegen(model) ast = model.parse(text) self.assertEqual(('+', '1', ('-', '2', ('+', '3', '4'))), ast)
def test_pattern_concatenation(self): grammar = ''' start = {letters_digits}+ ; letters_digits = ?"[a-z]+" + ?'[0-9]+' ; ''' pretty = ''' start = {letters_digits}+ ; letters_digits = /[a-z]+/ + /[0-9]+/ ; ''' model = compile(grammar=grammar) ast = model.parse('abc123 def456') self.assertEqual(['abc123', 'def456'], ast) print(model.pretty()) self.assertEqual(trim(pretty), model.pretty())
def test_numbers_and_unicode(self): grammar = ''' rúle(1, -23, 4.56, 7.89e-11, 0xABCDEF, Añez) = 'a' ; ''' rule2 = ''' rulé::Añez = '\\xf1' ; ''' rule3 = ''' rúlé::Añez = 'ñ' ; ''' if PY3: grammar += rule3 else: grammar += rule2 model = compile(grammar, "test") self.assertEqual(trim(grammar), ustr(model))
def test_no_left_recursion(self, trace=False): grammar = ''' @@left_recursion :: True start = expre $ ; expre = expre '+' number | expre '*' number | number ; number = ?/[0-9]+/? ; ''' model = compile(grammar, "test") model.parse("1*2+3*5", trace=trace, colorize=True) try: model.parse("1*2+3*5", left_recursion=False, trace=trace, colorize=True) self.Fail('expected left recursion failure') except FailedParse: pass
def test_direct_left_recursion(self, trace=False): grammar = ''' @@left_recursion :: True start = expre $ ; expre = expre '+' number | expre '*' number | number ; number = ?/[0-9]+/? ; ''' model = compile(grammar, "test") ast = model.parse("1*2+3*5", trace=trace, colorize=True) self.assertEqual(['1', '*', '2', '+', '3', '*', '5'], ast)
def test_ast_assignment(self): grammar = ''' n = @: {"a"}* $ ; f = @+: {"a"}* $ ; nn = @: {"a"}* @: {"b"}* $ ; nf = @: {"a"}* @+: {"b"}* $ ; fn = @+: {"a"}* @: {"b"}* $ ; ff = @+: {"a"}* @+: {"b"}* $ ; ''' model = compile(grammar, "test") def p(input, rule): return model.parse(input, start=rule, whitespace='') e = self.assertEqual e([], p('', 'n')) e(['a'], p('a', 'n')) e(['a', 'a'], p('aa', 'n')) e([[]], p('', 'f')) e([['a']], p('a', 'f')) e([['a', 'a']], p('aa', 'f')) for r in ('nn', 'nf', 'fn', 'ff'): e([[], []], p('', r)) e([['a'], []], p('a', r)) e([[], ['b']], p('b', r)) e([['a', 'a'], []], p('aa', r)) e([[], ['b', 'b']], p('bb', r)) e([['a', 'a'], ['b']], p('aab', r))
def notest_left_recursion_bug(self, trace=False): grammar = '''\ @@grammar :: Minus @@left_recursion :: True start = expression $ ; expression = | paren_expression | minus_expression | value ; paren_expression = '(' expression ')' ; minus_expression = expression '-' value ; value = /[0-9]+/ ; ''' model = compile(grammar=grammar) # model.parse('3', trace=trace, colorize=True) # model.parse('3 - 2', trace=trace, colorize=True) # model.parse('(3 - 2)', trace=trace, colorize=True) # model.parse('(3 - 2) - 1', trace=trace, colorize=True) # model.parse('3 - 2 - 1', trace=trace, colorize=True) model.parse('3 - (2 - 1)', trace=trace, colorize=True)
def test_include_and_override(self): gr = 'included_grammar' included_grammar = "plu = 'aaaa';" overridden = "%s@override\nplu = 'plu';" inclusion = '#include :: %s.ebnf\n' % gr including_grammar = overridden % (inclusion) whole_grammar = overridden % (included_grammar) class FakeIncludesBuffer(EBNFBuffer): def get_include(self, source, filename): return included_grammar, source + '/' + filename compile(FakeIncludesBuffer(whole_grammar), "test") compile(FakeIncludesBuffer(including_grammar), "test")
def test_stateful(self): # Parser for mediawiki-style unordered lists. grammar = r''' document = @:ul [ nl ] $ ; ul = "*" ul_start el+:li { nl el:li } * ul_end ; li = ul | li_text ; (* Quirk: If a text line is followed by a sublist, the sublist does not get its own li. *) li_text = text:text [ ul:li_followed_by_ul ] ; li_followed_by_ul = nl @:ul ; text = ?/.*/? ; nl = ?/\n/? ul_marker ; (* The following rules are placeholders for state transitions. *) ul_start = () ; ul_end = () ; (* The following rules are placeholders for state validations and grammar rules. *) ul_marker = () ; ''' class StatefulSemantics(object): def __init__(self, parser): self._context = parser def ul_start(self, ast): ctx = self._context ctx._state = 1 if ctx._state is None else ctx._state + 1 return ast def ul_end(self, ast): ctx = self._context ctx._state = None if ctx._state is None or ctx._state <= 1 else ctx._state - 1 return ast def ul_marker(self, ast): ctx = self._context if ctx._state is not None: if not ctx.buf.match("*" * ctx._state): raise FailedSemantics("not at correct level") return ast def ul(self, ast): return "<ul>" + "".join(ast.el) + "</ul>" def li(self, ast): return "<li>" + ast + "</li>" def li_text(self, ast): return ast.text if ast.ul is None else ast.text + ast.ul model = compile(grammar, "test") context = ModelContext(model.rules, whitespace='', nameguard=False) ast = model.parse('*abc', "document", context=context, semantics=StatefulSemantics(context), whitespace='', nameguard=False) self.assertEqual(ast, "<ul><li>abc</li></ul>") ast = model.parse('*abc\n', "document", context=context, semantics=StatefulSemantics(context), whitespace='', nameguard=False) self.assertEqual("<ul><li>abc</li></ul>", ast) ast = model.parse('*abc\n*def\n', "document", context=context, semantics=StatefulSemantics(context), whitespace='', nameguard=False) self.assertEqual("<ul><li>abc</li><li>def</li></ul>", ast) ast = model.parse('**abc', "document", context=context, semantics=StatefulSemantics(context), whitespace='', nameguard=False) self.assertEqual("<ul><li><ul><li>abc</li></ul></li></ul>", ast) ast = model.parse('*abc\n**def\n', "document", context=context, semantics=StatefulSemantics(context), whitespace='', nameguard=False) self.assertEqual("<ul><li>abc<ul><li>def</li></ul></li></ul>", ast)
def test_group_ast(self): grammar = ''' start = '1' ('2' '3') '4' $ ; ''' model = compile(grammar, "test") ast = model.parse("1234", nameguard=False) self.assertEqual(['1', '2', '3', '4'], ast)
def test_36_unichars(self): grammar = ''' start = { rule_positional | rule_keywords | rule_all }* $ ; rule_positional("ÄÖÜäöüß") = 'a' ; rule_keywords(k1='äöüÄÖÜß') = 'b' ; rule_all('ßÄÖÜäöü', k1="ßäöüÄÖÜ") = 'c' ; ''' def _trydelete(pymodule): import os try: os.unlink(pymodule + ".py") except EnvironmentError: pass try: os.unlink(pymodule + ".pyc") except EnvironmentError: pass try: os.unlink(pymodule + ".pyo") except EnvironmentError: pass def assert_equal(target, value): self.assertEqual(target, value) class UnicharsSemantics(object): """Check all rule parameters for expected types and values""" def rule_positional(self, ast, p1): assert_equal("ÄÖÜäöüß", p1) return ast def rule_keyword(self, ast, k1): assert_equal("äöüÄÖÜß", k1) return ast def rule_all(self, ast, p1, k1): assert_equal("ßÄÖÜäöü", p1) assert_equal("ßäöüÄÖÜ", k1) return ast m = compile(grammar, "UnicodeRuleArguments") ast = m.parse("a b c") self.assertEqual(['a', 'b', 'c'], ast) semantics = UnicharsSemantics() ast = m.parse("a b c", semantics=semantics) self.assertEqual(['a', 'b', 'c'], ast) code = codegen(m) import codecs with codecs.open("tc36unicharstest.py", "w", "utf-8") as f: f.write(code) import tc36unicharstest tc36unicharstest _trydelete("tc36unicharstest")
def test_update_ast(self): grammar = ''' foo = name:"1" [ name: bar ] ; bar = { "2" } * ; ''' m = compile(grammar, 'Keywords') ast = m.parse('1 2') self.assertEqual(['1', ['2']], ast.name) grammar = ''' start = items: { item } * $ ; item = @:{ subitem } * "0" ; subitem = ?/1+/? ; ''' m = compile(grammar, 'Update') ast = m.parse("1101110100", nameguard=False) self.assertEqual([['11'], ['111'], ['1'], []], ast.items_)
def test_empty_closure(self): grammar = ''' start = {'x'}+ {} 'y'$; ''' model = compile(grammar, "test") codegen(model) ast = model.parse("xxxy", nameguard=False) self.assertEqual([['x', 'x', 'x'], [], 'y'], ast)
def test_36_params_and_keyword_params(self): grammar = ''' rule(A, kwdB=B) = 'a' ; ''' model = compile(grammar, "test") self.assertEqual(trim(grammar), ustr(model))
def test_define_keywords(self): import parser grammar = ''' @@keyword :: B C @@keyword :: 'A' start = ('a' 'b').{'x'}+ ; ''' model = compile(grammar, "test") c = codegen(model) parser.suite(c) grammar2 = str(model) model2 = compile(grammar2, "test") c2 = codegen(model2) parser.suite(c2) self.assertEqual(grammar2, str(model2))
def test_parseinfo(self): grammar = ''' start = head:{'x'}+ {} tail:'y'$; ''' model = compile(grammar, "test") ast = model.parse("xxxy", nameguard=False, parseinfo=True) self.assertIsNotNone(ast) self.assertIsNotNone(ast.head) self.assertIsNotNone(ast.tail) self.assertIsNotNone(ast.parseinfo)
def test_rule_include(self): grammar = ''' start = b $; a = @:'a' ; b = >a {@:'b'} ; ''' model = compile(grammar, "test") ast = model.parse("abb", nameguard=False) self.assertEqual(['a', 'b', 'b'], ast)
def test_dot(self): grammar = ''' start = "foo\\nbar" $; ''' try: from grako.diagrams import draw except ImportError: return m = compile(grammar, 'Diagram') draw('tmp/diagram.png', m)
def test_new_override(self): grammar = ''' start = @:'a' {@:'b'} $ ; ''' model = compile(grammar, "test") ast = model.parse("abb", nameguard=False) self.assertEqual(['a', 'b', 'b'], ast)
def test_group_join(self): grammar = ''' start = ('a' 'b')%{'x'}+ ; ''' model = compile(grammar, "test") c = codegen(model) import parser parser.suite(c) ast = model.parse("x a b x", nameguard=False) self.assertEqual(['x', ['a', 'b'], 'x'], ast)
def test_check_unicode_name(self): grammar = ''' @@keyword :: A start = {id}+ $ ; @name id = /\w+/ ; ''' model = compile(grammar, 'test') model.parse("hello Øresund")
def test_indirect_left_recursion_with_cut(self, trace=False): grammar = ''' @@left_recursion :: True start = x $ ; x = expr ; expr = x '-' ~ num | num; num = ?/[0-9]+/? ; ''' model = compile(grammar, "test") ast = model.parse("5-87-32", trace=trace, colorize=True) self.assertEqual(['5', '-', '87', '-', '32'], ast)
def test_optional_closure(self): grammar = 'start = foo+:"x" foo:{"y"}* {foo:"z"}* ;' model = compile(grammar, "test") ast = model.parse("xyyzz", nameguard=False) self.assertEqual(['x', ['y', 'y'], 'z', 'z'], ast.foo) grammar = 'start = foo+:"x" [foo+:{"y"}*] {foo:"z"}* ;' model = compile(grammar, "test") ast = model.parse("xyyzz", nameguard=False) self.assertEqual(['x', ['y', 'y'], 'z', 'z'], ast.foo) grammar = 'start = foo+:"x" foo:[{"y"}*] {foo:"z"}* ;' model = compile(grammar, "test") ast = model.parse("xyyzz", nameguard=False) self.assertEqual(['x', ['y', 'y'], 'z', 'z'], ast.foo) grammar = 'start = foo+:"x" [foo:{"y"}*] {foo:"z"}* ;' model = compile(grammar, "test") ast = model.parse("xyyzz", nameguard=False) self.assertEqual(['x', ['y', 'y'], 'z', 'z'], ast.foo)
def test_slashed_pattern(self): grammar = ''' start = ?"[a-z]+/[0-9]+" $ ; ''' model = compile(grammar=grammar) ast = model.parse('abc/123') self.assertEqual('abc/123', ast) print(model.pretty()) self.assertEqual(trim(grammar), model.pretty())
def test_48_rule_override(self): grammar = ''' start = ab $; ab = 'xyz' ; @override ab = @:'a' {@:'b'} ; ''' model = compile(grammar, "test") ast = model.parse("abb", nameguard=False) self.assertEqual(['a', 'b', 'b'], ast)
def test_raw_string(self): grammar = r''' start = r'am\nraw' ; ''' pretty = r''' start = 'am\\nraw' ; ''' model = compile(grammar, "start") print(model.pretty()) self.assertEqual(trim(pretty), model.pretty())
def test_indirect_left_recursion_complex(self, trace=False): grammar = ''' @@left_recursion :: True start = Primary $ ; Primary = PrimaryNoNewArray ; PrimaryNoNewArray = ClassInstanceCreationExpression | MethodInvocation | FieldAccess | ArrayAccess | 'this' ; ClassInstanceCreationExpression = 'new' ClassOrInterfaceType '(' ')' | Primary '.new' Identifier '()' ; MethodInvocation = Primary '.' MethodName '()' | MethodName '()' ; FieldAccess = Primary '.' Identifier | 'super.' Identifier ; ArrayAccess = Primary '[' Expression ']' | ExpressionName '[' Expression ']' ; ClassOrInterfaceType = ClassName | InterfaceTypeName ; ClassName = 'C' | 'D' ; InterfaceTypeName = 'I' | 'J' ; Identifier = 'x' | 'y' | ClassOrInterfaceType ; MethodName = 'm' | 'n' ; ExpressionName = Identifier ; Expression = 'i' | 'j' ; ''' model = compile(grammar, "test") ast = model.parse("this", trace=trace, colorize=True) self.assertEqual('this', ast) ast = model.parse("this.x", trace=trace, colorize=True) self.assertEqual(['this', '.', 'x'], ast) ast = model.parse("this.x.y", trace=trace, colorize=True) self.assertEqual(['this', '.', 'x', '.', 'y'], ast) ast = model.parse("this.x.m()", trace=trace, colorize=True) self.assertEqual(['this', '.', 'x', '.', 'm', '()'], ast) ast = model.parse("x[i][j].y", trace=trace, colorize=True) self.assertEqual(['x', '[', 'i', ']', '[', 'j', ']', '.', 'y'], ast)
def test_keywords_in_rule_names(self): grammar = ''' start = whitespace ; whitespace = {'x'}+ ; ''' m = compile(grammar, 'Keywords') m.parse('x')
def test_positive_gather(self): grammar = ''' start = ','.{'x' 'y'}+ ; ''' grammar2 = ''' start = (','.{'x'}+|{}) ; ''' grammar3 = ''' start = [','.{'x'}+] ; ''' model = compile(grammar, "test") codegen(model) ast = model.parse("x y, x y", nameguard=False) self.assertEqual([['x', 'y'], ['x', 'y']], ast) ast = model.parse("x y x y", nameguard=False) self.assertEqual([['x', 'y']], ast) try: ast = model.parse("y x", nameguard=False) self.Fail('closure not positive') except FailedParse: pass model = compile(grammar2, "test") ast = model.parse("y x", nameguard=False) self.assertEqual([], ast) ast = model.parse("x", nameguard=False) self.assertEqual(['x'], ast) ast = model.parse("x,x", nameguard=False) self.assertEqual(['x', 'x'], ast) model = compile(grammar3, "test") ast = model.parse("y x", nameguard=False) self.assertEqual(None, ast)
def test_normal_join(self): grammar = ''' start = ','%{'x' 'y'} 'z' ; ''' model = compile(grammar, "test") codegen(model) ast = model.parse("x y, x y z", nameguard=False) self.assertEqual([[['x', 'y'], ',', ['x', 'y']], 'z'], ast) ast = model.parse("x y z", nameguard=False) self.assertEqual([[['x', 'y']], 'z'], ast) ast = model.parse("z", nameguard=False) self.assertEqual([[], 'z'], ast)
def test_python_keywords_in_rule_names(self): # This is a regression test for # https://bitbucket.org/neogeny/grako/issues/59 # (semantic actions not called for rules with the same name as a python # keyword). grammar = ''' not = 'x' ; ''' m = compile(grammar, 'Keywords') class Semantics(object): def __init__(self): self.called = False def not_(self, ast): self.called = True semantics = Semantics() m.parse('x', semantics=semantics) assert semantics.called
def test_patterns_with_newlines(self): grammar = ''' @@whitespace :: /[ \t]/ start = blanklines $ ; blanklines = blankline [blanklines] ; blankline = /^[^\\n]*\\n$/ ; ''' model = compile(grammar, "test") ast = model.parse('\n\n', trace=True) self.assertEqual(['\n', '\n'], ast)
def test_synth_model(self): grammar = ''' start::ASeq = values:aseq $ ; aseq = {'a'}+ ; ''' m = compile(grammar, 'ASeq') model = m.parse('a a a', semantics=ModelBuilderSemantics()) self.assertEqual('ASeq', type(model).__name__) p = pickle.dumps(model) new_model = pickle.loads(p) self.assertEqual('ASeq', type(new_model).__name__) self.assertEqual(model._ast, new_model._ast)
def test_36_param_combinations(self): def assert_equal(target, value): self.assertEqual(target, value) class TC36Semantics(object): """Check all rule parameters for expected types and values""" def rule_positional(self, ast, p1, p2, p3, p4): assert_equal("ABC", p1) assert_equal(123, p2) assert_equal('=', p3) assert_equal("+", p4) return ast def rule_keyword(self, ast, k1, k2, k3, k4): assert_equal("ABC", k1) assert_equal(123, k2) assert_equal('=', k3) assert_equal('+', k4) return ast def rule_all(self, ast, p1, p2, p3, p4, k1, k2, k3, k4): assert_equal("DEF", p1) assert_equal(456, p2) assert_equal('=', p3) assert_equal("+", p4) assert_equal("HIJ", k1) assert_equal(789, k2) assert_equal('=', k3) assert_equal('+', k4) return ast grammar = ''' @@ignorecase::False @@nameguard start = {rule_positional | rule_keywords | rule_all} $ ; rule_positional('ABC', 123, '=', '+') = 'a' ; rule_keywords(k1=ABC, k3='=', k4='+', k2=123) = 'b' ; rule_all('DEF', 456, '=', '+', k1=HIJ, k3='=', k4='+', k2=789) = 'c' ; ''' pretty = ''' @@ignorecase :: False @@nameguard :: True start = {rule_positional | rule_keywords | rule_all} $ ; rule_positional(ABC, 123, '=', '+') = 'a' ; rule_keywords(k1=ABC, k3='=', k4='+', k2=123) = 'b' ; rule_all(DEF, 456, '=', '+', k1=HIJ, k3='=', k4='+', k2=789) = 'c' ; ''' model = compile(grammar, 'RuleArguments') self.assertEqual(trim(pretty), ustr(model)) model = compile(pretty, 'RuleArguments') ast = model.parse("a b c") self.assertEqual(['a', 'b', 'c'], ast) semantics = TC36Semantics() ast = model.parse("a b c", semantics=semantics) self.assertEqual(['a', 'b', 'c'], ast) codegen(model)
def test_pretty(self): grammar = '''\ start = lisp ; lisp = sexp | list | symbol; sexp::SExp = '(' cons:lisp '.' ~ cdr:lisp ')' ; list::List = '(' elements:{sexp}* ')' ; symbol::Symbol = value:/[^\s().]+/ ; ''' pretty = trim('''\ start = lisp ; lisp = sexp | list | symbol ; sexp::SExp = '(' cons:lisp '.' ~ cdr:lisp ')' ; list::List = '(' elements:{sexp} ')' ; symbol::Symbol = value:/[^\s().]+/ ; ''') pretty_lean = trim('''\ start = lisp ; lisp = sexp | list | symbol ; sexp = '(' lisp '.' ~ lisp ')' ; list = '(' {sexp} ')' ; symbol = /[^\s().]+/ ; ''') model = compile(grammar=grammar) self.assertEqual(pretty, model.pretty()) self.assertEqual(str(model), model.pretty()) self.assertEqual(pretty_lean, model.pretty_lean())