def test_simple(): regexs, rules, transformer = parse_ebnf(""" IGNORE: " "; DECIMAL: "0|[1-9][0-9]*"; additive: multitive "+" additive | multitive; multitive: primary "*" multitive | primary; #nonsense! # the following too! primary: "(" additive ")" | DECIMAL; """) class MyEvalVisitor(EvaluateVisitor): def visit_primary(self, node): if len(node.children) == 3: return node.children[1].visit(self) return int(node.children[0].additional_info) parse = make_parse_function(regexs, rules) tree = parse("0 + 10 + 999") assert tree.visit(MyEvalVisitor()) == 10 + 999 tree = parse("22 * 12 + 44)") r = tree.visit(MyEvalVisitor()) assert r == 22 * 12 + 44 tree = parse("2*(3+5*2*(2+61))") assert tree.visit(MyEvalVisitor()) == 2 * (3 + 5 * 2 * (2 + 61)) tree = parse("12 + 4 * 5)")
def test_translate_ast_visitor(): from pypy.rlib.parsing.ebnfparse import parse_ebnf, make_parse_function regexs, rules, ToAST = parse_ebnf(""" DECIMAL: "0|[1-9][0-9]*"; IGNORE: " "; additive: multitive ["+!"] additive | <multitive>; multitive: primary ["*!"] multitive | <primary>; #nonsense! primary: "(" <additive> ")" | <DECIMAL>; """) parse = make_parse_function(regexs, rules) def f(): tree = parse("(0 +! 10) *! (999 +! 10) +! 1") tree = ToAST().visit_additive(tree) assert len(tree) == 1 tree = tree[0] return tree.symbol + " " + "-&-".join( [c.symbol for c in tree.children]) res1 = f() t = Translation(f) t.annotate() t.rtype() t.backendopt() func = t.compile_c() res2 = func() assert res1 == res2
def test_parse_python_args(): regexs, rules, ToAST = parse_ebnf(""" IGNORE: " "; NAME: "[a-zA-Z_]*"; NUMBER: "0|[1-9][0-9]*"; parameters: ["("] >varargslist<? [")"]; varargslist: (fpdef ("=" test)? [","])* star_or_starstarargs | fpdef ("=" test)? ([","] fpdef ("=" test)?)* [","]?; star_or_starstarargs: "*" NAME [","] "**" NAME | "*" NAME | "**" NAME; fpdef: <NAME> | "(" <fplist> ")"; fplist: fpdef ([","] fpdef)* [","]?; test: NUMBER; """) parse = make_parse_function(regexs, rules) t = parse("(a)").visit(ToAST())[0] t = parse("(a,)").visit(ToAST())[0] t = parse("(a,b,c,d)").visit(ToAST())[0] t = parse("(a,b,c,d,)").visit(ToAST())[0] t = parse("((a, b, c),b,c,d,)").visit(ToAST())[0] t = parse("((a, b, (d, e, (f, g))), b, *args, **kwargs)").visit(ToAST())[0] t = parse("((a, b, c),b,c,d,*args)").visit(ToAST())[0] t = parse("((a, b, c),b,c,d,**kwargs)").visit(ToAST())[0] t = parse("((a, b, c),b,c,d,*args, **args)").visit(ToAST())[0] t = parse("()").visit(ToAST())[0] t = parse("(*args, **args)").visit(ToAST())[0] t = parse("(a=1)").visit(ToAST())[0] t = parse("(a=2,)").visit(ToAST())[0] t = parse("(a,b,c,d=3)").visit(ToAST())[0] t = parse("(a,b,c,d=4,)").visit(ToAST())[0] t = parse("((a, b, c),b,c,(c, d)=1,)").visit(ToAST())[0] t = parse("((a, b, c),b,c,d=1,*args)").visit(ToAST())[0] t = parse("((a, b, c),b,c,d=2,**kwargs)").visit(ToAST())[0] t = parse("((a, b, c),b,c,(c, d)=4,*args, **args)").visit(ToAST())[0] t = parse("(self, a, b, args)").visit(ToAST())[0]
def make_parser_from_file(filename): try: t = py.path.local(filename).read(mode='U') regexs, rules, ToAST = parse_ebnf(t) except ParseError, e: print e.nice_error_message(filename=filename, source=t) raise
def test_eof(): grammar = """ DECIMAL: "0|[1-9][0-9]*"; IGNORE: " "; expr: additive0_9 EOF; additive0_9: multitive "+!" additive0_9 | multitive; multitive: primary "*!" multitive | primary; #nonsense! primary: "(" additive0_9 ")" | DECIMAL; """ regexs, rules, ToAST = parse_ebnf(grammar) class MyEvalVisitor(EvaluateVisitor): def visit_primary(self, node): if len(node.children) == 3: return node.children[1].visit(self) return int(node.children[0].additional_info) def visit_expr(self, node): return node.children[0].visit(self) visit_additive0_9 = EvaluateVisitor.visit_additive parse = make_parse_function(regexs, rules, eof=True) tree = parse("0 +! 10 +! 999") assert tree.visit(MyEvalVisitor()) == 10 + 999 py.test.raises(ParseError, parse, "0 +! 10 +! 999 0000") grammar += "some garbage here" py.test.raises(ParseError, parse_ebnf, grammar)
def setup_class(cls): from pypy.rlib.parsing.parsing import PackratParser regexs, rules, ToAST = parse_ebnf(grammar) cls.ToAST = ToAST() cls.parser = PackratParser(rules, rules[0].nonterminal) cls.regexs = regexs names, regexs = zip(*regexs) cls.lexer = Lexer(list(regexs), list(names))
def parse_js( path ): regexs, rules, ToAST = parse_ebnf(ebnf) parse = make_parse_function(regexs, rules, eof=True) doc = open(path, 'r').read() t = parse(doc) t = ToAST().transform(t) return t
def test_example2(): regexs, rules, ToAST = parse_ebnf(""" IGNORE: " "; DECIMAL: "0|[1-9][0-9]*"; list: DECIMAL >list< | DECIMAL; """) parse = make_parse_function(regexs, rules) t = parse("1 2 3 4 5") t = ToAST().transform(t)
def test_leftrecursion(): regexs, rules, ToAST = parse_ebnf(""" A: "a"; B: "b"; IGNORE: " |\n"; expr1: A | expr2 A; expr2: expr1 B; """) py.test.raises(AssertionError, make_parse_function, regexs, rules, True)
def test_example1(): regexs, rules, ToAST = parse_ebnf(""" IGNORE: " "; n: "a" "b" "c" m; m: "(" <n> ")" | "d"; """) parse = make_parse_function(regexs, rules) t = parse("a b c (a b c d)") t = ToAST().transform(t)
def test_nest_star_and_questionmark(): regexs, rules, ToAST = parse_ebnf(""" IGNORE: " "; y: x "END"; x: "B" ("A" "B"?)*; """) parse = make_parse_function(regexs, rules) t = ToAST().transform(parse("B A B A B END")) t = ToAST().transform(parse("B A A A END"))
def test_escape_quotes(): regexs, rules, ToAST = parse_ebnf(""" QUOTE: "a\\""; IGNORE: " "; expr: QUOTE "\\"" EOF;""") parse = make_parse_function(regexs, rules, eof=True) t = parse('a" "') assert t.children[0].additional_info == 'a"' assert t.children[1].additional_info == '"'
def test_mix_star_and_questionmark(): regexs, rules, ToAST = parse_ebnf(""" IGNORE: " "; y: x "END"; x: "B" ("A" "B")* "A"?; """) parse = make_parse_function(regexs, rules) t = ToAST().transform(parse("B A B END")) assert len(t.children[0].children) == 3
def test_starred_star(): regexs, rules, ToAST = parse_ebnf(""" IGNORE: " "; start: ("b"* "a")* EOF; """) parse = make_parse_function(regexs, rules, eof=True) for s in ["b b b b a b b a", "b a b a", "a a", ""]: t = parse(s) t = ToAST().transform(t) assert [c.additional_info for c in t.children] == (s + " EOF").split()
def test_clash_literal_nonterminal(): regexs, rules, ToAST = parse_ebnf(""" IGNORE: " "; y: x "END"; x: "y"; a: "x"; """) parse = make_parse_function(regexs, rules) py.test.raises(ParseError, parse, "x END") parse("y END")
def test_grouping_only_parens(): regexs, rules, ToAST = parse_ebnf(""" IGNORE: " "; x: ["m"] ("a" "b") "c" | <y>; y: ["n"] "a" "b" "c"; """) parse = make_parse_function(regexs, rules) t0 = ToAST().transform(parse("m a b c")) t1 = ToAST().transform(parse("n a b c")) assert len(t0.children) == len(t1.children)
def test_double_star(): regexs, rules, ToAST = parse_ebnf(""" IGNORE: " |\n"; start: "a"* "b"* "c"; """) parse = make_parse_function(regexs, rules, eof=True) for s in ["a a a b b c", "a b b c", "a a c", "b b c", "c"]: t = parse(s) t = ToAST().transform(t) assert [c.additional_info for c in t.children] == s.split()
def parse(code): GFILE = open_file_as_stream(abspath(join(dirname(__file__), "grammar.txt"))) t = None try: t = GFILE.read() regexs, rules, ToAST = parse_ebnf(t) except ParseError,e: print e.nice_error_message(filename=str(GFILE),source=t) raise
def test_quoting(): regexs, rules, ToAST = parse_ebnf(""" ATOM: "[a-z]*"; IGNORE: " "; list: ATOM "\n" ATOM; """) parse = make_parse_function(regexs, rules, eof=True) t = parse("""abc abd""") assert len(t.children) == 3 assert t.children[1].additional_info == "\n"
def test_check_for_missing_names(): regexs, rules, ToAST = parse_ebnf(""" IGNORE: " "; DECIMAL: "0|[1-9][0-9]*"; additive: multitive "+" additive | multitive; multitive: primary "*" multitive | primari; # observe the typo # the following too! primary: "(" additive ")" | DECIMAL; """) excinfo = py.test.raises(ValueError, make_parse_function, regexs, rules) assert "primari" in str(excinfo.value)
def test_lexer_end_string_corner_case(): regexs, rules, ToAST = parse_ebnf(""" NUMBER: "[0-9]*(\.[0-9]+)?"; ATOM: "\."; IGNORE: " "; expr: NUMBER ATOM EOF; """) parse = make_parse_function(regexs, rules, eof=True) t = parse("2.") assert t.children[0].additional_info == "2" assert t.children[1].additional_info == "."
def test_bug(): # this could be seen as using the transformer in the wrong way # but I have no clue how to detect this situation py.test.skip("fix me somehow") regexs, rules, ToAST = parse_ebnf(""" IGNORE: " "; DECIMAL: "0|[1-9][0-9]*"; list: DECIMAL >list< | <DECIMAL>; """) parse = make_parse_function(regexs, rules) t = parse("1 2 3 4 5") t = ToAST().transform(t)
def test_plus(): regexs, rules, ToAST = parse_ebnf(""" IGNORE: " "; x: "A"+ "B"; """) parse = make_parse_function(regexs, rules) t = parse("A A B") t = ToAST().transform(t) assert len(t.children) == 3 assert t.children[0].additional_info == "A" assert t.children[1].additional_info == "A" assert t.children[2].additional_info == "B" py.test.raises(ParseError, parse, "B")
def test_transform_greater_than(): regexs, rules, ToAST = parse_ebnf(""" IGNORE: " "; x: ["a"] >b< "c"; b: "A" "A"; """) parse = make_parse_function(regexs, rules) t = parse("a A A c") t = ToAST().transform(t) assert len(t.children) == 3 assert t.children[0].additional_info == "A" assert t.children[1].additional_info == "A" assert t.children[2].additional_info == "c"
def print_dot( path ): regexs, rules, ToAST = parse_ebnf(ebnf) parse = make_parse_function(regexs, rules, eof=True) f = open(path, 'r') doc = f.read() t = parse(doc) t = ToAST().transform(t) print "digraph parsed {" print "\n".join(list(t.dot())) print "}"
def test_toast(): regexs, rules, ToAST = parse_ebnf(""" DECIMAL: "0|[1-9][0-9]*"; IGNORE: " "; additive: multitive ["+!"] additive | <multitive>; multitive: primary ["*!"] multitive | <primary>; #nonsense! primary: "(" <additive> ")" | <DECIMAL>; """) parse = make_parse_function(regexs, rules) tree = parse("(0 +! 10) *! (999 +! 10) +! 1") tree = tree.visit(ToAST())[0] assert len(tree.children) == 2 assert tree.children[0].children[0].symbol == "additive" assert tree.children[1].symbol == "DECIMAL"
def test_long_inline_quotes(): regexs, rules, transformer = parse_ebnf(""" DECIMAL: "0|[1-9][0-9]*"; IGNORE: " "; additive: multitive "+!" additive | multitive; multitive: primary "*!" multitive | primary; #nonsense! primary: "(" additive ")" | DECIMAL; """) class MyEvalVisitor(EvaluateVisitor): def visit_primary(self, node): if len(node.children) == 3: return node.children[1].visit(self) return int(node.children[0].additional_info) parse = make_parse_function(regexs, rules) tree = parse("0 +! 10 +! 999") assert tree.visit(MyEvalVisitor()) == 10 + 999
def test_questionmark(): regexs, rules, ToAST = parse_ebnf(""" IGNORE: " "; x: ["A"] ("B" ["C"] "D")? "E"; """) parse = make_parse_function(regexs, rules) t = parse("A B C D E") py.test.raises(ParseError, parse, "A B C D B C D E") t = ToAST().transform(t) assert len(t.children) == 3 assert t.children[0].additional_info == "B" assert t.children[1].additional_info == "D" assert t.children[2].additional_info == "E" t = parse("A E") t = ToAST().transform(t) assert len(t.children) == 1 assert t.children[0].additional_info == "E"
def test_starparse(): regexs, rules, ToAST = parse_ebnf(""" QUOTED_STRING: "'[^\\']*'"; IGNORE: " |\n"; list: ["["] (QUOTED_STRING [","])* QUOTED_STRING ["]"]; """) parse = make_parse_function(regexs, rules, eof=True) t = parse("""['a', 'b', 'c']""") t = ToAST().transform(t) assert t.symbol == "list" assert len(t.children) == 3 assert [c.symbol for c in t.children] == ["QUOTED_STRING"] * 3 t = parse("['a']") t = ToAST().transform(t) assert t.symbol == "list" assert len(t.children) == 1 assert [c.symbol for c in t.children] == ["QUOTED_STRING"] * 1
def test_toast_bigger(): regexs, rules, ToAST = parse_ebnf(""" BOOLCONST: "TRUE|FALSE"; IDENTIFIER: "[a-zA-Z_][a-zA-Z0-9_]*"; NUMBER: "0|[1-9][0-9]*"; IGNORE: " "; # expression expr: <intexpr> | <boolexpr>; intexpr: multitive "+" intexpr | multitive "-" intexpr | <multitive>; multitive: primary "*" unaryexpr | primary "/" unaryexpr | primary "%" unaryexpr | <unaryexpr>; unaryexpr: "+" unaryexpr | "-" unaryexpr | <primary>; primary: "(" <intexpr> ")" | <NUMBER> | <IDENTIFIER>; boolexpr: <BOOLCONST>; #strange thing """) parse = make_parse_function(regexs, rules) tree = parse("x * floor + 1") tree = ToAST().transform(tree) assert tree.children[2].symbol == "NUMBER"
def test_parse_funcdef(): regexs, rules, ToAST = parse_ebnf(""" IGNORE: " "; NAME: "[a-zA-Z_]*"; NUMBER: "0|[1-9][0-9]*"; funcdef: "def" NAME parameters ":" suite; parameters: ["("] >varargslist< [")"] | ["("] [")"]; varargslist: (fpdef ("=" test)? ",")* star_or_starstarargs | fpdef ("=" test)? ("," fpdef ("=" test)?)* ","?; star_or_starstarargs: "*" NAME "," "**" NAME | "*" NAME | "**" NAME; fpdef: NAME | "(" fplist ")"; fplist: fpdef ("," fpdef)* ","?; test: NUMBER; suite: simple_stmt | ["NEWLINE"] ["INDENT"] stmt+ ["DEDENT"]; simple_stmt: stmt; stmt: "pass"; """) parse = make_parse_function(regexs, rules) t = parse("def f(a): NEWLINE INDENT pass DEDENT").visit(ToAST())[0]
def test_empty_production(): # this could be seen as using the transformer in the wrong way # but I have no clue how to detect this situation regexs, rules, ToAST = parse_ebnf(""" IGNORE: " "; DECIMAL: "0|[1-9][0-9]*"; file: <stuff> EOF; stuff: "a" >stuff< "a" | "y" | >empty<; empty: ; """) parse = make_parse_function(regexs, rules, eof=True) t = parse(" ") t = ToAST().transform(t) assert isinstance(t, Nonterminal) assert len(t.children) == 0 t = parse(" a a a a a a ") t = ToAST().transform(t) assert len(t.children) == 6 excinfo = py.test.raises(ParseError, parse, "a")
def test_prolog(): regexs, rules, ToAST = parse_ebnf(""" ATOM: "[a-z]([a-zA-Z0-9]|_)*"; VAR: "[A-Z]([a-zA-Z0-9]|_)*|_"; NUMBER: "0|[1-9][0-9]*"; IGNORE: "[ \\n\\t]"; file: fact file | fact; fact: complexterm "." | complexterm ":-" compoundexpr "."; compoundexpr: complexterm "," compoundexpr | complexterm ";" compoundexpr | complexterm; complexterm: ATOM "(" exprlist ")" | ATOM; exprlist: expr "," exprlist | expr; expr: complexterm | ATOM | NUMBER | VAR; """) parse = make_parse_function(regexs, rules) tree = parse("prefix(\n\tlonger(and_nested(term(X))), Xya, _, X0, _).") assert tree is not None tree = parse(""" foo(X, Y) :- bar(Y, X), bar(Y, X) ; foobar(X, Y, 1234, atom).""") assert tree is not None
def test_transform_star(): #py.test.skip("This needs to be fixed - generated transformer is buggy") regexs, rules, ToAST = parse_ebnf(""" IGNORE: " "; ATOM: "[\+a-zA-Z_][a-zA-Z0-9_]*"; sexpr: ATOM | list; list: "(" sexpr* ")"; """) parse = make_parse_function(regexs, rules) tree = parse("()") list_expr = tree.visit(ToAST())[0].children[0] assert list_expr.symbol == 'list' # should have two children, "(" and ")" assert len(list_expr.children) == 2 assert list_expr.children[0].additional_info == '(' assert list_expr.children[1].additional_info == ')' tree = parse("(a b c)") list_expr = ToAST().transform(tree)