def test_optional_literal() -> None: grammar = """ start: sum NEWLINE sum: term '+' ? term: NUMBER """ parser_class = make_parser(grammar) node = parse_string("1+\n", parser_class) assert node == [ [ TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1+\n"), TokenInfo(OP, string="+", start=(1, 1), end=(1, 2), line="1+\n"), ], TokenInfo(NEWLINE, string="\n", start=(1, 2), end=(1, 3), line="1+\n"), ] node = parse_string("1\n", parser_class) assert node == [ [ TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1\n"), None ], TokenInfo(NEWLINE, string="\n", start=(1, 1), end=(1, 2), line="1\n"), ]
def test_repeat_1_simple() -> None: grammar = """ start: thing thing+ NEWLINE thing: NUMBER """ parser_class = make_parser(grammar) node = parse_string("1 2 3\n", parser_class) assert node == [ TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1 2 3\n"), [ TokenInfo(NUMBER, string="2", start=(1, 2), end=(1, 3), line="1 2 3\n"), TokenInfo(NUMBER, string="3", start=(1, 4), end=(1, 5), line="1 2 3\n"), ], TokenInfo(NEWLINE, string="\n", start=(1, 5), end=(1, 6), line="1 2 3\n"), ] with pytest.raises(SyntaxError): parse_string("1\n", parser_class)
def test_cut_early_exit() -> None: grammar = """ start: '(' ~ expr ')' | '(' name ')' expr: NUMBER name: NAME """ parser_class = make_parser(grammar) with pytest.raises(SyntaxError): parse_string("(a)", parser_class, verbose=True)
def test_forced() -> None: grammar = """ start: NAME &&':' | NAME """ parser_class = make_parser(grammar) assert parse_string("number :", parser_class, verbose=True) with pytest.raises(SyntaxError) as e: parse_string("a", parser_class, verbose=True) assert "expected ':'" in str(e.exconly())
def test_left_recursive() -> None: grammar_source = """ start: expr NEWLINE expr: ('-' term | expr '+' term | term) term: NUMBER foo: NAME+ bar: NAME* baz: NAME? """ grammar: Grammar = parse_string(grammar_source, GrammarParser) parser_class = generate_parser(grammar) rules = grammar.rules assert not rules["start"].left_recursive assert rules["expr"].left_recursive assert not rules["term"].left_recursive assert not rules["foo"].left_recursive assert not rules["bar"].left_recursive assert not rules["baz"].left_recursive node = parse_string("1 + 2 + 3\n", parser_class) assert node == [ [ [ TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1 + 2 + 3\n"), TokenInfo(OP, string="+", start=(1, 2), end=(1, 3), line="1 + 2 + 3\n"), TokenInfo(NUMBER, string="2", start=(1, 4), end=(1, 5), line="1 + 2 + 3\n"), ], TokenInfo(OP, string="+", start=(1, 6), end=(1, 7), line="1 + 2 + 3\n"), TokenInfo(NUMBER, string="3", start=(1, 8), end=(1, 9), line="1 + 2 + 3\n"), ], TokenInfo(NEWLINE, string="\n", start=(1, 9), end=(1, 10), line="1 + 2 + 3\n"), ]
def test_forced_with_group() -> None: grammar = """ start: NAME &&(':' | ';') | NAME """ parser_class = make_parser(grammar) assert parse_string("number :", parser_class, verbose=True) assert parse_string("number ;", parser_class, verbose=True) with pytest.raises(SyntaxError) as e: parse_string("a", parser_class, verbose=True) assert "expected (':' | ';')" in e.value.args[0]
def test_repeat_with_sep_simple() -> None: grammar = """ start: ','.thing+ NEWLINE thing: NUMBER """ parser_class = make_parser(grammar) node = parse_string("1, 2, 3\n", parser_class) assert node == [ [ TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1, 2, 3\n"), TokenInfo(NUMBER, string="2", start=(1, 3), end=(1, 4), line="1, 2, 3\n"), TokenInfo(NUMBER, string="3", start=(1, 6), end=(1, 7), line="1, 2, 3\n"), ], TokenInfo(NEWLINE, string="\n", start=(1, 7), end=(1, 8), line="1, 2, 3\n"), ]
def test_locations_in_alt_action_and_group() -> None: grammar = """ start: t=term NEWLINE? $ { ast.Expression(t, LOCATIONS) } term: | l=term '*' r=factor { ast.BinOp(l, ast.Mult(), r, LOCATIONS) } | l=term '/' r=factor { ast.BinOp(l, ast.Div(), r, LOCATIONS) } | factor factor: | ( n=NAME { ast.Name(id=n.string, ctx=ast.Load(), LOCATIONS) } | n=NUMBER { ast.Constant(value=ast.literal_eval(n.string), LOCATIONS) } ) """ parser_class = make_parser(grammar) source = "2*3\n" o = ast.dump(parse_string(source, parser_class).body, include_attributes=True) p = ast.dump(ast.parse(source).body[0].value, include_attributes=True).replace(" kind=None,", "") diff = "\n".join( difflib.unified_diff(o.split("\n"), p.split("\n"), "cpython", "python-pegen")) if diff: print(diff) assert not diff
def test_repeat_1_complex() -> None: grammar = """ start: term ('+' term)+ NEWLINE term: NUMBER """ parser_class = make_parser(grammar) node = parse_string("1 + 2 + 3\n", parser_class) assert node == [ TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1 + 2 + 3\n"), [ [ TokenInfo(OP, string="+", start=(1, 2), end=(1, 3), line="1 + 2 + 3\n"), TokenInfo(NUMBER, string="2", start=(1, 4), end=(1, 5), line="1 + 2 + 3\n"), ], [ TokenInfo(OP, string="+", start=(1, 6), end=(1, 7), line="1 + 2 + 3\n"), TokenInfo(NUMBER, string="3", start=(1, 8), end=(1, 9), line="1 + 2 + 3\n"), ], ], TokenInfo(NEWLINE, string="\n", start=(1, 9), end=(1, 10), line="1 + 2 + 3\n"), ] with pytest.raises(SyntaxError): parse_string("1\n", parser_class)
def check_nested_expr(nesting_depth: int, parser: Any, language: str) -> bool: expr = f"{'(' * nesting_depth}0{')' * nesting_depth}" try: if language == "Python": parse_string(expr, parser) else: parser.parse_string(expr) print(f"({language}) Nesting depth of {nesting_depth} is successful") return True except Exception as err: print( f"{FAIL}({language}) Failed with nesting depth of {nesting_depth}{ENDC}" ) print(f"{FAIL}\t{err}{ENDC}") return False
def test_gather() -> None: grammar = """ start: ','.thing+ NEWLINE thing: NUMBER """ rules = parse_string(grammar, GrammarParser).rules assert str(rules["start"]) == "start: ','.thing+ NEWLINE" print(repr(rules["start"])) assert repr(rules["start"]).startswith( "Rule('start', None, Rhs([Alt([NamedItem(None, Gather(StringLeaf(\"','\"), NameLeaf('thing'" ) assert str(rules["thing"]) == "thing: NUMBER" parser_class = make_parser(grammar) node = parse_string("42\n", parser_class) assert node == [ [ TokenInfo(NUMBER, string="42", start=(1, 0), end=(1, 2), line="42\n") ], TokenInfo(NEWLINE, string="\n", start=(1, 2), end=(1, 3), line="42\n"), ] node = parse_string("1, 2\n", parser_class) assert node == [ [ TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1, 2\n"), TokenInfo(NUMBER, string="2", start=(1, 3), end=(1, 4), line="1, 2\n"), ], TokenInfo(NEWLINE, string="\n", start=(1, 4), end=(1, 5), line="1, 2\n"), ]
def test_parse_trivial_grammar() -> None: grammar = """ start: 'a' """ rules = parse_string(grammar, GrammarParser) visitor = Visitor() visitor.visit(rules) assert visitor.n_nodes == 6
def test_alt_optional_operator() -> None: grammar = """ start: sum NEWLINE sum: term ['+' term] term: NUMBER """ parser_class = make_parser(grammar) node = parse_string("1 + 2\n", parser_class) assert node == [ [ TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1 + 2\n"), [ TokenInfo(OP, string="+", start=(1, 2), end=(1, 3), line="1 + 2\n"), TokenInfo(NUMBER, string="2", start=(1, 4), end=(1, 5), line="1 + 2\n"), ], ], TokenInfo(NEWLINE, string="\n", start=(1, 5), end=(1, 6), line="1 + 2\n"), ] node = parse_string("1\n", parser_class) assert node == [ [ TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1\n"), None ], TokenInfo(NEWLINE, string="\n", start=(1, 1), end=(1, 2), line="1\n"), ]
def test_unreachable_explicit() -> None: source = """ start: NAME { UNREACHABLE } """ grammar = parse_string(source, GrammarParser) out = io.StringIO() genr = PythonParserGenerator(grammar, out, unreachable_formatting="This is a test") genr.generate("<string>") assert "This is a test" in out.getvalue()
def test_nullable() -> None: grammar_source = """ start: sign NUMBER sign: ['-' | '+'] """ grammar: Grammar = parse_string(grammar_source, GrammarParser) out = io.StringIO() genr = PythonParserGenerator(grammar, out) rules = grammar.rules assert rules["start"].nullable is False # Not None! assert rules["sign"].nullable
def test_rule_with_no_collision(self) -> None: grammar_source = """ start: bad_rule sum: | NAME '-' NAME | NAME '+' NAME """ grammar: Grammar = parse_string(grammar_source, GrammarParser) validator = SubRuleValidator(grammar) for rule_name, rule in grammar.rules.items(): validator.validate_rule(rule_name, rule)
def test_parse_repeat1_grammar() -> None: grammar = """ start: 'a'+ """ rules = parse_string(grammar, GrammarParser) visitor = Visitor() visitor.visit(rules) # Grammar/Rule/Rhs/Alt/NamedItem/Repeat1/StringLeaf -> 6 assert visitor.n_nodes == 7
def test_cut() -> None: grammar = """ start: '(' ~ expr ')' expr: NUMBER """ parser_class = make_parser(grammar) node = parse_string("(1)", parser_class, verbose=True) assert node == [ TokenInfo(OP, string="(", start=(1, 0), end=(1, 1), line="(1)"), TokenInfo(NUMBER, string="1", start=(1, 1), end=(1, 2), line="(1)"), TokenInfo(OP, string=")", start=(1, 2), end=(1, 3), line="(1)"), ]
def test_unreachable_implicit3() -> None: source = """ start: NAME | invalid_input { None } invalid_input: NUMBER """ grammar = parse_string(source, GrammarParser) out = io.StringIO() genr = PythonParserGenerator(grammar, out, unreachable_formatting="This is a test") genr.generate("<string>") assert "This is a test" not in out.getvalue()
def test_parse_grammar_with_types() -> None: grammar = """ start[ast.BinOp]: term ('+' term)* NEWLINE term[T[int]]: NUMBER c_rule[expr_ty*]: a=NUMBER? { _new_expr_ty(a) } """ grammar: Grammar = parse_string(grammar, GrammarParser) rules = grammar.rules assert rules["start"].type.replace(" ", "") == "ast.BinOp" assert rules["term"].type.replace(" ", "") == "T[int]" assert rules["c_rule"].type == "expr_ty*"
def test_rule_with_simple_collision(self) -> None: grammar_source = """ start: bad_rule sum: | NAME '+' NAME | NAME '+' NAME ';' """ grammar: Grammar = parse_string(grammar_source, GrammarParser) validator = SubRuleValidator(grammar) with self.assertRaises(ValidationError): for rule_name, rule in grammar.rules.items(): validator.validate_rule(rule_name, rule)
def test_expr_grammar() -> None: grammar = """ start: sum NEWLINE sum: term '+' term | term term: NUMBER """ parser_class = make_parser(grammar) node = parse_string("42\n", parser_class) assert node == [ TokenInfo(NUMBER, string="42", start=(1, 0), end=(1, 2), line="42\n"), TokenInfo(NEWLINE, string="\n", start=(1, 2), end=(1, 3), line="42\n"), ]
def test_parse_optional_grammar() -> None: grammar = """ start: 'a' ['b'] """ rules = parse_string(grammar, GrammarParser) visitor = Visitor() visitor.visit(rules) # Grammar/Rule/Rhs/Alt/NamedItem/StringLeaf -> 6 # NamedItem/Opt/Rhs/Alt/NamedItem/Stringleaf -> 6 assert visitor.n_nodes == 12
def test_advanced_left_recursive() -> None: grammar_source = """ start: NUMBER | sign start sign: ['-'] """ grammar: Grammar = parse_string(grammar_source, GrammarParser) out = io.StringIO() genr = PythonParserGenerator(grammar, out) rules = grammar.rules assert rules["start"].nullable is False # Not None! assert rules["sign"].nullable assert rules["start"].left_recursive assert not rules["sign"].left_recursive
def test_typed_rules() -> None: grammar = """ start[int]: sum NEWLINE sum[int]: t1=term '+' t2=term { action } | term term[int]: NUMBER """ rules = parse_string(grammar, GrammarParser).rules # Check the str() and repr() of a few rules; AST nodes don't support ==. assert str(rules["start"]) == "start: sum NEWLINE" assert str(rules["sum"]) == "sum: term '+' term | term" assert ( repr(rules["term"]) == "Rule('term', 'int', Rhs([Alt([NamedItem(None, NameLeaf('NUMBER'))])]))" )
def test_nasty_mutually_left_recursive() -> None: # This grammar does not recognize 'x - + =', much to my chagrin. # But that's the way PEG works. # [Breathlessly] # The problem is that the toplevel target call # recurses into maybe, which recognizes 'x - +', # and then the toplevel target looks for another '+', # which fails, so it retreats to NAME, # which succeeds, so we end up just recognizing 'x', # and then start fails because there's no '=' after that. grammar_source = """ start: target '=' target: maybe '+' | NAME maybe: maybe '-' | target """ grammar: Grammar = parse_string(grammar_source, GrammarParser) out = io.StringIO() genr = PythonParserGenerator(grammar, out) genr.generate("<string>") ns: Dict[str, Any] = {} exec(out.getvalue(), ns) parser_class = ns["GeneratedParser"] with pytest.raises(SyntaxError): parse_string("x - + =", parser_class)
def test_parse_or_grammar() -> None: grammar = """ start: rule rule: 'a' | 'b' """ rules = parse_string(grammar, GrammarParser) visitor = Visitor() visitor.visit(rules) # Grammar/Rule/Rhs/Alt/NamedItem/NameLeaf -> 6 # Rule/Rhs/ -> 2 # Alt/NamedItem/StringLeaf -> 3 # Alt/NamedItem/StringLeaf -> 3 assert visitor.n_nodes == 14
def test_long_rule_str() -> None: grammar_source = """ start: zero | one | one zero | one one | one zero zero | one zero one | one one zero | one one one """ expected = """ start: | zero | one | one zero | one one | one zero zero | one zero one | one one zero | one one one """ grammar: Grammar = parse_string(grammar_source, GrammarParser) assert str(grammar.rules["start"]) == textwrap.dedent(expected).strip()
def index() -> None: # you must tell the variable 'form' what you named the class, above # 'form' is the variable name used in this template: index.html form = GrammarForm() form.grammar(class_="form-control") output_text = "\n" if form.validate_on_submit(): grammar_source = form.grammar.data program_source = form.source.data output = io.StringIO() try: parser_class = make_parser(grammar_source) result = parse_string(program_source, parser_class, verbose=False) print(result, file=output) except Exception as e: traceback.print_exc(file=output) output_text += output.getvalue() return render_template("index.html", form=form, output=output_text)
def test_parse_grammar() -> None: grammar_source = """ start: sum NEWLINE sum: t1=term '+' t2=term { action } | term term: NUMBER """ expected = """ start: sum NEWLINE sum: term '+' term | term term: NUMBER """ grammar: Grammar = parse_string(grammar_source, GrammarParser) rules = grammar.rules assert str(grammar) == textwrap.dedent(expected).strip() # Check the str() and repr() of a few rules; AST nodes don't support ==. assert str(rules["start"]) == "start: sum NEWLINE" assert str(rules["sum"]) == "sum: term '+' term | term" expected_repr = "Rule('term', None, Rhs([Alt([NamedItem(None, NameLeaf('NUMBER'))])]))" assert repr(rules["term"]) == expected_repr