def test_left_recursive(): grammar = """ start: expr NEWLINE expr: ('-' term | expr '+' term | term) term: NUMBER foo: NAME+ bar: NAME* baz: NAME? """ rules = parse_string(grammar, GrammarParser).rules parser_class = generate_parser(rules) assert not rules['start'].left_recursive assert rules['expr'].left_recursive assert not rules['term'].left_recursive assert not rules['foo'].left_recursive assert not rules['bar'].left_recursive assert not rules['baz'].left_recursive node = parse_string("1 + 2 + 3\n", parser_class) assert node == [[[[[ TokenInfo(NUMBER, string='1', start=(1, 0), end=(1, 1), line='1 + 2 + 3\n') ]], TokenInfo(OP, string='+', start=(1, 2), end=(1, 3), line='1 + 2 + 3\n'), [ TokenInfo(NUMBER, string='2', start=(1, 4), end=(1, 5), line='1 + 2 + 3\n') ]], TokenInfo(OP, string='+', start=(1, 6), end=(1, 7), line='1 + 2 + 3\n'), [ TokenInfo(NUMBER, string='3', start=(1, 8), end=(1, 9), line='1 + 2 + 3\n') ]], TokenInfo(NEWLINE, string='\n', start=(1, 9), end=(1, 10), line='1 + 2 + 3\n')]
def test_repeat_1_complex(self) -> None: grammar = """ start: term ('+' term)+ NEWLINE term: NUMBER """ parser_class = make_parser(grammar) node = parse_string("1 + 2 + 3\n", parser_class) self.assertEqual( node, [ TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1 + 2 + 3\n"), [ [ TokenInfo(OP, string="+", start=(1, 2), end=(1, 3), line="1 + 2 + 3\n"), TokenInfo( NUMBER, string="2", start=(1, 4), end=(1, 5), line="1 + 2 + 3\n", ), ], [ TokenInfo(OP, string="+", start=(1, 6), end=(1, 7), line="1 + 2 + 3\n"), TokenInfo( NUMBER, string="3", start=(1, 8), end=(1, 9), line="1 + 2 + 3\n", ), ], ], TokenInfo(NEWLINE, string="\n", start=(1, 9), end=(1, 10), line="1 + 2 + 3\n"), ], ) with self.assertRaises(SyntaxError): parse_string("1\n", parser_class)
def test_optional_operator(self) -> None: grammar = """ start: sum NEWLINE sum: term ('+' term)? term: NUMBER """ parser_class = make_parser(grammar) node = parse_string("1 + 2\n", parser_class) self.assertEqual( node, [ [ TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1 + 2\n"), [ TokenInfo(OP, string="+", start=(1, 2), end=(1, 3), line="1 + 2\n"), TokenInfo(NUMBER, string="2", start=(1, 4), end=(1, 5), line="1 + 2\n"), ], ], TokenInfo(NEWLINE, string="\n", start=(1, 5), end=(1, 6), line="1 + 2\n"), ], ) node = parse_string("1\n", parser_class) self.assertEqual( node, [ [ TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1\n"), None, ], TokenInfo( NEWLINE, string="\n", start=(1, 1), end=(1, 2), line="1\n"), ], )
def test_repeat_1_complex(): grammar = """ start: term ('+' term)+ NEWLINE term: NUMBER """ parser_class = make_parser(grammar) node = parse_string("1 + 2 + 3\n", parser_class) assert node == [ [ TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1 + 2 + 3\n") ], [ [[ TokenInfo(OP, string="+", start=(1, 2), end=(1, 3), line="1 + 2 + 3\n"), [ TokenInfo(NUMBER, string="2", start=(1, 4), end=(1, 5), line="1 + 2 + 3\n") ], ]], [[ TokenInfo(OP, string="+", start=(1, 6), end=(1, 7), line="1 + 2 + 3\n"), [ TokenInfo(NUMBER, string="3", start=(1, 8), end=(1, 9), line="1 + 2 + 3\n") ], ]], ], TokenInfo(NEWLINE, string="\n", start=(1, 9), end=(1, 10), line="1 + 2 + 3\n"), ] with pytest.raises(SyntaxError): parse_string("1\n", parser_class)
def test_gather(self) -> None: grammar = """ start: ','.thing+ NEWLINE thing: NUMBER """ rules = parse_string(grammar, GrammarParser).rules self.assertEqual(str(rules["start"]), "start: ','.thing+ NEWLINE") self.assertTrue( repr(rules["start"]).startswith( "Rule('start', None, Rhs([Alt([NamedItem(None, Gather(StringLeaf(\"','\"), NameLeaf('thing'" )) self.assertEqual(str(rules["thing"]), "thing: NUMBER") parser_class = make_parser(grammar) node = parse_string("42\n", parser_class) assert node == [ [[ TokenInfo(NUMBER, string="42", start=(1, 0), end=(1, 2), line="42\n") ]], TokenInfo(NEWLINE, string="\n", start=(1, 2), end=(1, 3), line="42\n"), ] node = parse_string("1, 2\n", parser_class) assert node == [ [ [ TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1, 2\n") ], [ TokenInfo(NUMBER, string="2", start=(1, 3), end=(1, 4), line="1, 2\n") ], ], TokenInfo(NEWLINE, string="\n", start=(1, 4), end=(1, 5), line="1, 2\n"), ]
def test_with_stmt_with_paren(self) -> None: grammar_source = """ start[mod_ty]: a=[statements] ENDMARKER { Module(a, NULL, p->arena) } statements[asdl_seq*]: a=statement+ { _PyPegen_seq_flatten(p, a) } statement[asdl_seq*]: a=compound_stmt { _PyPegen_singleton_seq(p, a) } compound_stmt[stmt_ty]: with_stmt with_stmt[stmt_ty]: ( a='with' '(' b=','.with_item+ ')' ':' c=block { _Py_With(b, _PyPegen_singleton_seq(p, c), NULL, EXTRA) } ) with_item[withitem_ty]: ( e=NAME o=['as' t=NAME { t }] { _Py_withitem(e, _PyPegen_set_expr_context(p, o, Store), p->arena) } ) block[stmt_ty]: a=pass_stmt NEWLINE { a } | NEWLINE INDENT a=pass_stmt DEDENT { a } pass_stmt[stmt_ty]: a='pass' { _Py_Pass(EXTRA) } """ stmt = "with (\n a as b,\n c as d\n): pass" grammar = parse_string(grammar_source, GrammarParser) extension = generate_parser_c_extension(grammar, Path(self.tmp_path)) the_ast = extension.parse_string(stmt, mode=1) self.assertTrue( ast_dump(the_ast).startswith( "Module(body=[With(items=[withitem(context_expr=Name(id='a', ctx=Load()), optional_vars=Name(id='b', ctx=Store())), " "withitem(context_expr=Name(id='c', ctx=Load()), optional_vars=Name(id='d', ctx=Store()))]" ))
def check_nested_expr(nesting_depth: int, parser: Any, language: str) -> bool: expr = f"{'(' * nesting_depth}0{')' * nesting_depth}" try: if language == "Python": parse_string(expr, parser) else: parser.parse_string(expr) print(f"({language}) Nesting depth of {nesting_depth} is successful") return True except Exception as err: print(f"{FAIL}({language}) Failed with nesting depth of {nesting_depth}{ENDC}") print(f"{FAIL}\t{err}{ENDC}") return False
def verify_ast_generation(self, source: str, stmt: str, tmp_path: PurePath) -> None: grammar = parse_string(source, GrammarParser) extension = generate_parser_c_extension(grammar, Path(tmp_path)) expected_ast = ast.parse(stmt) actual_ast = extension.parse_string(stmt, mode=1) self.assertEqual(ast_dump(expected_ast), ast_dump(actual_ast))
def verify_ast_generation(source: str, stmt: str, tmp_path: PurePath) -> None: grammar = parse_string(source, GrammarParser) extension = generate_parser_c_extension(grammar, tmp_path) expected_ast = ast.parse(stmt) actual_ast = extension.parse_string(stmt) assert ast.dump(expected_ast) == ast.dump(actual_ast)
def test_repeat_with_sep_simple(self) -> None: grammar = """ start: ','.thing+ NEWLINE thing: NUMBER """ parser_class = make_parser(grammar) node = parse_string("1, 2, 3\n", parser_class) self.assertEqual( node, [ [ TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1, 2, 3\n"), TokenInfo(NUMBER, string="2", start=(1, 3), end=(1, 4), line="1, 2, 3\n"), TokenInfo(NUMBER, string="3", start=(1, 6), end=(1, 7), line="1, 2, 3\n"), ], TokenInfo(NEWLINE, string="\n", start=(1, 7), end=(1, 8), line="1, 2, 3\n"), ], )
def test_repeat_1_simple(self) -> None: grammar = """ start: thing thing+ NEWLINE thing: NUMBER """ parser_class = make_parser(grammar) node = parse_string("1 2 3\n", parser_class) self.assertEqual(node, [ [TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1 2 3\n")], [ [[TokenInfo(NUMBER, string="2", start=(1, 2), end=(1, 3), line="1 2 3\n")]], [[TokenInfo(NUMBER, string="3", start=(1, 4), end=(1, 5), line="1 2 3\n")]], ], TokenInfo(NEWLINE, string="\n", start=(1, 5), end=(1, 6), line="1 2 3\n"), ]) with self.assertRaises(SyntaxError): parse_string("1\n", parser_class)
def build_extension(self, grammar_source): grammar = parse_string(grammar_source, GrammarParser) # Because setUp() already changes the current directory to the # temporary path, use a relative path here to prevent excessive # path lengths when compiling. generate_parser_c_extension(grammar, Path('.'), library_dir=self.library_dir)
def test_repeat_0_simple(self) -> None: grammar = """ start: thing thing* NEWLINE thing: NUMBER """ parser_class = make_parser(grammar) node = parse_string("1 2 3\n", parser_class) self.assertEqual( node, [ TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1 2 3\n"), [ TokenInfo(NUMBER, string="2", start=(1, 2), end=(1, 3), line="1 2 3\n"), TokenInfo(NUMBER, string="3", start=(1, 4), end=(1, 5), line="1 2 3\n"), ], TokenInfo(NEWLINE, string="\n", start=(1, 5), end=(1, 6), line="1 2 3\n"), ], ) node = parse_string("1\n", parser_class) self.assertEqual( node, [ TokenInfo( NUMBER, string="1", start=(1, 0), end=(1, 1), line="1\n"), [], TokenInfo( NEWLINE, string="\n", start=(1, 1), end=(1, 2), line="1\n"), ], )
def test_soft_keywords(self) -> None: grammar_source = """ start: expr+ NEWLINE? ENDMARKER expr: "foo" """ grammar = parse_string(grammar_source, GrammarParser) parser_source = generate_c_parser_source(grammar) assert "expect_soft_keyword" in parser_source
def test_alt_optional_operator(): grammar = """ start: sum NEWLINE sum: term ['+' term] term: NUMBER """ parser_class = make_parser(grammar) node = parse_string("1 + 2\n", parser_class) assert node == [ [ [ TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1 + 2\n") ], [ TokenInfo(OP, string="+", start=(1, 2), end=(1, 3), line="1 + 2\n"), [ TokenInfo(NUMBER, string="2", start=(1, 4), end=(1, 5), line="1 + 2\n") ], ], ], TokenInfo(NEWLINE, string="\n", start=(1, 5), end=(1, 6), line="1 + 2\n"), ] node = parse_string("1\n", parser_class) assert node == [ [[TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1\n")], None], TokenInfo(NEWLINE, string="\n", start=(1, 1), end=(1, 2), line="1\n"), ]
def test_repeat_0_simple(): grammar = """ start: thing thing* NEWLINE thing: NUMBER """ parser_class = make_parser(grammar) node = parse_string("1 2 3\n", parser_class) assert node == [ [ TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1 2 3\n") ], [ [[ TokenInfo(NUMBER, string="2", start=(1, 2), end=(1, 3), line="1 2 3\n") ]], [[ TokenInfo(NUMBER, string="3", start=(1, 4), end=(1, 5), line="1 2 3\n") ]], ], TokenInfo(NEWLINE, string="\n", start=(1, 5), end=(1, 6), line="1 2 3\n"), ] node = parse_string("1\n", parser_class) assert node == [ [TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1\n")], [], TokenInfo(NEWLINE, string="\n", start=(1, 1), end=(1, 2), line="1\n"), ]
def test_parse_trivial_grammar(self) -> None: grammar = """ start: 'a' """ rules = parse_string(grammar, GrammarParser) visitor = self.Visitor() visitor.visit(rules) self.assertEqual(visitor.n_nodes, 6)
def test_nullable(self) -> None: grammar_source = """ start: sign NUMBER sign: ['-' | '+'] """ grammar: Grammar = parse_string(grammar_source, GrammarParser) rules = grammar.rules nullables = compute_nullables(rules) self.assertNotIn(rules["start"], nullables) # Not None! self.assertIn(rules["sign"], nullables)
def test_mutually_left_recursive(self) -> None: grammar_source = """ start: foo 'E' foo: bar 'A' | 'B' bar: foo 'C' | 'D' """ grammar: Grammar = parse_string(grammar_source, GrammarParser) out = io.StringIO() genr = PythonParserGenerator(grammar, out) rules = grammar.rules self.assertFalse(rules["start"].left_recursive) self.assertTrue(rules["foo"].left_recursive) self.assertTrue(rules["bar"].left_recursive) genr.generate("<string>") ns: Dict[str, Any] = {} exec(out.getvalue(), ns) parser_class: Type[Parser] = ns["GeneratedParser"] node = parse_string("D A C A E", parser_class) self.assertEqual(node, [ [ [ [ [TokenInfo(type=NAME, string="D", start=(1, 0), end=(1, 1), line="D A C A E")], TokenInfo(type=NAME, string="A", start=(1, 2), end=(1, 3), line="D A C A E"), ], TokenInfo(type=NAME, string="C", start=(1, 4), end=(1, 5), line="D A C A E"), ], TokenInfo(type=NAME, string="A", start=(1, 6), end=(1, 7), line="D A C A E"), ], TokenInfo(type=NAME, string="E", start=(1, 8), end=(1, 9), line="D A C A E"), ]) node = parse_string("B C A E", parser_class) self.assertIsNotNone(node) self.assertEqual(node, [ [ [ [TokenInfo(type=NAME, string="B", start=(1, 0), end=(1, 1), line="B C A E")], TokenInfo(type=NAME, string="C", start=(1, 2), end=(1, 3), line="B C A E"), ], TokenInfo(type=NAME, string="A", start=(1, 4), end=(1, 5), line="B C A E"), ], TokenInfo(type=NAME, string="E", start=(1, 6), end=(1, 7), line="B C A E"), ])
def test_nullable(): grammar = """ start: sign NUMBER sign: ['-' | '+'] """ rules = parse_string(grammar, GrammarParser).rules out = io.StringIO() genr = PythonParserGenerator(rules, out) assert rules['start'].nullable is False # Not None! assert rules['sign'].nullable
def test_error_in_rules(self) -> None: grammar_source = """ start: expr+ NEWLINE? ENDMARKER expr: NAME {PyTuple_New(-1)} """ grammar = parse_string(grammar_source, GrammarParser) extension = generate_parser_c_extension(grammar, Path(self.tmp_path)) # PyTuple_New raises SystemError if an invalid argument was passed. with self.assertRaises(SystemError): extension.parse_string("a", mode=0)
def test_parse_trivial_grammar(self): grammar = """ start: 'a' """ rules = parse_string(grammar, GrammarParser) visitor = self.Visitor() visitor.visit(rules) assert visitor.n_nodes == 6
def test_nullable(): grammar = """ start: sign NUMBER sign: ['-' | '+'] """ grammar = parse_string(grammar, GrammarParser) out = io.StringIO() genr = PythonParserGenerator(grammar, out) rules = grammar.rules assert rules["start"].nullable is False # Not None! assert rules["sign"].nullable
def test_nullable(self) -> None: grammar_source = """ start: sign NUMBER sign: ['-' | '+'] """ grammar: Grammar = parse_string(grammar_source, GrammarParser) out = io.StringIO() genr = PythonParserGenerator(grammar, out) rules = grammar.rules self.assertFalse(rules["start"].nullable) # Not None! self.assertTrue(rules["sign"].nullable)
def test_repeat_with_separator_rules(self) -> None: grammar = """ start: ','.thing+ NEWLINE thing: NUMBER """ rules = parse_string(grammar, GrammarParser).rules self.assertEqual(str(rules["start"]), "start: ','.thing+ NEWLINE") self.assertTrue(repr(rules["start"]).startswith( "Rule('start', None, Rhs([Alt([NamedItem(None, Gather(StringLeaf(\"','\"), NameLeaf('thing'" )) self.assertEqual(str(rules["thing"]), "thing: NUMBER")
def test_rule_with_no_collision(self) -> None: grammar_source = """ start: bad_rule sum: | NAME '-' NAME | NAME '+' NAME """ grammar: Grammar = parse_string(grammar_source, GrammarParser) validator = SubRuleValidator(grammar) for rule_name, rule in grammar.rules.items(): validator.validate_rule(rule_name, rule)
def test_parse_repeat1_grammar(self): grammar = """ start: 'a'+ """ rules = parse_string(grammar, GrammarParser) visitor = self.Visitor() visitor.visit(rules) # Grammar/Rule/Rhs/Alt/NamedItem/Repeat1/StringLeaf -> 6 assert visitor.n_nodes == 7
def test_optional_literal(self) -> None: grammar = """ start: sum NEWLINE sum: term '+' ? term: NUMBER """ parser_class = make_parser(grammar) node = parse_string("1+\n", parser_class) self.assertEqual(node, [ [ [TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1+\n")], TokenInfo(OP, string="+", start=(1, 1), end=(1, 2), line="1+\n"), ], TokenInfo(NEWLINE, string="\n", start=(1, 2), end=(1, 3), line="1+\n"), ]) node = parse_string("1\n", parser_class) self.assertEqual(node, [ [[TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1\n")], None], TokenInfo(NEWLINE, string="\n", start=(1, 1), end=(1, 2), line="1\n"), ])
def test_extension_name(tmp_path: PurePath) -> None: grammar_source = """ @modulename 'alternative_name' start: expr+ NEWLINE? ENDMARKER expr: x=NAME """ grammar = parse_string(grammar_source, GrammarParser) parser_source = generate_c_parser_source(grammar) assert "PyInit_alternative_name" in parser_source assert '.m_name = "alternative_name"' in parser_source
def test_unreachable_explicit(self) -> None: source = """ start: NAME { UNREACHABLE } """ grammar = parse_string(source, GrammarParser) out = io.StringIO() genr = PythonParserGenerator(grammar, out, unreachable_formatting="This is a test") genr.generate("<string>") self.assertIn("This is a test", out.getvalue())