Exemple #1
0
def test_left_recursive():
    grammar = """
    start: expr NEWLINE
    expr: ('-' term | expr '+' term | term)
    term: NUMBER
    foo: NAME+
    bar: NAME*
    baz: NAME?
    """
    rules = parse_string(grammar, GrammarParser).rules
    parser_class = generate_parser(rules)
    assert not rules['start'].left_recursive
    assert rules['expr'].left_recursive
    assert not rules['term'].left_recursive
    assert not rules['foo'].left_recursive
    assert not rules['bar'].left_recursive
    assert not rules['baz'].left_recursive
    node = parse_string("1 + 2 + 3\n", parser_class)
    assert node == [[[[[
        TokenInfo(NUMBER,
                  string='1',
                  start=(1, 0),
                  end=(1, 1),
                  line='1 + 2 + 3\n')
    ]],
                      TokenInfo(OP,
                                string='+',
                                start=(1, 2),
                                end=(1, 3),
                                line='1 + 2 + 3\n'),
                      [
                          TokenInfo(NUMBER,
                                    string='2',
                                    start=(1, 4),
                                    end=(1, 5),
                                    line='1 + 2 + 3\n')
                      ]],
                     TokenInfo(OP,
                               string='+',
                               start=(1, 6),
                               end=(1, 7),
                               line='1 + 2 + 3\n'),
                     [
                         TokenInfo(NUMBER,
                                   string='3',
                                   start=(1, 8),
                                   end=(1, 9),
                                   line='1 + 2 + 3\n')
                     ]],
                    TokenInfo(NEWLINE,
                              string='\n',
                              start=(1, 9),
                              end=(1, 10),
                              line='1 + 2 + 3\n')]
Exemple #2
0
 def test_repeat_1_complex(self) -> None:
     grammar = """
     start: term ('+' term)+ NEWLINE
     term: NUMBER
     """
     parser_class = make_parser(grammar)
     node = parse_string("1 + 2 + 3\n", parser_class)
     self.assertEqual(
         node,
         [
             TokenInfo(NUMBER,
                       string="1",
                       start=(1, 0),
                       end=(1, 1),
                       line="1 + 2 + 3\n"),
             [
                 [
                     TokenInfo(OP,
                               string="+",
                               start=(1, 2),
                               end=(1, 3),
                               line="1 + 2 + 3\n"),
                     TokenInfo(
                         NUMBER,
                         string="2",
                         start=(1, 4),
                         end=(1, 5),
                         line="1 + 2 + 3\n",
                     ),
                 ],
                 [
                     TokenInfo(OP,
                               string="+",
                               start=(1, 6),
                               end=(1, 7),
                               line="1 + 2 + 3\n"),
                     TokenInfo(
                         NUMBER,
                         string="3",
                         start=(1, 8),
                         end=(1, 9),
                         line="1 + 2 + 3\n",
                     ),
                 ],
             ],
             TokenInfo(NEWLINE,
                       string="\n",
                       start=(1, 9),
                       end=(1, 10),
                       line="1 + 2 + 3\n"),
         ],
     )
     with self.assertRaises(SyntaxError):
         parse_string("1\n", parser_class)
 def test_optional_operator(self) -> None:
     grammar = """
     start: sum NEWLINE
     sum: term ('+' term)?
     term: NUMBER
     """
     parser_class = make_parser(grammar)
     node = parse_string("1 + 2\n", parser_class)
     self.assertEqual(
         node,
         [
             [
                 TokenInfo(NUMBER,
                           string="1",
                           start=(1, 0),
                           end=(1, 1),
                           line="1 + 2\n"),
                 [
                     TokenInfo(OP,
                               string="+",
                               start=(1, 2),
                               end=(1, 3),
                               line="1 + 2\n"),
                     TokenInfo(NUMBER,
                               string="2",
                               start=(1, 4),
                               end=(1, 5),
                               line="1 + 2\n"),
                 ],
             ],
             TokenInfo(NEWLINE,
                       string="\n",
                       start=(1, 5),
                       end=(1, 6),
                       line="1 + 2\n"),
         ],
     )
     node = parse_string("1\n", parser_class)
     self.assertEqual(
         node,
         [
             [
                 TokenInfo(NUMBER,
                           string="1",
                           start=(1, 0),
                           end=(1, 1),
                           line="1\n"),
                 None,
             ],
             TokenInfo(
                 NEWLINE, string="\n", start=(1, 1), end=(1, 2),
                 line="1\n"),
         ],
     )
Exemple #4
0
def test_repeat_1_complex():
    grammar = """
    start: term ('+' term)+ NEWLINE
    term: NUMBER
    """
    parser_class = make_parser(grammar)
    node = parse_string("1 + 2 + 3\n", parser_class)
    assert node == [
        [
            TokenInfo(NUMBER,
                      string="1",
                      start=(1, 0),
                      end=(1, 1),
                      line="1 + 2 + 3\n")
        ],
        [
            [[
                TokenInfo(OP,
                          string="+",
                          start=(1, 2),
                          end=(1, 3),
                          line="1 + 2 + 3\n"),
                [
                    TokenInfo(NUMBER,
                              string="2",
                              start=(1, 4),
                              end=(1, 5),
                              line="1 + 2 + 3\n")
                ],
            ]],
            [[
                TokenInfo(OP,
                          string="+",
                          start=(1, 6),
                          end=(1, 7),
                          line="1 + 2 + 3\n"),
                [
                    TokenInfo(NUMBER,
                              string="3",
                              start=(1, 8),
                              end=(1, 9),
                              line="1 + 2 + 3\n")
                ],
            ]],
        ],
        TokenInfo(NEWLINE,
                  string="\n",
                  start=(1, 9),
                  end=(1, 10),
                  line="1 + 2 + 3\n"),
    ]
    with pytest.raises(SyntaxError):
        parse_string("1\n", parser_class)
Exemple #5
0
 def test_gather(self) -> None:
     grammar = """
     start: ','.thing+ NEWLINE
     thing: NUMBER
     """
     rules = parse_string(grammar, GrammarParser).rules
     self.assertEqual(str(rules["start"]), "start: ','.thing+ NEWLINE")
     self.assertTrue(
         repr(rules["start"]).startswith(
             "Rule('start', None, Rhs([Alt([NamedItem(None, Gather(StringLeaf(\"','\"), NameLeaf('thing'"
         ))
     self.assertEqual(str(rules["thing"]), "thing: NUMBER")
     parser_class = make_parser(grammar)
     node = parse_string("42\n", parser_class)
     assert node == [
         [[
             TokenInfo(NUMBER,
                       string="42",
                       start=(1, 0),
                       end=(1, 2),
                       line="42\n")
         ]],
         TokenInfo(NEWLINE,
                   string="\n",
                   start=(1, 2),
                   end=(1, 3),
                   line="42\n"),
     ]
     node = parse_string("1, 2\n", parser_class)
     assert node == [
         [
             [
                 TokenInfo(NUMBER,
                           string="1",
                           start=(1, 0),
                           end=(1, 1),
                           line="1, 2\n")
             ],
             [
                 TokenInfo(NUMBER,
                           string="2",
                           start=(1, 3),
                           end=(1, 4),
                           line="1, 2\n")
             ],
         ],
         TokenInfo(NEWLINE,
                   string="\n",
                   start=(1, 4),
                   end=(1, 5),
                   line="1, 2\n"),
     ]
Exemple #6
0
 def test_with_stmt_with_paren(self) -> None:
     grammar_source = """
     start[mod_ty]: a=[statements] ENDMARKER { Module(a, NULL, p->arena) }
     statements[asdl_seq*]: a=statement+ { _PyPegen_seq_flatten(p, a) }
     statement[asdl_seq*]: a=compound_stmt { _PyPegen_singleton_seq(p, a) }
     compound_stmt[stmt_ty]: with_stmt
     with_stmt[stmt_ty]: (
         a='with' '(' b=','.with_item+ ')' ':' c=block {
             _Py_With(b, _PyPegen_singleton_seq(p, c), NULL, EXTRA) }
     )
     with_item[withitem_ty]: (
         e=NAME o=['as' t=NAME { t }] { _Py_withitem(e, _PyPegen_set_expr_context(p, o, Store), p->arena) }
     )
     block[stmt_ty]: a=pass_stmt NEWLINE { a } | NEWLINE INDENT a=pass_stmt DEDENT { a }
     pass_stmt[stmt_ty]: a='pass' { _Py_Pass(EXTRA) }
     """
     stmt = "with (\n    a as b,\n    c as d\n): pass"
     grammar = parse_string(grammar_source, GrammarParser)
     extension = generate_parser_c_extension(grammar, Path(self.tmp_path))
     the_ast = extension.parse_string(stmt, mode=1)
     self.assertTrue(
         ast_dump(the_ast).startswith(
             "Module(body=[With(items=[withitem(context_expr=Name(id='a', ctx=Load()), optional_vars=Name(id='b', ctx=Store())), "
             "withitem(context_expr=Name(id='c', ctx=Load()), optional_vars=Name(id='d', ctx=Store()))]"
         ))
Exemple #7
0
def check_nested_expr(nesting_depth: int, parser: Any, language: str) -> bool:
    expr = f"{'(' * nesting_depth}0{')' * nesting_depth}"

    try:
        if language == "Python":
            parse_string(expr, parser)
        else:
            parser.parse_string(expr)

        print(f"({language}) Nesting depth of {nesting_depth} is successful")

        return True
    except Exception as err:
        print(f"{FAIL}({language}) Failed with nesting depth of {nesting_depth}{ENDC}")
        print(f"{FAIL}\t{err}{ENDC}")
        return False
Exemple #8
0
    def verify_ast_generation(self, source: str, stmt: str, tmp_path: PurePath) -> None:
        grammar = parse_string(source, GrammarParser)
        extension = generate_parser_c_extension(grammar, Path(tmp_path))

        expected_ast = ast.parse(stmt)
        actual_ast = extension.parse_string(stmt, mode=1)
        self.assertEqual(ast_dump(expected_ast), ast_dump(actual_ast))
Exemple #9
0
def verify_ast_generation(source: str, stmt: str, tmp_path: PurePath) -> None:
    grammar = parse_string(source, GrammarParser)
    extension = generate_parser_c_extension(grammar, tmp_path)

    expected_ast = ast.parse(stmt)
    actual_ast = extension.parse_string(stmt)
    assert ast.dump(expected_ast) == ast.dump(actual_ast)
 def test_repeat_with_sep_simple(self) -> None:
     grammar = """
     start: ','.thing+ NEWLINE
     thing: NUMBER
     """
     parser_class = make_parser(grammar)
     node = parse_string("1, 2, 3\n", parser_class)
     self.assertEqual(
         node,
         [
             [
                 TokenInfo(NUMBER,
                           string="1",
                           start=(1, 0),
                           end=(1, 1),
                           line="1, 2, 3\n"),
                 TokenInfo(NUMBER,
                           string="2",
                           start=(1, 3),
                           end=(1, 4),
                           line="1, 2, 3\n"),
                 TokenInfo(NUMBER,
                           string="3",
                           start=(1, 6),
                           end=(1, 7),
                           line="1, 2, 3\n"),
             ],
             TokenInfo(NEWLINE,
                       string="\n",
                       start=(1, 7),
                       end=(1, 8),
                       line="1, 2, 3\n"),
         ],
     )
Exemple #11
0
 def test_repeat_1_simple(self) -> None:
     grammar = """
     start: thing thing+ NEWLINE
     thing: NUMBER
     """
     parser_class = make_parser(grammar)
     node = parse_string("1 2 3\n", parser_class)
     self.assertEqual(node, [
         [TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1 2 3\n")],
         [
             [[TokenInfo(NUMBER, string="2", start=(1, 2), end=(1, 3), line="1 2 3\n")]],
             [[TokenInfo(NUMBER, string="3", start=(1, 4), end=(1, 5), line="1 2 3\n")]],
         ],
         TokenInfo(NEWLINE, string="\n", start=(1, 5), end=(1, 6), line="1 2 3\n"),
     ])
     with self.assertRaises(SyntaxError):
         parse_string("1\n", parser_class)
Exemple #12
0
 def build_extension(self, grammar_source):
     grammar = parse_string(grammar_source, GrammarParser)
     # Because setUp() already changes the current directory to the
     # temporary path, use a relative path here to prevent excessive
     # path lengths when compiling.
     generate_parser_c_extension(grammar,
                                 Path('.'),
                                 library_dir=self.library_dir)
 def test_repeat_0_simple(self) -> None:
     grammar = """
     start: thing thing* NEWLINE
     thing: NUMBER
     """
     parser_class = make_parser(grammar)
     node = parse_string("1 2 3\n", parser_class)
     self.assertEqual(
         node,
         [
             TokenInfo(NUMBER,
                       string="1",
                       start=(1, 0),
                       end=(1, 1),
                       line="1 2 3\n"),
             [
                 TokenInfo(NUMBER,
                           string="2",
                           start=(1, 2),
                           end=(1, 3),
                           line="1 2 3\n"),
                 TokenInfo(NUMBER,
                           string="3",
                           start=(1, 4),
                           end=(1, 5),
                           line="1 2 3\n"),
             ],
             TokenInfo(NEWLINE,
                       string="\n",
                       start=(1, 5),
                       end=(1, 6),
                       line="1 2 3\n"),
         ],
     )
     node = parse_string("1\n", parser_class)
     self.assertEqual(
         node,
         [
             TokenInfo(
                 NUMBER, string="1", start=(1, 0), end=(1, 1), line="1\n"),
             [],
             TokenInfo(
                 NEWLINE, string="\n", start=(1, 1), end=(1, 2),
                 line="1\n"),
         ],
     )
 def test_soft_keywords(self) -> None:
     grammar_source = """
     start: expr+ NEWLINE? ENDMARKER
     expr: "foo"
     """
     grammar = parse_string(grammar_source, GrammarParser)
     parser_source = generate_c_parser_source(grammar)
     assert "expect_soft_keyword" in parser_source
Exemple #15
0
def test_alt_optional_operator():
    grammar = """
    start: sum NEWLINE
    sum: term ['+' term]
    term: NUMBER
    """
    parser_class = make_parser(grammar)
    node = parse_string("1 + 2\n", parser_class)
    assert node == [
        [
            [
                TokenInfo(NUMBER,
                          string="1",
                          start=(1, 0),
                          end=(1, 1),
                          line="1 + 2\n")
            ],
            [
                TokenInfo(OP,
                          string="+",
                          start=(1, 2),
                          end=(1, 3),
                          line="1 + 2\n"),
                [
                    TokenInfo(NUMBER,
                              string="2",
                              start=(1, 4),
                              end=(1, 5),
                              line="1 + 2\n")
                ],
            ],
        ],
        TokenInfo(NEWLINE,
                  string="\n",
                  start=(1, 5),
                  end=(1, 6),
                  line="1 + 2\n"),
    ]
    node = parse_string("1\n", parser_class)
    assert node == [
        [[TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1\n")],
         None],
        TokenInfo(NEWLINE, string="\n", start=(1, 1), end=(1, 2), line="1\n"),
    ]
Exemple #16
0
def test_repeat_0_simple():
    grammar = """
    start: thing thing* NEWLINE
    thing: NUMBER
    """
    parser_class = make_parser(grammar)
    node = parse_string("1 2 3\n", parser_class)
    assert node == [
        [
            TokenInfo(NUMBER,
                      string="1",
                      start=(1, 0),
                      end=(1, 1),
                      line="1 2 3\n")
        ],
        [
            [[
                TokenInfo(NUMBER,
                          string="2",
                          start=(1, 2),
                          end=(1, 3),
                          line="1 2 3\n")
            ]],
            [[
                TokenInfo(NUMBER,
                          string="3",
                          start=(1, 4),
                          end=(1, 5),
                          line="1 2 3\n")
            ]],
        ],
        TokenInfo(NEWLINE,
                  string="\n",
                  start=(1, 5),
                  end=(1, 6),
                  line="1 2 3\n"),
    ]
    node = parse_string("1\n", parser_class)
    assert node == [
        [TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1\n")],
        [],
        TokenInfo(NEWLINE, string="\n", start=(1, 1), end=(1, 2), line="1\n"),
    ]
Exemple #17
0
    def test_parse_trivial_grammar(self) -> None:
        grammar = """
        start: 'a'
        """
        rules = parse_string(grammar, GrammarParser)
        visitor = self.Visitor()

        visitor.visit(rules)

        self.assertEqual(visitor.n_nodes, 6)
Exemple #18
0
 def test_nullable(self) -> None:
     grammar_source = """
     start: sign NUMBER
     sign: ['-' | '+']
     """
     grammar: Grammar = parse_string(grammar_source, GrammarParser)
     rules = grammar.rules
     nullables = compute_nullables(rules)
     self.assertNotIn(rules["start"], nullables)  # Not None!
     self.assertIn(rules["sign"], nullables)
Exemple #19
0
 def test_mutually_left_recursive(self) -> None:
     grammar_source = """
     start: foo 'E'
     foo: bar 'A' | 'B'
     bar: foo 'C' | 'D'
     """
     grammar: Grammar = parse_string(grammar_source, GrammarParser)
     out = io.StringIO()
     genr = PythonParserGenerator(grammar, out)
     rules = grammar.rules
     self.assertFalse(rules["start"].left_recursive)
     self.assertTrue(rules["foo"].left_recursive)
     self.assertTrue(rules["bar"].left_recursive)
     genr.generate("<string>")
     ns: Dict[str, Any] = {}
     exec(out.getvalue(), ns)
     parser_class: Type[Parser] = ns["GeneratedParser"]
     node = parse_string("D A C A E", parser_class)
     self.assertEqual(node, [
         [
             [
                 [
                     [TokenInfo(type=NAME, string="D", start=(1, 0), end=(1, 1), line="D A C A E")],
                     TokenInfo(type=NAME, string="A", start=(1, 2), end=(1, 3), line="D A C A E"),
                 ],
                 TokenInfo(type=NAME, string="C", start=(1, 4), end=(1, 5), line="D A C A E"),
             ],
             TokenInfo(type=NAME, string="A", start=(1, 6), end=(1, 7), line="D A C A E"),
         ],
         TokenInfo(type=NAME, string="E", start=(1, 8), end=(1, 9), line="D A C A E"),
     ])
     node = parse_string("B C A E", parser_class)
     self.assertIsNotNone(node)
     self.assertEqual(node, [
         [
             [
                 [TokenInfo(type=NAME, string="B", start=(1, 0), end=(1, 1), line="B C A E")],
                 TokenInfo(type=NAME, string="C", start=(1, 2), end=(1, 3), line="B C A E"),
             ],
             TokenInfo(type=NAME, string="A", start=(1, 4), end=(1, 5), line="B C A E"),
         ],
         TokenInfo(type=NAME, string="E", start=(1, 6), end=(1, 7), line="B C A E"),
     ])
Exemple #20
0
def test_nullable():
    grammar = """
    start: sign NUMBER
    sign: ['-' | '+']
    """
    rules = parse_string(grammar, GrammarParser).rules
    out = io.StringIO()
    genr = PythonParserGenerator(rules, out)
    assert rules['start'].nullable is False  # Not None!
    assert rules['sign'].nullable
Exemple #21
0
 def test_error_in_rules(self) -> None:
     grammar_source = """
     start: expr+ NEWLINE? ENDMARKER
     expr: NAME {PyTuple_New(-1)}
     """
     grammar = parse_string(grammar_source, GrammarParser)
     extension = generate_parser_c_extension(grammar, Path(self.tmp_path))
     # PyTuple_New raises SystemError if an invalid argument was passed.
     with self.assertRaises(SystemError):
         extension.parse_string("a", mode=0)
Exemple #22
0
    def test_parse_trivial_grammar(self):
        grammar = """
        start: 'a'
        """
        rules = parse_string(grammar, GrammarParser)
        visitor = self.Visitor()

        visitor.visit(rules)

        assert visitor.n_nodes == 6
Exemple #23
0
def test_nullable():
    grammar = """
    start: sign NUMBER
    sign: ['-' | '+']
    """
    grammar = parse_string(grammar, GrammarParser)
    out = io.StringIO()
    genr = PythonParserGenerator(grammar, out)
    rules = grammar.rules
    assert rules["start"].nullable is False  # Not None!
    assert rules["sign"].nullable
Exemple #24
0
 def test_nullable(self) -> None:
     grammar_source = """
     start: sign NUMBER
     sign: ['-' | '+']
     """
     grammar: Grammar = parse_string(grammar_source, GrammarParser)
     out = io.StringIO()
     genr = PythonParserGenerator(grammar, out)
     rules = grammar.rules
     self.assertFalse(rules["start"].nullable)  # Not None!
     self.assertTrue(rules["sign"].nullable)
Exemple #25
0
 def test_repeat_with_separator_rules(self) -> None:
     grammar = """
     start: ','.thing+ NEWLINE
     thing: NUMBER
     """
     rules = parse_string(grammar, GrammarParser).rules
     self.assertEqual(str(rules["start"]), "start: ','.thing+ NEWLINE")
     self.assertTrue(repr(rules["start"]).startswith(
         "Rule('start', None, Rhs([Alt([NamedItem(None, Gather(StringLeaf(\"','\"), NameLeaf('thing'"
     ))
     self.assertEqual(str(rules["thing"]), "thing: NUMBER")
Exemple #26
0
 def test_rule_with_no_collision(self) -> None:
     grammar_source = """
     start: bad_rule
     sum:
         | NAME '-' NAME
         | NAME '+' NAME
     """
     grammar: Grammar = parse_string(grammar_source, GrammarParser)
     validator = SubRuleValidator(grammar)
     for rule_name, rule in grammar.rules.items():
         validator.validate_rule(rule_name, rule)
Exemple #27
0
    def test_parse_repeat1_grammar(self):
        grammar = """
        start: 'a'+
        """
        rules = parse_string(grammar, GrammarParser)
        visitor = self.Visitor()

        visitor.visit(rules)

        # Grammar/Rule/Rhs/Alt/NamedItem/Repeat1/StringLeaf -> 6
        assert visitor.n_nodes == 7
Exemple #28
0
 def test_optional_literal(self) -> None:
     grammar = """
     start: sum NEWLINE
     sum: term '+' ?
     term: NUMBER
     """
     parser_class = make_parser(grammar)
     node = parse_string("1+\n", parser_class)
     self.assertEqual(node, [
         [
             [TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1+\n")],
             TokenInfo(OP, string="+", start=(1, 1), end=(1, 2), line="1+\n"),
         ],
         TokenInfo(NEWLINE, string="\n", start=(1, 2), end=(1, 3), line="1+\n"),
     ])
     node = parse_string("1\n", parser_class)
     self.assertEqual(node, [
         [[TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1\n")], None],
         TokenInfo(NEWLINE, string="\n", start=(1, 1), end=(1, 2), line="1\n"),
     ])
Exemple #29
0
def test_extension_name(tmp_path: PurePath) -> None:
    grammar_source = """
    @modulename 'alternative_name'
    start: expr+ NEWLINE? ENDMARKER
    expr: x=NAME
    """
    grammar = parse_string(grammar_source, GrammarParser)
    parser_source = generate_c_parser_source(grammar)

    assert "PyInit_alternative_name" in parser_source
    assert '.m_name = "alternative_name"' in parser_source
 def test_unreachable_explicit(self) -> None:
     source = """
     start: NAME { UNREACHABLE }
     """
     grammar = parse_string(source, GrammarParser)
     out = io.StringIO()
     genr = PythonParserGenerator(grammar,
                                  out,
                                  unreachable_formatting="This is a test")
     genr.generate("<string>")
     self.assertIn("This is a test", out.getvalue())