Ejemplo n.º 1
0
def generate_parser(rules):
    # Generate a parser.
    out = io.StringIO()
    genr = PythonParserGenerator(rules, out)
    genr.generate("<string>")

    # Load the generated parser class.
    ns = {}
    exec(out.getvalue(), ns)
    return ns['GeneratedParser']
Ejemplo n.º 2
0
def generate_parser(grammar: Grammar) -> Type[Parser]:
    # Generate a parser.
    out = io.StringIO()
    genr = PythonParserGenerator(grammar, out)
    genr.generate("<string>")

    # Load the generated parser class.
    ns: Dict[str, Any] = {}
    exec(out.getvalue(), ns)
    return ns["GeneratedParser"]
Ejemplo n.º 3
0
def test_unreachable_explicit() -> None:
    source = """
    start: NAME { UNREACHABLE }
    """
    grammar = parse_string(source, GrammarParser)
    out = io.StringIO()
    genr = PythonParserGenerator(grammar,
                                 out,
                                 unreachable_formatting="This is a test")
    genr.generate("<string>")
    assert "This is a test" in out.getvalue()
Ejemplo n.º 4
0
def test_unreachable_implicit3() -> None:
    source = """
    start: NAME | invalid_input { None }
    invalid_input: NUMBER
    """
    grammar = parse_string(source, GrammarParser)
    out = io.StringIO()
    genr = PythonParserGenerator(grammar,
                                 out,
                                 unreachable_formatting="This is a test")
    genr.generate("<string>")
    assert "This is a test" not in out.getvalue()
Ejemplo n.º 5
0
 def test_unreachable_implicit2(self) -> None:
     source = """
     start: NAME | '(' invalid_input ')'
     invalid_input: NUMBER { None }
     """
     grammar = parse_string(source, GrammarParser)
     out = io.StringIO()
     genr = PythonParserGenerator(grammar,
                                  out,
                                  unreachable_formatting="This is a test")
     genr.generate("<string>")
     self.assertIn("This is a test", out.getvalue())
Ejemplo n.º 6
0
def build_python_generator(
    grammar: Grammar, grammar_file: str, output_file: str, skip_actions: bool = False,
) -> ParserGenerator:
    with open(output_file, "w") as file:
        gen: ParserGenerator = PythonParserGenerator(grammar, file)  # TODO: skip_actions
        gen.generate(grammar_file)
    return gen
Ejemplo n.º 7
0
def build_generator(
    tokenizer: Tokenizer,
    grammar: Grammar,
    grammar_file: str,
    output_file: str,
    compile_extension: bool = False,
    verbose_c_extension: bool = False,
    keep_asserts_in_extension: bool = True,
    skip_actions: bool = False,
) -> ParserGenerator:
    with open(output_file, "w") as file:
        gen: ParserGenerator
        if output_file.endswith(".c"):
            gen = CParserGenerator(grammar, file, skip_actions=skip_actions)
        elif output_file.endswith(".py"):
            gen = PythonParserGenerator(grammar, file)  # TODO: skip_actions
        else:
            raise Exception("Your output file must either be a .c or .py file")
        gen.generate(grammar_file)

    if compile_extension and output_file.endswith(".c"):
        compile_c_extension(
            output_file, verbose=verbose_c_extension, keep_asserts=keep_asserts_in_extension
        )

    return gen
Ejemplo n.º 8
0
def build_generator(
    tokenizer: Tokenizer,
    grammar: Grammar,
    grammar_file: str,
    output_file: str,
    compile_extension: bool = False,
    verbose_c_extension: bool = False,
    keep_asserts_in_extension: bool = True,
    skip_actions: bool = False,
) -> ParserGenerator:
    # TODO: Allow other extensions; pass the output type as an argument.
    if not output_file.endswith((".c", ".py")):
        raise RuntimeError("Your output file must either be a .c or .py file")
    with open(output_file, "w") as file:
        gen: ParserGenerator
        if output_file.endswith(".c"):
            gen = CParserGenerator(grammar, file, skip_actions=skip_actions)
        elif output_file.endswith(".py"):
            gen = PythonParserGenerator(grammar, file)  # TODO: skip_actions
        else:
            assert False  # Should have been checked above
        gen.generate(grammar_file)

    if compile_extension and output_file.endswith(".c"):
        compile_c_extension(output_file,
                            verbose=verbose_c_extension,
                            keep_asserts=keep_asserts_in_extension)

    return gen
Ejemplo n.º 9
0
def generate_parser(grammar: Grammar,
                    parser_path: Optional[str] = None,
                    parser_name: str = "GeneratedParser") -> Type[Parser]:
    # Generate a parser.
    out = io.StringIO()
    genr = PythonParserGenerator(grammar, out)
    genr.generate("<string>")

    # Load the generated parser class.
    ns: Dict[str, Any] = {}
    if parser_path:
        with open(parser_path, "w") as f:
            f.write(out.getvalue())
        mod = import_file("py_parser", parser_path)
        return getattr(mod, parser_name)
    else:
        exec(out.getvalue(), ns)
        return ns[parser_name]
Ejemplo n.º 10
0
 def test_mutually_left_recursive(self) -> None:
     grammar_source = """
     start: foo 'E'
     foo: bar 'A' | 'B'
     bar: foo 'C' | 'D'
     """
     grammar: Grammar = parse_string(grammar_source, GrammarParser)
     out = io.StringIO()
     genr = PythonParserGenerator(grammar, out)
     rules = grammar.rules
     self.assertFalse(rules["start"].left_recursive)
     self.assertTrue(rules["foo"].left_recursive)
     self.assertTrue(rules["bar"].left_recursive)
     genr.generate("<string>")
     ns: Dict[str, Any] = {}
     exec(out.getvalue(), ns)
     parser_class: Type[Parser] = ns["GeneratedParser"]
     node = parse_string("D A C A E", parser_class)
     self.assertEqual(node, [
         [
             [
                 [
                     [TokenInfo(type=NAME, string="D", start=(1, 0), end=(1, 1), line="D A C A E")],
                     TokenInfo(type=NAME, string="A", start=(1, 2), end=(1, 3), line="D A C A E"),
                 ],
                 TokenInfo(type=NAME, string="C", start=(1, 4), end=(1, 5), line="D A C A E"),
             ],
             TokenInfo(type=NAME, string="A", start=(1, 6), end=(1, 7), line="D A C A E"),
         ],
         TokenInfo(type=NAME, string="E", start=(1, 8), end=(1, 9), line="D A C A E"),
     ])
     node = parse_string("B C A E", parser_class)
     self.assertIsNotNone(node)
     self.assertEqual(node, [
         [
             [
                 [TokenInfo(type=NAME, string="B", start=(1, 0), end=(1, 1), line="B C A E")],
                 TokenInfo(type=NAME, string="C", start=(1, 2), end=(1, 3), line="B C A E"),
             ],
             TokenInfo(type=NAME, string="A", start=(1, 4), end=(1, 5), line="B C A E"),
         ],
         TokenInfo(type=NAME, string="E", start=(1, 6), end=(1, 7), line="B C A E"),
     ])
Ejemplo n.º 11
0
def test_nullable():
    grammar = """
    start: sign NUMBER
    sign: ['-' | '+']
    """
    rules = parse_string(grammar, GrammarParser).rules
    out = io.StringIO()
    genr = PythonParserGenerator(rules, out)
    assert rules['start'].nullable is False  # Not None!
    assert rules['sign'].nullable
Ejemplo n.º 12
0
def test_nullable():
    grammar = """
    start: sign NUMBER
    sign: ['-' | '+']
    """
    grammar = parse_string(grammar, GrammarParser)
    out = io.StringIO()
    genr = PythonParserGenerator(grammar, out)
    rules = grammar.rules
    assert rules["start"].nullable is False  # Not None!
    assert rules["sign"].nullable
Ejemplo n.º 13
0
 def test_nullable(self) -> None:
     grammar_source = """
     start: sign NUMBER
     sign: ['-' | '+']
     """
     grammar: Grammar = parse_string(grammar_source, GrammarParser)
     out = io.StringIO()
     genr = PythonParserGenerator(grammar, out)
     rules = grammar.rules
     self.assertFalse(rules["start"].nullable)  # Not None!
     self.assertTrue(rules["sign"].nullable)
Ejemplo n.º 14
0
def test_advanced_left_recursive():
    grammar = """
    start: NUMBER | sign start
    sign: ['-']
    """
    rules = parse_string(grammar, GrammarParser).rules
    out = io.StringIO()
    genr = PythonParserGenerator(rules, out)
    assert rules['start'].nullable is False  # Not None!
    assert rules['sign'].nullable
    assert rules['start'].left_recursive
    assert not rules['sign'].left_recursive
Ejemplo n.º 15
0
def test_advanced_left_recursive():
    grammar = """
    start: NUMBER | sign start
    sign: ['-']
    """
    grammar = parse_string(grammar, GrammarParser)
    out = io.StringIO()
    genr = PythonParserGenerator(grammar, out)
    rules = grammar.rules
    assert rules["start"].nullable is False  # Not None!
    assert rules["sign"].nullable
    assert rules["start"].left_recursive
    assert not rules["sign"].left_recursive
Ejemplo n.º 16
0
 def test_advanced_left_recursive(self) -> None:
     grammar_source = """
     start: NUMBER | sign start
     sign: ['-']
     """
     grammar: Grammar = parse_string(grammar_source, GrammarParser)
     out = io.StringIO()
     genr = PythonParserGenerator(grammar, out)
     rules = grammar.rules
     self.assertFalse(rules["start"].nullable)  # Not None!
     self.assertTrue(rules["sign"].nullable)
     self.assertTrue(rules["start"].left_recursive)
     self.assertFalse(rules["sign"].left_recursive)
Ejemplo n.º 17
0
def test_nasty_mutually_left_recursive():
    # This grammar does not recognize 'x - + =', much to my chagrin.
    # But that's the way PEG works.
    # [Breathlessly]
    # The problem is that the toplevel target call
    # recurses into maybe, which recognizes 'x - +',
    # and then the toplevel target looks for another '+',
    # which fails, so it retreats to NAME,
    # which succeeds, so we end up just recognizing 'x',
    # and then start fails because there's no '=' after that.
    grammar = """
    start: target '='
    target: maybe '+' | NAME
    maybe: maybe '-' | target
    """
    grammar = parse_string(grammar, GrammarParser)
    out = io.StringIO()
    genr = PythonParserGenerator(grammar, out)
    genr.generate("<string>")
    ns = {}
    exec(out.getvalue(), ns)
    parser_class = ns["GeneratedParser"]
    with pytest.raises(SyntaxError):
        parse_string("x - + =", parser_class)
Ejemplo n.º 18
0
def build_generator(
    tokenizer,
    rules,
    grammar_file,
    output_file,
    compile_extension=False,
    verbose_c_extension=False,
):
    with open(output_file, "w") as file:
        gen: ParserGenerator
        if output_file.endswith(".c"):
            gen = CParserGenerator(rules.rules, file)
        elif output_file.endswith(".py"):
            gen = PythonParserGenerator(rules.rules, file)
        else:
            raise Exception("Your output file must either be a .c or .py file")
        gen.generate(grammar_file)

    if compile_extension and output_file.endswith(".c"):
        compile_c_extension(output_file, verbose=verbose_c_extension)

    return gen
Ejemplo n.º 19
0
def test_mutually_left_recursive():
    grammar = """
    start: foo 'E'
    foo: bar 'A' | 'B'
    bar: foo 'C' | 'D'
    """
    grammar = parse_string(grammar, GrammarParser)
    out = io.StringIO()
    genr = PythonParserGenerator(grammar, out)
    rules = grammar.rules
    assert not rules["start"].left_recursive
    assert rules["foo"].left_recursive
    assert rules["bar"].left_recursive
    genr.generate("<string>")
    ns = {}
    exec(out.getvalue(), ns)
    parser_class = ns["GeneratedParser"]
    node = parse_string("D A C A E", parser_class)
    assert node == [
        [
            [
                [
                    [
                        TokenInfo(type=NAME,
                                  string="D",
                                  start=(1, 0),
                                  end=(1, 1),
                                  line="D A C A E")
                    ],
                    TokenInfo(type=NAME,
                              string="A",
                              start=(1, 2),
                              end=(1, 3),
                              line="D A C A E"),
                ],
                TokenInfo(type=NAME,
                          string="C",
                          start=(1, 4),
                          end=(1, 5),
                          line="D A C A E"),
            ],
            TokenInfo(type=NAME,
                      string="A",
                      start=(1, 6),
                      end=(1, 7),
                      line="D A C A E"),
        ],
        TokenInfo(type=NAME,
                  string="E",
                  start=(1, 8),
                  end=(1, 9),
                  line="D A C A E"),
    ]
    node = parse_string("B C A E", parser_class)
    assert node != None
    assert node == [
        [
            [
                [
                    TokenInfo(type=NAME,
                              string="B",
                              start=(1, 0),
                              end=(1, 1),
                              line="B C A E")
                ],
                TokenInfo(type=NAME,
                          string="C",
                          start=(1, 2),
                          end=(1, 3),
                          line="B C A E"),
            ],
            TokenInfo(type=NAME,
                      string="A",
                      start=(1, 4),
                      end=(1, 5),
                      line="B C A E"),
        ],
        TokenInfo(type=NAME,
                  string="E",
                  start=(1, 6),
                  end=(1, 7),
                  line="B C A E"),
    ]
Ejemplo n.º 20
0
def main() -> None:
    args = argparser.parse_args()
    verbose = args.verbose
    verbose_tokenizer = verbose >= 3
    verbose_parser = verbose == 2 or verbose >= 4
    t0 = time.time()

    with open(args.filename) as file:
        tokenizer = Tokenizer(grammar_tokenizer(
            tokenize.generate_tokens(file.readline)),
                              verbose=verbose_tokenizer)
        parser = GrammarParser(tokenizer, verbose=verbose_parser)
        rules = parser.start()
        if not rules:
            err = parser.make_syntax_error(args.filename)
            traceback.print_exception(err.__class__, err, None)
            sys.exit(1)
        endpos = file.tell()

    if not args.quiet:
        if args.verbose:
            print("Raw Grammar:")
            for rule in rules.rules.values():
                print(" ", repr(rule))
        print("Clean Grammar:")
        for rule in rules.rules.values():
            print(" ", rule)

    output = args.output
    if not output:
        if args.cpython:
            output = "parse.c"
        else:
            output = "parse.py"
    with open(output, 'w') as file:
        gen: ParserGenerator
        if args.cpython:
            gen = CParserGenerator(rules.rules, file)
        else:
            gen = PythonParserGenerator(rules.rules, file)
        gen.generate(args.filename)

    if args.cpython and args.compile_extension:
        compile_c_extension(output, verbose=args.verbose)

    if args.verbose:
        print("First Graph:")
        for src, dsts in gen.first_graph.items():
            print(f"  {src} -> {', '.join(dsts)}")
        print("First SCCS:")
        for scc in gen.first_sccs:
            print(" ", scc, end="")
            if len(scc) > 1:
                print("  # Indirectly left-recursive")
            else:
                name = next(iter(scc))
                if name in gen.first_graph[name]:
                    print("  # Left-recursive")
                else:
                    print()

    t1 = time.time()

    if args.verbose:
        dt = t1 - t0
        diag = tokenizer.diagnose()
        nlines = diag.end[0]
        if diag.type == token.ENDMARKER:
            nlines -= 1
        print(f"Total time: {dt:.3f} sec; {nlines} lines", end="")
        if endpos:
            print(f" ({endpos} bytes)", end="")
        if dt:
            print(f"; {nlines / dt:.0f} lines/sec")
        else:
            print()
        print("Caches sizes:")
        print(f"  token array : {len(tokenizer._tokens):10}")
        print(f"        cache : {len(parser._cache):10}")
        if not print_memstats():
            print("(Can't find psutil; install it for memory stats.)")
Ejemplo n.º 21
0
def test_mutually_left_recursive():
    grammar = """
    start: foo 'E'
    foo: bar 'A' | 'B'
    bar: foo 'C' | 'D'
    """
    rules = parse_string(grammar, GrammarParser).rules
    out = io.StringIO()
    genr = PythonParserGenerator(rules, out)
    assert not rules['start'].left_recursive
    assert rules['foo'].left_recursive
    assert rules['bar'].left_recursive
    genr.generate("<string>")
    ns = {}
    exec(out.getvalue(), ns)
    parser_class = ns['GeneratedParser']
    node = parse_string("D A C A E", parser_class)
    assert node == [[[[[
        TokenInfo(type=NAME,
                  string='D',
                  start=(1, 0),
                  end=(1, 1),
                  line='D A C A E')
    ],
                       TokenInfo(type=NAME,
                                 string='A',
                                 start=(1, 2),
                                 end=(1, 3),
                                 line='D A C A E')],
                      TokenInfo(type=NAME,
                                string='C',
                                start=(1, 4),
                                end=(1, 5),
                                line='D A C A E')],
                     TokenInfo(type=NAME,
                               string='A',
                               start=(1, 6),
                               end=(1, 7),
                               line='D A C A E')],
                    TokenInfo(type=NAME,
                              string='E',
                              start=(1, 8),
                              end=(1, 9),
                              line='D A C A E')]
    node = parse_string("B C A E", parser_class)
    assert node != None
    assert node == [[[[
        TokenInfo(type=NAME,
                  string='B',
                  start=(1, 0),
                  end=(1, 1),
                  line='B C A E')
    ],
                      TokenInfo(type=NAME,
                                string='C',
                                start=(1, 2),
                                end=(1, 3),
                                line='B C A E')],
                     TokenInfo(type=NAME,
                               string='A',
                               start=(1, 4),
                               end=(1, 5),
                               line='B C A E')],
                    TokenInfo(type=NAME,
                              string='E',
                              start=(1, 6),
                              end=(1, 7),
                              line='B C A E')]