Esempio n. 1
0
def build_generator(
    tokenizer: Tokenizer,
    grammar: Grammar,
    grammar_file: str,
    output_file: str,
    compile_extension: bool = False,
    verbose_c_extension: bool = False,
    keep_asserts_in_extension: bool = True,
    skip_actions: bool = False,
) -> ParserGenerator:
    # TODO: Allow other extensions; pass the output type as an argument.
    if not output_file.endswith((".c", ".py")):
        raise RuntimeError("Your output file must either be a .c or .py file")
    with open(output_file, "w") as file:
        gen: ParserGenerator
        if output_file.endswith(".c"):
            gen = CParserGenerator(grammar, file, skip_actions=skip_actions)
        elif output_file.endswith(".py"):
            gen = PythonParserGenerator(grammar, file)  # TODO: skip_actions
        else:
            assert False  # Should have been checked above
        gen.generate(grammar_file)

    if compile_extension and output_file.endswith(".c"):
        compile_c_extension(output_file,
                            verbose=verbose_c_extension,
                            keep_asserts=keep_asserts_in_extension)

    return gen
Esempio n. 2
0
def generate_parser_c_extension(grammar: Grammar,
                                path: pathlib.PurePath,
                                debug: bool = False) -> Any:
    """Generate a parser c extension for the given grammar in the given path

    Returns a module object with a parse_string() method.
    TODO: express that using a Protocol.
    """
    # Make sure that the working directory is empty: reusing non-empty temporary
    # directories when generating extensions can lead to segmentation faults.
    # Check issue #95 (https://github.com/gvanrossum/pegen/issues/95) for more
    # context.
    assert not os.listdir(path)
    source = path / "parse.c"
    with open(source, "w", encoding="utf-8") as file:
        genr = CParserGenerator(grammar,
                                ALL_TOKENS,
                                EXACT_TOKENS,
                                NON_EXACT_TOKENS,
                                file,
                                debug=debug)
        genr.generate("parse.c")
    extension_path = compile_c_extension(str(source),
                                         build_dir=str(path / "build"))
    extension = import_file("parse", extension_path)
    return extension
Esempio n. 3
0
def build_generator(
    tokenizer: Tokenizer,
    grammar: Grammar,
    grammar_file: str,
    output_file: str,
    compile_extension: bool = False,
    verbose_c_extension: bool = False,
    keep_asserts_in_extension: bool = True,
    skip_actions: bool = False,
) -> ParserGenerator:
    with open(output_file, "w") as file:
        gen: ParserGenerator
        if output_file.endswith(".c"):
            gen = CParserGenerator(grammar, file, skip_actions=skip_actions)
        elif output_file.endswith(".py"):
            gen = PythonParserGenerator(grammar, file)  # TODO: skip_actions
        else:
            raise Exception("Your output file must either be a .c or .py file")
        gen.generate(grammar_file)

    if compile_extension and output_file.endswith(".c"):
        compile_c_extension(
            output_file, verbose=verbose_c_extension, keep_asserts=keep_asserts_in_extension
        )

    return gen
Esempio n. 4
0
def generate_parser_c_extension(rules, path):
    """Generate a parser c extension for the given rules in the given path"""
    source = path / "parse.c"
    with open(source, "w") as file:
        genr = CParserGenerator(rules, file)
        genr.generate("parse.c")
    extension_path = compile_c_extension(str(source),
                                         build_dir=str(path / "build"))
    extension = import_file("parse", extension_path)
    return extension
Esempio n. 5
0
def generate_parser_c_extension(grammar: Grammar,
                                path: pathlib.PurePath) -> Any:
    """Generate a parser c extension for the given grammar in the given path

    Returns a module object with a parse_string() method.
    TODO: express that using a Protocol.
    """
    source = path / "parse.c"
    with open(source, "w") as file:
        genr = CParserGenerator(grammar, file)
        genr.generate("parse.c")
    extension_path = compile_c_extension(str(source),
                                         build_dir=str(path / "build"))
    extension = import_file("parse", extension_path)
    return extension
Esempio n. 6
0
def build_c_generator(
    grammar: Grammar,
    grammar_file: str,
    tokens_file: str,
    output_file: str,
    compile_extension: bool = False,
    verbose_c_extension: bool = False,
    keep_asserts_in_extension: bool = True,
    skip_actions: bool = False,
) -> ParserGenerator:
    with open(tokens_file, "r") as tok_file:
        all_tokens, exact_tok, non_exact_tok = generate_token_definitions(tok_file)
    with open(output_file, "w") as file:
        gen: ParserGenerator = CParserGenerator(
            grammar, all_tokens, exact_tok, non_exact_tok, file, skip_actions=skip_actions
        )
        gen.generate(grammar_file)

    if compile_extension:
        with tempfile.TemporaryDirectory() as build_dir:
            compile_c_extension(
                output_file,
                build_dir=build_dir,
                verbose=verbose_c_extension,
                keep_asserts=keep_asserts_in_extension,
            )
    return gen
Esempio n. 7
0
def build_generator(
    tokenizer,
    rules,
    grammar_file,
    output_file,
    compile_extension=False,
    verbose_c_extension=False,
):
    with open(output_file, "w") as file:
        gen: ParserGenerator
        if output_file.endswith(".c"):
            gen = CParserGenerator(rules.rules, file)
        elif output_file.endswith(".py"):
            gen = PythonParserGenerator(rules.rules, file)
        else:
            raise Exception("Your output file must either be a .c or .py file")
        gen.generate(grammar_file)

    if compile_extension and output_file.endswith(".c"):
        compile_c_extension(output_file, verbose=verbose_c_extension)

    return gen
Esempio n. 8
0
def generate_c_parser_source(grammar: Grammar) -> str:
    out = io.StringIO()
    genr = CParserGenerator(grammar, out)
    genr.generate("<string>")
    return out.getvalue()
Esempio n. 9
0
def main() -> None:
    args = argparser.parse_args()
    verbose = args.verbose
    verbose_tokenizer = verbose >= 3
    verbose_parser = verbose == 2 or verbose >= 4
    t0 = time.time()

    with open(args.filename) as file:
        tokenizer = Tokenizer(grammar_tokenizer(
            tokenize.generate_tokens(file.readline)),
                              verbose=verbose_tokenizer)
        parser = GrammarParser(tokenizer, verbose=verbose_parser)
        rules = parser.start()
        if not rules:
            err = parser.make_syntax_error(args.filename)
            traceback.print_exception(err.__class__, err, None)
            sys.exit(1)
        endpos = file.tell()

    if not args.quiet:
        if args.verbose:
            print("Raw Grammar:")
            for rule in rules.rules.values():
                print(" ", repr(rule))
        print("Clean Grammar:")
        for rule in rules.rules.values():
            print(" ", rule)

    output = args.output
    if not output:
        if args.cpython:
            output = "parse.c"
        else:
            output = "parse.py"
    with open(output, 'w') as file:
        gen: ParserGenerator
        if args.cpython:
            gen = CParserGenerator(rules.rules, file)
        else:
            gen = PythonParserGenerator(rules.rules, file)
        gen.generate(args.filename)

    if args.cpython and args.compile_extension:
        compile_c_extension(output, verbose=args.verbose)

    if args.verbose:
        print("First Graph:")
        for src, dsts in gen.first_graph.items():
            print(f"  {src} -> {', '.join(dsts)}")
        print("First SCCS:")
        for scc in gen.first_sccs:
            print(" ", scc, end="")
            if len(scc) > 1:
                print("  # Indirectly left-recursive")
            else:
                name = next(iter(scc))
                if name in gen.first_graph[name]:
                    print("  # Left-recursive")
                else:
                    print()

    t1 = time.time()

    if args.verbose:
        dt = t1 - t0
        diag = tokenizer.diagnose()
        nlines = diag.end[0]
        if diag.type == token.ENDMARKER:
            nlines -= 1
        print(f"Total time: {dt:.3f} sec; {nlines} lines", end="")
        if endpos:
            print(f" ({endpos} bytes)", end="")
        if dt:
            print(f"; {nlines / dt:.0f} lines/sec")
        else:
            print()
        print("Caches sizes:")
        print(f"  token array : {len(tokenizer._tokens):10}")
        print(f"        cache : {len(parser._cache):10}")
        if not print_memstats():
            print("(Can't find psutil; install it for memory stats.)")