Exemple #1
0
def run_parser(file, parser_class, *, verbose=False):
    # Run a parser on a file (stream).
    # Note that this always recognizes {...} as CURLY_STUFF.
    tokenizer = Tokenizer(grammar_tokenizer(tokenize.generate_tokens(file.readline)))
    parser = parser_class(tokenizer, verbose=verbose)
    result = parser.start()
    if result is None:
        raise parser.make_syntax_error()
    return result
Exemple #2
0
def build_parser(grammar_file, verbose_tokenizer=False, verbose_parser=False):
    with open(grammar_file) as file:
        tokenizer = Tokenizer(
            grammar_tokenizer(tokenize.generate_tokens(file.readline)),
            verbose=verbose_tokenizer,
        )
        parser = GrammarParser(tokenizer, verbose=verbose_parser)
        rules = parser.start()

        if not rules:
            raise parser.make_syntax_error(grammar_file)

    return rules, parser, tokenizer
Exemple #3
0
def main() -> None:
    args = argparser.parse_args()
    with open(args.filename) as file:
        tokenizer = Tokenizer(
            grammar_tokenizer(tokenize.generate_tokens(file.readline)))
        parser = GrammarParser(tokenizer)
        rules = parser.start()

    if rules is None:
        print("ERROR: Failed to parse grammar file", file=sys.stderr)
        sys.exit(1)

    visitor = ASTGrammarPrinter()
    visitor.print_grammar_ast(rules)
Exemple #4
0
def main() -> None:
    args = argparser.parse_args()
    verbose = args.verbose
    verbose_tokenizer = verbose >= 3
    verbose_parser = verbose == 2 or verbose >= 4
    t0 = time.time()

    with open(args.filename) as file:
        tokenizer = Tokenizer(grammar_tokenizer(
            tokenize.generate_tokens(file.readline)),
                              verbose=verbose_tokenizer)
        parser = GrammarParser(tokenizer, verbose=verbose_parser)
        rules = parser.start()
        if not rules:
            err = parser.make_syntax_error(args.filename)
            traceback.print_exception(err.__class__, err, None)
            sys.exit(1)
        endpos = file.tell()

    if not args.quiet:
        if args.verbose:
            print("Raw Grammar:")
            for rule in rules.rules.values():
                print(" ", repr(rule))
        print("Clean Grammar:")
        for rule in rules.rules.values():
            print(" ", rule)

    output = args.output
    if not output:
        if args.cpython:
            output = "parse.c"
        else:
            output = "parse.py"
    with open(output, 'w') as file:
        gen: ParserGenerator
        if args.cpython:
            gen = CParserGenerator(rules.rules, file)
        else:
            gen = PythonParserGenerator(rules.rules, file)
        gen.generate(args.filename)

    if args.cpython and args.compile_extension:
        compile_c_extension(output, verbose=args.verbose)

    if args.verbose:
        print("First Graph:")
        for src, dsts in gen.first_graph.items():
            print(f"  {src} -> {', '.join(dsts)}")
        print("First SCCS:")
        for scc in gen.first_sccs:
            print(" ", scc, end="")
            if len(scc) > 1:
                print("  # Indirectly left-recursive")
            else:
                name = next(iter(scc))
                if name in gen.first_graph[name]:
                    print("  # Left-recursive")
                else:
                    print()

    t1 = time.time()

    if args.verbose:
        dt = t1 - t0
        diag = tokenizer.diagnose()
        nlines = diag.end[0]
        if diag.type == token.ENDMARKER:
            nlines -= 1
        print(f"Total time: {dt:.3f} sec; {nlines} lines", end="")
        if endpos:
            print(f" ({endpos} bytes)", end="")
        if dt:
            print(f"; {nlines / dt:.0f} lines/sec")
        else:
            print()
        print("Caches sizes:")
        print(f"  token array : {len(tokenizer._tokens):10}")
        print(f"        cache : {len(parser._cache):10}")
        if not print_memstats():
            print("(Can't find psutil; install it for memory stats.)")
Exemple #5
0
def simple_parser_main(parser_class):
    argparser = argparse.ArgumentParser()
    argparser.add_argument(
        '-v',
        '--verbose',
        action='count',
        default=0,
        help="Print timing stats; repeat for more debug output")
    argparser.add_argument('-q',
                           '--quiet',
                           action='store_true',
                           help="Don't print the parsed program")
    argparser.add_argument(
        '-G',
        '--grammar-parser',
        action='store_true',
        help="Recognize { ... } stuff; use for meta-grammar")
    argparser.add_argument('filename', help="Input file ('-' to use stdin)")

    args = argparser.parse_args()
    verbose = args.verbose
    verbose_tokenizer = verbose >= 3
    verbose_parser = verbose == 2 or verbose >= 4

    t0 = time.time()

    filename = args.filename
    if filename == '' or filename == '-':
        filename = "<stdin>"
        file = sys.stdin
    else:
        file = open(args.filename)
    try:
        tokengen = tokenize.generate_tokens(file.readline)
        if args.grammar_parser:
            tokengen = grammar_tokenizer(tokengen)
        tokenizer = Tokenizer(tokengen, verbose=verbose_tokenizer)
        parser = parser_class(tokenizer, verbose=verbose_parser)
        tree = parser.start()
        try:
            if file.isatty():
                endpos = 0
            else:
                endpos = file.tell()
        except IOError:
            endpos = 0
    finally:
        if file is not sys.stdin:
            file.close()

    t1 = time.time()

    if not tree:
        err = parser.make_syntax_error(filename)
        traceback.print_exception(err.__class__, err, None)
        sys.exit(1)

    if not args.quiet:
        print(tree)

    if verbose:
        dt = t1 - t0
        diag = tokenizer.diagnose()
        nlines = diag.end[0]
        if diag.type == token.ENDMARKER:
            nlines -= 1
        print(f"Total time: {dt:.3f} sec; {nlines} lines", end="")
        if endpos:
            print(f" ({endpos} bytes)", end="")
        if dt:
            print(f"; {nlines / dt:.0f} lines/sec")
        else:
            print()
        print("Caches sizes:")
        print(f"  token array : {len(tokenizer._tokens):10}")
        print(f"        cache : {len(parser._cache):10}")