def run_parser(file, parser_class, *, verbose=False): # Run a parser on a file (stream). # Note that this always recognizes {...} as CURLY_STUFF. tokenizer = Tokenizer(grammar_tokenizer(tokenize.generate_tokens(file.readline))) parser = parser_class(tokenizer, verbose=verbose) result = parser.start() if result is None: raise parser.make_syntax_error() return result
def build_parser(grammar_file, verbose_tokenizer=False, verbose_parser=False): with open(grammar_file) as file: tokenizer = Tokenizer( grammar_tokenizer(tokenize.generate_tokens(file.readline)), verbose=verbose_tokenizer, ) parser = GrammarParser(tokenizer, verbose=verbose_parser) rules = parser.start() if not rules: raise parser.make_syntax_error(grammar_file) return rules, parser, tokenizer
def main() -> None: args = argparser.parse_args() with open(args.filename) as file: tokenizer = Tokenizer( grammar_tokenizer(tokenize.generate_tokens(file.readline))) parser = GrammarParser(tokenizer) rules = parser.start() if rules is None: print("ERROR: Failed to parse grammar file", file=sys.stderr) sys.exit(1) visitor = ASTGrammarPrinter() visitor.print_grammar_ast(rules)
def main() -> None: args = argparser.parse_args() verbose = args.verbose verbose_tokenizer = verbose >= 3 verbose_parser = verbose == 2 or verbose >= 4 t0 = time.time() with open(args.filename) as file: tokenizer = Tokenizer(grammar_tokenizer( tokenize.generate_tokens(file.readline)), verbose=verbose_tokenizer) parser = GrammarParser(tokenizer, verbose=verbose_parser) rules = parser.start() if not rules: err = parser.make_syntax_error(args.filename) traceback.print_exception(err.__class__, err, None) sys.exit(1) endpos = file.tell() if not args.quiet: if args.verbose: print("Raw Grammar:") for rule in rules.rules.values(): print(" ", repr(rule)) print("Clean Grammar:") for rule in rules.rules.values(): print(" ", rule) output = args.output if not output: if args.cpython: output = "parse.c" else: output = "parse.py" with open(output, 'w') as file: gen: ParserGenerator if args.cpython: gen = CParserGenerator(rules.rules, file) else: gen = PythonParserGenerator(rules.rules, file) gen.generate(args.filename) if args.cpython and args.compile_extension: compile_c_extension(output, verbose=args.verbose) if args.verbose: print("First Graph:") for src, dsts in gen.first_graph.items(): print(f" {src} -> {', '.join(dsts)}") print("First SCCS:") for scc in gen.first_sccs: print(" ", scc, end="") if len(scc) > 1: print(" # Indirectly left-recursive") else: name = next(iter(scc)) if name in gen.first_graph[name]: print(" # Left-recursive") else: print() t1 = time.time() if args.verbose: dt = t1 - t0 diag = tokenizer.diagnose() nlines = diag.end[0] if diag.type == token.ENDMARKER: nlines -= 1 print(f"Total time: {dt:.3f} sec; {nlines} lines", end="") if endpos: print(f" ({endpos} bytes)", end="") if dt: print(f"; {nlines / dt:.0f} lines/sec") else: print() print("Caches sizes:") print(f" token array : {len(tokenizer._tokens):10}") print(f" cache : {len(parser._cache):10}") if not print_memstats(): print("(Can't find psutil; install it for memory stats.)")
def simple_parser_main(parser_class): argparser = argparse.ArgumentParser() argparser.add_argument( '-v', '--verbose', action='count', default=0, help="Print timing stats; repeat for more debug output") argparser.add_argument('-q', '--quiet', action='store_true', help="Don't print the parsed program") argparser.add_argument( '-G', '--grammar-parser', action='store_true', help="Recognize { ... } stuff; use for meta-grammar") argparser.add_argument('filename', help="Input file ('-' to use stdin)") args = argparser.parse_args() verbose = args.verbose verbose_tokenizer = verbose >= 3 verbose_parser = verbose == 2 or verbose >= 4 t0 = time.time() filename = args.filename if filename == '' or filename == '-': filename = "<stdin>" file = sys.stdin else: file = open(args.filename) try: tokengen = tokenize.generate_tokens(file.readline) if args.grammar_parser: tokengen = grammar_tokenizer(tokengen) tokenizer = Tokenizer(tokengen, verbose=verbose_tokenizer) parser = parser_class(tokenizer, verbose=verbose_parser) tree = parser.start() try: if file.isatty(): endpos = 0 else: endpos = file.tell() except IOError: endpos = 0 finally: if file is not sys.stdin: file.close() t1 = time.time() if not tree: err = parser.make_syntax_error(filename) traceback.print_exception(err.__class__, err, None) sys.exit(1) if not args.quiet: print(tree) if verbose: dt = t1 - t0 diag = tokenizer.diagnose() nlines = diag.end[0] if diag.type == token.ENDMARKER: nlines -= 1 print(f"Total time: {dt:.3f} sec; {nlines} lines", end="") if endpos: print(f" ({endpos} bytes)", end="") if dt: print(f"; {nlines / dt:.0f} lines/sec") else: print() print("Caches sizes:") print(f" token array : {len(tokenizer._tokens):10}") print(f" cache : {len(parser._cache):10}")