def test_last_non_whitespace(): source = io.StringIO("\n1\n2") t = Tokenizer(generate_tokens(source.readline)) assert t.peek() == TokenInfo(NUMBER, "1", (2, 0), (2, 1), "1\n") assert t.getnext() == TokenInfo(NUMBER, "1", (2, 0), (2, 1), "1\n") assert t.getnext() == TokenInfo(NEWLINE, "\n", (2, 1), (2, 2), "1\n") assert t.get_last_non_whitespace_token() == TokenInfo( NUMBER, "1", (2, 0), (2, 1), "1\n")
def test_mark_reset(): source = io.StringIO("\n1 2") t = Tokenizer(generate_tokens(source.readline)) index = t.mark() assert t.peek() == TokenInfo(NUMBER, "1", (2, 0), (2, 1), "1 2") assert t.getnext() == TokenInfo(NUMBER, "1", (2, 0), (2, 1), "1 2") t.reset(index) assert t.peek() == TokenInfo(NUMBER, "1", (2, 0), (2, 1), "1 2") assert t.getnext() == TokenInfo(NUMBER, "1", (2, 0), (2, 1), "1 2")
def compile( source, filename="<unknown>", symbol="file", verbose_tokenizer=False, verbose_parser=False, py_version=None, ): start_time = time.time_ns() token_list = merge_operators(generate_tokens(StringIO(source).readline)) tokenizer = Tokenizer(iter(token_list), verbose=verbose_tokenizer) parser = PythonParser(tokenizer, filename=filename, verbose=verbose_parser, py_version=py_version) try: return parser.parse(symbol) except SyntaxError as syntax_error: if parser._exception is None and str(syntax_error) == "invalid syntax": raise parser.make_syntax_error("unknown syntax error") from None else: raise finally: end_time = time.time_ns() log.debug( f"Compile {filename} took {(end_time - start_time) / 1e6:.2f} ms")
def main(): parser = argparse.ArgumentParser() parser.add_argument("grammar_file") parser.add_argument("tokens_file") parser.add_argument("output_file") parser.add_argument("--verbose", action="store_true") parser.add_argument("--debug", action="store_true") args = parser.parse_args() with open(args.grammar_file) as file: tokenizer = Tokenizer(tokenize.generate_tokens(file.readline), verbose=args.verbose) parser = GrammarParser(tokenizer, verbose=args.verbose) grammar = parser.start() if not grammar: sys.exit("Failed to generate grammar") with open(args.tokens_file, "r") as tok_file: all_tokens, exact_tokens, non_exact_tokens = generate_token_definitions( tok_file) with open(args.output_file, "w") as file: gen = JavaParserGenerator(grammar, all_tokens, exact_tokens, non_exact_tokens, file, debug=args.debug) gen.generate( os.path.relpath(args.grammar_file, os.path.dirname(args.output_file)))
def run_parser(file: IO[bytes], parser_class: Type[Parser], *, verbose: bool = False) -> Any: # Run a parser on a file (stream). tokenizer = Tokenizer(tokenize.generate_tokens(file.readline)) # type: ignore # typeshed issue #3515 parser = parser_class(tokenizer, verbose=verbose) result = parser.start() if result is None: raise parser.make_syntax_error() return result
def test_async_comprehension(python_parser_cls): temp = io.StringIO("""[a async for a in b if c]""") tokengen = tokenize.generate_tokens(temp.readline) tokenizer = Tokenizer(tokengen, verbose=False) pp = python_parser_cls(tokenizer, py_version=(3, 5)) with pytest.raises(SyntaxError) as e: pp.parse("file") assert "Async comprehensions are" in e.exconly()
def test_generic_decorators(python_parser_cls, source): temp = io.StringIO(source) tokengen = tokenize.generate_tokens(temp.readline) tokenizer = Tokenizer(tokengen, verbose=False) pp = python_parser_cls(tokenizer, py_version=(3, 8)) with pytest.raises(SyntaxError) as e: pp.parse("file") assert "Generic decorator are" in e.exconly()
def run_parser(file, parser_class, *, verbose=False): # Run a parser on a file (stream). # Note that this always recognizes {...} as CURLY_STUFF. tokenizer = Tokenizer(grammar_tokenizer(tokenize.generate_tokens(file.readline))) parser = parser_class(tokenizer, verbose=verbose) result = parser.start() if result is None: raise parser.make_syntax_error() return result
def test_assignment_operator(python_parser_cls, source): temp = io.StringIO(source) tokengen = tokenize.generate_tokens(temp.readline) tokenizer = Tokenizer(tokengen, verbose=False) pp = python_parser_cls(tokenizer, py_version=(3, 7)) with pytest.raises(SyntaxError) as e: pp.parse("file") assert "The ':=' operator is" in e.exconly()
def test_pos_only_args(python_parser_cls, source): temp = io.StringIO(source) tokengen = tokenize.generate_tokens(temp.readline) tokenizer = Tokenizer(tokengen, verbose=False) pp = python_parser_cls(tokenizer, py_version=(3, 7)) with pytest.raises(SyntaxError) as e: pp.parse("file") assert "Positional only arguments are" in e.exconly()
def test_variable_annotation(python_parser_cls, source): temp = io.StringIO(source) tokengen = tokenize.generate_tokens(temp.readline) tokenizer = Tokenizer(tokengen, verbose=False) pp = python_parser_cls(tokenizer, py_version=(3, 5)) with pytest.raises(SyntaxError) as e: pp.parse("file") assert "Variable annotation syntax is" in e.exconly()
def test_async(python_parser_cls, source, message): temp = io.StringIO(source) tokengen = tokenize.generate_tokens(temp.readline) tokenizer = Tokenizer(tokengen, verbose=False) pp = python_parser_cls(tokenizer, py_version=(3, 4)) with pytest.raises(SyntaxError) as e: pp.parse("file") assert message in e.exconly()
def test_await(python_parser_cls): temp = io.StringIO("await b") tokengen = tokenize.generate_tokens(temp.readline) tokenizer = Tokenizer(tokengen, verbose=False) pp = python_parser_cls(tokenizer, py_version=(3, 4)) with pytest.raises(SyntaxError) as e: pp.parse("file") assert "Await expressions are" in e.exconly()
def test_match_statement(python_parser_cls, source): temp = io.StringIO(source) tokengen = tokenize.generate_tokens(temp.readline) tokenizer = Tokenizer(tokengen, verbose=False) pp = python_parser_cls(tokenizer, py_version=(3, 9)) with pytest.raises(SyntaxError) as e: pp.parse("file") assert "Pattern matching is" in e.exconly()
def test_parenthesized_with_items(python_parser_cls, source): temp = io.StringIO(source) tokengen = tokenize.generate_tokens(temp.readline) tokenizer = Tokenizer(tokengen, verbose=False) pp = python_parser_cls(tokenizer, py_version=(3, 8)) with pytest.raises(SyntaxError) as e: pp.parse("file") assert "Parenthesized with items" in e.exconly()
def build_parser( grammar_file: str, verbose_tokenizer: bool = False, verbose_parser: bool = False ) -> Tuple[Grammar, Parser, Tokenizer]: with open(grammar_file) as file: tokenizer = Tokenizer(tokenize.generate_tokens(file.readline), verbose=verbose_tokenizer) parser = GrammarParser(tokenizer, verbose=verbose_parser) grammar = parser.start() if not grammar: raise parser.make_syntax_error(grammar_file) return grammar, parser, tokenizer
def test_peek_getnext(): source = io.StringIO("# test\n1") t = Tokenizer(generate_tokens(source.readline)) assert t.peek() == TokenInfo(NUMBER, "1", (2, 0), (2, 1), "1") assert t.getnext() == TokenInfo(NUMBER, "1", (2, 0), (2, 1), "1") assert t.peek() == TokenInfo(NEWLINE, "", (2, 1), (2, 2), "") assert t.getnext() == TokenInfo(NEWLINE, "", (2, 1), (2, 2), "")
def build_parser(grammar_file, verbose_tokenizer=False, verbose_parser=False): with open(grammar_file) as file: tokenizer = Tokenizer( grammar_tokenizer(tokenize.generate_tokens(file.readline)), verbose=verbose_tokenizer, ) parser = GrammarParser(tokenizer, verbose=verbose_parser) rules = parser.start() if not rules: raise parser.make_syntax_error(grammar_file) return rules, parser, tokenizer
def try_our_parser(source: str) -> Tuple[Optional[Exception], Parser]: file = io.StringIO(source) tokengen = tokenize.generate_tokens(file.readline) tokenizer = Tokenizer(tokengen) parser = GeneratedParser(tokenizer) try: tree = parser.start() except Exception as err: return err, parser if tree: ## import pprint; pprint.pprint(tree) return None, parser else: return make_improved_syntax_error(parser, "<string>"), parser
def main() -> None: args = argparser.parse_args() with open(args.filename) as file: tokenizer = Tokenizer( grammar_tokenizer(tokenize.generate_tokens(file.readline))) parser = GrammarParser(tokenizer) rules = parser.start() if rules is None: print("ERROR: Failed to parse grammar file", file=sys.stderr) sys.exit(1) visitor = ASTGrammarPrinter() visitor.print_grammar_ast(rules)
def main(): if len(sys.argv) == 3: grammar_files = map(lambda f: open(f), sys.argv[1:]) elif len(sys.argv) == 2 and not sys.stdin.isatty(): grammar_files = [sys.stdin, open(sys.argv[1])] else: sys.exit("\n".join([ "Usage:", f"\t\t{sys.argv[0]} GRAMMAR_FILE_OLD GRAMMAR_FILE_NEW", "\tor", f"\t\tcat GRAMMAR_FILE_OLD | {sys.argv[0]} GRAMMAR_FILE_NEW" ])) grammars = [] for grammar_file in grammar_files: with grammar_file as file: tokenizer = Tokenizer(tokenize.generate_tokens(file.readline)) parser = GrammarParser(tokenizer) grammar = parser.start() if not grammar: sys.exit(f"Failed to parse {grammar_file}") grammars.append(grammar) DiffVisitor(*grammars).diff()
def parse(filepath): with open(filepath) as f: tokengen = tokenize.generate_tokens(f.readline) tokenizer = Tokenizer(tokengen, verbose=False) parser = GeneratedParser(tokenizer, verbose=verbose) return parser.start()
def simple_parser_main(parser_class: Type[Parser]) -> None: argparser = argparse.ArgumentParser() argparser.add_argument( "-v", "--verbose", action="count", default=0, help="Print timing stats; repeat for more debug output", ) argparser.add_argument( "-q", "--quiet", action="store_true", help="Don't print the parsed program" ) argparser.add_argument("filename", help="Input file ('-' to use stdin)") args = argparser.parse_args() verbose = args.verbose verbose_tokenizer = verbose >= 3 verbose_parser = verbose == 2 or verbose >= 4 t0 = time.time() filename = args.filename if filename == "" or filename == "-": filename = "<stdin>" file = sys.stdin else: file = open(args.filename) try: tokengen = tokenize.generate_tokens(file.readline) tokenizer = Tokenizer(tokengen, verbose=verbose_tokenizer) parser = parser_class(tokenizer, verbose=verbose_parser) tree = parser.start() try: if file.isatty(): endpos = 0 else: endpos = file.tell() except IOError: endpos = 0 finally: if file is not sys.stdin: file.close() t1 = time.time() if not tree: err = parser.make_syntax_error(filename) traceback.print_exception(err.__class__, err, None) sys.exit(1) if not args.quiet: print(tree) if verbose: dt = t1 - t0 diag = tokenizer.diagnose() nlines = diag.end[0] if diag.type == token.ENDMARKER: nlines -= 1 print(f"Total time: {dt:.3f} sec; {nlines} lines", end="") if endpos: print(f" ({endpos} bytes)", end="") if dt: print(f"; {nlines / dt:.0f} lines/sec") else: print() print("Caches sizes:") print(f" token array : {len(tokenizer._tokens):10}") print(f" cache : {len(parser._cache):10}")
def make_parser(source: str) -> Parser: file = io.StringIO(source) tokengen = tokenize.generate_tokens(file.readline) tokenizer = Tokenizer(tokengen) return GeneratedParser(tokenizer)
def main() -> None: args = argparser.parse_args() verbose = args.verbose verbose_tokenizer = verbose >= 3 verbose_parser = verbose == 2 or verbose >= 4 t0 = time.time() with open(args.filename) as file: tokenizer = Tokenizer(grammar_tokenizer( tokenize.generate_tokens(file.readline)), verbose=verbose_tokenizer) parser = GrammarParser(tokenizer, verbose=verbose_parser) rules = parser.start() if not rules: err = parser.make_syntax_error(args.filename) traceback.print_exception(err.__class__, err, None) sys.exit(1) endpos = file.tell() if not args.quiet: if args.verbose: print("Raw Grammar:") for rule in rules.rules.values(): print(" ", repr(rule)) print("Clean Grammar:") for rule in rules.rules.values(): print(" ", rule) output = args.output if not output: if args.cpython: output = "parse.c" else: output = "parse.py" with open(output, 'w') as file: gen: ParserGenerator if args.cpython: gen = CParserGenerator(rules.rules, file) else: gen = PythonParserGenerator(rules.rules, file) gen.generate(args.filename) if args.cpython and args.compile_extension: compile_c_extension(output, verbose=args.verbose) if args.verbose: print("First Graph:") for src, dsts in gen.first_graph.items(): print(f" {src} -> {', '.join(dsts)}") print("First SCCS:") for scc in gen.first_sccs: print(" ", scc, end="") if len(scc) > 1: print(" # Indirectly left-recursive") else: name = next(iter(scc)) if name in gen.first_graph[name]: print(" # Left-recursive") else: print() t1 = time.time() if args.verbose: dt = t1 - t0 diag = tokenizer.diagnose() nlines = diag.end[0] if diag.type == token.ENDMARKER: nlines -= 1 print(f"Total time: {dt:.3f} sec; {nlines} lines", end="") if endpos: print(f" ({endpos} bytes)", end="") if dt: print(f"; {nlines / dt:.0f} lines/sec") else: print() print("Caches sizes:") print(f" token array : {len(tokenizer._tokens):10}") print(f" cache : {len(parser._cache):10}") if not print_memstats(): print("(Can't find psutil; install it for memory stats.)")
if args.filename: argparser.error("Use either -c or filename, not both") filename = "<string>" file: TextIO = io.StringIO(command) else: filename = args.filename if not filename: argparser.error("Exactly one of -c and filename is required") if filename == "" or filename == "-": filename = "<stdin>" file = sys.stdin else: file = open(args.filename) try: tokengen = tokenize.generate_tokens(file.readline) tokenizer = Tokenizer(tokengen) parser = GeneratedParser(tokenizer) try: tree = parser.start() except Exception as e: traceback.print_exception(e.__class__, e, None) sys.exit(1) if not tree: print("----- raw error -----") err = parser.make_syntax_error(filename) traceback.print_exception(err.__class__, err, None) print("----- improved error -----") err = make_improved_syntax_error(parser, filename) traceback.print_exception(err.__class__, err, None) print("----- raw error correction by insertion -----") got, reach, expected, howfar = recovery_by_insertions(parser)