def make_messy_parse_function(regexes, rules, eof=False, post_lexer=None, extra_names=()): """ NOT_RPYTHON: This function is only called to process the grammarfile, which occurs before translation, so it does not need to be RPython. The parse function it returns parses Fundy code into a "messy" AST, which can be cleaned up using the ToAST object obtained from parse_ebnf(grammar). """ names, regexes = zip(*regexes) if "IGNORE" in names: ignore = ["IGNORE"] else: ignore = [] check_for_missing_names(names + extra_names, regexes, rules) lexer = Lexer(list(regexes), list(names), ignore=ignore) parser = PackratParser(rules, rules[0].nonterminal) def parse(s): tokens = lexer.tokenize(s, eof=eof) if post_lexer is not None: tokens = post_lexer(tokens) s = parser.parse(tokens) return s return parse
def _get_parse_tools(): grammar = _get_grammar_file() try: regexs, rules, transformer_class = parse_ebnf(grammar) except ParseError as e: print e.nice_error_message('grammar.ebnf', grammar) raise e names, regexs = zip(*regexs) check_for_missing_names(names, regexs, rules) lexer = PieLexer(list(regexs), list(names)) parser = PackratParser(rules, rules[0].nonterminal) def parse(s): tokens = lexer.tokenize(s) s = parser.parse(tokens) return s return parse, transformer_class()