def get_compiler(tokens_filename, rules_filename, reducible_node_names=None): """Creates a callable which can be used to construct abstract syntax trees for the given program. arguments: tokens_filename - the filename of the file which contains the regexes that match literal token values. rules_filename - the filename of the file which contains the language rules in Backus Naur Form. reducible_node_names - optional. a list of strings, indicating which tail-recursive nodes should be reduced. See `reduce_tail_recursive_nodes` for more information. returns: function construct_ast(program_text), which returns the root Node of the constructed ast. """ if not reducible_node_names: reducible_node_names = [] rules_text = slurp(rules_filename) rules = parse_rules(rules_text) lex_text = slurp(tokens_filename) token_rules = gen_token_rules(lex_text, rules) parser = construct_parser(rules_text) def construct_ast(program_text): tokens = lex(program_text, token_rules) # todo: this kind of post-lexing processing should be specified by the caller somehow. tokens = [token for token in tokens if token.klass.name != "whitespace"] right_derivation = parser.parse(tokens) parse_tree = construct_parse_tree(right_derivation, rules, tokens) remove_literal_tokens(parse_tree) for name in reducible_node_names: reduce_tail_recursive_nodes(parse_tree, name) return parse_tree return construct_ast
def is_valid(rules_text, program_text): rules = parse_rules(rules_text) table = ParseTable(rules) parser = LRParser(rules, table.action_table(), table.goto_table()) try: derivation = parser.parse(program_text) except: return False return True
def construct_parser(rules_text, is_verbose=False): def log(msg=""): if is_verbose: print(msg) log("Parsing rules...") rules = parse_rules(rules_text) log("\n".join(map(str, rules))) log() log("Constructing parse tables...") table = ParseTable(rules) log(table) parser = LRParser(rules, table.action_table(), table.goto_table()) return parser