def __init__(self, file_base): self.file_base = file_base grammar_string = open( f"{os.path.dirname(__file__)}/{file_base}.grammar", 'r', encoding='utf-8').read() self.productions = convert_grammar_string_to_productions( grammar_string) simple_prods = [(prod.lhs_s, prod.rhs_pieces) for prod in self.productions] # LR self.lr_parser = LR_Parser('SLR(1)', simple_prods, 'silent') # # Earley (attempt, doesn't work) # self.eparser = Earley.Parser(simple_prods, '*EOI*') if shared.g_outdir: self.f_errors = shared.open_for_output(file_base + '_errors') self.f_ambig = shared.open_for_output(file_base + '_ambig') self.f_parsed = shared.open_for_output(file_base + '_parsed') else: # testing self.f_ambig = sys.stdout self.error_count = 0 self.group_errors_by_expectation = True if self.group_errors_by_expectation: self.error_posns = collections.defaultdict(list)
def main(): global input, grammar g = Grammar(grammar) g.parse() gotos = GotoGenerator(g) gotos.generate() gotos.display() g.first_follow.display() parsing_table = Table(g, gotos) parsing_table.generate() lr_parser = LR_Parser(g, parsing_table, input) lr_parser.parse()
def main(*argv): global input, grammar if len(argv[0]) == 2: grammar = """""" with open(argv[0][0], 'r') as content_file: grammar = content_file.read() input = argv[0][1] g = Grammar(grammar) g.parse() gotos = GotoGenerator(g) gotos.generate() gotos.display() g.first_follow.display() parsing_table = Table(g, gotos) parsing_table.generate() lr_parser = LR_Parser(g, parsing_table, input) lr_parser.parse()
class Pseudocode_Parser: def __init__(self, file_base): self.file_base = file_base grammar_string = open( f"{os.path.dirname(__file__)}/{file_base}.grammar", 'r', encoding='utf-8').read() self.productions = convert_grammar_string_to_productions( grammar_string) simple_prods = [(prod.lhs_s, prod.rhs_pieces) for prod in self.productions] # LR self.lr_parser = LR_Parser('SLR(1)', simple_prods, 'silent') # # Earley (attempt, doesn't work) # self.eparser = Earley.Parser(simple_prods, '*EOI*') if shared.g_outdir: self.f_errors = shared.open_for_output(file_base + '_errors') self.f_ambig = shared.open_for_output(file_base + '_ambig') self.f_parsed = shared.open_for_output(file_base + '_parsed') else: # testing self.f_ambig = sys.stdout self.error_count = 0 self.group_errors_by_expectation = True if self.group_errors_by_expectation: self.error_posns = collections.defaultdict(list) def parse_and_handle_errors(self, start_posn, end_posn, goal): entry_lhs = '{_' + goal.lower() + '_}' entry_rhs = '' entry_prod = Production(True, entry_lhs, entry_rhs) entry_token = (entry_prod, start_posn, start_posn, '') # hm # Find the start of 'this line' # (the line that contains start_posn) for posn in range(start_posn, -1, -1): if posn == 0 or shared.spec_text[posn - 1] == '\n': line_start_posn = posn break else: assert 0 # And then find the end of this line's indentation for posn in range(line_start_posn, start_posn + 1): if shared.spec_text[posn] != ' ': line_indent_end_posn = posn break else: assert 0 # this_line_indentation = line_indent_end_posn - line_start_posn token_generator = tokenizer_for_pseudocode.tokenize( shared.spec_text, start_posn, end_posn, True, this_line_indentation) tokens = [entry_token] + [token_info for token_info in token_generator] def matcher_for_gparse(curr_tind, terminals): assert curr_tind < len(tokens) (tok_prod, tok_s_posn, tok_e_posn, tok_text) = tokens[curr_tind] matching_terminals = [] for terminal in terminals: assert isinstance(terminal, str) match_token = False if terminal.startswith('{') and terminal.endswith('}'): if tok_prod.lhs_s == terminal: if terminal in ['{nlai}', '{_indent_}', '{_outdent_}']: match_token = None else: match_token = ANode(tok_prod, [tok_text], tok_s_posn, tok_e_posn) else: if terminal == 'an?': if tok_text in ['a', 'an']: match_token = None else: if tok_text == terminal: match_token = None if match_token is not False: matching_terminals.append( (terminal, curr_tind + 1, match_token)) return matching_terminals def reducer(pi, reductands, s_tind, e_tind): prod = self.productions[pi] prod.n_reductions += 1 assert len(reductands) == len(prod.rhs_pieces) if prod.lhs_s.startswith('{_'): # We're not interested in the details. return None node_children = [] for red in reductands: if red is None: # rhs_piece is a regex with no capturing group # or is an uninteresting nonterminal continue if red.prod.lhs_s == '{space}': continue node_children.append(red) (_, s_posn, _, _) = tokens[s_tind] (_, e_posn, _, _) = tokens[e_tind] node = ANode(prod, node_children, s_posn, e_posn) return node try: results = self.lr_parser.gparse(matcher_for_gparse, reducer, 0) except ParsingError as e: self.error_count += 1 (_, tok_s_posn, _, _) = tokens[e.posn] if self.group_errors_by_expectation: self.error_posns[tuple(e.expecting)].append(tok_s_posn) else: print( '\n' + shared.source_line_with_caret_marking_column(tok_s_posn) + '\n' + "Expecting: " + ' '.join(e.expecting), file=self.f_errors) print('(Error)', file=self.f_parsed) return None except TooManyHeadsError as e: (_, tok_s_posn, _, _) = tokens[e.posn] print(shared.source_line_with_caret_marking_column(tok_s_posn)) raise if len(results) != 1: print('-------------------------------', file=self.f_ambig) for result in results: result.printTree(self.f_ambig) result = results[0] result.set_parent_links() def count(node): if isinstance(node, str): return assert isinstance(node, ANode) if not hasattr(node.prod, 'n_delivered_instances'): return node.prod.n_delivered_instances += 1 for child in node.children: count(child) count(result) [entry_node, goal_node] = result.children assert entry_node.prod is entry_prod assert goal_node.prod.lhs_s == '{' + goal + '}' goal_node.printTree(self.f_parsed) return goal_node def report(self): report_file_base = self.file_base + '_prod_counts' shared.stderr(f"generating new {report_file_base} ...") if self.group_errors_by_expectation: # This approach is better when I'm developing a grammar, # as it tends to group similar cases. def err(x): print(x, file=self.f_errors) err("%d parsing errors:" % self.error_count) err('') for (expecting, posns) in sorted(self.error_posns.items()): # err('') err('X' * 80) # err('') err("Expecting:") for e in expecting: err(" %r" % e) for posn in posns: err( shared.source_line_with_caret_marking_column( math.ceil(posn))) f = shared.open_for_output(report_file_base) for prod in self.productions: print("%5d %s" % (prod.n_delivered_instances, prod), file=f)