def _finish_error_reporting(self): # Expected symbols are only those that can cause active heads # to shift. self._expected = set(h.token_ahead.symbol for h, _ in self._for_shifter) if self.debug: a_print("*** LEAVING ERROR REPORTING MODE.", new_line=True) h_print("Tokens expected:", ', '.join([t.name for t in self._expected]), level=1) h_print("Tokens found:", self._tokens_ahead, level=1) # After leaving error reporting mode, register error and try # recovery if enabled context = self._last_shifted_heads[0] self.errors.append( self._create_error(context, self._expected, tokens_ahead=self._tokens_ahead, symbols_before=list({ h.state.symbol for h in self._last_shifted_heads }), last_heads=self._last_shifted_heads)) self.for_shifter = [] self._in_error_reporting = False
def check_get_grammar_table(grammar_file, debug, colors): try: g = Grammar.from_file(grammar_file, _no_check_recognizers=True, debug_colors=colors) if debug: g.print_debug() table = create_table(g) if debug: table.print_debug() h_print("Grammar OK.") if table.sr_conflicts: a_print("There are {} Shift/Reduce conflicts.".format( len(table.sr_conflicts))) prints("Either use 'prefer_shifts' parser mode, try to resolve " "manually or use GLR parsing.".format( len(table.sr_conflicts))) if table.rr_conflicts: a_print("There are {} Reduce/Reduce conflicts.".format( len(table.rr_conflicts))) prints("Try to resolve manually or use GLR parsing.") if (table.sr_conflicts or table.rr_conflicts) and not debug: prints("Run in debug mode to print all the states.") except (GrammarError, ParseError) as e: print("Error in the grammar file.") print(e) sys.exit(1) return g, table
def print_debug(self): a_print("*** GRAMMAR ***", new_line=True) h_print("Terminals:") prints(" ".join([text(t) for t in self.terminals])) h_print("NonTerminals:") prints(" ".join([text(n) for n in self.nonterminals])) h_print("Productions:") for p in self.productions: prints(text(p))
def _actor(self, head): debug = self.debug for action in head.state.actions.get(head.token_ahead.symbol, []): if action.action == SHIFT: self._for_shifter.append((head, action.state)) elif action.action == REDUCE: self._do_reductions(head, action.prod) else: if not self._in_error_reporting: self._accepted_heads.append(head) if debug: a_print('**ACCEPTING HEAD: ', str(head)) if self.debug_trace: self._trace_step_finish(head)
def _do_error_recovery(self): """ If recovery is enabled, does error recovery for the heads in _last_shifted_heads. """ if self.debug: a_print("*** STARTING ERROR RECOVERY.", new_line=True) error = self.errors[-1] debug = self.debug self._active_heads = {} for head in self._last_shifted_heads: if debug: input_str = head.input_str symbols = head.state.actions.keys() h_print("Recovery initiated for head {}.".format(head), level=1, new_line=True) h_print("Symbols expected: ", [s.name for s in symbols], level=1) if type(self.error_recovery) is bool: # Default recovery if debug: prints("\tDoing default error recovery.") successful = self.default_error_recovery(head) else: # Custom recovery provided during parser construction if debug: prints("\tDoing custom error recovery.") successful = self.error_recovery(head, error) if successful: error.location.context.end_position = head.position if debug: a_print("New position is ", pos_to_line_col(input_str, head.position), level=1) a_print("New lookahead token is ", head.token_ahead, level=1) self._active_heads[head.state.state_id] = head if self.debug: a_print("*** ERROR RECOVERY SUCCEEDED. CONTINUING.", new_line=True) else: if debug: a_print("Killing head: ", head, level=1) if self.debug_trace: self._trace_step_kill(head)
def _create_error(self, context, symbols_expected, tokens_ahead=None, symbols_before=None, last_heads=None): error = ParseError(Location(context=ErrorContext(context)), symbols_expected, tokens_ahead, symbols_before=symbols_before, last_heads=last_heads, grammar=self.grammar) if self.debug: a_print("Error: ", error, level=1) return error
def compile_get_grammar_table(grammar_file, debug, colors, prefer_shifts, prefer_shifts_over_empty): try: g = Grammar.from_file(grammar_file, _no_check_recognizers=True, debug_colors=colors) if debug: g.print_debug() table = create_load_table( g, prefer_shifts=prefer_shifts, prefer_shifts_over_empty=prefer_shifts_over_empty, force_create=True) if debug or table.sr_conflicts or table.rr_conflicts: table.print_debug() if not table.sr_conflicts and not table.rr_conflicts: h_print("Grammar OK.") if table.sr_conflicts: if len(table.sr_conflicts) == 1: message = 'There is 1 Shift/Reduce conflict.' else: message = 'There are {} Shift/Reduce conflicts.'\ .format(len(table.sr_conflicts)) a_print(message) prints("Either use 'prefer_shifts' parser mode, try to resolve " "manually, or use GLR parsing.".format( len(table.sr_conflicts))) if table.rr_conflicts: if len(table.rr_conflicts) == 1: message = 'There is 1 Reduce/Reduce conflict.' else: message = 'There are {} Reduce/Reduce conflicts.'\ .format(len(table.rr_conflicts)) a_print(message) prints("Try to resolve manually or use GLR parsing.") except (GrammarError, ParseError) as e: print("Error in the grammar file.") print(e) sys.exit(1) return g, table
def print_debug(self): a_print("*** STATES ***", new_line=True) for state in self.states: state.print_debug() if state.gotos: h_print("GOTO:", level=1, new_line=True) prints("\t" + ", ".join([("%s" + s_emph("->") + "%d") % (k, v.state_id) for k, v in state.gotos.items()])) h_print("ACTIONS:", level=1, new_line=True) prints("\t" + ", ".join([("%s" + s_emph("->") + "%s") % (k, str(v[0]) if len(v) == 1 else "[{}]". format(",".join([str(x) for x in v]))) for k, v in state.actions.items()])) if self.sr_conflicts: a_print("*** S/R conflicts ***", new_line=True) if len(self.sr_conflicts) == 1: message = 'There is {} S/R conflict.' else: message = 'There are {} S/R conflicts.' h_print(message.format(len(self.sr_conflicts))) for src in self.sr_conflicts: print(src.message) if self.rr_conflicts: a_print("*** R/R conflicts ***", new_line=True) if len(self.rr_conflicts) == 1: message = 'There is {} R/R conflict.' else: message = 'There are {} R/R conflicts.' h_print(message.format(len(self.rr_conflicts))) for rrc in self.rr_conflicts: print(rrc.message)
def _do_recovery(self): debug = self.debug if debug: a_print("**Recovery initiated.**") head = self.parse_stack[-1] error = self.errors[-1] if type(self.error_recovery) is bool: # Default recovery if debug: prints("\tDoing default error recovery.") successful = self.default_error_recovery(head) else: # Custom recovery provided during parser construction if debug: prints("\tDoing custom error recovery.") successful = self.error_recovery(head, error) # The recovery may either decide to skip erroneous part of # the input and resume at the place that can continue or it # might decide to fill in missing tokens. if successful: if debug: h_print("Recovery ") error.location.context.end_position = head.position if debug: a_print("New position is ", pos_to_line_col(head.input_str, head.position), level=1) a_print("New lookahead token is ", head.token_ahead, level=1) return successful
def _call_dynamic_filter(self, context, from_state, to_state, action, production=None, subresults=None): token = context.token if context.token is None: context.token = context.token_ahead if (action is SHIFT and not to_state.symbol.dynamic)\ or (action is REDUCE and not production.dynamic): return True if self.debug: if action is SHIFT: act_str = "SHIFT" token = context.token production = "" subresults = "" else: act_str = "REDUCE" token = context.token_ahead production = ", prod={}".format(context.production) subresults = ", subresults={}".format(subresults) h_print("Calling filter for action:", " {}, token={}{}{}".format(act_str, token, production, subresults), level=2) accepted = self.dynamic_filter(context, from_state, to_state, action, production, subresults) if self.debug: if accepted: a_print("Action accepted.", level=2) else: a_print("Action rejected.", level=2) return accepted
def parse(self, input_str, position=0, file_name=None, extra=None): """ Parses the given input string. Args: input_str(str): A string to parse. position(int): Position to start from. file_name(str): File name if applicable. Used in error reporting. extra: An object that keeps custom parsing state. If not given initialized to dict. """ if self.debug: a_print("*** PARSING STARTED\n") self.debug_frontier = 0 self.debug_step = 0 if self.debug_trace: self._dot_trace = '' self._dot_trace_ranks = '' self._trace_frontier_heads = [] self._trace_frontier_steps = [] self.input_str = input_str self.file_name = file_name self.extra = {} if extra is None else extra # Error reporting and recovery self.errors = [] self._in_error_reporting = False self._expected = set() self._tokens_ahead = [] self._last_shifted_heads = [] self._for_shifter = [] # We start with a single parser head in state 0. start_head = GSSNode(self, self.table.states[0], 0, position, ambiguity=1) self._init_dynamic_disambiguation(start_head) # Accepted (finished) heads self._accepted_heads = [] if self.debug and self.debug_trace: self._trace_head(start_head) # The main loop self._active_heads = {0: start_head} while self._active_heads or self._in_error_reporting: if self.debug: a_print("** REDUCING - frontier {}".format( self.debug_frontier), new_line=True) self._debug__active_heads(self._active_heads.values()) if not self._in_error_reporting: self._last_shifted_heads = list(self._active_heads.values()) self._find_lookaheads() while self._active_heads_per_symbol: _, self._active_heads = self._active_heads_per_symbol.popitem() self._for_actor = list(self._active_heads.values()) # Used to optimize revisiting only heads that will # traverse newly added paths. # state_id -> set(state_id) self._states_traversed = {} while self._for_actor: head = self._for_actor.pop() self._actor(head) if self._in_error_reporting: self._finish_error_reporting() if self.error_recovery: self._do_error_recovery() self._for_shifter = [] continue break self._do_shifts() if not self._active_heads and not self._accepted_heads: if self.debug: a_print("*** ENTERING ERROR REPORTING MODE.", new_line=True) self._enter_error_reporting() if self.debug and self.debug_trace: self._trace_finish() self._export__dot_trace() if self._accepted_heads: # Return results forest = Forest(self) if self.debug: a_print(f'*** {forest.solutions} successful parse(s).') self._remove_transient_state() return forest else: # Report error self._remove_transient_state() raise self.errors[-1]
def _do_shifts(self): debug = self.debug if debug: self.debug_frontier += 1 self.debug_step = 0 a_print("** SHIFTING - frontier {}".format(self.debug_frontier), new_line=True) self._debug__active_heads(self._active_heads.values()) if self.debug_trace: self._trace_frontier() self._active_heads = {} # Due to lexical ambiguity heads might be at different positions. # We must order heads by position before shift to process them in # the right order. Only shift heads with minimal position during # a single frontier processing. self._for_shifter.sort(key=lambda x: x[0].token_ahead.end_position, reverse=True) end_position = None while self._for_shifter: head, to_state = self._for_shifter.pop() if end_position is not None and head.token_ahead.end_position > end_position: self._for_shifter.append((head, to_state)) break end_position = head.token_ahead.end_position if debug: self.debug_step += 1 a_print("{}. SHIFTING head: ".format(self._debug_step_str()), head, new_line=True) shifted_head = self._active_heads.get(to_state.state_id, None) if shifted_head: # If this token has already been shifted connect shifted head to # this head. parent = next(iter( shifted_head.parents.values())).clone_with_root(head) if self.dynamic_filter and \ not self._call_dynamic_filter(parent, head.state, to_state, SHIFT): continue else: # We need to create new shifted head if debug: self._debug_context(head.position, lookahead_tokens=head.token_ahead, expected_symbols=None) end_position = head.position + len(head.token_ahead) shifted_head = GSSNode( self, to_state, end_position, head.frontier + 1, ambiguity=1, layout_content=head.layout_content_ahead) parent = Parent(shifted_head, head, head.position, end_position, token=head.token_ahead) if self.dynamic_filter and \ not self._call_dynamic_filter(parent, head.state, to_state, SHIFT): continue if self.debug: a_print("New shifted head ", shifted_head, level=1) if self.debug_trace: self._trace_head(shifted_head) self._active_heads[to_state.state_id] = shifted_head shifted_head.create_link(parent, head)
def _reduce(self, head, root_head, production, node_nonterm, start_position, end_position): """ Executes the given reduction. """ if start_position is None: start_position = end_position = root_head.position state = root_head.state.gotos[production.symbol] if self.debug: self.debug_step += 1 a_print('{} REDUCING head '.format(self._debug_step_str()), str(head), new_line=True) a_print('by prod ', production, level=1) a_print('to state {}:{}'.format(state.state_id, state.symbol), level=1) a_print('root is ', root_head, level=1) a_print('Position span: {} - {}'.format(start_position, end_position), level=1) new_head = GSSNode(self, state, head.position, head.frontier, token_ahead=head.token_ahead, layout_content=root_head.layout_content, layout_content_ahead=head.layout_content_ahead) parent = Parent(new_head, root_head, start_position, end_position, production=production, possibilities=[node_nonterm]) if self.dynamic_filter and \ not self._call_dynamic_filter(parent, head.state, state, REDUCE, production, list(node_nonterm)): # Action rejected by dynamic filter return active_head = self._active_heads.get(state.state_id, None) if active_head: created = active_head.create_link(parent, head) # Calculate heads to revisit with the new path. Only those heads that # are already processed (not in _for_actor) and are traversing this # new head state on the current frontier should be considered. if created and state.state_id in self._states_traversed: to_revisit = self._states_traversed[ state.state_id].intersection( self._active_heads.keys()) - set( h.state.state_id for h in self._for_actor) if to_revisit: if self.debug: h_print('Revisiting reductions for processed ' 'active heads in states {}'.format(to_revisit), level=1) for r_head_state in to_revisit: r_head = self._active_heads[r_head_state] for action in [ a for a in r_head.state.actions.get( head.token_ahead.symbol, []) if a.action == REDUCE ]: self._do_reductions(r_head, action.prod, parent) else: # No cycles. Do the reduction. new_head.create_link(parent, head) self._for_actor.append(new_head) self._active_heads[new_head.state.state_id] = new_head if self.debug: a_print("New head: ", new_head, level=1, new_line=True) if self.debug_trace: self._trace_head(new_head)
def create_load_table(grammar, itemset_type=LR_1, start_production=1, prefer_shifts=False, prefer_shifts_over_empty=True, force_create=False, force_load=False, in_layout=False, debug=False, **kwargs): """ Construct table by loading from file if present and newer than the grammar. If table file is older than the grammar or non-existent calculate the table and save to file. Arguments: see create_table force_create(bool): If set to True table will be created even if table file exists. force_load(bool): If set to True table will be loaded if exists even if it's not newer than the grammar, i.e. modification time will not be checked. """ if in_layout: # For layout grammars always calculate table. # Those are usually very small grammars so there is no point in # using cached tables. if debug: a_print("** Calculating LR table for the layout parser...", new_line=True) return create_table(grammar, itemset_type, start_production, prefer_shifts, prefer_shifts_over_empty) else: if debug: a_print("** Calculating LR table...", new_line=True) table_file_name = None if grammar.file_path: file_basename, _ = os.path.splitext(grammar.file_path) table_file_name = "{}.pgt".format(file_basename) create_table_file = True if not force_create and not force_load: if grammar.file_path: file_basename, _ = os.path.splitext(grammar.file_path) table_file_name = "{}.pgt".format(file_basename) if os.path.exists(table_file_name): create_table_file = False table_mtime = os.path.getmtime(table_file_name) # Check if older than any of the grammar files for g_file_name in grammar.imported_files.keys(): if os.path.getmtime(g_file_name) > table_mtime: create_table_file = True break if (create_table_file or force_create) and not force_load: table = create_table(grammar, itemset_type, start_production, prefer_shifts, prefer_shifts_over_empty, debug=debug, **kwargs) if table_file_name: try: save_table(table_file_name, table) except PermissionError: pass else: if debug: h_print("Loading LR table from '{}'".format(table_file_name)) table = load_table(table_file_name, grammar) return table
def parse(self, input_str, position=0, file_name=None, extra=None): """ Parses the given input string. Args: input_str(str): A string to parse. position(int): Position to start from. file_name(str): File name if applicable. Used in error reporting. extra: An object that keeps custom parsing state. If not given initialized to dict. """ if self.debug: a_print("*** PARSING STARTED", new_line=True) self.input_str = input_str self.file_name = file_name self.extra = {} if extra is None else extra self.errors = [] self.in_error_recovery = False self.accepted_head = None next_token = self._next_token debug = self.debug start_head = LRStackNode(self, self.table.states[0], 0, position) self._init_dynamic_disambiguation(start_head) self.parse_stack = parse_stack = [start_head] while True: head = parse_stack[-1] cur_state = head.state if debug: a_print("Current state:", str(cur_state.state_id), new_line=True) if head.token_ahead is None: if not self.in_layout: self._skipws(head, input_str) if self.debug: h_print("Layout content:", "'{}'".format(head.layout_content), level=1) head.token_ahead = next_token(head) if debug: h_print("Context:", position_context(head.input_str, head.position), level=1) h_print("Tokens expected:", expected_symbols_str(cur_state.actions.keys()), level=1) h_print("Token ahead:", head.token_ahead, level=1) actions = None if head.token_ahead is not None: actions = cur_state.actions.get(head.token_ahead.symbol) if not actions and not self.consume_input: # If we don't have any action for the current token ahead # see if we can finish without consuming the whole input. actions = cur_state.actions.get(STOP) if not actions: symbols_expected = list(cur_state.actions.keys()) tokens_ahead = self._get_all_possible_tokens_ahead(head) self.errors.append( self._create_error(head, symbols_expected, tokens_ahead, symbols_before=[cur_state.symbol])) if self.error_recovery: if self.debug: a_print("*** STARTING ERROR RECOVERY.", new_line=True) if self._do_recovery(): # Error recovery succeeded if self.debug: a_print( "*** ERROR RECOVERY SUCCEEDED. CONTINUING.", new_line=True) continue else: break else: break # Dynamic disambiguation if self.dynamic_filter: actions = self._dynamic_disambiguation(head, actions) # If after dynamic disambiguation we still have at least one # shift and non-empty reduction or multiple non-empty # reductions raise exception. if len([ a for a in actions if (a.action is SHIFT) or ( (a.action is REDUCE) and len(a.prod.rhs)) ]) > 1: raise DynamicDisambiguationConflict(head, actions) # If dynamic disambiguation is disabled either globaly by not # giving disambiguation function or localy by not marking # any production dynamic for this state take the first action. # First action is either SHIFT while there might be empty # reductions, or it is the only reduction. # Otherwise, parser construction should raise an error. act = actions[0] if act.action is SHIFT: cur_state = act.state if debug: a_print( "Shift:", "{} \"{}\"".format(cur_state.state_id, head.token_ahead.value) + " at position " + str(pos_to_line_col(self.input_str, head.position)), level=1) new_position = head.position + len(head.token_ahead) new_head = LRStackNode( self, state=act.state, frontier=head.frontier + 1, token=head.token_ahead, layout_content=head.layout_content_ahead, position=new_position, start_position=head.position, end_position=new_position) new_head.results = self._call_shift_action(new_head) parse_stack.append(new_head) self.in_error_recovery = False elif act.action is REDUCE: # if this is EMPTY reduction try to take another if # exists. if len(act.prod.rhs) == 0: if len(actions) > 1: act = actions[1] production = act.prod if debug: a_print("Reducing", "by prod '{}'.".format(production), level=1) r_length = len(production.rhs) if r_length: start_reduction_head = parse_stack[-r_length] results = [x.results for x in parse_stack[-r_length:]] del parse_stack[-r_length:] next_state = parse_stack[-1].state.gotos[production.symbol] new_head = LRStackNode( self, state=next_state, frontier=head.frontier, position=head.position, production=production, start_position=start_reduction_head.start_position, end_position=head.end_position, token_ahead=head.token_ahead, layout_content=start_reduction_head.layout_content, layout_content_ahead=head.layout_content_ahead) else: # Empty reduction results = [] next_state = cur_state.gotos[production.symbol] new_head = LRStackNode( self, state=next_state, frontier=head.frontier, position=head.position, production=production, start_position=head.end_position, end_position=head.end_position, token_ahead=head.token_ahead, layout_content='', layout_content_ahead=head.layout_content_ahead) # Calling reduce action new_head.results = self._call_reduce_action(new_head, results) parse_stack.append(new_head) elif act.action is ACCEPT: self.accepted_head = head break if self.accepted_head: if debug: a_print("SUCCESS!!!") if self.return_position: return parse_stack[1].results, parse_stack[1].position else: return parse_stack[1].results else: raise self.errors[-1]
def print_debug(self): if self.in_layout and self.debug_layout: a_print('*** LAYOUT parser ***', new_line=True) self.table.print_debug()