class IncParser(object): def __init__(self, grammar=None, lr_type=LR0, whitespaces=False, startsymbol=None): if grammar: logging.debug("Parsing Grammar") parser = Parser(grammar, whitespaces) parser.parse() filename = "".join([os.path.dirname(__file__), "/../pickle/", str(hash(grammar) ^ hash(whitespaces)), ".pcl"]) try: logging.debug("Try to unpickle former stategraph") f = open(filename, "r") start = time.time() self.graph = pickle.load(f) end = time.time() logging.debug("unpickling done in %s", end-start) except IOError: logging.debug("could not unpickle old graph") logging.debug("Creating Stategraph") self.graph = StateGraph(parser.start_symbol, parser.rules, lr_type) logging.debug("Building Stategraph") self.graph.build() logging.debug("Pickling") pickle.dump(self.graph, open(filename, "w")) if lr_type == LALR: self.graph.convert_lalr() logging.debug("Creating Syntaxtable") self.syntaxtable = SyntaxTable(lr_type) self.syntaxtable.build(self.graph) self.stack = [] self.ast_stack = [] self.all_changes = [] self.last_shift_state = 0 self.validating = False self.last_status = False self.whitespaces = whitespaces self.status_by_version = {} self.errornodes_by_version = {} self.indentation_based = False self.previous_version = None self.prev_version = 0 self.ooc = None def from_dict(self, rules, startsymbol, lr_type, whitespaces, pickle_id, precedences): self.graph = None self.syntaxtable = None if pickle_id: filename = "".join([os.path.dirname(__file__), "/../pickle/", str(pickle_id ^ hash(whitespaces)), ".pcl"]) try: f = open(filename, "r") self.syntaxtable = pickle.load(f) except IOError: pass if self.syntaxtable is None: self.graph = StateGraph(startsymbol, rules, lr_type) self.graph.build() self.syntaxtable = SyntaxTable(lr_type) self.syntaxtable.build(self.graph, precedences) if pickle_id: pickle.dump(self.syntaxtable, open(filename, "w")) self.whitespaces = whitespaces def init_ast(self, magic_parent=None): bos = BOS(Terminal(""), 0, []) eos = EOS(FinishSymbol(), 0, []) bos.magic_parent = magic_parent eos.magic_parent = magic_parent bos.next_term = eos eos.prev_term = bos root = Node(Nonterminal("Root"), 0, [bos, eos]) self.previous_version = AST(root) root.save(0) bos.save(0) eos.save(0) def reparse(self): self.inc_parse([], True) def inc_parse(self, line_indents=[], needs_reparse=False, state=0, stack = []): logging.debug("============ NEW %s PARSE ================= ", "OOC" if self.ooc else "INCREMENTAL") logging.debug("= starting in state %s ", state) self.validating = False self.reused_nodes = set() self.current_state = state self.previous_version.parent.isolated = None bos = self.previous_version.parent.children[0] eos = self.previous_version.parent.children[-1] if not stack: self.stack = [eos] else: self.stack = stack eos.state = 0 self.loopcount = 0 self.needs_reparse = needs_reparse self.error_nodes = [] self.error_pres = [] if self.ooc: rmroot = self.ooc[1] else: rmroot = self.previous_version.parent self.rm = RecoveryManager(self.prev_version, rmroot, self.stack, self.syntaxtable) USE_OPT = True la = self.pop_lookahead(bos) while(True): logging.debug("\x1b[35mProcessing\x1b[0m %s %s %s %s", la, la.changed, id(la), la.indent) self.loopcount += 1 # Abort condition for out-of-context analysis. If we reached the state of the # node that is being analyses and the lookahead matches the nodes # lookahead from the previous parse, we are done if self.ooc: logging.debug("ooc %s %s", self.ooc, id(self.ooc)) logging.debug("la %s", la) logging.debug("cs %s", self.current_state) if la is self.ooc[0]: if isinstance(la.symbol, Nonterminal): # if OOC is Nonterminal, use first terminal to apply # reductions first_term = la.find_first_terminal(self.prev_version) lookup = self.get_lookup(first_term) else: lookup = self.get_lookup(la) while True: # OOC is complete if we reached the expected state and # there are no more reductions left to do if self.current_state == self.ooc[2] and len(self.stack) == 2: logging.debug("======= OOC parse successfull =========") self.last_status = True return True # Otherwise apply more reductions to reach the wanted # state or an error occurs element = self.syntaxtable.lookup(self.current_state, lookup) if not isinstance(element, Reduce): logging.debug("No more reductions") break else: self.reduce(element) logging.debug("======= OOC parse failed =========") self.last_status = False return False if isinstance(la.symbol, Terminal) or isinstance(la.symbol, FinishSymbol) or la.symbol == Epsilon(): lookup_symbol = self.get_lookup(la) result = self.parse_terminal(la, lookup_symbol) if result == "Accept": logging.debug("============ INCREMENTAL PARSE END (ACCEPT) ================= ") # With error recovery we can end up in the accepting # state despite errors occuring during the parse. if len(self.error_nodes) == 0: self.last_status = True return True self.last_status = False return False elif result == "Error": logging.debug("============ INCREMENTAL PARSE END (ERROR) ================= ") self.last_status = False return False elif result != None: la = result else: # Nonterminal if la.has_changes() or needs_reparse or la.has_errors() or self.iso_context_changed(la): la = self.left_breakdown(la) else: if USE_OPT: goto = self.syntaxtable.lookup(self.current_state, la.symbol) # Only opt-shift if the nonterminal has children to # avoid a bug in the retainability algorithm. See # test/test_eco.py::Test_RetainSubtree::test_bug1 if goto and la.children: # can we shift this Nonterminal in the current state? logging.debug("OPTShift: %s in state %s -> %s", la.symbol, self.current_state, goto) follow_id = goto.action self.stack.append(la) la.deleted = False la.state = follow_id #XXX this fixed goto error (I should think about storing the states on the stack instead of inside the elements) la.exists = True self.current_state = follow_id logging.debug("USE_OPT: set state to %s", self.current_state) if la.isolated: # When skipping previously isolated subtrees, # traverse their children to find the error # nodes and report them back to the editor. self.find_nested_error(la) la = self.pop_lookahead(la) self.validating = True continue else: #XXX can be made faster by providing more information in syntax tables first_term = la.find_first_terminal(self.prev_version) lookup_symbol = self.get_lookup(first_term) element = self.syntaxtable.lookup(self.current_state, lookup_symbol) if isinstance(element, Reduce): logging.debug("OPT Reduce: %s", element) self.reduce(element) else: la = self.left_breakdown(la) else: # PARSER WITHOUT OPTIMISATION if la.lookup != "": lookup_symbol = Terminal(la.lookup) else: lookup_symbol = la.symbol element = self.syntaxtable.lookup(self.current_state, lookup_symbol) if self.shiftable(la): logging.debug("\x1b[37mis shiftable\x1b[0m") self.stack.append(la) self.current_state = la.state self.right_breakdown() la = self.pop_lookahead(la) else: la = self.left_breakdown(la) def parse_terminal(self, la, lookup_symbol): """Lookup the current lookahead symbol in the syntax table and apply the received action.""" element = None if la.deleted: # Nodes are no longer removed from the tree. Instead "deleted" nodes # are skipped during parsing so they won't end up in the next parse # tree. This allows to revert deleted nodes on undo. la = self.pop_lookahead(la) return la # XXX if temporary EOS symbol, check lookup # if accept: return accept # if nothing: try normal EOS instead (e.g. to reduce things) if isinstance(la, EOS): # This is needed so we can finish single line comments at the end of # the file element = self.syntaxtable.lookup(self.current_state, Terminal("<eos>")) if isinstance(element, Shift): self.current_state = element.action return la if element is None: element = self.syntaxtable.lookup(self.current_state, lookup_symbol) logging.debug("\x1b[34mparse_terminal\x1b[0m: %s in %s -> %s", lookup_symbol, self.current_state, element) if isinstance(element, Accept): #XXX change parse so that stack is [bos, startsymbol, eos] bos = self.previous_version.parent.children[0] eos = self.previous_version.parent.children[-1] bos.changed = False eos.changed = False self.previous_version.parent.set_children([bos, self.stack[1], eos]) self.previous_version.parent.changed = True logging.debug("loopcount: %s", self.loopcount) logging.debug ("\x1b[32mAccept\x1b[0m") return "Accept" elif isinstance(element, Shift): self.validating = False self.shift(la, element) la.local_error = la.nested_errors = False return self.pop_lookahead(la) elif isinstance(element, Reduce): logging.debug("\x1b[33mReduce\x1b[0m: %s -> %s", la, element) self.reduce(element) return la #self.parse_terminal(la, lookup_symbol) elif element is None: if self.validating: logging.debug("Was validating: Right breakdown and return to normal") logging.debug("Before breakdown: %s", self.stack[-1]) self.right_breakdown() logging.debug("After breakdown: %s", self.stack[-1]) self.validating = False else: self.error_nodes.append(la) if self.rm.recover(la): # recovered, continue parsing self.refine(self.rm.iso_node, self.rm.iso_offset, self.rm.error_offset) self.current_state = self.rm.new_state self.rm.iso_node.isolated = la self.rm.iso_node.deleted = False self.stack.append(self.rm.iso_node) logging.debug("Recovered. Continue after %s", self.rm.iso_node) return self.pop_lookahead(self.rm.iso_node) logging.debug("Couldn't find a subtree to recover. Recovering the whole tree.") logging.debug("\x1b[31mError\x1b[0m: %s %s %s", la, la.prev_term, la.next_term) logging.debug("loopcount: %s", self.loopcount) error_offset = self.rm.offset(la, self.rm.previous_version) iso_node = self.previous_version.parent self.refine(iso_node, 0, error_offset) iso_node.isolated = la return "Error" def get_lookup(self, la): """Get the lookup symbol of a node. If no such lookup symbol exists use the nodes symbol instead.""" if la.lookup != "": lookup_symbol = Terminal(la.lookup) else: lookup_symbol = la.symbol if isinstance(lookup_symbol, IndentationTerminal): #XXX hack: change parsing table to accept IndentationTerminals lookup_symbol = Terminal(lookup_symbol.name) return lookup_symbol def isolate(self, node): if node.has_changes():# or node.has_errors(): node.load(self.prev_version) if node.nested_changes: node.nested_errors = True if node.changed: node.local_error = True for c in node.children: self.isolate(c) def discard_changes(self, node): if node.has_changes(): node.load(self.prev_version) if node.nested_changes: node.nested_errors = True if node.changed: node.local_error = True self.compute_presention(node) def compute_presention(self, node): if type(node.symbol) is not Terminal: return try: prev_name = node.get_attr("symbol.name", self.reference_version) except AttributeError: prev_name = None if prev_name != node.symbol.name: self.error_pres.append((node, prev_name)) def refine(self, node, offset, error_offset): # for all children that come after the detection offset, we need # to analyse them using the normal incparser logging.debug(" Refine %s Offset: %s Error Offset: %s", node, offset, error_offset) retain_set = set() self.pass1(node, offset, error_offset, retain_set) node.load(self.prev_version) node.set_children(node.children) # reset sibling pointers node.local_error = node.nested_errors = False self.pass2(node, offset, error_offset, retain_set) def pass1 (self, node, offset, error_offset, retain_set): if offset > error_offset: # We don't have to check any other children # that come after the error node return for child in node.get_attr("children", self.prev_version): if offset + child.textlength() <= error_offset: self.find_retainable_subtrees(child, retain_set) else: self.pass1(child, offset, error_offset, retain_set) offset += child.textlength() def pass2(self, node, offset, error_offset, retain_set): for c in node.children: if self.ooc and c is self.ooc[0]: logging.debug(" Don't refine TempEOS nodes") return if offset > error_offset: # XXX check if following terminal requires analysis self.out_of_context_analysis(c) elif offset + c.textlength() <= error_offset: self.retain_or_discard(c, node, retain_set) else: assert offset <= error_offset assert offset + c.textlength() > error_offset self.discard_changes(c) self.pass2(c, offset, error_offset, retain_set) offset += c.textlength() def find_retainable_subtrees(self, node, retain_set): if self.is_retainable_subtree(node): retain_set.add(node) return for child in node.get_attr("children", self.prev_version): self.find_retainable_subtrees(child, retain_set) def is_retainable_subtree(self, node): if node.new: return False if not node.does_exist(): return False if not node.has_changes(): # if no changes, discarding doesn't do anything anyways so why check? return True # This is equivalent to Wagner's `same_pos` function. if node.textlength(self.prev_version) == node.textlength() and \ node.get_attr("position", self.prev_version) == node.position: return True return False def retain_or_discard(self, node, parent, retain_set): if node in retain_set: retain_set.remove(node) logging.debug(" Retaining %s (%s). Set parent to %s (%s) (%s)", node, id(node), parent, id(parent), "SAME" if parent is node.parent else "DIFF") # Might have been assigned to a different parent in current version # that was removed during refinement. This makes sure this node is # assigned to the right parent. See test_eco.py:Test_RetainSubtree node.parent = parent # Also need to update siblings as they might have been changed by # the parser before nodes parent was reset node.update_siblings() if node.has_changes(): parent.mark_changed() return self.discard_changes(node) for c in node.children: self.retain_or_discard(c, node, retain_set) node.set_children(node.children) # reset links between children def out_of_context_analysis(self, node): logging.debug(" Attempting out of context analysis on %s (%s)", node, id(node)) if not node.children: logging.debug(" Failed: Node has no children") self.isolate(node) return if not node.has_changes(): if node.has_errors(): self.find_nested_error(node) logging.debug(" Failed: Node has no changes") return # check if subtree is followed by terminal requiring analysis # (includes deleted terminals) follow = self.next_terminal(node) if follow.deleted: # or follow.changed: # XXX This should also include `follow.changed`, but since currently nodes # are marked as changed even if just their siblings or next_terms # are updated, this would fail for most out-of-context analyses logging.debug(" Failed: Surrounding context has changed") self.isolate(node) return temp_parser = IncParser() temp_parser.syntaxtable = self.syntaxtable temp_parser.prev_version = self.prev_version temp_parser.reference_version = self.reference_version oldname = node.symbol.name oldleft = node.left oldright = node.right oldparent = node.parent saved_left = node.get_attr("left", self.prev_version) saved_right = node.get_attr("right", self.prev_version) saved_parent = node.get_attr("parent", self.prev_version) temp_bos = BOS(Terminal(""), 0, []) temp_eos = self.pop_lookahead(node) while isinstance(temp_eos.symbol, Terminal) and temp_eos.deleted: # We can't use a deleted node as a temporary EOS since the deleted # note can pass the temp EOS reduction check but is then immediately # skipped by parse_terminal. This causes the parser to continue # parsing past the temp_eos resulting in faulty sub parse trees. temp_eos = self.pop_lookahead(temp_eos) eos_parent = temp_eos.parent eos_left = temp_eos.left eos_right = temp_eos.right # During out-of-context analysis we need to calculate offsets of # isolation nodes. Without this change we would calculate the offset # within the original parse tree and not the offset within the temporary # parse tree node.log[("left", self.prev_version)] = temp_bos node.log[("right", self.prev_version)] = temp_eos logging.debug(" TempEOS: %s", temp_eos) temp_root = Node(Nonterminal("TempRoot"), 0, [temp_bos, node, temp_eos]) node.log[("parent", self.prev_version)] = temp_root temp_root.save(self.prev_version) temp_bos.next_term = node temp_bos.state = oldleft.state temp_bos.save(node.version) temp_parser.previous_version = AST(temp_root) temp_parser.ooc = (temp_eos, node, node.state) temp_parser.root = temp_root dummy_stack_eos = EOS(Terminal(""), oldleft.state, []) try: temp_parser.inc_parse(state=oldleft.state, stack=[dummy_stack_eos]) except IndexError: temp_parser.last_status = False temp_eos.parent = eos_parent temp_eos.left = eos_left temp_eos.right = eos_right # pass on errors to the outer parser self.error_nodes.extend(temp_parser.error_nodes) self.error_pres.extend(temp_parser.error_pres) if temp_parser.last_status == False: # isolate logging.debug("OOC analysis of %s failed. Error on %s.", node, temp_parser.error_nodes) node.log[("left", self.prev_version)] = saved_left node.log[("right", self.prev_version)] = saved_right node.log[("parent", self.prev_version)] = saved_parent self.isolate(node) # revert changes done during OOC if temp_parser.previous_version.parent.isolated: # if during OOC parsing error recovery isolated the entire # tree (due to not finding an appropriate isolation node) we # need to move the isolation reference over to the actual node # being reparsed as the root is thrown away after this node.isolated = temp_parser.previous_version.parent.isolated return newnode = temp_parser.stack[-1] if newnode.symbol.name != oldname: logging.debug("OOC analysis resulted in different symbol: %s", newnode.symbol.name) # node is not the same: revert all changes! node.log[("left", self.prev_version)] = saved_left node.log[("right", self.prev_version)] = saved_right node.log[("parent", self.prev_version)] = saved_parent self.isolate(node) return if newnode is not node: node.log[("left", self.prev_version)] = saved_left node.log[("right", self.prev_version)] = saved_right node.log[("parent", self.prev_version)] = saved_parent logging.debug("OOC analysis resulted in different node but same symbol: %s", newnode.symbol.name) assert len(temp_parser.stack) == 2 # should only contain [EOS, node] i = oldparent.children.index(node) oldparent.children[i] = newnode newnode.parent = oldparent newnode.left = oldleft if oldleft: oldleft.right = newnode oldleft.mark_changed() newnode.right = oldright if oldright: oldright.left = newnode oldright.mark_changed() newnode.mark_changed() # why did I remove this? return logging.debug("Subtree resulted in the same parse as before %s %s", newnode, node) assert len(temp_parser.stack) == 2 # should only contain [EOS, node] node.parent = oldparent node.left = oldleft node.right = oldright node.log[("left", self.prev_version)] = saved_left node.log[("right", self.prev_version)] = saved_right node.log[("parent", self.prev_version)] = saved_parent def reduce(self, element): """Reduce elements on the stack to a non-terminal.""" children = [] i = 0 while i < element.amount(): c = self.stack.pop() children.insert(0, c) i += 1 logging.debug(" Element on stack: %s(%s)", self.stack[-1].symbol, self.stack[-1].state) self.current_state = self.stack[-1].state #XXX don't store on nodes, but on stack logging.debug(" Reduce: set state to %s (%s)", self.current_state, self.stack[-1].symbol) goto = self.syntaxtable.lookup(self.current_state, element.action.left) if goto is None: raise Exception("Reduction error on %s in state %s: goto is None" % (element, self.current_state)) assert goto != None # save childrens parents state has_errors = False for c in children: if c.has_errors() or c.isolated: has_errors = True if not c.new: # just marking changed is not enough. If we encounter an error # during reduction the path from the root down to this node is # incomplete and thus can't be reverted/isolate properly c.mark_changed() reuse_parent = self.ambig_reuse_check(element.action.left, children) if not self.needs_reparse and reuse_parent: logging.debug(" Reusing parent: %s (%s)", reuse_parent, id(reuse_parent)) new_node = reuse_parent new_node.changed = False new_node.deleted = False new_node.isolated = None new_node.local_error = False new_node.set_children(children) new_node.state = goto.action # XXX need to save state using hisotry service new_node.mark_changed() else: new_node = Node(element.action.left.copy(), goto.action, children) logging.debug(" No reuse parent. Make new %s (%s)", new_node, id(new_node)) new_node.nested_errors = has_errors new_node.calc_textlength() new_node.position = self.stack[-1].position + self.stack[-1].textlen logging.debug(" Add %s to stack and goto state %s", new_node.symbol, new_node.state) self.stack.append(new_node) new_node.exists = True self.current_state = new_node.state # = goto.action logging.debug("Reduce: set state to %s (%s)", self.current_state, new_node.symbol) if getattr(element.action.annotation, "interpret", None): # eco grammar annotations self.interpret_annotation(new_node, element.action) def ambig_reuse_check(self, prod, children): if children: for c in children: if c.parent and not c.new: # not a new node old_parent = c.get_attr('parent', self.prev_version) if old_parent.symbol == prod and old_parent not in self.reused_nodes: if len(old_parent.get_attr("children", self.prev_version)) > 1: # if node is the only child, reuse is unambiguous so # we don't need to remember we've reused this node # (which allows us to reuse it after error recovery) self.reused_nodes.add(old_parent) return old_parent return None def top_down_reuse(self): main = self.previous_version.parent self.top_down_traversal(main) def top_down_traversal(self, node): if node.changed and not node.new: self.reuse_isomorphic_structure(node) elif node.nested_changes or node.new: for c in node.children: self.top_down_traversal(c) def reuse_isomorphic_structure(self, node): for i in range(len(node.children)): current_child = node.children[i] try: previous_child = node.get_attr("children", self.prev_version)[i] except IndexError: self.top_down_traversal(current_child) continue if current_child.new and not previous_child.exists and \ current_child.symbol.name == previous_child.get_attr("symbol.name", self.prev_version): self.replace_child(node, i, current_child, previous_child) self.reuse_isomorphic_structure(previous_child) elif current_child.nested_changes or current_child.new: self.top_down_traversal(current_child) def replace_child(self, parent, i, current, previous): if isinstance(current.symbol, Terminal): # Newly inserted terminals have already been saved to the history # (previous_version) before we reach this. Reusing terminals # here would thus give no memory benefit as the old terminal can't # be garbage collected return parent.children[i] = previous previous.parent = parent # in case previous was moved before being deleted previous.children = list(current.children) for c in current.children: c.parent = previous previous.symbol.name = current.symbol.name previous.changed = False previous.deleted = False previous.isolated = False previous.local_error = False previous.state = current.state previous.mark_changed() previous.calc_textlength() previous.position = current.position previous.exists = True previous.nested_errors = current.nested_errors previous.right = current.right previous.left = current.left previous.alternate = current.alternate if previous.right: previous.right.left = previous if previous.left: previous.left.right = previous if isinstance(current.symbol, Terminal): previous.lookup = current.lookup previous.prev_term = current.prev_term previous.next_term = current.next_term previous.prev_term.next_term = previous previous.next_term.prev_term = previous def interpret_annotation(self, node, production): annotation = production.annotation if annotation: astnode = annotation.interpret(node) if not self.is_reusable_astnode(node.alternate, astnode): node.alternate = astnode def is_reusable_astnode(self, old, new): from grammar_parser.bootstrap import AstNode if type(old) is not AstNode or type(new) is not AstNode: return False if old.name != new.name: return False for key in old.children: if old.children.get(key) is not new.children.get(key): return False return True def left_breakdown(self, la): la.exists = False if len(la.children) > 0: return la.children[0] else: return self.pop_lookahead(la) def right_breakdown(self): node = self.stack.pop() # optimistically shifted Nonterminal # after the breakdown, we need to properly shift the left over terminal # using the (correct) current state from before the optimistic shift of # it's parent tree self.current_state = self.stack[-1].state logging.debug("right breakdown(%s): set state to %s", node.symbol.name, self.current_state) while(isinstance(node.symbol, Nonterminal)): # Right_breakdown reverts wrong optimistic shifts including # subsequent reductions. These reductions may contain nodes that # have been reused. Reverting the reduction also means we need to # undo the reusing of that node to free it up for future reusing. node.exists = False self.reused_nodes.discard(node) # This bit of code is necessary to avoid a bug that occurs with the # default Wagner implementation if we isolate a subtree and # optimistically shift an empty Nonterminal, and then run into an # error. The verifying parts of the incremental parser then try to # undo wrong optimistic shifts by breaking them down to their most # right terminal. Since the optimistic shift happened on an empty # Nonterminal, the algorithm tries to break down the isolated # subtree to the left of it. Since this subtree contains an error in # form of an unshiftable terminal, the algorithm fails and throws an # exception. The following code fixes this by ignoring already # isolated subtrees. if node.isolated: self.stack.append(node) self.current_state = node.state return for c in node.children: self.shift(c, rb=True) node = self.stack.pop() # after undoing an optimistic shift (through pop) we need to revert # back to the state before the shift (which can be found on the top # of the stack after the "pop" if isinstance(node.symbol, FinishSymbol): # if we reached the end of the stack, reset to state 0 and push # FinishSymbol pack onto the stack self.current_state = 0 self.stack.append(node) return else: logging.debug("right breakdown else: set state to %s", self.stack[-1].state) self.current_state = self.stack[-1].state self.shift(node, rb=True) # pushes previously popped terminal back on stack def shift(self, la, element=None, rb=False): if not element: lookup_symbol = self.get_lookup(la) element = self.syntaxtable.lookup(self.current_state, lookup_symbol) logging.debug("\x1b[32m" + "%sShift(%s)" + "\x1b[0m" + ": %s -> %s", "rb" if rb else "", self.current_state, la, element) la.state = element.action la.exists = True la.position = self.stack[-1].position + self.stack[-1].textlen self.stack.append(la) self.current_state = la.state if not la.lookup == "<ws>": # last_shift_state is used to predict next symbol # whitespace destroys correct behaviour self.last_shift_state = element.action def pop_lookahead(self, la): while(self.right_sibling(la) is None): la = la.get_attr("parent", self.prev_version) return self.right_sibling(la) def right_sibling(self, node): return node.right_sibling(self.prev_version) def shiftable(self, la): if self.syntaxtable.lookup(self.current_state, la.symbol): return True return False def has_changed(self, node): return node in self.all_changes def prepare_input(self, _input): l = [] # XXX need an additional lexer to do this right if _input != "": for i in _input.split(" "): l.append(Terminal(i)) l.append(FinishSymbol()) return l def get_ast(self): bos = Node(Terminal("bos"), 0, []) eos = Node(FinishSymbol(), 0, []) root = Node(Nonterminal("Root"), 0, [bos, self.ast_stack[0], eos]) return AST(root) def get_next_possible_symbols(self, state_id): l = set() for (state, symbol) in self.syntaxtable.table.keys(): if state == state_id: l.add(symbol) return l def get_next_symbols_list(self, state = -1): if state == -1: state = self.last_shift_state lookahead = self.get_next_possible_symbols(state) s = [] for symbol in lookahead: s.append(symbol.name) return s def get_next_symbols_string(self, state = -1): l = self.get_next_symbols_list(state) return ", ".join(l) def get_expected_symbols(self, state_id): #XXX if state of a symbol is nullable, return next symbol as well #XXX if at end of state, find state we came from (reduce, stack) and get next symbols from there if state_id != -1: stateset = self.graph.state_sets[state_id] symbols = stateset.get_next_symbols_no_ws() return symbols return [] def reset(self): self.stack = [] self.ast_stack = [] self.all_changes = [] self.last_shift_state = 0 self.validating = False self.last_status = False self.previous_version = None self.init_ast() def load_status(self, version): try: self.last_status = self.status_by_version[version] except KeyError: logging.warning("Could not find status for version %s", version) try: self.error_nodes = list(self.errornodes_by_version[version]) except KeyError: logging.warning("Could not find errornodes for version %s", version) def save_status(self, version): self.status_by_version[version] = self.last_status self.errornodes_by_version[version] = list(self.error_nodes) def find_nested_error(self, node): """Find errors within isolated subtrees.""" self.compute_presention(node) if node.isolated: self.error_nodes.append(node.isolated) elif not node.nested_errors: return for c in node.children: self.find_nested_error(c) def iso_context_changed(self, node): # Currently catches more cases than neccessary. Could be made more # accurate by finding the next terminal reachable from node (including # deleted ones) if not node.isolated: return False la = self.pop_lookahead(node) return la.has_changes() def next_terminal(self, node): n = self.pop_lookahead(node) while type(n.symbol) is Nonterminal: if len(n.children) > 0: n = n.children[0] else: n = self.pop_lookahead(n) return n
class LRParser(object): def __init__(self, grammar, lr_type=LR0): parser = Parser(grammar) parser.parse() self.graph = StateGraph(parser.start_symbol, parser.rules, lr_type) self.graph.build() if lr_type == LALR: self.graph.convert_lalr() self.syntaxtable = SyntaxTable(lr_type) self.syntaxtable.build(self.graph) self.stack = [] self.ast_stack = [] def check(self, _input): self.reset() l = [] # XXX need an additional lexer to do this right for i in _input.split(" "): l.append(Terminal(i)) l.append(FinishSymbol()) _input = l self.stack.append(FinishSymbol()) self.stack.append(0) i = 0 while i < len(_input): c = _input[i] state_id = self.stack[-1] element = self.syntaxtable.lookup(state_id, c) if element is None: return False if isinstance(element, Shift): self.stack.append(c) self.stack.append(element.action) i += 1 if isinstance(element, Reduce): #self.add_to_ast(element) for x in range(2*element.amount()): self.stack.pop() state_id = self.stack[-1] self.stack.append(element.action.left) element = self.syntaxtable.lookup(state_id, element.action.left) assert isinstance(element, Goto) self.stack.append(element.action) if isinstance(element, Accept): return True def add_to_ast(self, element): l = [] # action = Production for e in element.action.right: if isinstance(e, Nonterminal): l.append(self.ast_stack.pop()) if isinstance(e, Terminal): l.append(Node(e, [])) l.reverse() n = Node(element.action.left, l) self.ast_stack.append(n) def get_ast(self): return AST(self.ast_stack[0]) def reset(self): self.stack = [] self.ast_stack = []
class IncParser(object): """ The incremental parser """ def __init__(self, grammar=None, lr_type=LR0, whitespaces=False, startsymbol=None): if grammar: logging.debug("Parsing Grammar") parser = Parser(grammar, whitespaces) parser.parse() filename = "".join([ os.path.dirname(__file__), "/../pickle/", str(hash(grammar) ^ hash(whitespaces)), ".pcl" ]) try: logging.debug("Try to unpickle former stategraph") f = open(filename, "r") start = time.time() self.graph = pickle.load(f) end = time.time() logging.debug("unpickling done in %s", end - start) except IOError: logging.debug("could not unpickle old graph") logging.debug("Creating Stategraph") self.graph = StateGraph(parser.start_symbol, parser.rules, lr_type) logging.debug("Building Stategraph") self.graph.build() logging.debug("Pickling") pickle.dump(self.graph, open(filename, "w")) if lr_type == LALR: self.graph.convert_lalr() logging.debug("Creating Syntaxtable") self.syntaxtable = SyntaxTable(lr_type) self.syntaxtable.build(self.graph) self.stack = [] self.ast_stack = [] self.all_changes = [] self.undo = [] self.last_shift_state = 0 self.validating = False self.last_status = False self.error_node = None self.whitespaces = whitespaces self.status_by_version = {} self.errornode_by_version = {} self.indentation_based = False self.pm = PluginManager() self.pm.loadplugins(self) self.pm.do_incparse_init() self.previous_version = None logging.debug("Incremental parser done") def from_dict(self, rules, startsymbol, lr_type, whitespaces, pickle_id, precedences): self.graph = None self.syntaxtable = None if pickle_id: filename = "".join([ os.path.dirname(__file__), "/../pickle/", str(pickle_id ^ hash(whitespaces)), ".pcl" ]) try: f = open(filename, "r") self.syntaxtable = pickle.load(f) except IOError: pass if self.syntaxtable is None: self.graph = StateGraph(startsymbol, rules, lr_type) self.graph.build() self.syntaxtable = SyntaxTable(lr_type) self.syntaxtable.build(self.graph, precedences) if pickle_id: pickle.dump(self.syntaxtable, open(filename, "w")) self.whitespaces = whitespaces self.pm.do_incparse_from_dict(rules) def init_ast(self, magic_parent=None): bos = BOS(Terminal(""), 0, []) eos = EOS(FinishSymbol(), 0, []) bos.magic_parent = magic_parent eos.magic_parent = magic_parent bos.next_term = eos eos.prev_term = bos root = Node(Nonterminal("Root"), 0, [bos, eos]) self.previous_version = AST(root) root.save(0) bos.save(0) eos.save(0) def reparse(self): self.inc_parse([], True) def inc_parse(self, line_indents=[], reparse=False): logging.debug("============ NEW INCREMENTAL PARSE ================= ") self.validating = False self.error_node = None self.stack = [] self.undo = [] self.current_state = 0 self.stack.append(Node(FinishSymbol(), 0, [])) bos = self.previous_version.parent.children[0] self.loopcount = 0 USE_OPT = True self.pm.do_incparse_inc_parse_top() la = self.pop_lookahead(bos) while (True): logging.debug("\x1b[35mProcessing\x1b[0m %s %s %s %s", la, la.changed, id(la), la.indent) self.loopcount += 1 if isinstance(la.symbol, Terminal) or isinstance( la.symbol, FinishSymbol) or la.symbol == Epsilon(): if la.changed: assert False # with prelexing you should never end up here! else: lookup_symbol = self.get_lookup(la) result = self.parse_terminal(la, lookup_symbol) if result == "Accept": self.last_status = True return True elif result == "Error": self.last_status = False return False elif result != None: la = result else: # Nonterminal if la.changed or reparse: # deconstruct the #la.changed = False # as all nonterminals that have changed are being rebuild, there is no need to change this flag (this also solves problems with comments) self.undo.append((la, 'changed', True)) la = self.left_breakdown(la) else: if USE_OPT: #Follow parsing/syntax table goto = self.syntaxtable.lookup(self.current_state, la.symbol) if goto: # can we shift this Nonterminal in the current state? logging.debug("OPTShift: %s in state %s -> %s", la.symbol, self.current_state, goto) self.pm.do_incparse_optshift(la) follow_id = goto.action self.stack.append(la) la.state = follow_id #XXX this fixed goto error (I should think about storing the states on the stack instead of inside the elements) self.current_state = follow_id logging.debug("USE_OPT: set state to %s", self.current_state) la = self.pop_lookahead(la) self.validating = True continue else: #XXX can be made faster by providing more information in syntax tables first_term = la.find_first_terminal() lookup_symbol = self.get_lookup(first_term) element = self.syntaxtable.lookup( self.current_state, lookup_symbol) if isinstance(element, Reduce): self.reduce(element) else: la = self.left_breakdown(la) else: # PARSER WITHOUT OPTIMISATION if la.lookup != "": lookup_symbol = Terminal(la.lookup) else: lookup_symbol = la.symbol element = self.syntaxtable.lookup( self.current_state, lookup_symbol) if self.shiftable(la): logging.debug("\x1b[37mis shiftable\x1b[0m") self.stack.append(la) self.current_state = la.state self.right_breakdown() la = self.pop_lookahead(la) else: la = self.left_breakdown(la) logging.debug("============ INCREMENTAL PARSE END ================= ") def parse_terminal(self, la, lookup_symbol): """ Take in one terminal and set it's state to the state the parsing is in at the moment this terminal has been read. :param la: lookahead :param lookup_symbol: :return: "Accept" is the code was accepted as valid, "Error" if the syntax table does not provide a next state """ element = None if isinstance(la, EOS): element = self.syntaxtable.lookup(self.current_state, Terminal("<eos>")) if isinstance(element, Shift): self.current_state = element.action return la if element is None: element = self.syntaxtable.lookup(self.current_state, lookup_symbol) logging.debug("\x1b[34mparse_terminal\x1b[0m: %s in %s -> %s", lookup_symbol, self.current_state, element) if isinstance(element, Accept): #XXX change parse so that stack is [bos, startsymbol, eos] bos = self.previous_version.parent.children[0] eos = self.previous_version.parent.children[-1] self.previous_version.parent.set_children( [bos, self.stack[1], eos]) logging.debug("loopcount: %s", self.loopcount) logging.debug("\x1b[32mAccept\x1b[0m") return "Accept" elif isinstance(element, Shift): self.validating = False self.shift(la, element) return self.pop_lookahead(la) elif isinstance(element, Reduce): logging.debug("\x1b[33mReduce\x1b[0m: %s -> %s", la, element) self.reduce(element) return self.parse_terminal(la, lookup_symbol) elif element is None: if self.validating: logging.debug( "Was validating: Right breakdown and return to normal") logging.debug("Before breakdown: %s", self.stack[-1]) self.right_breakdown() logging.debug("After breakdown: %s", self.stack[-1]) self.validating = False else: return self.do_undo(la) def get_lookup(self, la): """ Retrurn the lookup of a node as Terminal. The lookup is name of the regular expression that mached the token in the lexing phase. Note: indentation terminals are handled in a special manner :param la: node to find lookup of :return: the lookup of the node wraped in a Terminal """ if la.lookup != "": lookup_symbol = Terminal(la.lookup) else: lookup_symbol = la.symbol if isinstance(lookup_symbol, IndentationTerminal): #XXX hack: change parsing table to accept IndentationTerminals lookup_symbol = Terminal(lookup_symbol.name) return lookup_symbol def do_undo(self, la): """ Restore changes Loop over self.undo and for the tupel (a,b,c) do a.b = c :param la: :return: """ while len(self.undo) > 0: node, attribute, value = self.undo.pop(-1) setattr(node, attribute, value) self.error_node = la logging.debug("\x1b[31mError\x1b[0m: %s %s %s", la, la.prev_term, la.next_term) logging.debug("loopcount: %s", self.loopcount) return "Error" def reduce(self, element): """ Execute the reduction given on the current stack. Reduces elements from the stack to a Nonterminal subtree. special: COMMENT subtrees that are found on the stack during reduction are added "silently" to the subtree (they don't count to the amount of symbols of the reduction) :type element: Reduce :param element: reduction to apply :except Exception rule not applicable """ #Fill a children array with nodes that are on the stack children = [] i = 0 while i < element.amount(): c = self.stack.pop() # apply folding information from grammar to tree nodes fold = element.action.right[element.amount() - i - 1].folding c.symbol.folding = fold children.insert(0, c) i += 1 logging.debug(" Element on stack: %s(%s)", self.stack[-1].symbol, self.stack[-1].state) self.current_state = self.stack[ -1].state #XXX don't store on nodes, but on stack logging.debug(" Reduce: set state to %s (%s)", self.current_state, self.stack[-1].symbol) goto = self.syntaxtable.lookup(self.current_state, element.action.left) if goto is None: raise Exception("Reduction error on %s in state %s: goto is None" % (element, self.current_state)) assert goto != None # save childrens parents state for c in children: self.undo.append((c, 'parent', c.parent)) self.undo.append((c, 'left', c.left)) self.undo.append((c, 'right', c.right)) self.undo.append((c, 'log', c.log.copy())) c.mark_version( ) # XXX with node reuse we only have to do this if the parent changes new_node = Node(element.action.left.copy(), goto.action, children) self.pm.do_incparse_reduce(new_node) logging.debug(" Add %s to stack and goto state %s", new_node.symbol, new_node.state) self.stack.append(new_node) self.current_state = new_node.state # = goto.action logging.debug("Reduce: set state to %s (%s)", self.current_state, new_node.symbol) if getattr(element.action.annotation, "interpret", None): # eco grammar annotations\ self.interpret_annotation(new_node, element.action) else: # johnstone annotations self.add_alternate_version(new_node, element.action) def interpret_annotation(self, node, production): annotation = production.annotation if annotation: astnode = annotation.interpret(node) node.alternate = astnode def add_alternate_version(self, node, production): # add alternate (folded) versions for nodes to the tree alternate = TextNode(node.symbol.__class__(node.symbol.name), node.state, []) alternate.children = [] teared = [] for i in range(len(node.children)): if production.inserts.has_key(i): # insert tiered nodes at right position value = production.inserts[i] for t in teared: if t.symbol.name == value.name: alternate.children.append(t) c = node.children[i] if c.symbol.folding == "^^^": c.symbol.folding = None teared.append(c) continue elif c.symbol.folding == "^^": while c.alternate is not None: c = c.alternate alternate.symbol = c.symbol for child in c.children: alternate.children.append(child) elif c.symbol.folding == "^": while c.alternate is not None: c = c.alternate for child in c.children: alternate.children.append(child) else: alternate.children.append(c) node.alternate = alternate def left_breakdown(self, la): if len(la.children) > 0: return la.children[0] else: return self.pop_lookahead(la) def right_breakdown(self): node = self.stack.pop() # optimistically shifted Nonterminal # after the breakdown, we need to properly shift the left over terminal # using the (correct) current state from before the optimistic shift of # it's parent tree self.current_state = self.stack[-1].state logging.debug("right breakdown(%s): set state to %s", node.symbol.name, self.current_state) while (isinstance(node.symbol, Nonterminal)): for c in node.children: self.shift(c, rb=True) c = c.right node = self.stack.pop() # after undoing an optimistic shift (through pop) we need to revert # back to the state before the shift (which can be found on the top # of the stack after the "pop" if isinstance(node.symbol, FinishSymbol): # if we reached the end of the stack, reset to state 0 and push # FinishSymbol pack onto the stack self.current_state = 0 self.stack.append(node) return else: logging.debug("right breakdown else: set state to %s", self.stack[-1].state) self.current_state = self.stack[-1].state self.shift(node, rb=True) # pushes previously popped terminal back on stack def shift(self, la, element=None, rb=False): if not element: lookup_symbol = self.get_lookup(la) element = self.syntaxtable.lookup(self.current_state, lookup_symbol) logging.debug("\x1b[32m" + "%sShift(%s)" + "\x1b[0m" + ": %s -> %s", "rb" if rb else "", self.current_state, la, element) la.state = element.action self.stack.append(la) self.current_state = la.state if not la.lookup == "<ws>": # last_shift_state is used to predict next symbol # whitespace destroys correct behaviour self.last_shift_state = element.action self.pm.do_incparse_shift(la, rb) def pop_lookahead(self, la): """ Get next (right) Node :rtype: Node :param la: :return: """ org = la while (la.right_sibling() is None): la = la.parent logging.debug("pop_lookahead(%s): %s", org.symbol, la.right_sibling().symbol) return la.right_sibling() def shiftable(self, la): if self.syntaxtable.lookup(self.current_state, la.symbol): return True return False def has_changed(self, node): return node in self.all_changes def prepare_input(self, _input): l = [] # XXX need an additional lexer to do this right if _input != "": for i in _input.split(" "): l.append(Terminal(i)) l.append(FinishSymbol()) return l def get_ast(self): bos = Node(Terminal("bos"), 0, []) eos = Node(FinishSymbol(), 0, []) root = Node(Nonterminal("Root"), 0, [bos, self.ast_stack[0], eos]) return AST(root) def get_next_possible_symbols(self, state_id): l = set() for (state, symbol) in self.syntaxtable.table.keys(): if state == state_id: l.add(symbol) return l def get_next_symbols_list(self, state=-1): if state == -1: state = self.last_shift_state lookahead = self.get_next_possible_symbols(state) s = [] for symbol in lookahead: s.append(symbol.name) return s def get_next_symbols_string(self, state=-1): l = self.get_next_symbols_list(state) return ", ".join(l) def get_expected_symbols(self, state_id): #XXX if state of a symbol is nullable, return next symbol as well #XXX if at end of state, find state we came from (reduce, stack) and get next symbols from there if state_id != -1: stateset = self.graph.state_sets[state_id] symbols = stateset.get_next_symbols_no_ws() return symbols return [] def reset(self): self.stack = [] self.ast_stack = [] self.all_changes = [] self.undo = [] self.last_shift_state = 0 self.validating = False self.last_status = False self.error_node = None self.previous_version = None self.init_ast() def load_status(self, version): try: self.last_status = self.status_by_version[version] except KeyError: logging.warning("Could not find status for version %s", version) try: self.error_node = self.errornode_by_version[version] except KeyError: logging.warning("Could not find errornode for version %s", version) def save_status(self, version): self.status_by_version[version] = self.last_status self.errornode_by_version[version] = self.error_node
class LRParser(object): def __init__(self, grammar, lr_type=LR0): parser = Parser(grammar) parser.parse() self.graph = StateGraph(parser.start_symbol, parser.rules, lr_type) self.graph.build() if lr_type == LALR: self.graph.convert_lalr() self.syntaxtable = SyntaxTable(lr_type) self.syntaxtable.build(self.graph) self.stack = [] self.ast_stack = [] def check(self, _input): self.reset() l = [] # XXX need an additional lexer to do this right for i in _input.split(" "): l.append(Terminal(i)) l.append(FinishSymbol()) _input = l self.stack.append(FinishSymbol()) self.stack.append(0) i = 0 while i < len(_input): c = _input[i] state_id = self.stack[-1] element = self.syntaxtable.lookup(state_id, c) if element is None: return False if isinstance(element, Shift): self.stack.append(c) self.stack.append(element.action) i += 1 if isinstance(element, Reduce): for x in range(2 * element.amount()): self.stack.pop() state_id = self.stack[-1] self.stack.append(element.action.left) element = self.syntaxtable.lookup(state_id, element.action.left) assert isinstance(element, Goto) self.stack.append(element.action) if isinstance(element, Accept): return True def add_to_ast(self, element): l = [] # action = Production for e in element.action.right: if isinstance(e, Nonterminal): l.append(self.ast_stack.pop()) if isinstance(e, Terminal): l.append(Node(e, [])) l.reverse() n = Node(element.action.left, l) self.ast_stack.append(n) def get_ast(self): return AST(self.ast_stack[0]) def reset(self): self.stack = [] self.ast_stack = []
class IncParser(object): def __init__(self, grammar=None, lr_type=LR0, whitespaces=False, startsymbol=None): if grammar: logging.debug("Parsing Grammar") parser = Parser(grammar, whitespaces) parser.parse() filename = "".join([os.path.dirname(__file__), "/../pickle/", str(hash(grammar) ^ hash(whitespaces)), ".pcl"]) try: logging.debug("Try to unpickle former stategraph") f = open(filename, "r") start = time.time() self.graph = pickle.load(f) end = time.time() logging.debug("unpickling done in %s", end-start) except IOError: logging.debug("could not unpickle old graph") logging.debug("Creating Stategraph") self.graph = StateGraph(parser.start_symbol, parser.rules, lr_type) logging.debug("Building Stategraph") self.graph.build() logging.debug("Pickling") pickle.dump(self.graph, open(filename, "w")) if lr_type == LALR: self.graph.convert_lalr() logging.debug("Creating Syntaxtable") self.syntaxtable = SyntaxTable(lr_type) self.syntaxtable.build(self.graph) self.stack = [] self.ast_stack = [] self.all_changes = [] self.undo = [] self.last_shift_state = 0 self.validating = False self.last_status = False self.error_node = None self.whitespaces = whitespaces self.anycount = set() self.status_by_version = {} self.errornode_by_version = {} self.comment_tokens = [] self.indent_stack = None self.indentation_based = False self.previous_version = None logging.debug("Incemental parser done") def from_dict(self, rules, startsymbol, lr_type, whitespaces, pickle_id, precedences): self.graph = None self.syntaxtable = None if pickle_id: filename = "".join([os.path.dirname(__file__), "/../pickle/", str(pickle_id ^ hash(whitespaces)), ".pcl"]) try: f = open(filename, "r") self.syntaxtable = pickle.load(f) except IOError: pass if self.syntaxtable is None: self.graph = StateGraph(startsymbol, rules, lr_type) self.graph.build() self.syntaxtable = SyntaxTable(lr_type) self.syntaxtable.build(self.graph, precedences) if pickle_id: pickle.dump(self.syntaxtable, open(filename, "w")) self.whitespaces = whitespaces if not rules: print("Warning: incparser has not access to comment tokens") elif rules.has_key(Nonterminal("comment")): rule = rules[Nonterminal("comment")] for a in rule.alternatives: self.comment_tokens.append(a[0].name) def init_ast(self, magic_parent=None): bos = BOS(Terminal(""), 0, []) eos = EOS(FinishSymbol(), 0, []) bos.magic_parent = magic_parent eos.magic_parent = magic_parent bos.next_term = eos eos.prev_term = bos root = Node(Nonterminal("Root"), 0, [bos, eos]) self.previous_version = AST(root) root.save(0) bos.save(0) eos.save(0) def reparse(self): self.inc_parse([], True) def inc_parse(self, line_indents=[], reparse=False): logging.debug("============ NEW INCREMENTAL PARSE ================= ") self.validating = False self.error_node = None self.stack = [] self.undo = [] self.current_state = 0 self.stack.append(Node(FinishSymbol(), 0, [])) self.stack[0].indent = [0] bos = self.previous_version.parent.children[0] self.loopcount = 0 self.anycount = set() self.any_newlines = [] self.last_indent = [0] USE_OPT = True eos = self.previous_version.parent.children[-1] d = eos.prev_term while isinstance(d.symbol, IndentationTerminal): d = d.prev_term self.last_token_before_eos = d if isinstance(d, BOS): # if file is empty, delete left over indentation tokens n = d.next_term while isinstance(n.symbol, IndentationTerminal): n.parent.remove_child(n) n = n.next_term # fix indentation after bos. Should result in an error for whitespace # at the beginning if bos.next_term.lookup == "<ws>": bos.insert_after(TextNode(IndentationTerminal("INDENT"))) elif isinstance(bos.next_term.symbol, IndentationTerminal): bos.next_term.parent.remove_child(bos.next_term) la = self.pop_lookahead(bos) while(True): logging.debug("\x1b[35mProcessing\x1b[0m %s %s %s %s", la, la.changed, id(la), la.indent) self.loopcount += 1 if isinstance(la.symbol, Terminal) or isinstance(la.symbol, FinishSymbol) or la.symbol == Epsilon(): if la.changed:#self.has_changed(la): assert False # with prelexing you should never end up here! else: lookup_symbol = self.get_lookup(la) result = self.parse_terminal(la, lookup_symbol) if result == "Accept": self.last_status = True return True elif result == "Error": self.last_status = False return False elif result != None: la = result else: # Nonterminal if la.changed or reparse: #la.changed = False # as all nonterminals that have changed are being rebuild, there is no need to change this flag (this also solves problems with comments) self.undo.append((la, 'changed', True)) la = self.left_breakdown(la) else: if USE_OPT: goto = self.syntaxtable.lookup(self.current_state, la.symbol) if goto: # can we shift this Nonterminal in the current state? logging.debug("OPTShift: %s in state %s -> %s", la.symbol, self.current_state, goto) follow_id = goto.action self.stack.append(la) if la.indent: self.last_indent = list(la.indent) la.state = follow_id #XXX this fixed goto error (i should think about storing the states on the stack instead of inside the elements) self.current_state = follow_id logging.debug("USE_OPT: set state to %s", self.current_state) la = self.pop_lookahead(la) self.validating = True continue else: #XXX can be made faster by providing more information in syntax tables first_term = la.find_first_terminal() lookup_symbol = self.get_lookup(first_term) element = self.syntaxtable.lookup(self.current_state, lookup_symbol) if isinstance(element, Reduce): self.reduce(element) else: la = self.left_breakdown(la) else: # PARSER WITHOUT OPTIMISATION if la.lookup != "": lookup_symbol = Terminal(la.lookup) else: lookup_symbol = la.symbol element = self.syntaxtable.lookup(self.current_state, lookup_symbol) if self.shiftable(la): logging.debug("\x1b[37mis shiftable\x1b[0m") self.stack.append(la) self.current_state = la.state self.right_breakdown() la = self.pop_lookahead(la) else: la = self.left_breakdown(la) logging.debug("============ INCREMENTAL PARSE END ================= ") def get_previous_ws(self, node): """Returns the whitespace of the previous logical line""" node = node.prev_term while True: if isinstance(node, BOS): return 0 if node.lookup != "<return>": node = node.prev_term continue if not self.is_logical_line(node): node = node.prev_term continue if node.next_term.lookup == "<ws>": return len(node.next_term.symbol.name) else: return 0 def indents_differ(self, this, other): if len(this) != len(other): return True for i in range(len(this)): if this[i].symbol != other[i].symbol: return True return False def repair_indents(self, node, there, needed): """Updates the indentation tokens of a line, given a list of needed tokens and tokens already there""" it = iter(there) last = node # update indentation tokens with new values or insert new ones for e in needed: try: ne = it.next() if e.symbol == ne.symbol: last = ne continue else: ne.symbol.name = e.symbol.name ne.mark_changed() continue except StopIteration: pass last.insert_after(e) last = e # delete all leftovers while True: try: x = it.next() x.parent.remove_child(x) except StopIteration: break def parse_anysymbol(self): symbol = AnySymbol() result = self.syntaxtable.lookup(self.current_state, symbol) if not result: symbol = AnySymbol("@ncr") result = self.syntaxtable.lookup(self.current_state, symbol) return result, symbol def parse_terminal(self, la, lookup_symbol): # try parsing ANYSYMBOL if not isinstance(la.symbol, FinishSymbol): if self.process_any(la): return self.pop_lookahead(la) element = self.syntaxtable.lookup(self.current_state, lookup_symbol) logging.debug("\x1b[34mparse_terminal\x1b[0m: %s in %s -> %s", lookup_symbol, self.current_state, element) if isinstance(element, Accept): #XXX change parse so that stack is [bos, startsymbol, eos] bos = self.previous_version.parent.children[0] eos = self.previous_version.parent.children[-1] self.previous_version.parent.set_children([bos, self.stack[1], eos]) logging.debug("loopcount: %s", self.loopcount) logging.debug ("\x1b[32mAccept\x1b[0m") return "Accept" elif isinstance(element, Shift): self.validating = False self.shift(la, element) return self.pop_lookahead(la) elif isinstance(element, Reduce): logging.debug("\x1b[33mReduce\x1b[0m: %s -> %s", la, element) self.reduce(element) return self.parse_terminal(la, lookup_symbol) elif element is None: if self.validating: logging.debug("Was validating: Right breakdown and return to normal") logging.debug("Before breakdown: %s", self.stack[-1]) self.right_breakdown() logging.debug("After breakdown: %s", self.stack[-1]) self.validating = False else: return self.do_undo(la) def is_logical_line(self, node): """Checks if a line is logical, i.e. doesn't only consist of whitespaces or comments""" if node.symbol.name == "\r" and node.prev_term.symbol.name == "\\": return False node = node.next_term while True: if isinstance(node, EOS): return False # this doesn't work as we only know if something is part of a # comment AFTER we parsed it. But by this time it's too late to add # indentation tokens: # if node.parent.symbol.name in ["multiline_string", "single_string", "comment"] and not node.parent.changed: # return False # instead we need to manually check if one of the known comment tokens appears # in the line if node.lookup in self.comment_tokens: #XXX return false or continue? return False if node.lookup == "<return>": # reached next line return False if node.lookup == "<ws>": node = node.next_term continue if isinstance(node.symbol, IndentationTerminal): node = node.next_term continue # if we are here, we reached a normal node return True def parse_whitespace(self, la): """Calculates and repairs indentation levels and tokens after parsing a <return> token. Special case: The last token before EOS triggers the generation of the closing dedentations 1) Check if a line is logical or not a) Logical: Update indent levels, compare needed indetation tokens with current ones and update if needed b) Not logical: Remove all indentation tokens and set indent level to None 2) Update succeeding lines that depend(ed) on this line """ if la.lookup == "<return>" or isinstance(la, BOS) or la is self.last_token_before_eos: if not self.is_logical_line(la) and not la is self.last_token_before_eos: # delete indentation tokens and indent level n = la.next_term while isinstance(n.symbol, IndentationTerminal): n.parent.remove_child(n) n = n.next_term la.indent = None newindent = list(self.get_last_indent(la)) ws = self.get_previous_ws(la) else: there = [] n = la.next_term while isinstance(n.symbol, IndentationTerminal): there.append(n) n = n.next_term if n.lookup == "<ws>": ws = len(n.symbol.name) else: ws = 0 last_indent = list(self.get_last_indent(la)) needed, newindent = self.get_indentation_tokens_and_indent(last_indent, ws) indent_stack_eq = newindent == la.indent if la is not self.last_token_before_eos: la.indent = list(newindent) self.last_indent = list(la.indent) if self.indents_differ(there, needed): self.repair_indents(la, there, needed) elif indent_stack_eq: return self.update_succeeding_lines(la, ws, newindent) def update_succeeding_lines(self, la, ws, newindent): # update succeeding lines # XXX this causes a chain reaction iterating over some lines # multiple times. we might only have to do this for the <return> # that has actually changed during the parse next_r = la.next_term while True: if isinstance(next_r, EOS): # if changes reach end of file, repair indentations now or # it will be too late eos_there = [] d = next_r.prev_term while isinstance(d.symbol, IndentationTerminal): eos_there.insert(0, d) d = d.prev_term eos_needed, _ = self.get_indentation_tokens_and_indent(list(self.get_last_indent(d)), 0) if self.indents_differ(eos_there, eos_needed): self.last_token_before_eos.mark_changed() # don't repair here, only mark and repair just before last token is parsed break if next_r.lookup != "<return>": next_r = next_r.next_term continue # XXX need to skip unlogical lines (what if don't know if unlogical yet) # if tokens need to be updated, mark as changed, so the parser will go down this tree to update next_ws = self.get_whitespace(next_r) if next_ws is None: next_r = next_r.next_term continue needed, newindent = self.get_indentation_tokens_and_indent(newindent, next_ws) if not self.indents_match(next_r, needed) or next_r.indent != newindent: next_r.mark_changed() if next_ws < ws: # if newline has smaller whitespace -> mark and break break next_r = next_r.next_term def get_indentation_tokens_and_indent(self, indent, ws): needed = [] newindent = [] if ws > indent[-1]: needed.append(Node(IndentationTerminal("NEWLINE"))) needed.append(Node(IndentationTerminal("INDENT"))) newindent = indent + [ws] elif ws < indent[-1]: needed.append(Node(IndentationTerminal("NEWLINE"))) while ws < indent[-1]: indent.pop() needed.append(Node(IndentationTerminal("DEDENT"))) newindent = list(indent) if ws != indent[-1]: # XXX in future, just ERROR here needed.append(Node(IndentationTerminal("UNBALANCED"))) else: needed.append(Node(IndentationTerminal("NEWLINE"))) newindent = list(indent) return needed, newindent def indents_match(self, node, needed): there = [] n = node.next_term while isinstance(n.symbol, IndentationTerminal): there.append(n) n = n.next_term if len(there) != len(needed): return False for i in range(len(there)): if there[i].symbol != needed[i].symbol: return False return True def get_whitespace(self, node): if not self.is_logical_line(node): return None node = node.next_term while isinstance(node.symbol, IndentationTerminal): node = node.next_term if node.lookup == "<ws>": return len(node.symbol.name) return 0 def get_last_indent(self, la): return self.last_indent # XXX not the most performant solution as it iterates over all elements # on the stack until one has it's indent level set, which will be # either a return terminal or a Nonterminal with a return somewhere in # its subtrees # -> replace with global variable for n in reversed(self.stack): if n.indent and n is not la: return n.indent def set_total_indent(self, node): l = [] if node.children: for c in node.children: if c.indent: l = c.indent if l: node.indent = l def get_lookup(self, la): if la.lookup != "": lookup_symbol = Terminal(la.lookup) else: lookup_symbol = la.symbol if isinstance(lookup_symbol, IndentationTerminal): #XXX hack: change parsing table to accept IndentationTerminals lookup_symbol = Terminal(lookup_symbol.name) return lookup_symbol def do_undo(self, la): while len(self.undo) > 0: node, attribute, value = self.undo.pop(-1) setattr(node, attribute, value) self.error_node = la logging.debug ("\x1b[31mError\x1b[0m: %s %s %s", la, la.prev_term, la.next_term) logging.debug("loopcount: %s", self.loopcount) return "Error" def reduce(self, element): # Reduces elements from the stack to a Nonterminal subtree. special: # COMMENT subtrees that are found on the stack during reduction are # added "silently" to the subtree (they don't count to the amount of # symbols of the reduction) children = [] i = 0 while i < element.amount(): c = self.stack.pop() # apply folding information from grammar to tree nodes fold = element.action.right[element.amount()-i-1].folding c.symbol.folding = fold children.insert(0, c) if c not in self.anycount: # if this node is part of any, don't count it towards reduce elements i += 1 logging.debug(" Element on stack: %s(%s)", self.stack[-1].symbol, self.stack[-1].state) self.current_state = self.stack[-1].state #XXX don't store on nodes, but on stack logging.debug(" Reduce: set state to %s (%s)", self.current_state, self.stack[-1].symbol) goto = self.syntaxtable.lookup(self.current_state, element.action.left) if goto is None: raise Exception("Reduction error on %s in state %s: goto is None" % (element, self.current_state)) assert goto != None # save childrens parents state for c in children: self.undo.append((c, 'parent', c.parent)) self.undo.append((c, 'left', c.left)) self.undo.append((c, 'right', c.right)) self.undo.append((c, 'log', c.log.copy())) new_node = Node(element.action.left.copy(), goto.action, children) self.set_total_indent(new_node) logging.debug(" Add %s to stack and goto state %s", new_node.symbol, new_node.state) self.stack.append(new_node) self.current_state = new_node.state # = goto.action logging.debug("Reduce: set state to %s (%s)", self.current_state, new_node.symbol) if getattr(element.action.annotation, "interpret", None): # eco grammar annotations self.interpret_annotation(new_node, element.action) else: # johnstone annotations self.add_alternate_version(new_node, element.action) def interpret_annotation(self, node, production): annotation = production.annotation if annotation: astnode = annotation.interpret(node) node.alternate = astnode def add_alternate_version(self, node, production): # add alternate (folded) versions for nodes to the tree alternate = TextNode(node.symbol.__class__(node.symbol.name), node.state, []) alternate.children = [] teared = [] for i in range(len(node.children)): if production.inserts.has_key(i): # insert teared nodes at right position value = production.inserts[i] for t in teared: if t.symbol.name == value.name: alternate.children.append(t) c = node.children[i] if c.symbol.folding == "^^^": c.symbol.folding = None teared.append(c) continue elif c.symbol.folding == "^^": while c.alternate is not None: c = c.alternate alternate.symbol = c.symbol for child in c.children: alternate.children.append(child) elif c.symbol.folding == "^": while c.alternate is not None: c = c.alternate for child in c.children: alternate.children.append(child) else: alternate.children.append(c) node.alternate = alternate def left_breakdown(self, la): if len(la.children) > 0: return la.children[0] else: return self.pop_lookahead(la) def right_breakdown(self): node = self.stack.pop() # optimistically shifted Nonterminal # after the breakdown, we need to properly shift the left over terminal # using the (correct) current state from before the optimistic shift of # it's parent tree self.current_state = self.stack[-1].state logging.debug("right breakdown(%s): set state to %s", node.symbol.name, self.current_state) while(isinstance(node.symbol, Nonterminal)): for c in node.children: if not self.process_any(c): # in breakdown we also have to take care of ANYSYMBOLs self.shift(c, rb=True) c = c.right node = self.stack.pop() # after undoing an optimistic shift (through pop) we need to revert # back to the state before the shift (which can be found on the top # of the stack after the "pop" if isinstance(node.symbol, FinishSymbol): # if we reached the end of the stack, reset to state 0 and push # FinishSymbol pack onto the stack self.current_state = 0 self.stack.append(node) return else: logging.debug("right breakdown else: set state to %s", self.stack[-1].state) self.current_state = self.stack[-1].state if not self.process_any(node): self.shift(node, rb=True) # pushes previously popped terminal back on stack def shift(self, la, element=None, rb=False): if not element: lookup_symbol = self.get_lookup(la) element = self.syntaxtable.lookup(self.current_state, lookup_symbol) logging.debug("\x1b[32m" + "%sShift(%s)" + "\x1b[0m" + ": %s -> %s", "rb" if rb else "", self.current_state, la, element) la.state = element.action self.stack.append(la) self.current_state = la.state if not la.lookup == "<ws>": # last_shift_state is used to predict next symbol # whitespace destroys correct behaviour self.last_shift_state = element.action if self.indentation_based and not rb: return self.parse_whitespace(la) def process_any(self, la): result, symbol = self.parse_anysymbol() if result: # ANYSYMBOL with finishing symbol r_finish = self.syntaxtable.lookup(result.action, self.get_lookup(la)) if isinstance(r_finish, Shift): self.end_any(la, result) return False # ANY without finishing symbol elif symbol.name == "@ncr" and (la.lookup == "<return>" or la.symbol == IndentationTerminal("NEWLINE") or isinstance(la, EOS)): self.end_any(la, result, symbol.name) return False else: self.push_any(la) return True def push_any(self, la): logging.debug("AnySymbol: push %s" % (la)) la.state = self.current_state # this node is now part of this comment state (needed to unvalidating) self.stack.append(la) self.anycount.add(la) if la.lookup == "<return>" and self.indentation_based: self.any_newlines.append(la) def end_any(self, la, result, mode="@"): logging.debug("AnySymbol: end %s (%s)" % (la, mode)) self.current_state = result.action # switch to state after ANY and continue parsing normally logging.debug("AnySymbol: set state to %s", self.current_state) # update succeeding if self.indentation_based: for n in self.any_newlines: self.update_succeeding_lines(n, self.last_indent[-1], list(self.last_indent)) self.any_newlines = [] def pop_lookahead(self, la): org = la while(la.right_sibling() is None): la = la.parent logging.debug("pop_lookahead(%s): %s", org.symbol, la.right_sibling().symbol) return la.right_sibling() def shiftable(self, la): if self.syntaxtable.lookup(self.current_state, la.symbol): return True return False def has_changed(self, node): return node in self.all_changes def prepare_input(self, _input): l = [] # XXX need an additional lexer to do this right if _input != "": for i in _input.split(" "): l.append(Terminal(i)) l.append(FinishSymbol()) return l def get_ast(self): bos = Node(Terminal("bos"), 0, []) eos = Node(FinishSymbol(), 0, []) root = Node(Nonterminal("Root"), 0, [bos, self.ast_stack[0], eos]) return AST(root) def get_next_possible_symbols(self, state_id): l = set() for (state, symbol) in self.syntaxtable.table.keys(): if state == state_id: l.add(symbol) return l def get_next_symbols_list(self, state = -1): if state == -1: state = self.last_shift_state lookahead = self.get_next_possible_symbols(state) s = [] for symbol in lookahead: s.append(symbol.name) return s def get_next_symbols_string(self, state = -1): l = self.get_next_symbols_list(state) return ", ".join(l) def get_expected_symbols(self, state_id): #XXX if state of a symbol is nullable, return next symbol as well #XXX if at end of state, find state we came from (reduce, stack) and get next symbols from there if state_id != -1: stateset = self.graph.state_sets[state_id] symbols = stateset.get_next_symbols_no_ws() return symbols return [] def reset(self): self.stack = [] self.ast_stack = [] self.all_changes = [] self.undo = [] self.last_shift_state = 0 self.validating = False self.last_status = False self.error_node = None self.previous_version = None self.init_ast() def load_status(self, version): try: self.last_status = self.status_by_version[version] except KeyError: logging.warning("Could not find status for version %s", version) try: self.error_node = self.errornode_by_version[version] except KeyError: logging.warning("Could not find errornode for version %s", version) def save_status(self, version): self.status_by_version[version] = self.last_status self.errornode_by_version[version] = self.error_node
class IncParser(object): def __init__(self, grammar=None, lr_type=LR0, whitespaces=False, startsymbol=None): if grammar: logging.debug("Parsing Grammar") parser = Parser(grammar, whitespaces) parser.parse() filename = "".join([os.path.dirname(__file__), "/../pickle/", str(hash(grammar) ^ hash(whitespaces)), ".pcl"]) try: logging.debug("Try to unpickle former stategraph") f = open(filename, "r") start = time.time() self.graph = pickle.load(f) end = time.time() logging.debug("unpickling done in %s", end-start) except IOError: logging.debug("could not unpickle old graph") logging.debug("Creating Stategraph") self.graph = StateGraph(parser.start_symbol, parser.rules, lr_type) logging.debug("Building Stategraph") self.graph.build() logging.debug("Pickling") pickle.dump(self.graph, open(filename, "w")) if lr_type == LALR: self.graph.convert_lalr() logging.debug("Creating Syntaxtable") self.syntaxtable = SyntaxTable(lr_type) self.syntaxtable.build(self.graph) self.stack = [] self.ast_stack = [] self.all_changes = [] self.undo = [] self.last_shift_state = 0 self.validating = False self.last_status = False self.error_node = None self.whitespaces = whitespaces self.status_by_version = {} self.errornode_by_version = {} self.previous_version = None logging.debug("Incemental parser done") def from_dict(self, rules, startsymbol, lr_type, whitespaces, pickle_id, precedences): self.graph = None self.syntaxtable = None if pickle_id: filename = "".join([os.path.dirname(__file__), "/../pickle/", str(pickle_id ^ hash(whitespaces)), ".pcl"]) try: f = open(filename, "r") self.syntaxtable = pickle.load(f) except IOError: pass if self.syntaxtable is None: self.graph = StateGraph(startsymbol, rules, lr_type) self.graph.build() self.syntaxtable = SyntaxTable(lr_type) self.syntaxtable.build(self.graph, precedences) if pickle_id: pickle.dump(self.syntaxtable, open(filename, "w")) self.whitespaces = whitespaces def init_ast(self, magic_parent=None): bos = BOS(Terminal(""), 0, []) eos = EOS(FinishSymbol(), 0, []) bos.magic_parent = magic_parent eos.magic_parent = magic_parent bos.next_term = eos eos.prev_term = bos root = Node(Nonterminal("Root"), 0, [bos, eos]) self.previous_version = AST(root) root.save(0) bos.save(0) eos.save(0) def reparse(self): self.inc_parse([], True) def inc_parse(self, line_indents=[], reparse=False): logging.debug("============ NEW INCREMENTAL PARSE ================= ") self.error_node = None self.stack = [] self.undo = [] self.current_state = 0 self.stack.append(Node(FinishSymbol(), 0, [])) bos = self.previous_version.parent.children[0] la = self.pop_lookahead(bos) self.loopcount = 0 self.comment_mode = False USE_OPT = True while(True): self.loopcount += 1 if self.comment_mode: if la.lookup == "cmt_end": # in comment mode we just add all subtrees as they are to a # subtree COMMENT subtrees that have changes are broken # apart, e.g. to be able to find an inserted */ the CMT # subtree is then added to the parsers stack without # changing its state when the parser later reduces stack # elements to a new subtree, CMT subtrees are added as # children next_la = self.pop_lookahead(la) self.comment_mode = False comment_stack.append(la) CMT = Node(Nonterminal("~COMMENT~")) for c in comment_stack: self.undo.append((c, 'parent', c.parent)) self.undo.append((c, 'left', c.left)) self.undo.append((c, 'right', c.right)) CMT.set_children(comment_stack) CMT.state = self.current_state self.stack.append(CMT) la = next_la continue if isinstance(la, EOS): self.comment_mode = False self.do_undo(la) self.last_status = False return False la = self.add_to_stack(la, comment_stack) continue if isinstance(la.symbol, Terminal) or isinstance(la.symbol, FinishSymbol) or la.symbol == Epsilon(): if la.changed:#self.has_changed(la): assert False # with prelexing you should never end up here! else: if la.lookup == "cmt_start": # when we find a cmt_start token, we enter comment mode self.comment_mode = True comment_stack = [] comment_stack.append(la) # since unchanged subtrees are left untouched, we # wouldn't find a cmt_end if it is part of another # comment, e.g. /* foo /* bar */ to be able to merge # two comment together, we need to find the next # cmt_end and mark its subtree as changed end = la # XXX configure these through the grammar, e.g. Java # needs /*@*/, Python """@""" (@ means, match anything) while True: end = end.next_term if isinstance(end, EOS): break if end.symbol.name.find("*/") > 0: # split token self.lexer.split_endcomment(end) break if end.lookup == "cmt_end": end.mark_changed() break la = self.pop_lookahead(la) continue lookup_symbol = self.get_lookup(la) result = self.parse_terminal(la, lookup_symbol) if result == "Accept": self.last_status = True return True elif result == "Error": self.last_status = False return False elif result != None: la = result else: # Nonterminal if la.changed or reparse: la.changed = False self.undo.append((la, 'changed', True)) la = self.left_breakdown(la) else: if USE_OPT: goto = self.syntaxtable.lookup(self.current_state, la.symbol) if goto: # can we shift this Nonterminal in the current state? logging.debug("OPTShift: %s in state %s -> %s", la.symbol, self.current_state, goto) follow_id = goto.action self.stack.append(la) la.state = follow_id #XXX this fixed goto error (i should think about storing the states on the stack instead of inside the elements) self.current_state = follow_id la = self.pop_lookahead(la) self.validating = True continue else: #XXX can be made faster by providing more information in syntax tables first_term = la.find_first_terminal() lookup_symbol = self.get_lookup(first_term) element = self.syntaxtable.lookup(self.current_state, lookup_symbol) if isinstance(element, Reduce): self.reduce(element) else: la = self.left_breakdown(la) else: # PARSER WITHOUT OPTIMISATION if la.lookup != "": lookup_symbol = Terminal(la.lookup) else: lookup_symbol = la.symbol element = self.syntaxtable.lookup(self.current_state, lookup_symbol) if self.shiftable(la): self.stack.append(la) self.current_state = la.state self.right_breakdown() la = self.pop_lookahead(la) else: la = self.left_breakdown(la) logging.debug("============ INCREMENTAL PARSE END ================= ") def add_to_stack(self, la, stack): # comment helper that adds elements to the comment stack and if la is a # subtree with changes, recursively break it apart and adds its # children while True: if isinstance(la.symbol, Terminal) and la.lookup == "cmt_end": return la if isinstance(la, EOS): return la if la.changed: if la.children: la = la.children[0] else: la = self.pop_lookahead(la) continue else: stack.append(la) la = self.pop_lookahead(la) continue def parse_terminal(self, la, lookup_symbol): element = self.syntaxtable.lookup(self.current_state, lookup_symbol) logging.debug("parse_terminal: %s in %s -> %s", lookup_symbol, self.current_state, element) if isinstance(element, Accept): #XXX change parse so that stack is [bos, startsymbol, eos] bos = self.previous_version.parent.children[0] eos = self.previous_version.parent.children[-1] self.previous_version.parent.set_children([bos, self.stack[1], eos]) logging.debug("loopcount: %s", self.loopcount) logging.debug ("Accept") return "Accept" elif isinstance(element, Shift): logging.debug("Shift: %s -> %s", la, element.action) # removing this makes "Valid tokens" correct, should not be needed # for incremental parser #self.undo.append((la, "state", la.state)) la.state = element.action self.stack.append(la) self.current_state = element.action if not la.lookup == "<ws>": # last_shift_state is used to predict next symbol # whitespace destroys correct behaviour self.last_shift_state = element.action return self.pop_lookahead(la) elif isinstance(element, Reduce): logging.debug("Reduce: %s -> %s", la, element) self.reduce(element) return self.parse_terminal(la, lookup_symbol) elif element is None: if self.validating: self.right_breakdown() self.validating = False else: return self.do_undo(la) def get_lookup(self, la): if la.lookup != "": lookup_symbol = Terminal(la.lookup) else: lookup_symbol = la.symbol if isinstance(lookup_symbol, IndentationTerminal): #XXX hack: change parsing table to accept IndentationTerminals lookup_symbol = Terminal(lookup_symbol.name) return lookup_symbol def do_undo(self, la): while len(self.undo) > 0: node, attribute, value = self.undo.pop(-1) setattr(node, attribute, value) self.error_node = la logging.debug ("Error: %s %s %s", la, la.prev_term, la.next_term) logging.debug("loopcount: %s", self.loopcount) return "Error" def reduce(self, element): # Reduces elements from the stack to a Nonterminal subtree. special: # COMMENT subtrees that are found on the stack during reduction are # added "silently" to the subtree (they don't count to the amount of # symbols of the reduction) children = [] i = 0 while i < element.amount(): c = self.stack.pop() # apply folding information from grammar to tree nodes fold = element.action.right[element.amount()-i-1].folding c.symbol.folding = fold children.insert(0, c) if c.symbol.name != "~COMMENT~": i += 1 if self.stack[-1].symbol.name == "~COMMENT~": c = self.stack.pop() children.insert(0, c) logging.debug(" Element on stack: %s(%s)", self.stack[-1].symbol, self.stack[-1].state) self.current_state = self.stack[-1].state #XXX don't store on nodes, but on stack goto = self.syntaxtable.lookup(self.current_state, element.action.left) if goto is None: raise Exception("Reduction error on %s in state %s: goto is None" % (element, self.current_state)) assert goto != None # save childrens parents state for c in children: self.undo.append((c, 'parent', c.parent)) self.undo.append((c, 'left', c.left)) self.undo.append((c, 'right', c.right)) new_node = Node(element.action.left.copy(), goto.action, children) logging.debug(" Add %s to stack and goto state %s", new_node.symbol, new_node.state) self.stack.append(new_node) self.current_state = new_node.state # = goto.action if getattr(element.action.annotation, "interpret", None): # eco grammar annotations self.interpret_annotation(new_node, element.action) else: # johnstone annotations self.add_alternate_version(new_node, element.action) def interpret_annotation(self, node, production): annotation = production.annotation if annotation: astnode = annotation.interpret(node) node.alternate = astnode def add_alternate_version(self, node, production): # add alternate (folded) versions for nodes to the tree alternate = TextNode(node.symbol.__class__(node.symbol.name), node.state, []) alternate.children = [] teared = [] for i in range(len(node.children)): if production.inserts.has_key(i): # insert teared nodes at right position value = production.inserts[i] for t in teared: if t.symbol.name == value.name: alternate.children.append(t) c = node.children[i] if c.symbol.folding == "^^^": c.symbol.folding = None teared.append(c) continue elif c.symbol.folding == "^^": while c.alternate is not None: c = c.alternate alternate.symbol = c.symbol for child in c.children: alternate.children.append(child) elif c.symbol.folding == "^": while c.alternate is not None: c = c.alternate for child in c.children: alternate.children.append(child) else: alternate.children.append(c) node.alternate = alternate def left_breakdown(self, la): if len(la.children) > 0: return la.children[0] else: return self.pop_lookahead(la) def right_breakdown(self): node = self.stack.pop() self.current_state = self.stack[-1].state logging.debug("right breakdown: set state to %s", self.current_state) while(isinstance(node.symbol, Nonterminal)): for c in node.children: self.shift(c) node = self.stack.pop() # after undoing an optimistic shift (through pop) we need to revert # back to the state before the shift (which can be found on the top # of the stack after the "pop" if isinstance(node.symbol, FinishSymbol): # if we reached the end of the stack, reset to state 0 and push # FinishSymbol pack onto the stack self.current_state = 0 self.stack.append(node) return else: self.current_state = self.stack[-1].state self.shift(node) def shift(self, la): # after the breakdown, we need to properly shift the left over terminal # using the (correct) current state from before the optimistic shift of # it's parent tree lookup_symbol = self.get_lookup(la) element = self.syntaxtable.lookup(self.current_state, lookup_symbol) logging.debug("RBShift: la: %s state: %s element: %s", la, la.state, element) la.state = element.action self.stack.append(la) logging.debug("RBShift: set state to %s", la.state) self.current_state = la.state def pop_lookahead(self, la): while(la.right_sibling() is None): la = la.parent return la.right_sibling() def shiftable(self, la): if self.syntaxtable.lookup(self.current_state, la.symbol): return True return False def has_changed(self, node): return node in self.all_changes def prepare_input(self, _input): l = [] # XXX need an additional lexer to do this right if _input != "": for i in _input.split(" "): l.append(Terminal(i)) l.append(FinishSymbol()) return l def get_ast(self): bos = Node(Terminal("bos"), 0, []) eos = Node(FinishSymbol(), 0, []) root = Node(Nonterminal("Root"), 0, [bos, self.ast_stack[0], eos]) return AST(root) def get_next_possible_symbols(self, state_id): l = set() for (state, symbol) in self.syntaxtable.table.keys(): if state == state_id: l.add(symbol) return l def get_next_symbols_list(self, state = -1): if state == -1: state = self.last_shift_state lookahead = self.get_next_possible_symbols(state) s = [] for symbol in lookahead: s.append(symbol.name) return s def get_next_symbols_string(self, state = -1): l = self.get_next_symbols_list(state) return ", ".join(l) def get_expected_symbols(self, state_id): #XXX if state of a symbol is nullable, return next symbol as well #XXX if at end of state, find state we came from (reduce, stack) and get next symbols from there if state_id != -1: stateset = self.graph.state_sets[state_id] symbols = stateset.get_next_symbols_no_ws() return symbols return [] def reset(self): self.stack = [] self.ast_stack = [] self.all_changes = [] self.undo = [] self.last_shift_state = 0 self.validating = False self.last_status = False self.error_node = None self.previous_version = None self.init_ast() def load_status(self, version): try: self.last_status = self.status_by_version[version] except KeyError: logging.warning("Could not find status for version %s", version) try: self.error_node = self.errornode_by_version[version] except KeyError: logging.warning("Could not find errornode for version %s", version) def save_status(self, version): self.status_by_version[version] = self.last_status self.errornode_by_version[version] = self.error_node