Ejemplo n.º 1
0
class IncParser(object):

    def __init__(self, grammar=None, lr_type=LR0, whitespaces=False, startsymbol=None):

        if grammar:
            logging.debug("Parsing Grammar")
            parser = Parser(grammar, whitespaces)
            parser.parse()

            filename = "".join([os.path.dirname(__file__), "/../pickle/", str(hash(grammar) ^ hash(whitespaces)), ".pcl"])
            try:
                logging.debug("Try to unpickle former stategraph")
                f = open(filename, "r")
                start = time.time()
                self.graph = pickle.load(f)
                end = time.time()
                logging.debug("unpickling done in %s", end-start)
            except IOError:
                logging.debug("could not unpickle old graph")
                logging.debug("Creating Stategraph")
                self.graph = StateGraph(parser.start_symbol, parser.rules, lr_type)
                logging.debug("Building Stategraph")
                self.graph.build()
                logging.debug("Pickling")
                pickle.dump(self.graph, open(filename, "w"))

            if lr_type == LALR:
                self.graph.convert_lalr()

            logging.debug("Creating Syntaxtable")
            self.syntaxtable = SyntaxTable(lr_type)
            self.syntaxtable.build(self.graph)

        self.stack = []
        self.ast_stack = []
        self.all_changes = []
        self.last_shift_state = 0
        self.validating = False
        self.last_status = False
        self.whitespaces = whitespaces
        self.status_by_version = {}
        self.errornodes_by_version = {}
        self.indentation_based = False

        self.previous_version = None
        self.prev_version = 0

        self.ooc = None


    def from_dict(self, rules, startsymbol, lr_type, whitespaces, pickle_id, precedences):
        self.graph = None
        self.syntaxtable = None
        if pickle_id:
            filename = "".join([os.path.dirname(__file__), "/../pickle/", str(pickle_id ^ hash(whitespaces)), ".pcl"])
            try:
                f = open(filename, "r")
                self.syntaxtable = pickle.load(f)
            except IOError:
                pass
        if self.syntaxtable is None:
            self.graph = StateGraph(startsymbol, rules, lr_type)
            self.graph.build()
            self.syntaxtable = SyntaxTable(lr_type)
            self.syntaxtable.build(self.graph, precedences)
            if pickle_id:
                pickle.dump(self.syntaxtable, open(filename, "w"))

        self.whitespaces = whitespaces

    def init_ast(self, magic_parent=None):
        bos = BOS(Terminal(""), 0, [])
        eos = EOS(FinishSymbol(), 0, [])
        bos.magic_parent = magic_parent
        eos.magic_parent = magic_parent
        bos.next_term = eos
        eos.prev_term = bos
        root = Node(Nonterminal("Root"), 0, [bos, eos])
        self.previous_version = AST(root)
        root.save(0)
        bos.save(0)
        eos.save(0)

    def reparse(self):
        self.inc_parse([], True)

    def inc_parse(self, line_indents=[], needs_reparse=False, state=0, stack = []):
        logging.debug("============ NEW %s PARSE ================= ", "OOC" if self.ooc else "INCREMENTAL")
        logging.debug("= starting in state %s ", state)
        self.validating = False
        self.reused_nodes = set()
        self.current_state = state
        self.previous_version.parent.isolated = None
        bos = self.previous_version.parent.children[0]
        eos = self.previous_version.parent.children[-1]
        if not stack:
            self.stack = [eos]
        else:
            self.stack = stack
        eos.state = 0
        self.loopcount = 0
        self.needs_reparse = needs_reparse
        self.error_nodes = []
        self.error_pres = []
        if self.ooc:
            rmroot = self.ooc[1]
        else:
            rmroot = self.previous_version.parent
        self.rm = RecoveryManager(self.prev_version, rmroot, self.stack, self.syntaxtable)

        USE_OPT = True


        la = self.pop_lookahead(bos)
        while(True):
            logging.debug("\x1b[35mProcessing\x1b[0m %s %s %s %s", la, la.changed, id(la), la.indent)
            self.loopcount += 1



            # Abort condition for out-of-context analysis. If we reached the state of the
            # node that is being analyses and the lookahead matches the nodes
            # lookahead from the previous parse, we are done
            if self.ooc:
                logging.debug("ooc %s %s", self.ooc, id(self.ooc))
                logging.debug("la %s", la)
                logging.debug("cs %s", self.current_state)
                if la is self.ooc[0]:
                    if isinstance(la.symbol, Nonterminal):
                        # if OOC is Nonterminal, use first terminal to apply
                        # reductions
                        first_term = la.find_first_terminal(self.prev_version)
                        lookup = self.get_lookup(first_term)
                    else:
                        lookup = self.get_lookup(la)
                    while True:
                        # OOC is complete if we reached the expected state and
                        # there are no more reductions left to do
                        if self.current_state == self.ooc[2] and len(self.stack) == 2:
                            logging.debug("======= OOC parse successfull =========")
                            self.last_status = True
                            return True
                        # Otherwise apply more reductions to reach the wanted
                        # state or an error occurs
                        element = self.syntaxtable.lookup(self.current_state, lookup)
                        if not isinstance(element, Reduce):
                            logging.debug("No more reductions")
                            break
                        else:
                            self.reduce(element)
                    logging.debug("======= OOC parse failed =========")
                    self.last_status = False
                    return False

            if isinstance(la.symbol, Terminal) or isinstance(la.symbol, FinishSymbol) or la.symbol == Epsilon():
                    lookup_symbol = self.get_lookup(la)
                    result = self.parse_terminal(la, lookup_symbol)
                    if result == "Accept":
                        logging.debug("============ INCREMENTAL PARSE END (ACCEPT) ================= ")
                        # With error recovery we can end up in the accepting
                        # state despite errors occuring during the parse.
                        if len(self.error_nodes) == 0:
                            self.last_status = True
                            return True
                        self.last_status = False
                        return False
                    elif result == "Error":
                        logging.debug("============ INCREMENTAL PARSE END (ERROR) ================= ")
                        self.last_status = False
                        return False
                    elif result != None:
                        la = result

            else: # Nonterminal
                if la.has_changes() or needs_reparse or la.has_errors() or self.iso_context_changed(la):
                    la = self.left_breakdown(la)
                else:
                    if USE_OPT:
                        goto = self.syntaxtable.lookup(self.current_state, la.symbol)
                        # Only opt-shift if the nonterminal has children to
                        # avoid a bug in the retainability algorithm. See
                        # test/test_eco.py::Test_RetainSubtree::test_bug1
                        if goto and la.children: # can we shift this Nonterminal in the current state?
                            logging.debug("OPTShift: %s in state %s -> %s", la.symbol, self.current_state, goto)
                            follow_id = goto.action
                            self.stack.append(la)
                            la.deleted = False
                            la.state = follow_id #XXX this fixed goto error (I should think about storing the states on the stack instead of inside the elements)
                            la.exists = True
                            self.current_state = follow_id
                            logging.debug("USE_OPT: set state to %s", self.current_state)
                            if la.isolated:
                                # When skipping previously isolated subtrees,
                                # traverse their children to find the error
                                # nodes and report them back to the editor.
                                self.find_nested_error(la)
                            la = self.pop_lookahead(la)
                            self.validating = True
                            continue
                        else:
                            #XXX can be made faster by providing more information in syntax tables
                            first_term = la.find_first_terminal(self.prev_version)

                            lookup_symbol = self.get_lookup(first_term)
                            element = self.syntaxtable.lookup(self.current_state, lookup_symbol)
                            if isinstance(element, Reduce):
                                logging.debug("OPT Reduce: %s", element)
                                self.reduce(element)
                            else:
                                la = self.left_breakdown(la)
                    else:
                        # PARSER WITHOUT OPTIMISATION
                        if la.lookup != "":
                            lookup_symbol = Terminal(la.lookup)
                        else:
                            lookup_symbol = la.symbol
                        element = self.syntaxtable.lookup(self.current_state, lookup_symbol)

                        if self.shiftable(la):
                            logging.debug("\x1b[37mis shiftable\x1b[0m")
                            self.stack.append(la)
                            self.current_state = la.state
                            self.right_breakdown()
                            la = self.pop_lookahead(la)
                        else:
                            la = self.left_breakdown(la)

    def parse_terminal(self, la, lookup_symbol):
        """Lookup the current lookahead symbol in the syntax table and apply the received action."""
        element = None
        if la.deleted:
            # Nodes are no longer removed from the tree. Instead "deleted" nodes
            # are skipped during parsing so they won't end up in the next parse
            # tree. This allows to revert deleted nodes on undo.
            la = self.pop_lookahead(la)
            return la
        # XXX if temporary EOS symbol, check lookup
        #        if accept: return accept
        #        if nothing: try normal EOS instead (e.g. to reduce things)

        if isinstance(la, EOS):
            # This is needed so we can finish single line comments at the end of
            # the file
            element = self.syntaxtable.lookup(self.current_state, Terminal("<eos>"))
            if isinstance(element, Shift):
                self.current_state = element.action
                return la
        if element is None:
            element = self.syntaxtable.lookup(self.current_state, lookup_symbol)
        logging.debug("\x1b[34mparse_terminal\x1b[0m: %s in %s -> %s", lookup_symbol, self.current_state, element)
        if isinstance(element, Accept):
            #XXX change parse so that stack is [bos, startsymbol, eos]
            bos = self.previous_version.parent.children[0]
            eos = self.previous_version.parent.children[-1]

            bos.changed = False
            eos.changed = False
            self.previous_version.parent.set_children([bos, self.stack[1], eos])
            self.previous_version.parent.changed = True
            logging.debug("loopcount: %s", self.loopcount)
            logging.debug ("\x1b[32mAccept\x1b[0m")
            return "Accept"
        elif isinstance(element, Shift):
            self.validating = False
            self.shift(la, element)
            la.local_error = la.nested_errors = False
            return self.pop_lookahead(la)

        elif isinstance(element, Reduce):
            logging.debug("\x1b[33mReduce\x1b[0m: %s -> %s", la, element)
            self.reduce(element)
            return la #self.parse_terminal(la, lookup_symbol)
        elif element is None:
            if self.validating:
                logging.debug("Was validating: Right breakdown and return to normal")
                logging.debug("Before breakdown: %s", self.stack[-1])
                self.right_breakdown()
                logging.debug("After breakdown: %s", self.stack[-1])
                self.validating = False
            else:
                self.error_nodes.append(la)
                if self.rm.recover(la):
                    # recovered, continue parsing
                    self.refine(self.rm.iso_node, self.rm.iso_offset, self.rm.error_offset)
                    self.current_state = self.rm.new_state
                    self.rm.iso_node.isolated = la
                    self.rm.iso_node.deleted = False
                    self.stack.append(self.rm.iso_node)
                    logging.debug("Recovered. Continue after %s", self.rm.iso_node)
                    return self.pop_lookahead(self.rm.iso_node)
                logging.debug("Couldn't find a subtree to recover. Recovering the whole tree.")
                logging.debug("\x1b[31mError\x1b[0m: %s %s %s", la, la.prev_term, la.next_term)
                logging.debug("loopcount: %s", self.loopcount)

                error_offset = self.rm.offset(la, self.rm.previous_version)
                iso_node = self.previous_version.parent
                self.refine(iso_node, 0, error_offset)
                iso_node.isolated = la
                return "Error"

    def get_lookup(self, la):
        """Get the lookup symbol of a node. If no such lookup symbol exists use the nodes symbol instead."""
        if la.lookup != "":
            lookup_symbol = Terminal(la.lookup)
        else:
            lookup_symbol = la.symbol
        if isinstance(lookup_symbol, IndentationTerminal):
            #XXX hack: change parsing table to accept IndentationTerminals
            lookup_symbol = Terminal(lookup_symbol.name)
        return lookup_symbol

    def isolate(self, node):
        if node.has_changes():# or node.has_errors():
            node.load(self.prev_version)
            if node.nested_changes:
                node.nested_errors = True
            if node.changed:
                node.local_error = True
            for c in node.children:
                self.isolate(c)

    def discard_changes(self, node):
        if node.has_changes():
            node.load(self.prev_version)
            if node.nested_changes:
                node.nested_errors = True
            if node.changed:
                node.local_error = True
                self.compute_presention(node)

    def compute_presention(self, node):
        if type(node.symbol) is not Terminal:
            return
        try:
            prev_name = node.get_attr("symbol.name", self.reference_version)
        except AttributeError:
            prev_name = None
        if prev_name != node.symbol.name:
            self.error_pres.append((node, prev_name))

    def refine(self, node, offset, error_offset):
        # for all children that come after the detection offset, we need
        # to analyse them using the normal incparser
        logging.debug("    Refine %s Offset: %s Error Offset: %s", node, offset, error_offset)
        retain_set = set()
        self.pass1(node, offset, error_offset, retain_set)
        node.load(self.prev_version)
        node.set_children(node.children) # reset sibling pointers
        node.local_error = node.nested_errors = False
        self.pass2(node, offset, error_offset, retain_set)

    def pass1 (self, node, offset, error_offset, retain_set):
        if offset > error_offset:
            # We don't have to check any other children
            # that come after the error node
            return
        for child in node.get_attr("children", self.prev_version):
            if offset + child.textlength() <= error_offset:
                self.find_retainable_subtrees(child, retain_set)
            else:
                self.pass1(child, offset, error_offset, retain_set)
            offset += child.textlength()

    def pass2(self, node, offset, error_offset, retain_set):
        for c in node.children:
            if self.ooc and c is self.ooc[0]:
                logging.debug("    Don't refine TempEOS nodes")
                return
            if offset > error_offset:
                # XXX check if following terminal requires analysis
                self.out_of_context_analysis(c)
            elif offset + c.textlength() <= error_offset:
                self.retain_or_discard(c, node, retain_set)
            else:
                assert offset <= error_offset
                assert offset + c.textlength() > error_offset
                self.discard_changes(c)
                self.pass2(c, offset, error_offset, retain_set)
            offset += c.textlength()

    def find_retainable_subtrees(self, node, retain_set):
        if self.is_retainable_subtree(node):
            retain_set.add(node)
            return
        for child in node.get_attr("children", self.prev_version):
            self.find_retainable_subtrees(child, retain_set)

    def is_retainable_subtree(self, node):
        if node.new:
            return False

        if not node.does_exist():
            return False

        if not node.has_changes():
            # if no changes, discarding doesn't do anything anyways so why check?
            return True

        # This is equivalent to Wagner's `same_pos` function.
        if node.textlength(self.prev_version) == node.textlength() and \
                node.get_attr("position", self.prev_version) == node.position:
            return True

        return False


    def retain_or_discard(self, node, parent, retain_set):
        if node in retain_set:
            retain_set.remove(node)
            logging.debug("    Retaining %s (%s). Set parent to %s (%s) (%s)", node, id(node), parent, id(parent), "SAME" if parent is node.parent else "DIFF")
            # Might have been assigned to a different parent in current version
            # that was removed during refinement. This makes sure this node is
            # assigned to the right parent. See test_eco.py:Test_RetainSubtree
            node.parent = parent
            # Also need to update siblings as they might have been changed by
            # the parser before nodes parent was reset
            node.update_siblings()
            if node.has_changes():
                parent.mark_changed()
            return
        self.discard_changes(node)
        for c in node.children:
            self.retain_or_discard(c, node, retain_set)
        node.set_children(node.children) # reset links between children

    def out_of_context_analysis(self, node):
        logging.debug("    Attempting out of context analysis on %s (%s)", node, id(node))

        if not node.children:
            logging.debug("    Failed: Node has no children")
            self.isolate(node)
            return

        if not node.has_changes():
            if node.has_errors():
                self.find_nested_error(node)
            logging.debug("    Failed: Node has no changes")
            return

        # check if subtree is followed by terminal requiring analysis
        # (includes deleted terminals)
        follow = self.next_terminal(node)
        if follow.deleted: # or follow.changed:
            # XXX This should also include `follow.changed`, but since currently nodes
            # are marked as changed even if just their siblings or next_terms
            # are updated, this would fail for most out-of-context analyses
            logging.debug("   Failed: Surrounding context has changed")
            self.isolate(node)
            return

        temp_parser = IncParser()
        temp_parser.syntaxtable = self.syntaxtable
        temp_parser.prev_version = self.prev_version
        temp_parser.reference_version = self.reference_version

        oldname = node.symbol.name
        oldleft = node.left
        oldright = node.right
        oldparent = node.parent

        saved_left = node.get_attr("left", self.prev_version)
        saved_right = node.get_attr("right", self.prev_version)
        saved_parent = node.get_attr("parent", self.prev_version)

        temp_bos = BOS(Terminal(""), 0, [])
        temp_eos = self.pop_lookahead(node)
        while isinstance(temp_eos.symbol, Terminal) and temp_eos.deleted:
            # We can't use a deleted node as a temporary EOS since the deleted
            # note can pass the temp EOS reduction check but is then immediately
            # skipped by parse_terminal. This causes the parser to continue
            # parsing past the temp_eos resulting in faulty sub parse trees.
            temp_eos = self.pop_lookahead(temp_eos)

        eos_parent = temp_eos.parent
        eos_left = temp_eos.left
        eos_right = temp_eos.right
        # During out-of-context analysis we need to calculate offsets of
        # isolation nodes. Without this change we would calculate the offset
        # within the original parse tree and not the offset within the temporary
        # parse tree
        node.log[("left", self.prev_version)] = temp_bos
        node.log[("right", self.prev_version)] = temp_eos

        logging.debug("    TempEOS: %s", temp_eos)
        temp_root = Node(Nonterminal("TempRoot"), 0, [temp_bos, node, temp_eos])
        node.log[("parent", self.prev_version)] = temp_root
        temp_root.save(self.prev_version)
        temp_bos.next_term = node
        temp_bos.state = oldleft.state
        temp_bos.save(node.version)
        temp_parser.previous_version = AST(temp_root)
        temp_parser.ooc = (temp_eos, node, node.state)
        temp_parser.root = temp_root
        dummy_stack_eos = EOS(Terminal(""), oldleft.state, [])
        try:
            temp_parser.inc_parse(state=oldleft.state, stack=[dummy_stack_eos])
        except IndexError:
            temp_parser.last_status = False

        temp_eos.parent = eos_parent
        temp_eos.left = eos_left
        temp_eos.right = eos_right

        # pass on errors to the outer parser
        self.error_nodes.extend(temp_parser.error_nodes)
        self.error_pres.extend(temp_parser.error_pres)
        if temp_parser.last_status == False:
              # isolate
              logging.debug("OOC analysis of %s failed. Error on %s.", node, temp_parser.error_nodes)
              node.log[("left", self.prev_version)] = saved_left
              node.log[("right", self.prev_version)] = saved_right
              node.log[("parent", self.prev_version)] = saved_parent
              self.isolate(node) # revert changes done during OOC
              if temp_parser.previous_version.parent.isolated:
                  # if during OOC parsing error recovery isolated the entire
                  # tree (due to not finding an appropriate isolation node) we
                  # need to move the isolation reference over to the actual node
                  # being reparsed as the root is thrown away after this
                  node.isolated = temp_parser.previous_version.parent.isolated
              return

        newnode = temp_parser.stack[-1]

        if newnode.symbol.name != oldname:
            logging.debug("OOC analysis resulted in different symbol: %s", newnode.symbol.name)
            # node is not the same: revert all changes!
            node.log[("left", self.prev_version)] = saved_left
            node.log[("right", self.prev_version)] = saved_right
            node.log[("parent", self.prev_version)] = saved_parent
            self.isolate(node)
            return

        if newnode is not node:
            node.log[("left", self.prev_version)] = saved_left
            node.log[("right", self.prev_version)] = saved_right
            node.log[("parent", self.prev_version)] = saved_parent
            logging.debug("OOC analysis resulted in different node but same symbol: %s", newnode.symbol.name)
            assert len(temp_parser.stack) == 2 # should only contain [EOS, node]
            i = oldparent.children.index(node)
            oldparent.children[i] = newnode
            newnode.parent = oldparent
            newnode.left = oldleft
            if oldleft:
                oldleft.right = newnode
                oldleft.mark_changed()
            newnode.right = oldright
            if oldright:
                oldright.left = newnode
                oldright.mark_changed()
            newnode.mark_changed() # why did I remove this?
            return

        logging.debug("Subtree resulted in the same parse as before %s %s", newnode, node)
        assert len(temp_parser.stack) == 2 # should only contain [EOS, node]
        node.parent = oldparent
        node.left = oldleft
        node.right = oldright
        node.log[("left", self.prev_version)] = saved_left
        node.log[("right", self.prev_version)] = saved_right
        node.log[("parent", self.prev_version)] = saved_parent

    def reduce(self, element):
        """Reduce elements on the stack to a non-terminal."""

        children = []
        i = 0
        while i < element.amount():
            c = self.stack.pop()
            children.insert(0, c)
            i += 1

        logging.debug("   Element on stack: %s(%s)", self.stack[-1].symbol, self.stack[-1].state)
        self.current_state = self.stack[-1].state #XXX don't store on nodes, but on stack
        logging.debug("   Reduce: set state to %s (%s)", self.current_state, self.stack[-1].symbol)

        goto = self.syntaxtable.lookup(self.current_state, element.action.left)
        if goto is None:
            raise Exception("Reduction error on %s in state %s: goto is None" % (element, self.current_state))
        assert goto != None

        # save childrens parents state
        has_errors = False
        for c in children:
            if c.has_errors() or c.isolated:
                has_errors = True
            if not c.new:
                # just marking changed is not enough. If we encounter an error
                # during reduction the path from the root down to this node is
                # incomplete and thus can't be reverted/isolate properly
                c.mark_changed()

        reuse_parent = self.ambig_reuse_check(element.action.left, children)
        if not self.needs_reparse and reuse_parent:
            logging.debug("   Reusing parent: %s (%s)", reuse_parent, id(reuse_parent))
            new_node = reuse_parent
            new_node.changed = False
            new_node.deleted = False
            new_node.isolated = None
            new_node.local_error = False
            new_node.set_children(children)
            new_node.state = goto.action # XXX need to save state using hisotry service
            new_node.mark_changed()
        else:
            new_node = Node(element.action.left.copy(), goto.action, children)
            logging.debug("   No reuse parent. Make new %s (%s)", new_node, id(new_node))
        new_node.nested_errors = has_errors
        new_node.calc_textlength()
        new_node.position = self.stack[-1].position + self.stack[-1].textlen
        logging.debug("   Add %s to stack and goto state %s", new_node.symbol, new_node.state)
        self.stack.append(new_node)
        new_node.exists = True
        self.current_state = new_node.state # = goto.action
        logging.debug("Reduce: set state to %s (%s)", self.current_state, new_node.symbol)
        if getattr(element.action.annotation, "interpret", None):
            # eco grammar annotations
            self.interpret_annotation(new_node, element.action)

    def ambig_reuse_check(self, prod, children):
        if children:
            for c in children:
                if c.parent and not c.new: # not a new node
                    old_parent = c.get_attr('parent', self.prev_version)
                    if old_parent.symbol == prod and old_parent not in self.reused_nodes:
                        if len(old_parent.get_attr("children", self.prev_version)) > 1:
                            # if node is the only child, reuse is unambiguous so
                            # we don't need to remember we've reused this node
                            # (which allows us to reuse it after error recovery)
                            self.reused_nodes.add(old_parent)
                        return old_parent
        return None

    def top_down_reuse(self):
        main = self.previous_version.parent
        self.top_down_traversal(main)

    def top_down_traversal(self, node):
        if node.changed and not node.new:
            self.reuse_isomorphic_structure(node)
        elif node.nested_changes or node.new:
            for c in node.children:
                self.top_down_traversal(c)

    def reuse_isomorphic_structure(self, node):
        for i in range(len(node.children)):
            current_child = node.children[i]
            try:
                previous_child = node.get_attr("children", self.prev_version)[i]
            except IndexError:
                self.top_down_traversal(current_child)
                continue
            if current_child.new and not previous_child.exists and \
                current_child.symbol.name == previous_child.get_attr("symbol.name", self.prev_version):
                    self.replace_child(node, i, current_child, previous_child)
                    self.reuse_isomorphic_structure(previous_child)
            elif current_child.nested_changes or current_child.new:
                self.top_down_traversal(current_child)

    def replace_child(self, parent, i, current, previous):
        if isinstance(current.symbol, Terminal):
            # Newly inserted terminals have already been saved to the history
            # (previous_version) before we reach this. Reusing terminals
            # here would thus give no memory benefit as the old terminal can't
            # be garbage collected
            return
        parent.children[i] = previous
        previous.parent = parent # in case previous was moved before being deleted
        previous.children = list(current.children)
        for c in current.children:
            c.parent = previous
        previous.symbol.name = current.symbol.name
        previous.changed = False
        previous.deleted = False
        previous.isolated = False
        previous.local_error = False
        previous.state = current.state
        previous.mark_changed()
        previous.calc_textlength()
        previous.position = current.position
        previous.exists = True
        previous.nested_errors = current.nested_errors
        previous.right = current.right
        previous.left = current.left
        previous.alternate = current.alternate
        if previous.right:
            previous.right.left = previous
        if previous.left:
            previous.left.right = previous

        if isinstance(current.symbol, Terminal):
            previous.lookup = current.lookup
            previous.prev_term = current.prev_term
            previous.next_term = current.next_term
            previous.prev_term.next_term = previous
            previous.next_term.prev_term = previous

    def interpret_annotation(self, node, production):
        annotation = production.annotation
        if annotation:
            astnode = annotation.interpret(node)
            if not self.is_reusable_astnode(node.alternate, astnode):
                node.alternate = astnode

    def is_reusable_astnode(self, old, new):
        from grammar_parser.bootstrap import AstNode
        if type(old) is not AstNode or type(new) is not AstNode:
            return False
        if old.name != new.name:
            return False
        for key in old.children:
            if old.children.get(key) is not new.children.get(key):
                return False
        return True

    def left_breakdown(self, la):
        la.exists = False
        if len(la.children) > 0:
            return la.children[0]
        else:
            return self.pop_lookahead(la)

    def right_breakdown(self):
        node = self.stack.pop() # optimistically shifted Nonterminal
        # after the breakdown, we need to properly shift the left over terminal
        # using the (correct) current state from before the optimistic shift of
        # it's parent tree
        self.current_state = self.stack[-1].state
        logging.debug("right breakdown(%s): set state to %s", node.symbol.name, self.current_state)
        while(isinstance(node.symbol, Nonterminal)):
            # Right_breakdown reverts wrong optimistic shifts including
            # subsequent reductions. These reductions may contain nodes that
            # have been reused. Reverting the reduction also means we need to
            # undo the reusing of that node to free it up for future reusing.
            node.exists = False
            self.reused_nodes.discard(node)
            # This bit of code is necessary to avoid a bug that occurs with the
            # default Wagner implementation if we isolate a subtree and
            # optimistically shift an empty Nonterminal, and then run into an
            # error. The verifying parts of the incremental parser then try to
            # undo wrong optimistic shifts by breaking them down to their most
            # right terminal. Since the optimistic shift happened on an empty
            # Nonterminal, the algorithm tries to break down the isolated
            # subtree to the left of it. Since this subtree contains an error in
            # form of an unshiftable terminal, the algorithm fails and throws an
            # exception. The following code fixes this by ignoring already
            # isolated subtrees.
            if node.isolated:
                self.stack.append(node)
                self.current_state = node.state
                return
            for c in node.children:
                self.shift(c, rb=True)
            node = self.stack.pop()
            # after undoing an optimistic shift (through pop) we need to revert
            # back to the state before the shift (which can be found on the top
            # of the stack after the "pop"
            if isinstance(node.symbol, FinishSymbol):
                # if we reached the end of the stack, reset to state 0 and push
                # FinishSymbol pack onto the stack
                self.current_state = 0
                self.stack.append(node)
                return
            else:
                logging.debug("right breakdown else: set state to %s", self.stack[-1].state)
                self.current_state = self.stack[-1].state
        self.shift(node, rb=True) # pushes previously popped terminal back on stack

    def shift(self, la, element=None, rb=False):
        if not element:
            lookup_symbol = self.get_lookup(la)
            element = self.syntaxtable.lookup(self.current_state, lookup_symbol)
        logging.debug("\x1b[32m" + "%sShift(%s)" + "\x1b[0m" + ": %s -> %s", "rb" if rb else "", self.current_state, la, element)
        la.state = element.action
        la.exists = True
        la.position = self.stack[-1].position + self.stack[-1].textlen
        self.stack.append(la)
        self.current_state = la.state

        if not la.lookup == "<ws>":
            # last_shift_state is used to predict next symbol
            # whitespace destroys correct behaviour
            self.last_shift_state = element.action


    def pop_lookahead(self, la):
        while(self.right_sibling(la) is None):
            la = la.get_attr("parent", self.prev_version)
        return self.right_sibling(la)

    def right_sibling(self, node):
        return node.right_sibling(self.prev_version)

    def shiftable(self, la):
        if self.syntaxtable.lookup(self.current_state, la.symbol):
            return True
        return False

    def has_changed(self, node):
        return node in self.all_changes

    def prepare_input(self, _input):
        l = []
        # XXX need an additional lexer to do this right
        if _input != "":
            for i in _input.split(" "):
                l.append(Terminal(i))
        l.append(FinishSymbol())
        return l

    def get_ast(self):
        bos = Node(Terminal("bos"), 0, [])
        eos = Node(FinishSymbol(), 0, [])
        root = Node(Nonterminal("Root"), 0, [bos, self.ast_stack[0], eos])
        return AST(root)

    def get_next_possible_symbols(self, state_id):
        l = set()
        for (state, symbol) in self.syntaxtable.table.keys():
            if state == state_id:
                l.add(symbol)
        return l

    def get_next_symbols_list(self, state = -1):
        if state == -1:
            state = self.last_shift_state
        lookahead = self.get_next_possible_symbols(state)

        s = []
        for symbol in lookahead:
            s.append(symbol.name)
        return s

    def get_next_symbols_string(self, state = -1):
        l = self.get_next_symbols_list(state)
        return ", ".join(l)

    def get_expected_symbols(self, state_id):
        #XXX if state of a symbol is nullable, return next symbol as well
        #XXX if at end of state, find state we came from (reduce, stack) and get next symbols from there
        if state_id != -1:
            stateset = self.graph.state_sets[state_id]
            symbols = stateset.get_next_symbols_no_ws()
            return symbols
        return []

    def reset(self):
        self.stack = []
        self.ast_stack = []
        self.all_changes = []
        self.last_shift_state = 0
        self.validating = False
        self.last_status = False
        self.previous_version = None
        self.init_ast()

    def load_status(self, version):
        try:
            self.last_status = self.status_by_version[version]
        except KeyError:
            logging.warning("Could not find status for version %s", version)
        try:
            self.error_nodes = list(self.errornodes_by_version[version])
        except KeyError:
            logging.warning("Could not find errornodes for version %s", version)

    def save_status(self, version):
        self.status_by_version[version] = self.last_status
        self.errornodes_by_version[version] = list(self.error_nodes)

    def find_nested_error(self, node):
        """Find errors within isolated subtrees."""
        self.compute_presention(node)
        if node.isolated:
            self.error_nodes.append(node.isolated)
        elif not node.nested_errors:
            return
        for c in node.children:
            self.find_nested_error(c)

    def iso_context_changed(self, node):
        # Currently catches more cases than neccessary. Could be made more
        # accurate by finding the next terminal reachable from node (including
        # deleted ones)
        if not node.isolated:
            return False
        la = self.pop_lookahead(node)
        return la.has_changes()

    def next_terminal(self, node):
        n = self.pop_lookahead(node)
        while type(n.symbol) is Nonterminal:
            if len(n.children) > 0:
                n = n.children[0]
            else:
                n = self.pop_lookahead(n)
        return n
Ejemplo n.º 2
0
class LRParser(object):

    def __init__(self, grammar, lr_type=LR0):
        parser = Parser(grammar)
        parser.parse()

        self.graph = StateGraph(parser.start_symbol, parser.rules, lr_type)
        self.graph.build()

        if lr_type == LALR:
            self.graph.convert_lalr()

        self.syntaxtable = SyntaxTable(lr_type)
        self.syntaxtable.build(self.graph)

        self.stack = []
        self.ast_stack = []

    def check(self, _input):
        self.reset()

        l = []
        # XXX need an additional lexer to do this right
        for i in _input.split(" "):
            l.append(Terminal(i))
        l.append(FinishSymbol())
        _input = l

        self.stack.append(FinishSymbol())
        self.stack.append(0)

        i = 0
        while i < len(_input):
            c = _input[i]
            state_id = self.stack[-1]
            element = self.syntaxtable.lookup(state_id, c)
            if element is None:
                return False
            if isinstance(element, Shift):
                self.stack.append(c)
                self.stack.append(element.action)
                i += 1
            if isinstance(element, Reduce):
                #self.add_to_ast(element)
                for x in range(2*element.amount()):
                    self.stack.pop()
                state_id = self.stack[-1]
                self.stack.append(element.action.left)
                element = self.syntaxtable.lookup(state_id, element.action.left)
                assert isinstance(element, Goto)
                self.stack.append(element.action)

            if isinstance(element, Accept):
                return True

    def add_to_ast(self, element):
        l = []
        # action = Production
        for e in element.action.right:
            if isinstance(e, Nonterminal):
                l.append(self.ast_stack.pop())
            if isinstance(e, Terminal):
                l.append(Node(e, []))
        l.reverse()
        n = Node(element.action.left, l)
        self.ast_stack.append(n)

    def get_ast(self):
        return AST(self.ast_stack[0])

    def reset(self):
        self.stack = []
        self.ast_stack = []
Ejemplo n.º 3
0
class IncParser(object):
    """
    The incremental parser
    """
    def __init__(self,
                 grammar=None,
                 lr_type=LR0,
                 whitespaces=False,
                 startsymbol=None):

        if grammar:
            logging.debug("Parsing Grammar")
            parser = Parser(grammar, whitespaces)
            parser.parse()

            filename = "".join([
                os.path.dirname(__file__), "/../pickle/",
                str(hash(grammar) ^ hash(whitespaces)), ".pcl"
            ])
            try:
                logging.debug("Try to unpickle former stategraph")
                f = open(filename, "r")
                start = time.time()
                self.graph = pickle.load(f)
                end = time.time()
                logging.debug("unpickling done in %s", end - start)
            except IOError:
                logging.debug("could not unpickle old graph")
                logging.debug("Creating Stategraph")
                self.graph = StateGraph(parser.start_symbol, parser.rules,
                                        lr_type)
                logging.debug("Building Stategraph")
                self.graph.build()
                logging.debug("Pickling")
                pickle.dump(self.graph, open(filename, "w"))

            if lr_type == LALR:
                self.graph.convert_lalr()

            logging.debug("Creating Syntaxtable")
            self.syntaxtable = SyntaxTable(lr_type)
            self.syntaxtable.build(self.graph)

        self.stack = []
        self.ast_stack = []
        self.all_changes = []
        self.undo = []
        self.last_shift_state = 0
        self.validating = False
        self.last_status = False
        self.error_node = None
        self.whitespaces = whitespaces
        self.status_by_version = {}
        self.errornode_by_version = {}
        self.indentation_based = False

        self.pm = PluginManager()
        self.pm.loadplugins(self)
        self.pm.do_incparse_init()

        self.previous_version = None
        logging.debug("Incremental parser done")

    def from_dict(self, rules, startsymbol, lr_type, whitespaces, pickle_id,
                  precedences):
        self.graph = None
        self.syntaxtable = None
        if pickle_id:
            filename = "".join([
                os.path.dirname(__file__), "/../pickle/",
                str(pickle_id ^ hash(whitespaces)), ".pcl"
            ])
            try:
                f = open(filename, "r")
                self.syntaxtable = pickle.load(f)
            except IOError:
                pass
        if self.syntaxtable is None:
            self.graph = StateGraph(startsymbol, rules, lr_type)
            self.graph.build()
            self.syntaxtable = SyntaxTable(lr_type)
            self.syntaxtable.build(self.graph, precedences)
            if pickle_id:
                pickle.dump(self.syntaxtable, open(filename, "w"))

        self.whitespaces = whitespaces
        self.pm.do_incparse_from_dict(rules)

    def init_ast(self, magic_parent=None):
        bos = BOS(Terminal(""), 0, [])
        eos = EOS(FinishSymbol(), 0, [])
        bos.magic_parent = magic_parent
        eos.magic_parent = magic_parent
        bos.next_term = eos
        eos.prev_term = bos
        root = Node(Nonterminal("Root"), 0, [bos, eos])
        self.previous_version = AST(root)
        root.save(0)
        bos.save(0)
        eos.save(0)

    def reparse(self):
        self.inc_parse([], True)

    def inc_parse(self, line_indents=[], reparse=False):
        logging.debug("============ NEW INCREMENTAL PARSE ================= ")
        self.validating = False
        self.error_node = None
        self.stack = []
        self.undo = []
        self.current_state = 0
        self.stack.append(Node(FinishSymbol(), 0, []))
        bos = self.previous_version.parent.children[0]
        self.loopcount = 0

        USE_OPT = True

        self.pm.do_incparse_inc_parse_top()

        la = self.pop_lookahead(bos)
        while (True):
            logging.debug("\x1b[35mProcessing\x1b[0m %s %s %s %s", la,
                          la.changed, id(la), la.indent)
            self.loopcount += 1
            if isinstance(la.symbol, Terminal) or isinstance(
                    la.symbol, FinishSymbol) or la.symbol == Epsilon():
                if la.changed:
                    assert False  # with prelexing you should never end up here!
                else:
                    lookup_symbol = self.get_lookup(la)
                    result = self.parse_terminal(la, lookup_symbol)
                    if result == "Accept":
                        self.last_status = True
                        return True
                    elif result == "Error":
                        self.last_status = False
                        return False
                    elif result != None:
                        la = result

            else:  # Nonterminal
                if la.changed or reparse:
                    # deconstruct the
                    #la.changed = False # as all nonterminals that have changed are being rebuild, there is no need to change this flag (this also solves problems with comments)
                    self.undo.append((la, 'changed', True))
                    la = self.left_breakdown(la)
                else:
                    if USE_OPT:
                        #Follow parsing/syntax table
                        goto = self.syntaxtable.lookup(self.current_state,
                                                       la.symbol)
                        if goto:  # can we shift this Nonterminal in the current state?
                            logging.debug("OPTShift: %s in state %s -> %s",
                                          la.symbol, self.current_state, goto)
                            self.pm.do_incparse_optshift(la)
                            follow_id = goto.action
                            self.stack.append(la)
                            la.state = follow_id  #XXX this fixed goto error (I should think about storing the states on the stack instead of inside the elements)
                            self.current_state = follow_id
                            logging.debug("USE_OPT: set state to %s",
                                          self.current_state)
                            la = self.pop_lookahead(la)
                            self.validating = True
                            continue
                        else:
                            #XXX can be made faster by providing more information in syntax tables
                            first_term = la.find_first_terminal()

                            lookup_symbol = self.get_lookup(first_term)
                            element = self.syntaxtable.lookup(
                                self.current_state, lookup_symbol)
                            if isinstance(element, Reduce):
                                self.reduce(element)
                            else:
                                la = self.left_breakdown(la)
                    else:
                        # PARSER WITHOUT OPTIMISATION
                        if la.lookup != "":
                            lookup_symbol = Terminal(la.lookup)
                        else:
                            lookup_symbol = la.symbol
                        element = self.syntaxtable.lookup(
                            self.current_state, lookup_symbol)

                        if self.shiftable(la):
                            logging.debug("\x1b[37mis shiftable\x1b[0m")
                            self.stack.append(la)
                            self.current_state = la.state
                            self.right_breakdown()
                            la = self.pop_lookahead(la)
                        else:
                            la = self.left_breakdown(la)
        logging.debug("============ INCREMENTAL PARSE END ================= ")

    def parse_terminal(self, la, lookup_symbol):
        """
        Take in one terminal and set it's state to the state the parsing is in at the moment this terminal
        has been read.

        :param la: lookahead
        :param lookup_symbol:
        :return: "Accept" is the code was accepted as valid, "Error" if the syntax table does not provide a next state
        """
        element = None
        if isinstance(la, EOS):
            element = self.syntaxtable.lookup(self.current_state,
                                              Terminal("<eos>"))
            if isinstance(element, Shift):
                self.current_state = element.action
                return la
        if element is None:
            element = self.syntaxtable.lookup(self.current_state,
                                              lookup_symbol)
        logging.debug("\x1b[34mparse_terminal\x1b[0m: %s in %s -> %s",
                      lookup_symbol, self.current_state, element)
        if isinstance(element, Accept):
            #XXX change parse so that stack is [bos, startsymbol, eos]
            bos = self.previous_version.parent.children[0]
            eos = self.previous_version.parent.children[-1]
            self.previous_version.parent.set_children(
                [bos, self.stack[1], eos])
            logging.debug("loopcount: %s", self.loopcount)
            logging.debug("\x1b[32mAccept\x1b[0m")
            return "Accept"
        elif isinstance(element, Shift):
            self.validating = False
            self.shift(la, element)
            return self.pop_lookahead(la)

        elif isinstance(element, Reduce):
            logging.debug("\x1b[33mReduce\x1b[0m: %s -> %s", la, element)
            self.reduce(element)
            return self.parse_terminal(la, lookup_symbol)
        elif element is None:
            if self.validating:
                logging.debug(
                    "Was validating: Right breakdown and return to normal")
                logging.debug("Before breakdown: %s", self.stack[-1])
                self.right_breakdown()
                logging.debug("After breakdown: %s", self.stack[-1])
                self.validating = False
            else:
                return self.do_undo(la)

    def get_lookup(self, la):
        """
        Retrurn the lookup of a node as Terminal. The lookup is name of the regular expression that mached the
        token in the lexing phase.

        Note: indentation terminals are handled in a special manner

        :param la: node to find lookup of
        :return: the lookup of the node wraped in a Terminal
        """
        if la.lookup != "":
            lookup_symbol = Terminal(la.lookup)
        else:
            lookup_symbol = la.symbol
        if isinstance(lookup_symbol, IndentationTerminal):
            #XXX hack: change parsing table to accept IndentationTerminals
            lookup_symbol = Terminal(lookup_symbol.name)
        return lookup_symbol

    def do_undo(self, la):
        """
        Restore changes

        Loop over self.undo and for the tupel (a,b,c) do a.b = c
        :param la:
        :return:
        """
        while len(self.undo) > 0:
            node, attribute, value = self.undo.pop(-1)
            setattr(node, attribute, value)
        self.error_node = la
        logging.debug("\x1b[31mError\x1b[0m: %s %s %s", la, la.prev_term,
                      la.next_term)
        logging.debug("loopcount: %s", self.loopcount)
        return "Error"

    def reduce(self, element):
        """
        Execute the reduction given on the current stack.

        Reduces elements from the stack to a Nonterminal subtree.  special:
        COMMENT subtrees that are found on the stack during reduction are
        added "silently" to the subtree (they don't count to the amount of
        symbols of the reduction)

        :type element: Reduce
        :param element: reduction to apply
        :except Exception rule not applicable
        """

        #Fill a children array with nodes that are on the stack
        children = []
        i = 0
        while i < element.amount():
            c = self.stack.pop()
            # apply folding information from grammar to tree nodes
            fold = element.action.right[element.amount() - i - 1].folding
            c.symbol.folding = fold
            children.insert(0, c)
            i += 1

        logging.debug("   Element on stack: %s(%s)", self.stack[-1].symbol,
                      self.stack[-1].state)
        self.current_state = self.stack[
            -1].state  #XXX don't store on nodes, but on stack
        logging.debug("   Reduce: set state to %s (%s)", self.current_state,
                      self.stack[-1].symbol)

        goto = self.syntaxtable.lookup(self.current_state, element.action.left)
        if goto is None:
            raise Exception("Reduction error on %s in state %s: goto is None" %
                            (element, self.current_state))
        assert goto != None

        # save childrens parents state
        for c in children:
            self.undo.append((c, 'parent', c.parent))
            self.undo.append((c, 'left', c.left))
            self.undo.append((c, 'right', c.right))
            self.undo.append((c, 'log', c.log.copy()))
            c.mark_version(
            )  # XXX with node reuse we only have to do this if the parent changes

        new_node = Node(element.action.left.copy(), goto.action, children)
        self.pm.do_incparse_reduce(new_node)
        logging.debug("   Add %s to stack and goto state %s", new_node.symbol,
                      new_node.state)
        self.stack.append(new_node)
        self.current_state = new_node.state  # = goto.action
        logging.debug("Reduce: set state to %s (%s)", self.current_state,
                      new_node.symbol)
        if getattr(element.action.annotation, "interpret", None):
            # eco grammar annotations\
            self.interpret_annotation(new_node, element.action)
        else:
            # johnstone annotations
            self.add_alternate_version(new_node, element.action)

    def interpret_annotation(self, node, production):
        annotation = production.annotation
        if annotation:
            astnode = annotation.interpret(node)
            node.alternate = astnode

    def add_alternate_version(self, node, production):
        # add alternate (folded) versions for nodes to the tree
        alternate = TextNode(node.symbol.__class__(node.symbol.name),
                             node.state, [])
        alternate.children = []
        teared = []
        for i in range(len(node.children)):
            if production.inserts.has_key(i):
                # insert tiered nodes at right position
                value = production.inserts[i]
                for t in teared:
                    if t.symbol.name == value.name:
                        alternate.children.append(t)
            c = node.children[i]
            if c.symbol.folding == "^^^":
                c.symbol.folding = None
                teared.append(c)
                continue
            elif c.symbol.folding == "^^":
                while c.alternate is not None:
                    c = c.alternate
                alternate.symbol = c.symbol
                for child in c.children:
                    alternate.children.append(child)
            elif c.symbol.folding == "^":
                while c.alternate is not None:
                    c = c.alternate
                for child in c.children:
                    alternate.children.append(child)
            else:
                alternate.children.append(c)
        node.alternate = alternate

    def left_breakdown(self, la):
        if len(la.children) > 0:
            return la.children[0]
        else:
            return self.pop_lookahead(la)

    def right_breakdown(self):
        node = self.stack.pop()  # optimistically shifted Nonterminal
        # after the breakdown, we need to properly shift the left over terminal
        # using the (correct) current state from before the optimistic shift of
        # it's parent tree
        self.current_state = self.stack[-1].state
        logging.debug("right breakdown(%s): set state to %s", node.symbol.name,
                      self.current_state)
        while (isinstance(node.symbol, Nonterminal)):
            for c in node.children:
                self.shift(c, rb=True)
                c = c.right
            node = self.stack.pop()
            # after undoing an optimistic shift (through pop) we need to revert
            # back to the state before the shift (which can be found on the top
            # of the stack after the "pop"
            if isinstance(node.symbol, FinishSymbol):
                # if we reached the end of the stack, reset to state 0 and push
                # FinishSymbol pack onto the stack
                self.current_state = 0
                self.stack.append(node)
                return
            else:
                logging.debug("right breakdown else: set state to %s",
                              self.stack[-1].state)
                self.current_state = self.stack[-1].state
        self.shift(node,
                   rb=True)  # pushes previously popped terminal back on stack

    def shift(self, la, element=None, rb=False):
        if not element:
            lookup_symbol = self.get_lookup(la)
            element = self.syntaxtable.lookup(self.current_state,
                                              lookup_symbol)
        logging.debug("\x1b[32m" + "%sShift(%s)" + "\x1b[0m" + ": %s -> %s",
                      "rb" if rb else "", self.current_state, la, element)
        la.state = element.action
        self.stack.append(la)
        self.current_state = la.state

        if not la.lookup == "<ws>":
            # last_shift_state is used to predict next symbol
            # whitespace destroys correct behaviour
            self.last_shift_state = element.action

        self.pm.do_incparse_shift(la, rb)

    def pop_lookahead(self, la):
        """
        Get next (right) Node
        :rtype: Node
        :param la:
        :return:
        """
        org = la
        while (la.right_sibling() is None):
            la = la.parent
        logging.debug("pop_lookahead(%s): %s", org.symbol,
                      la.right_sibling().symbol)
        return la.right_sibling()

    def shiftable(self, la):
        if self.syntaxtable.lookup(self.current_state, la.symbol):
            return True
        return False

    def has_changed(self, node):
        return node in self.all_changes

    def prepare_input(self, _input):
        l = []
        # XXX need an additional lexer to do this right
        if _input != "":
            for i in _input.split(" "):
                l.append(Terminal(i))
        l.append(FinishSymbol())
        return l

    def get_ast(self):
        bos = Node(Terminal("bos"), 0, [])
        eos = Node(FinishSymbol(), 0, [])
        root = Node(Nonterminal("Root"), 0, [bos, self.ast_stack[0], eos])
        return AST(root)

    def get_next_possible_symbols(self, state_id):
        l = set()
        for (state, symbol) in self.syntaxtable.table.keys():
            if state == state_id:
                l.add(symbol)
        return l

    def get_next_symbols_list(self, state=-1):
        if state == -1:
            state = self.last_shift_state
        lookahead = self.get_next_possible_symbols(state)

        s = []
        for symbol in lookahead:
            s.append(symbol.name)
        return s

    def get_next_symbols_string(self, state=-1):
        l = self.get_next_symbols_list(state)
        return ", ".join(l)

    def get_expected_symbols(self, state_id):
        #XXX if state of a symbol is nullable, return next symbol as well
        #XXX if at end of state, find state we came from (reduce, stack) and get next symbols from there
        if state_id != -1:
            stateset = self.graph.state_sets[state_id]
            symbols = stateset.get_next_symbols_no_ws()
            return symbols
        return []

    def reset(self):
        self.stack = []
        self.ast_stack = []
        self.all_changes = []
        self.undo = []
        self.last_shift_state = 0
        self.validating = False
        self.last_status = False
        self.error_node = None
        self.previous_version = None
        self.init_ast()

    def load_status(self, version):
        try:
            self.last_status = self.status_by_version[version]
        except KeyError:
            logging.warning("Could not find status for version %s", version)
        try:
            self.error_node = self.errornode_by_version[version]
        except KeyError:
            logging.warning("Could not find errornode for version %s", version)

    def save_status(self, version):
        self.status_by_version[version] = self.last_status
        self.errornode_by_version[version] = self.error_node
Ejemplo n.º 4
0
class LRParser(object):
    def __init__(self, grammar, lr_type=LR0):
        parser = Parser(grammar)
        parser.parse()

        self.graph = StateGraph(parser.start_symbol, parser.rules, lr_type)
        self.graph.build()

        if lr_type == LALR:
            self.graph.convert_lalr()

        self.syntaxtable = SyntaxTable(lr_type)
        self.syntaxtable.build(self.graph)

        self.stack = []
        self.ast_stack = []

    def check(self, _input):
        self.reset()

        l = []
        # XXX need an additional lexer to do this right
        for i in _input.split(" "):
            l.append(Terminal(i))
        l.append(FinishSymbol())
        _input = l

        self.stack.append(FinishSymbol())
        self.stack.append(0)

        i = 0
        while i < len(_input):
            c = _input[i]
            state_id = self.stack[-1]
            element = self.syntaxtable.lookup(state_id, c)
            if element is None:
                return False
            if isinstance(element, Shift):
                self.stack.append(c)
                self.stack.append(element.action)
                i += 1
            if isinstance(element, Reduce):
                for x in range(2 * element.amount()):
                    self.stack.pop()
                state_id = self.stack[-1]
                self.stack.append(element.action.left)
                element = self.syntaxtable.lookup(state_id,
                                                  element.action.left)
                assert isinstance(element, Goto)
                self.stack.append(element.action)

            if isinstance(element, Accept):
                return True

    def add_to_ast(self, element):
        l = []
        # action = Production
        for e in element.action.right:
            if isinstance(e, Nonterminal):
                l.append(self.ast_stack.pop())
            if isinstance(e, Terminal):
                l.append(Node(e, []))
        l.reverse()
        n = Node(element.action.left, l)
        self.ast_stack.append(n)

    def get_ast(self):
        return AST(self.ast_stack[0])

    def reset(self):
        self.stack = []
        self.ast_stack = []
Ejemplo n.º 5
0
class IncParser(object):

    def __init__(self, grammar=None, lr_type=LR0, whitespaces=False, startsymbol=None):

        if grammar:
            logging.debug("Parsing Grammar")
            parser = Parser(grammar, whitespaces)
            parser.parse()

            filename = "".join([os.path.dirname(__file__), "/../pickle/", str(hash(grammar) ^ hash(whitespaces)), ".pcl"])
            try:
                logging.debug("Try to unpickle former stategraph")
                f = open(filename, "r")
                start = time.time()
                self.graph = pickle.load(f)
                end = time.time()
                logging.debug("unpickling done in %s", end-start)
            except IOError:
                logging.debug("could not unpickle old graph")
                logging.debug("Creating Stategraph")
                self.graph = StateGraph(parser.start_symbol, parser.rules, lr_type)
                logging.debug("Building Stategraph")
                self.graph.build()
                logging.debug("Pickling")
                pickle.dump(self.graph, open(filename, "w"))

            if lr_type == LALR:
                self.graph.convert_lalr()

            logging.debug("Creating Syntaxtable")
            self.syntaxtable = SyntaxTable(lr_type)
            self.syntaxtable.build(self.graph)

        self.stack = []
        self.ast_stack = []
        self.all_changes = []
        self.undo = []
        self.last_shift_state = 0
        self.validating = False
        self.last_status = False
        self.error_node = None
        self.whitespaces = whitespaces
        self.anycount = set()
        self.status_by_version = {}
        self.errornode_by_version = {}

        self.comment_tokens = []

        self.indent_stack = None
        self.indentation_based = False

        self.previous_version = None
        logging.debug("Incemental parser done")

    def from_dict(self, rules, startsymbol, lr_type, whitespaces, pickle_id, precedences):
        self.graph = None
        self.syntaxtable = None
        if pickle_id:
            filename = "".join([os.path.dirname(__file__), "/../pickle/", str(pickle_id ^ hash(whitespaces)), ".pcl"])
            try:
                f = open(filename, "r")
                self.syntaxtable = pickle.load(f)
            except IOError:
                pass
        if self.syntaxtable is None:
            self.graph = StateGraph(startsymbol, rules, lr_type)
            self.graph.build()
            self.syntaxtable = SyntaxTable(lr_type)
            self.syntaxtable.build(self.graph, precedences)
            if pickle_id:
                pickle.dump(self.syntaxtable, open(filename, "w"))

        self.whitespaces = whitespaces
        if not rules:
            print("Warning: incparser has not access to comment tokens")
        elif rules.has_key(Nonterminal("comment")):
            rule = rules[Nonterminal("comment")]
            for a in rule.alternatives:
                self.comment_tokens.append(a[0].name)

    def init_ast(self, magic_parent=None):
        bos = BOS(Terminal(""), 0, [])
        eos = EOS(FinishSymbol(), 0, [])
        bos.magic_parent = magic_parent
        eos.magic_parent = magic_parent
        bos.next_term = eos
        eos.prev_term = bos
        root = Node(Nonterminal("Root"), 0, [bos, eos])
        self.previous_version = AST(root)
        root.save(0)
        bos.save(0)
        eos.save(0)

    def reparse(self):
        self.inc_parse([], True)

    def inc_parse(self, line_indents=[], reparse=False):
        logging.debug("============ NEW INCREMENTAL PARSE ================= ")
        self.validating = False
        self.error_node = None
        self.stack = []
        self.undo = []
        self.current_state = 0
        self.stack.append(Node(FinishSymbol(), 0, []))
        self.stack[0].indent = [0]
        bos = self.previous_version.parent.children[0]
        self.loopcount = 0
        self.anycount = set()
        self.any_newlines = []
        self.last_indent = [0]

        USE_OPT = True

        eos = self.previous_version.parent.children[-1]
        d = eos.prev_term
        while isinstance(d.symbol, IndentationTerminal):
            d = d.prev_term
        self.last_token_before_eos = d
        if isinstance(d, BOS):
            # if file is empty, delete left over indentation tokens
            n = d.next_term
            while isinstance(n.symbol, IndentationTerminal):
                n.parent.remove_child(n)
                n = n.next_term

        # fix indentation after bos. Should result in an error for whitespace
        # at the beginning
        if bos.next_term.lookup == "<ws>":
            bos.insert_after(TextNode(IndentationTerminal("INDENT")))
        elif isinstance(bos.next_term.symbol, IndentationTerminal):
            bos.next_term.parent.remove_child(bos.next_term)

        la = self.pop_lookahead(bos)
        while(True):
            logging.debug("\x1b[35mProcessing\x1b[0m %s %s %s %s", la, la.changed, id(la), la.indent)
            self.loopcount += 1
            if isinstance(la.symbol, Terminal) or isinstance(la.symbol, FinishSymbol) or la.symbol == Epsilon():
                if la.changed:#self.has_changed(la):
                    assert False # with prelexing you should never end up here!
                else:
                    lookup_symbol = self.get_lookup(la)
                    result = self.parse_terminal(la, lookup_symbol)
                    if result == "Accept":
                        self.last_status = True
                        return True
                    elif result == "Error":
                        self.last_status = False
                        return False
                    elif result != None:
                        la = result

            else: # Nonterminal
                if la.changed or reparse:
                    #la.changed = False # as all nonterminals that have changed are being rebuild, there is no need to change this flag (this also solves problems with comments)
                    self.undo.append((la, 'changed', True))
                    la = self.left_breakdown(la)
                else:
                    if USE_OPT:
                        goto = self.syntaxtable.lookup(self.current_state, la.symbol)
                        if goto: # can we shift this Nonterminal in the current state?
                            logging.debug("OPTShift: %s in state %s -> %s", la.symbol, self.current_state, goto)
                            follow_id = goto.action
                            self.stack.append(la)
                            if la.indent:
                                self.last_indent = list(la.indent)
                            la.state = follow_id #XXX this fixed goto error (i should think about storing the states on the stack instead of inside the elements)
                            self.current_state = follow_id
                            logging.debug("USE_OPT: set state to %s", self.current_state)
                            la = self.pop_lookahead(la)
                            self.validating = True
                            continue
                        else:
                            #XXX can be made faster by providing more information in syntax tables
                            first_term = la.find_first_terminal()

                            lookup_symbol = self.get_lookup(first_term)
                            element = self.syntaxtable.lookup(self.current_state, lookup_symbol)
                            if isinstance(element, Reduce):
                                self.reduce(element)
                            else:
                                la = self.left_breakdown(la)
                    else:
                    # PARSER WITHOUT OPTIMISATION
                        if la.lookup != "":
                            lookup_symbol = Terminal(la.lookup)
                        else:
                            lookup_symbol = la.symbol
                        element = self.syntaxtable.lookup(self.current_state, lookup_symbol)

                        if self.shiftable(la):
                            logging.debug("\x1b[37mis shiftable\x1b[0m")
                            self.stack.append(la)
                            self.current_state = la.state
                            self.right_breakdown()
                            la = self.pop_lookahead(la)
                        else:
                            la = self.left_breakdown(la)
        logging.debug("============ INCREMENTAL PARSE END ================= ")

    def get_previous_ws(self, node):
        """Returns the whitespace of the previous logical line"""
        node = node.prev_term
        while True:
            if isinstance(node, BOS):
                return 0
            if node.lookup != "<return>":
                node = node.prev_term
                continue
            if not self.is_logical_line(node):
                node = node.prev_term
                continue
            if node.next_term.lookup == "<ws>":
                return len(node.next_term.symbol.name)
            else:
                return 0

    def indents_differ(self, this, other):
        if len(this) != len(other):
            return True
        for i in range(len(this)):
            if this[i].symbol != other[i].symbol:
                return True
        return False

    def repair_indents(self, node, there, needed):
        """Updates the indentation tokens of a line, given a list of needed
        tokens and tokens already there"""
        it = iter(there)
        last = node
        # update indentation tokens with new values or insert new ones
        for e in needed:
            try:
                ne = it.next()
                if e.symbol == ne.symbol:
                    last = ne
                    continue
                else:
                    ne.symbol.name = e.symbol.name
                    ne.mark_changed()
                    continue
            except StopIteration:
                pass
            last.insert_after(e)
            last = e
        # delete all leftovers
        while True:
            try:
                x = it.next()
                x.parent.remove_child(x)
            except StopIteration:
                break

    def parse_anysymbol(self):
        symbol = AnySymbol()
        result = self.syntaxtable.lookup(self.current_state, symbol)
        if not result:
            symbol = AnySymbol("@ncr")
            result = self.syntaxtable.lookup(self.current_state, symbol)
        return result, symbol

    def parse_terminal(self, la, lookup_symbol):
        # try parsing ANYSYMBOL

        if not isinstance(la.symbol, FinishSymbol):
            if self.process_any(la):
                return self.pop_lookahead(la)

        element = self.syntaxtable.lookup(self.current_state, lookup_symbol)
        logging.debug("\x1b[34mparse_terminal\x1b[0m: %s in %s -> %s", lookup_symbol, self.current_state, element)
        if isinstance(element, Accept):
            #XXX change parse so that stack is [bos, startsymbol, eos]
            bos = self.previous_version.parent.children[0]
            eos = self.previous_version.parent.children[-1]
            self.previous_version.parent.set_children([bos, self.stack[1], eos])
            logging.debug("loopcount: %s", self.loopcount)
            logging.debug ("\x1b[32mAccept\x1b[0m")
            return "Accept"
        elif isinstance(element, Shift):
            self.validating = False
            self.shift(la, element)
            return self.pop_lookahead(la)

        elif isinstance(element, Reduce):
            logging.debug("\x1b[33mReduce\x1b[0m: %s -> %s", la, element)
            self.reduce(element)
            return self.parse_terminal(la, lookup_symbol)
        elif element is None:
            if self.validating:
                logging.debug("Was validating: Right breakdown and return to normal")
                logging.debug("Before breakdown: %s", self.stack[-1])
                self.right_breakdown()
                logging.debug("After breakdown: %s", self.stack[-1])
                self.validating = False
            else:
                return self.do_undo(la)

    def is_logical_line(self, node):
        """Checks if a line is logical, i.e. doesn't only consist of whitespaces or comments"""
        if node.symbol.name == "\r" and node.prev_term.symbol.name == "\\":
            return False
        node = node.next_term
        while True:
            if isinstance(node, EOS):
                return False
            # this doesn't work as we only know if something is part of a
            # comment AFTER we parsed it. But by this time it's too late to add
            # indentation tokens:
            # if node.parent.symbol.name in ["multiline_string", "single_string", "comment"] and not node.parent.changed:
            #     return False
            # instead we need to manually check if one of the known comment tokens appears
            # in the line
            if node.lookup in self.comment_tokens:
                #XXX return false or continue?
                return False
            if node.lookup == "<return>": # reached next line
                return False
            if node.lookup == "<ws>":
                node = node.next_term
                continue
            if  isinstance(node.symbol, IndentationTerminal):
                node = node.next_term
                continue
            # if we are here, we reached a normal node
            return True

    def parse_whitespace(self, la):
        """Calculates and repairs indentation levels and tokens after parsing a <return> token.

        Special case: The last token before EOS triggers the generation of the closing dedentations

        1) Check if a line is logical or not
           a) Logical: Update indent levels, compare needed indetation tokens
              with current ones and update if needed
           b) Not logical: Remove all indentation tokens and set indent level to None
        2) Update succeeding lines that depend(ed) on this line
        """
        if la.lookup == "<return>" or isinstance(la, BOS) or la is self.last_token_before_eos:
            if not self.is_logical_line(la) and not la is self.last_token_before_eos:
                # delete indentation tokens and indent level
                n = la.next_term
                while isinstance(n.symbol, IndentationTerminal):
                    n.parent.remove_child(n)
                    n = n.next_term
                la.indent = None
                newindent = list(self.get_last_indent(la))
                ws = self.get_previous_ws(la)
            else:
                there = []
                n = la.next_term
                while isinstance(n.symbol, IndentationTerminal):
                    there.append(n)
                    n = n.next_term

                if n.lookup == "<ws>":
                    ws = len(n.symbol.name)
                else:
                    ws = 0

                last_indent = list(self.get_last_indent(la))
                needed, newindent = self.get_indentation_tokens_and_indent(last_indent, ws)
                indent_stack_eq = newindent == la.indent
                if la is not self.last_token_before_eos:
                    la.indent = list(newindent)
                    self.last_indent = list(la.indent)

                if self.indents_differ(there, needed):
                    self.repair_indents(la, there, needed)
                elif indent_stack_eq:
                    return
            self.update_succeeding_lines(la, ws, newindent)

    def update_succeeding_lines(self, la, ws, newindent):
        # update succeeding lines
        # XXX this causes a chain reaction iterating over some lines
        # multiple times. we might only have to do this for the <return>
        # that has actually changed during the parse
        next_r = la.next_term
        while True:
            if isinstance(next_r, EOS):
                # if changes reach end of file, repair indentations now or
                # it will be too late
                eos_there = []
                d = next_r.prev_term
                while isinstance(d.symbol, IndentationTerminal):
                    eos_there.insert(0, d)
                    d = d.prev_term
                eos_needed, _ = self.get_indentation_tokens_and_indent(list(self.get_last_indent(d)), 0)
                if self.indents_differ(eos_there, eos_needed):
                    self.last_token_before_eos.mark_changed() # don't repair here, only mark and repair just before last token is parsed
                break
            if next_r.lookup != "<return>":
                next_r = next_r.next_term
                continue

            # XXX need to skip unlogical lines (what if don't know if unlogical yet)

            # if tokens need to be updated, mark as changed, so the parser will go down this tree to update
            next_ws = self.get_whitespace(next_r)
            if next_ws is None:
                next_r = next_r.next_term
                continue
            needed, newindent = self.get_indentation_tokens_and_indent(newindent, next_ws)
            if not self.indents_match(next_r, needed) or next_r.indent != newindent:
                next_r.mark_changed()
            if next_ws < ws:
                # if newline has smaller whitespace -> mark and break
                break

            next_r = next_r.next_term

    def get_indentation_tokens_and_indent(self, indent, ws):
        needed = []
        newindent = []
        if ws > indent[-1]:
            needed.append(Node(IndentationTerminal("NEWLINE")))
            needed.append(Node(IndentationTerminal("INDENT")))
            newindent = indent + [ws]
        elif ws < indent[-1]:
            needed.append(Node(IndentationTerminal("NEWLINE")))
            while ws < indent[-1]:
                indent.pop()
                needed.append(Node(IndentationTerminal("DEDENT")))
            newindent = list(indent)
            if ws != indent[-1]:
                # XXX in future, just ERROR here
                needed.append(Node(IndentationTerminal("UNBALANCED")))
        else:
            needed.append(Node(IndentationTerminal("NEWLINE")))
            newindent = list(indent)
        return needed, newindent

    def indents_match(self, node, needed):
        there = []
        n = node.next_term
        while isinstance(n.symbol, IndentationTerminal):
            there.append(n)
            n = n.next_term

        if len(there) != len(needed):
            return False
        for i in range(len(there)):
            if there[i].symbol != needed[i].symbol:
                return False
        return True

    def get_whitespace(self, node):
        if not self.is_logical_line(node):
            return None

        node = node.next_term
        while isinstance(node.symbol, IndentationTerminal):
            node = node.next_term

        if node.lookup == "<ws>":
            return len(node.symbol.name)

        return 0

    def get_last_indent(self, la):
        return self.last_indent
        # XXX not the most performant solution as it iterates over all elements
        # on the stack until one has it's indent level set, which will be
        # either a return terminal or a Nonterminal with a return somewhere in
        # its subtrees
        # -> replace with global variable
        for n in reversed(self.stack):
            if n.indent and n is not la:
                return n.indent

    def set_total_indent(self, node):
        l = []
        if node.children:
            for c in node.children:
                if c.indent:
                    l = c.indent
        if l:
            node.indent = l

    def get_lookup(self, la):
        if la.lookup != "":
            lookup_symbol = Terminal(la.lookup)
        else:
            lookup_symbol = la.symbol
        if isinstance(lookup_symbol, IndentationTerminal):
            #XXX hack: change parsing table to accept IndentationTerminals
            lookup_symbol = Terminal(lookup_symbol.name)
        return lookup_symbol

    def do_undo(self, la):
        while len(self.undo) > 0:
            node, attribute, value = self.undo.pop(-1)
            setattr(node, attribute, value)
        self.error_node = la
        logging.debug ("\x1b[31mError\x1b[0m: %s %s %s", la, la.prev_term, la.next_term)
        logging.debug("loopcount: %s", self.loopcount)
        return "Error"

    def reduce(self, element):
        # Reduces elements from the stack to a Nonterminal subtree.  special:
        # COMMENT subtrees that are found on the stack during reduction are
        # added "silently" to the subtree (they don't count to the amount of
        # symbols of the reduction)
        children = []
        i = 0
        while i < element.amount():
            c = self.stack.pop()
            # apply folding information from grammar to tree nodes
            fold = element.action.right[element.amount()-i-1].folding
            c.symbol.folding = fold
            children.insert(0, c)
            if c not in self.anycount:
                # if this node is part of any, don't count it towards reduce elements
                i += 1

        logging.debug("   Element on stack: %s(%s)", self.stack[-1].symbol, self.stack[-1].state)
        self.current_state = self.stack[-1].state #XXX don't store on nodes, but on stack
        logging.debug("   Reduce: set state to %s (%s)", self.current_state, self.stack[-1].symbol)

        goto = self.syntaxtable.lookup(self.current_state, element.action.left)
        if goto is None:
            raise Exception("Reduction error on %s in state %s: goto is None" % (element, self.current_state))
        assert goto != None

        # save childrens parents state
        for c in children:
            self.undo.append((c, 'parent', c.parent))
            self.undo.append((c, 'left', c.left))
            self.undo.append((c, 'right', c.right))
            self.undo.append((c, 'log', c.log.copy()))

        new_node = Node(element.action.left.copy(), goto.action, children)
        self.set_total_indent(new_node)
        logging.debug("   Add %s to stack and goto state %s", new_node.symbol, new_node.state)
        self.stack.append(new_node)
        self.current_state = new_node.state # = goto.action
        logging.debug("Reduce: set state to %s (%s)", self.current_state, new_node.symbol)
        if getattr(element.action.annotation, "interpret", None):
            # eco grammar annotations
            self.interpret_annotation(new_node, element.action)
        else:
            # johnstone annotations
            self.add_alternate_version(new_node, element.action)

    def interpret_annotation(self, node, production):
        annotation = production.annotation
        if annotation:
            astnode = annotation.interpret(node)
            node.alternate = astnode

    def add_alternate_version(self, node, production):
        # add alternate (folded) versions for nodes to the tree
        alternate = TextNode(node.symbol.__class__(node.symbol.name), node.state, [])
        alternate.children = []
        teared = []
        for i in range(len(node.children)):
            if production.inserts.has_key(i):
                # insert teared nodes at right position
                value = production.inserts[i]
                for t in teared:
                    if t.symbol.name == value.name:
                        alternate.children.append(t)
            c = node.children[i]
            if c.symbol.folding == "^^^":
                c.symbol.folding = None
                teared.append(c)
                continue
            elif c.symbol.folding == "^^":
                while c.alternate is not None:
                    c = c.alternate
                alternate.symbol = c.symbol
                for child in c.children:
                    alternate.children.append(child)
            elif c.symbol.folding == "^":
                while c.alternate is not None:
                    c = c.alternate
                for child in c.children:
                    alternate.children.append(child)
            else:
                alternate.children.append(c)
        node.alternate = alternate

    def left_breakdown(self, la):
        if len(la.children) > 0:
            return la.children[0]
        else:
            return self.pop_lookahead(la)

    def right_breakdown(self):
        node = self.stack.pop() # optimistically shifted Nonterminal
        # after the breakdown, we need to properly shift the left over terminal
        # using the (correct) current state from before the optimistic shift of
        # it's parent tree
        self.current_state = self.stack[-1].state
        logging.debug("right breakdown(%s): set state to %s", node.symbol.name, self.current_state)
        while(isinstance(node.symbol, Nonterminal)):
            for c in node.children:
                if not self.process_any(c): # in breakdown we also have to take care of ANYSYMBOLs
                    self.shift(c, rb=True)
                c = c.right
            node = self.stack.pop()
            # after undoing an optimistic shift (through pop) we need to revert
            # back to the state before the shift (which can be found on the top
            # of the stack after the "pop"
            if isinstance(node.symbol, FinishSymbol):
                # if we reached the end of the stack, reset to state 0 and push
                # FinishSymbol pack onto the stack
                self.current_state = 0
                self.stack.append(node)
                return
            else:
                logging.debug("right breakdown else: set state to %s", self.stack[-1].state)
                self.current_state = self.stack[-1].state
        if not self.process_any(node):
            self.shift(node, rb=True) # pushes previously popped terminal back on stack

    def shift(self, la, element=None, rb=False):
        if not element:
            lookup_symbol = self.get_lookup(la)
            element = self.syntaxtable.lookup(self.current_state, lookup_symbol)
        logging.debug("\x1b[32m" + "%sShift(%s)" + "\x1b[0m" + ": %s -> %s", "rb" if rb else "", self.current_state, la, element)
        la.state = element.action
        self.stack.append(la)
        self.current_state = la.state

        if not la.lookup == "<ws>":
            # last_shift_state is used to predict next symbol
            # whitespace destroys correct behaviour
            self.last_shift_state = element.action

        if self.indentation_based and not rb:
            return self.parse_whitespace(la)

    def process_any(self, la):
        result, symbol = self.parse_anysymbol()
        if result:
            # ANYSYMBOL with finishing symbol
            r_finish = self.syntaxtable.lookup(result.action, self.get_lookup(la))
            if isinstance(r_finish, Shift):
                self.end_any(la, result)
                return False
            # ANY without finishing symbol
            elif symbol.name == "@ncr" and (la.lookup == "<return>" or la.symbol == IndentationTerminal("NEWLINE") or isinstance(la, EOS)):
                self.end_any(la, result, symbol.name)
                return False
            else:
                self.push_any(la)
                return True

    def push_any(self, la):
        logging.debug("AnySymbol: push %s" % (la))
        la.state = self.current_state # this node is now part of this comment state (needed to unvalidating)
        self.stack.append(la)
        self.anycount.add(la)
        if la.lookup == "<return>" and self.indentation_based:
            self.any_newlines.append(la)

    def end_any(self, la, result, mode="@"):
        logging.debug("AnySymbol: end %s (%s)" % (la, mode))
        self.current_state = result.action # switch to state after ANY and continue parsing normally
        logging.debug("AnySymbol: set state to %s", self.current_state)

        # update succeeding
        if self.indentation_based:
            for n in self.any_newlines:
                self.update_succeeding_lines(n, self.last_indent[-1], list(self.last_indent))
        self.any_newlines = []

    def pop_lookahead(self, la):
        org = la
        while(la.right_sibling() is None):
            la = la.parent
        logging.debug("pop_lookahead(%s): %s", org.symbol, la.right_sibling().symbol)
        return la.right_sibling()

    def shiftable(self, la):
        if self.syntaxtable.lookup(self.current_state, la.symbol):
            return True
        return False

    def has_changed(self, node):
        return node in self.all_changes

    def prepare_input(self, _input):
        l = []
        # XXX need an additional lexer to do this right
        if _input != "":
            for i in _input.split(" "):
                l.append(Terminal(i))
        l.append(FinishSymbol())
        return l

    def get_ast(self):
        bos = Node(Terminal("bos"), 0, [])
        eos = Node(FinishSymbol(), 0, [])
        root = Node(Nonterminal("Root"), 0, [bos, self.ast_stack[0], eos])
        return AST(root)

    def get_next_possible_symbols(self, state_id):
        l = set()
        for (state, symbol) in self.syntaxtable.table.keys():
            if state == state_id:
                l.add(symbol)
        return l

    def get_next_symbols_list(self, state = -1):
        if state == -1:
            state = self.last_shift_state
        lookahead = self.get_next_possible_symbols(state)

        s = []
        for symbol in lookahead:
            s.append(symbol.name)
        return s

    def get_next_symbols_string(self, state = -1):
        l = self.get_next_symbols_list(state)
        return ", ".join(l)

    def get_expected_symbols(self, state_id):
        #XXX if state of a symbol is nullable, return next symbol as well
        #XXX if at end of state, find state we came from (reduce, stack) and get next symbols from there
        if state_id != -1:
            stateset = self.graph.state_sets[state_id]
            symbols = stateset.get_next_symbols_no_ws()
            return symbols
        return []


    def reset(self):
        self.stack = []
        self.ast_stack = []
        self.all_changes = []
        self.undo = []
        self.last_shift_state = 0
        self.validating = False
        self.last_status = False
        self.error_node = None
        self.previous_version = None
        self.init_ast()

    def load_status(self, version):
        try:
            self.last_status = self.status_by_version[version]
        except KeyError:
            logging.warning("Could not find status for version %s", version)
        try:
            self.error_node = self.errornode_by_version[version]
        except KeyError:
            logging.warning("Could not find errornode for version %s", version)

    def save_status(self, version):
        self.status_by_version[version] = self.last_status
        self.errornode_by_version[version] = self.error_node
Ejemplo n.º 6
0
class IncParser(object):

    def __init__(self, grammar=None, lr_type=LR0, whitespaces=False, startsymbol=None):

        if grammar:
            logging.debug("Parsing Grammar")
            parser = Parser(grammar, whitespaces)
            parser.parse()

            filename = "".join([os.path.dirname(__file__), "/../pickle/", str(hash(grammar) ^ hash(whitespaces)), ".pcl"])
            try:
                logging.debug("Try to unpickle former stategraph")
                f = open(filename, "r")
                start = time.time()
                self.graph = pickle.load(f)
                end = time.time()
                logging.debug("unpickling done in %s", end-start)
            except IOError:
                logging.debug("could not unpickle old graph")
                logging.debug("Creating Stategraph")
                self.graph = StateGraph(parser.start_symbol, parser.rules, lr_type)
                logging.debug("Building Stategraph")
                self.graph.build()
                logging.debug("Pickling")
                pickle.dump(self.graph, open(filename, "w"))

            if lr_type == LALR:
                self.graph.convert_lalr()

            logging.debug("Creating Syntaxtable")
            self.syntaxtable = SyntaxTable(lr_type)
            self.syntaxtable.build(self.graph)

        self.stack = []
        self.ast_stack = []
        self.all_changes = []
        self.undo = []
        self.last_shift_state = 0
        self.validating = False
        self.last_status = False
        self.error_node = None
        self.whitespaces = whitespaces
        self.status_by_version = {}
        self.errornode_by_version = {}

        self.previous_version = None
        logging.debug("Incemental parser done")

    def from_dict(self, rules, startsymbol, lr_type, whitespaces, pickle_id, precedences):
        self.graph = None
        self.syntaxtable = None
        if pickle_id:
            filename = "".join([os.path.dirname(__file__), "/../pickle/", str(pickle_id ^ hash(whitespaces)), ".pcl"])
            try:
                f = open(filename, "r")
                self.syntaxtable = pickle.load(f)
            except IOError:
                pass
        if self.syntaxtable is None:
            self.graph = StateGraph(startsymbol, rules, lr_type)
            self.graph.build()
            self.syntaxtable = SyntaxTable(lr_type)
            self.syntaxtable.build(self.graph, precedences)
            if pickle_id:
                pickle.dump(self.syntaxtable, open(filename, "w"))

        self.whitespaces = whitespaces

    def init_ast(self, magic_parent=None):
        bos = BOS(Terminal(""), 0, [])
        eos = EOS(FinishSymbol(), 0, [])
        bos.magic_parent = magic_parent
        eos.magic_parent = magic_parent
        bos.next_term = eos
        eos.prev_term = bos
        root = Node(Nonterminal("Root"), 0, [bos, eos])
        self.previous_version = AST(root)
        root.save(0)
        bos.save(0)
        eos.save(0)

    def reparse(self):
        self.inc_parse([], True)

    def inc_parse(self, line_indents=[], reparse=False):
        logging.debug("============ NEW INCREMENTAL PARSE ================= ")
        self.error_node = None
        self.stack = []
        self.undo = []
        self.current_state = 0
        self.stack.append(Node(FinishSymbol(), 0, []))
        bos = self.previous_version.parent.children[0]
        la = self.pop_lookahead(bos)
        self.loopcount = 0
        self.comment_mode = False

        USE_OPT = True

        while(True):
            self.loopcount += 1
            if self.comment_mode:
                if la.lookup == "cmt_end":
                    # in comment mode we just add all subtrees as they are to a
                    # subtree COMMENT subtrees that have changes are broken
                    # apart, e.g. to be able to find an inserted */ the CMT
                    # subtree is then added to the parsers stack without
                    # changing its state when the parser later reduces stack
                    # elements to a new subtree, CMT subtrees are added as
                    # children
                    next_la = self.pop_lookahead(la)
                    self.comment_mode = False
                    comment_stack.append(la)
                    CMT = Node(Nonterminal("~COMMENT~"))
                    for c in comment_stack:
                        self.undo.append((c, 'parent', c.parent))
                        self.undo.append((c, 'left', c.left))
                        self.undo.append((c, 'right', c.right))
                    CMT.set_children(comment_stack)
                    CMT.state = self.current_state
                    self.stack.append(CMT)
                    la = next_la
                    continue
                if isinstance(la, EOS):
                    self.comment_mode = False
                    self.do_undo(la)
                    self.last_status = False
                    return False
                la = self.add_to_stack(la, comment_stack)
                continue
            if isinstance(la.symbol, Terminal) or isinstance(la.symbol, FinishSymbol) or la.symbol == Epsilon():
                if la.changed:#self.has_changed(la):
                    assert False # with prelexing you should never end up here!
                else:
                    if la.lookup == "cmt_start":
                        # when we find a cmt_start token, we enter comment mode
                        self.comment_mode = True
                        comment_stack = []
                        comment_stack.append(la)
                        # since unchanged subtrees are left untouched, we
                        # wouldn't find a cmt_end if it is part of another
                        # comment, e.g. /* foo /* bar */ to be able to merge
                        # two comment together, we need to find the next
                        # cmt_end and mark its subtree as changed
                        end = la
                        # XXX configure these through the grammar, e.g. Java
                        # needs /*@*/, Python """@""" (@ means, match anything)
                        while True:
                            end = end.next_term
                            if isinstance(end, EOS):
                                break
                            if end.symbol.name.find("*/") > 0:
                                # split token
                                self.lexer.split_endcomment(end)
                                break
                            if end.lookup == "cmt_end":
                                end.mark_changed()
                                break
                        la = self.pop_lookahead(la)
                        continue

                    lookup_symbol = self.get_lookup(la)
                    result = self.parse_terminal(la, lookup_symbol)
                    if result == "Accept":
                        self.last_status = True
                        return True
                    elif result == "Error":
                        self.last_status = False
                        return False
                    elif result != None:
                        la = result

            else: # Nonterminal
                if la.changed or reparse:
                    la.changed = False
                    self.undo.append((la, 'changed', True))
                    la = self.left_breakdown(la)
                else:
                    if USE_OPT:
                        goto = self.syntaxtable.lookup(self.current_state, la.symbol)
                        if goto: # can we shift this Nonterminal in the current state?
                            logging.debug("OPTShift: %s in state %s -> %s", la.symbol, self.current_state, goto)
                            follow_id = goto.action
                            self.stack.append(la)
                            la.state = follow_id #XXX this fixed goto error (i should think about storing the states on the stack instead of inside the elements)
                            self.current_state = follow_id
                            la = self.pop_lookahead(la)
                            self.validating = True
                            continue
                        else:
                            #XXX can be made faster by providing more information in syntax tables
                            first_term = la.find_first_terminal()

                            lookup_symbol = self.get_lookup(first_term)
                            element = self.syntaxtable.lookup(self.current_state, lookup_symbol)
                            if isinstance(element, Reduce):
                                self.reduce(element)
                            else:
                                la = self.left_breakdown(la)
                    else:
                    # PARSER WITHOUT OPTIMISATION
                        if la.lookup != "":
                            lookup_symbol = Terminal(la.lookup)
                        else:
                            lookup_symbol = la.symbol
                        element = self.syntaxtable.lookup(self.current_state, lookup_symbol)

                        if self.shiftable(la):
                            self.stack.append(la)
                            self.current_state = la.state
                            self.right_breakdown()
                            la = self.pop_lookahead(la)
                        else:
                            la = self.left_breakdown(la)
        logging.debug("============ INCREMENTAL PARSE END ================= ")

    def add_to_stack(self, la, stack):
        # comment helper that adds elements to the comment stack and if la is a
        # subtree with changes, recursively break it apart and adds its
        # children

        while True:
            if isinstance(la.symbol, Terminal) and la.lookup == "cmt_end":
                return la
            if isinstance(la, EOS):
                return la
            if la.changed:
                if la.children:
                    la = la.children[0]
                else:
                    la = self.pop_lookahead(la)
                continue
            else:
                stack.append(la)
                la = self.pop_lookahead(la)
                continue

    def parse_terminal(self, la, lookup_symbol):
        element = self.syntaxtable.lookup(self.current_state, lookup_symbol)
        logging.debug("parse_terminal: %s in %s -> %s", lookup_symbol, self.current_state, element)
        if isinstance(element, Accept):
            #XXX change parse so that stack is [bos, startsymbol, eos]
            bos = self.previous_version.parent.children[0]
            eos = self.previous_version.parent.children[-1]
            self.previous_version.parent.set_children([bos, self.stack[1], eos])
            logging.debug("loopcount: %s", self.loopcount)
            logging.debug ("Accept")
            return "Accept"
        elif isinstance(element, Shift):
            logging.debug("Shift: %s -> %s", la, element.action)
            # removing this makes "Valid tokens" correct, should not be needed
            # for incremental parser
            #self.undo.append((la, "state", la.state))
            la.state = element.action
            self.stack.append(la)
            self.current_state = element.action
            if not la.lookup == "<ws>":
                # last_shift_state is used to predict next symbol
                # whitespace destroys correct behaviour
                self.last_shift_state = element.action
            return self.pop_lookahead(la)

        elif isinstance(element, Reduce):
            logging.debug("Reduce: %s -> %s", la, element)
            self.reduce(element)
            return self.parse_terminal(la, lookup_symbol)
        elif element is None:
            if self.validating:
                self.right_breakdown()
                self.validating = False
            else:
                return self.do_undo(la)

    def get_lookup(self, la):
        if la.lookup != "":
            lookup_symbol = Terminal(la.lookup)
        else:
            lookup_symbol = la.symbol
        if isinstance(lookup_symbol, IndentationTerminal):
            #XXX hack: change parsing table to accept IndentationTerminals
            lookup_symbol = Terminal(lookup_symbol.name)
        return lookup_symbol

    def do_undo(self, la):
        while len(self.undo) > 0:
            node, attribute, value = self.undo.pop(-1)
            setattr(node, attribute, value)
        self.error_node = la
        logging.debug ("Error: %s %s %s", la, la.prev_term, la.next_term)
        logging.debug("loopcount: %s", self.loopcount)
        return "Error"

    def reduce(self, element):
        # Reduces elements from the stack to a Nonterminal subtree.  special:
        # COMMENT subtrees that are found on the stack during reduction are
        # added "silently" to the subtree (they don't count to the amount of
        # symbols of the reduction)
        children = []
        i = 0
        while i < element.amount():
            c = self.stack.pop()
            # apply folding information from grammar to tree nodes
            fold = element.action.right[element.amount()-i-1].folding
            c.symbol.folding = fold
            children.insert(0, c)
            if c.symbol.name != "~COMMENT~":
                i += 1
        if self.stack[-1].symbol.name == "~COMMENT~":
            c = self.stack.pop()
            children.insert(0, c)
        logging.debug("   Element on stack: %s(%s)", self.stack[-1].symbol, self.stack[-1].state)
        self.current_state = self.stack[-1].state #XXX don't store on nodes, but on stack

        goto = self.syntaxtable.lookup(self.current_state, element.action.left)
        if goto is None:
            raise Exception("Reduction error on %s in state %s: goto is None" % (element, self.current_state))
        assert goto != None

        # save childrens parents state
        for c in children:
            self.undo.append((c, 'parent', c.parent))
            self.undo.append((c, 'left', c.left))
            self.undo.append((c, 'right', c.right))

        new_node = Node(element.action.left.copy(), goto.action, children)
        logging.debug("   Add %s to stack and goto state %s", new_node.symbol, new_node.state)
        self.stack.append(new_node)
        self.current_state = new_node.state # = goto.action
        if getattr(element.action.annotation, "interpret", None):
            # eco grammar annotations
            self.interpret_annotation(new_node, element.action)
        else:
            # johnstone annotations
            self.add_alternate_version(new_node, element.action)

    def interpret_annotation(self, node, production):
        annotation = production.annotation
        if annotation:
            astnode = annotation.interpret(node)
            node.alternate = astnode

    def add_alternate_version(self, node, production):
        # add alternate (folded) versions for nodes to the tree
        alternate = TextNode(node.symbol.__class__(node.symbol.name), node.state, [])
        alternate.children = []
        teared = []
        for i in range(len(node.children)):
            if production.inserts.has_key(i):
                # insert teared nodes at right position
                value = production.inserts[i]
                for t in teared:
                    if t.symbol.name == value.name:
                        alternate.children.append(t)
            c = node.children[i]
            if c.symbol.folding == "^^^":
                c.symbol.folding = None
                teared.append(c)
                continue
            elif c.symbol.folding == "^^":
                while c.alternate is not None:
                    c = c.alternate
                alternate.symbol = c.symbol
                for child in c.children:
                    alternate.children.append(child)
            elif c.symbol.folding == "^":
                while c.alternate is not None:
                    c = c.alternate
                for child in c.children:
                    alternate.children.append(child)
            else:
                alternate.children.append(c)
        node.alternate = alternate

    def left_breakdown(self, la):
        if len(la.children) > 0:
            return la.children[0]
        else:
            return self.pop_lookahead(la)

    def right_breakdown(self):
        node = self.stack.pop()
        self.current_state = self.stack[-1].state
        logging.debug("right breakdown: set state to %s", self.current_state)
        while(isinstance(node.symbol, Nonterminal)):
            for c in node.children:
                self.shift(c)
            node = self.stack.pop()
            # after undoing an optimistic shift (through pop) we need to revert
            # back to the state before the shift (which can be found on the top
            # of the stack after the "pop"
            if isinstance(node.symbol, FinishSymbol):
                # if we reached the end of the stack, reset to state 0 and push
                # FinishSymbol pack onto the stack
                self.current_state = 0
                self.stack.append(node)
                return
            else:
                self.current_state = self.stack[-1].state
        self.shift(node)

    def shift(self, la):
        # after the breakdown, we need to properly shift the left over terminal
        # using the (correct) current state from before the optimistic shift of
        # it's parent tree
        lookup_symbol = self.get_lookup(la)
        element = self.syntaxtable.lookup(self.current_state, lookup_symbol)
        logging.debug("RBShift: la: %s state: %s element: %s", la, la.state, element)
        la.state = element.action
        self.stack.append(la)
        logging.debug("RBShift: set state to %s", la.state)
        self.current_state = la.state

    def pop_lookahead(self, la):
        while(la.right_sibling() is None):
            la = la.parent
        return la.right_sibling()

    def shiftable(self, la):
        if self.syntaxtable.lookup(self.current_state, la.symbol):
            return True
        return False

    def has_changed(self, node):
        return node in self.all_changes

    def prepare_input(self, _input):
        l = []
        # XXX need an additional lexer to do this right
        if _input != "":
            for i in _input.split(" "):
                l.append(Terminal(i))
        l.append(FinishSymbol())
        return l

    def get_ast(self):
        bos = Node(Terminal("bos"), 0, [])
        eos = Node(FinishSymbol(), 0, [])
        root = Node(Nonterminal("Root"), 0, [bos, self.ast_stack[0], eos])
        return AST(root)

    def get_next_possible_symbols(self, state_id):
        l = set()
        for (state, symbol) in self.syntaxtable.table.keys():
            if state == state_id:
                l.add(symbol)
        return l

    def get_next_symbols_list(self, state = -1):
        if state == -1:
            state = self.last_shift_state
        lookahead = self.get_next_possible_symbols(state)

        s = []
        for symbol in lookahead:
            s.append(symbol.name)
        return s

    def get_next_symbols_string(self, state = -1):
        l = self.get_next_symbols_list(state)
        return ", ".join(l)

    def get_expected_symbols(self, state_id):
        #XXX if state of a symbol is nullable, return next symbol as well
        #XXX if at end of state, find state we came from (reduce, stack) and get next symbols from there
        if state_id != -1:
            stateset = self.graph.state_sets[state_id]
            symbols = stateset.get_next_symbols_no_ws()
            return symbols
        return []


    def reset(self):
        self.stack = []
        self.ast_stack = []
        self.all_changes = []
        self.undo = []
        self.last_shift_state = 0
        self.validating = False
        self.last_status = False
        self.error_node = None
        self.previous_version = None
        self.init_ast()

    def load_status(self, version):
        try:
            self.last_status = self.status_by_version[version]
        except KeyError:
            logging.warning("Could not find status for version %s", version)
        try:
            self.error_node = self.errornode_by_version[version]
        except KeyError:
            logging.warning("Could not find errornode for version %s", version)

    def save_status(self, version):
        self.status_by_version[version] = self.last_status
        self.errornode_by_version[version] = self.error_node