Example #1
0
 def test_relex3(self):
     ast = AST()
     ast.init()
     bos = ast.parent.children[0]
     new1 = TextNode(Terminal("1+2"))
     new2 = TextNode(Terminal("345"))
     new3 = TextNode(Terminal("6+"))
     new4 = TextNode(Terminal("789"))  # this should never be touched
     new4.lookup = "INT"
     new5 = TextNode(Terminal("+"))  # this should never be touched
     new5.lookup = "plus"
     bos.insert_after(new1)
     new1.insert_after(new2)
     new2.insert_after(new3)
     new3.insert_after(new4)
     new4.insert_after(new5)
     self.relex(new1)
     assert ast.parent.symbol == Nonterminal("Root")
     assert isinstance(ast.parent.children[0], BOS)
     assert isinstance(ast.parent.children[-1], EOS)
     node = bos.next_term
     assert node.symbol == Terminal("1")
     node = node.next_term
     assert node.symbol == Terminal("+")
     node = node.next_term
     assert node.symbol == Terminal("23456")
     node = node.next_term
     assert node.symbol == Terminal("+")
     # check that 789 hasn't been relexed
     assert node.next_term is new4
     assert node.next_term.symbol is new4.symbol
Example #2
0
 def relex_import(self, startnode, version = 0):
     """Optimised relex for freshly imported files."""
     success = self.lex(startnode.symbol.name)
     bos = startnode.prev_term # bos
     parent = bos.parent
     eos = parent.children.pop()
     last_node = bos
     for match in success:
         if match is success[0]:
             # reuse old node for fist node to mimic the behaviour of a
             # normal relex
             node = startnode
             node.symbol.name = match[0]
         else:
             node = TextNode(Terminal(match[0]))
         node.lookup = match[1]
         parent.children.append(node)
         last_node.next_term = node
         last_node.right = node
         node.left = last_node
         node.prev_term = last_node
         node.parent = parent
         last_node = node
         node.mark_changed()
     parent.children.append(eos)
     last_node.right = eos # link to eos
     last_node.next_term = eos
     eos.left = last_node
     eos.prev_term = last_node
     bos.mark_changed()
     eos.mark_changed()
     parent.mark_changed()
Example #3
0
 def test_relex_stop(self):
     ast = AST()
     ast.init()
     bos = ast.parent.children[0]
     new = TextNode(Terminal("1+2"))
     old1 = TextNode(Terminal("*"))
     old2 = TextNode(Terminal("3"))
     old2.lookup = "INT"
     bos.insert_after(new)
     new.insert_after(old1)
     old1.insert_after(old2)
     self.relex(new)
     assert ast.parent.symbol == Nonterminal("Root")
     assert isinstance(ast.parent.children[0], BOS)
     assert isinstance(ast.parent.children[-1], EOS)
     node = bos.next_term
     assert node.symbol == Terminal("1")
     node = node.next_term
     assert node.symbol == Terminal("+")
     node = node.next_term
     assert node.symbol == Terminal("2")
     node = node.next_term
     assert node.symbol == Terminal("*")
     node = node.next_term
     assert node.symbol == Terminal("3")
     node = node.next_term
     assert isinstance(node, EOS)
Example #4
0
    def merge_back(self, read_nodes, generated_tokens):

        any_changes = False
        # insert new nodes into tree
        it = iter(read_nodes)
        for t in generated_tokens:
            try:
                node = it.next()
            except StopIteration:
                node = TextNode(Terminal(""))
                last_node.insert_after(node)
                any_changes = True
            last_node = node
            node.symbol.name = t.source
            if node.lookup != t.name or t.source.find("*/") > 0:
                any_changes = True
                node.mark_changed()
            else:
                node.mark_version()
            node.lookup = t.name
            node.lookahead = t.lookahead
        # delete left over nodes
        while True:
            try:
                node = it.next()
                node.parent.remove_child(node)
                any_changes = True
            except StopIteration:
                break
        return any_changes
Example #5
0
 def relex_import(self, startnode, version = 0):
     """
     Replace a node with the tokens of its name
     :param startnode: node to expand
     :param version: version assigned to each created node
     :return:
     """
     success = self.lex(startnode.symbol.name)
     bos = startnode.prev_term # bos
     startnode.parent.remove_child(startnode)
     parent = bos.parent
     eos = parent.children.pop()
     last_node = bos
     for match in success:
         node = TextNode(Terminal(match[0]))
         node.version = version
         node.lookup = match[1]
         parent.children.append(node)
         last_node.next_term = node
         last_node.right = node
         node.left = last_node
         node.prev_term = last_node
         node.parent = parent
         last_node = node
     parent.children.append(eos)
     last_node.right = eos # link to eos
     last_node.next_term = eos
     eos.left = last_node
     eos.prev_term = last_node
Example #6
0
    def merge_back(self, read_nodes, generated_tokens):

        any_changes = False
        # insert new nodes into tree
        it = iter(read_nodes)
        for t in generated_tokens:
            try:
                node = it.next()
            except StopIteration:
                node = TextNode(Terminal(""))
                last_node.insert_after(node)
                any_changes = True
            last_node = node
            node.symbol.name = t.source
            node.indent = None
            if node.lookup != t.name:
                node.mark_changed()
                any_changes = True
            else:
                node.mark_version()
            # we need to invalidate the newline if we changed whitespace or
            # logical nodes that come after it
            if node.lookup == "<ws>" or node.lookup != t.name:
                prev = node.prev_term
                while isinstance(prev.symbol, IndentationTerminal):
                    prev = prev.prev_term
                if prev.lookup == "<return>":
                    prev.mark_changed()
                    any_changes = True
                elif isinstance(prev, BOS):
                    # if there is no return, re-indentation won't be triggered
                    # in the incremental parser so we have to mark the next
                    # terminal. possibly only use case: bos <ws> pass DEDENT eos
                    node.next_term.mark_changed()
            # XXX this should become neccessary with incparse optimisations turned on
            if node.lookup == "\\" and node.next_term.lookup == "<return>":
                node.next_term.mark_changed()
                any_changes = True
            node.lookup = t.name
            node.lookahead = t.lookahead
        # delete left over nodes
        while True:
            try:
                node = it.next()
                node.parent.remove_child(node)
                any_changes = True
            except StopIteration:
                break
        return any_changes
Example #7
0
 def manual_relex(self, bos, pattern):
     """To avoid a bootstrapping loop (inclexer depends on Lexer and thus
     RegexParser), we need to lex the regex grammar manually"""
     import re
     pos = 0
     while pos < len(pattern):
         for name, regex in self.lrules:
             r = re.match(regex, pattern[pos:])
             if r:
                 n = TextNode(Terminal(r.group(0)))
                 n.lookup = name
                 bos.insert_after(n)
                 bos = n
                 pos += len(r.group(0))
                 break
Example #8
0
 def relex_import(self, startnode):
     success = self.lex(startnode.symbol.name)
     bos = startnode.prev_term # bos
     startnode.parent.remove_child(startnode)
     parent = bos.parent
     eos = parent.children.pop()
     last_node = bos
     for match in success:
         node = TextNode(Terminal(match[0]))
         node.lookup = match[1]
         parent.children.append(node)
         last_node.next_term = node
         last_node.right = node
         node.left = last_node
         node.prev_term = last_node
         node.parent = parent
         last_node = node
     parent.children.append(eos)
     last_node.right = eos # link to eos
     last_node.next_term = eos
     eos.left = last_node
     eos.prev_term = last_node
Example #9
0
 def relex_import(self, startnode, version=0):
     success = self.lex(startnode.symbol.name)
     bos = startnode.prev_term # bos
     startnode.parent.remove_child(startnode)
     parent = bos.parent
     eos = parent.children.pop()
     last_node = bos
     for match in success:
         node = TextNode(Terminal(match[0]))
         node.version = version
         node.lookup = match[1]
         parent.children.append(node)
         last_node.next_term = node
         last_node.right = node
         node.left = last_node
         node.prev_term = last_node
         node.parent = parent
         last_node = node
     parent.children.append(eos)
     last_node.right = eos # link to eos
     last_node.next_term = eos
     eos.left = last_node
     eos.prev_term = last_node
Example #10
0
    def merge_back(self, tokens, read):
        if len(tokens) == 1 and tokens[0][0] == "\x81":
            return False

        lastread = read[0].prev_term

        it_gen = self.iter_gen(tokens)
        it_read = self.iter_read(read)

        gen = it_gen.next()
        read = it_read.next()

        totalr = 0
        totalg = 0

        reused = set()
        current_mt = None
        changed = False

        while True:
            while read is not None and isinstance(read.symbol, IndentationTerminal):
                read.remove()
                read = it_read.next()
            if gen is None and read is None:
                break

            if read and read.deleted:
                read = it_read.next()
                continue

            if gen is None:
                lengen = 0
            elif gen[0] == "new mt":
                if read and read.ismultichild() and not read.parent in reused:
                    current_mt = read.parent # reuse
                else:
                    current_mt = MultiTextNode() # create new
                    lastread.insert_after(current_mt) # insert multiline under same parent as the nodes it replaces
                    changed = True
                if current_mt.lookup != gen[1]:
                    changed = True
                current_mt.lookup = gen[1]
                current_mt.lookahead = gen[2]
                self.relexed.add(current_mt)
                gen = it_gen.next()
                continue
            elif gen[0] == "finish mt":
                reused.add(current_mt)
                lastread = current_mt
                gen = it_gen.next()
                current_mt.update_children()
                current_mt = None
                continue
            else:
                lengen = len(gen[0])

            if totalr >= totalg + lengen:
                changed = True
                # One node has been split into multiple nodes. Insert all
                # remaining nodes until the lengths add up again.
                new = TextNode(Terminal(gen[0]))
                self.relexed.add(new)
                new.lookup = gen[1]
                if new.lookup == "<E>":
                    # If this token comes from the leftovers of a LexingError,
                    # mark it appropriately
                    new.changed = True  # XXX with error recovery, mark as error
                new.lookahead = gen[2]
                if current_mt and not lastread.ismultichild():
                    current_mt.insert_at_beginning(new)
                else:
                    lastread.insert_after(new)
                lastread = new
                totalg += lengen
                gen = it_gen.next()
            elif totalr + getlength(read) <= totalg:
                changed = True
                # Multiple nodes have been combined into less nodes. Delete old
                # nodes until the lengths add up again.
                read.remove()
                self.remove_check(read)
                totalr += getlength(read)
                read = it_read.next()
            else:
                # Overwrite old nodes with updated values. Move nodes in or out
                # of multinodes if needed.
                totalr += getlength(read)
                totalg += lengen
                if read.lookup != gen[1]:
                    read.mark_changed()
                    self.relexed.add(read)
                    changed = True
                else:
                    read.mark_changed()
                if not isinstance(read.symbol, MagicTerminal):
                    read.symbol.name = gen[0].replace("\x81", "")
                    read.lookup = gen[1]
                    read.lookahead = gen[2]
                    self.relexed.add(read)
                else:
                    read.lookup = gen[1]
                if not current_mt:
                    if read.ismultichild():
                        # Read node was previously part of a multinode but has
                        # been updated to a normal node. Remove it from the
                        # multinode.
                        read.remove(True)
                        read.deleted = False
                        self.remove_check(read)
                        lastread.insert_after(read)
                        changed = True
                else:
                    if not read.ismultichild() or current_mt is not read.parent:
                        # Read node has been moved from a normal node into a
                        # multinode or from one multinode into another
                        # multinode. Remove from old locations and insert into
                        # new location.
                        read.remove(True)
                        read.deleted = False
                        self.remove_check(read)
                        if current_mt.isempty():
                            current_mt.set_children([read])
                        else:
                            lastread.insert_after(read)
                        changed = True
                lastread = read
                read = it_read.next()
                gen = it_gen.next()

        return changed
Example #11
0
    def relex(self, node):
        # find farthest node that has lookahead into node
        # start munching tokens and spit out nodes
        #     if generated node already exists => stop
        #     (only if we passed edited node)

        # find node to start relaxing
        startnode = node
        nodes = self.find_preceeding_nodes(node)
        if nodes:
            node = nodes[0]
        if node is startnode:
            past_startnode = True
        else:
            past_startnode = False

        if isinstance(node, EOS):
            # nothing to do here
            return False

        # relex
        read_nodes = []
        generated_tokens = []
        pos = 0
        read = 0
        current_node = node
        next_token = self.lexer.get_token_iter(StringWrapper(node))
        while True:
            token = next_token()
            if token.source == "":
                read_nodes.append(current_node)
                break
            read += len(token.source)
            # special case when inserting a newline into a string, the lexer
            # creates a single token. We need to make sure that that newline
            # gets lexed into its own token
            if len(token.source) > 1 and token.source.find("\r") >= 0:
                l = token.source.split("\r")
                for e in l:
                    t = self.lexer.tokenize(e)
                    generated_tokens.extend(t)
                    if e is not l[-1]:
                        newline = self.lexer.tokenize("\r")
                        generated_tokens.extend(newline)
            else:
                generated_tokens.append(token)
            while read > pos + len(current_node.symbol.name):
                pos += len(current_node.symbol.name)
                read_nodes.append(current_node)
                current_node = current_node.next_term
                if current_node is startnode:
                    past_startnode = True
            if past_startnode and read == pos + len(current_node.symbol.name):
                read_nodes.append(current_node)
                break

        any_changes = False
        # insert new nodes into tree
        it = iter(read_nodes)
        for t in generated_tokens:
            try:
                node = it.next()
            except StopIteration:
                node = TextNode(Terminal(""))
                last_node.insert_after(node)
                any_changes = True
            last_node = node
            node.symbol.name = t.source
            if node.lookup != t.name:
                any_changes = True
            node.lookup = t.name
            node.lookahead = t.lookahead
        # delete left over nodes
        while True:
            try:
                node = it.next()
                node.parent.remove_child(node)
                any_changes = True
            except StopIteration:
                break
        return any_changes
Example #12
0
    def merge_back(self, read_nodes, generated_tokens):
        """
        Replace the symbols in the nodes with the newly generated tokens.


        We loop over read_nodes and generated tokens at the same pace and replace the read_node.symbol.name with
        the corresponding generated_token.source. We also update the node's lookup (the type of token). I
        If it is changed, the node is marked changed (A node whom lookup did not change is identical to the parser)

        If the arrays are of unequal length:
          - If the length of generated_tokens is insufficient, we add extra nodes
          - Excess nodes are removed

        :param read_nodes: Nodes that have been read by the relexer
        :param generated_tokens: Tokens that have been found during relexing
        :return:
        """
        any_changes = False
        # insert new nodes into tree
        it = iter(read_nodes)
        for t in generated_tokens:
            try:
                node = it.next()
            except StopIteration:
                node = TextNode(Terminal(""))
                last_node.insert_after(node)
                any_changes = True
            last_node = node
            node.symbol.name = t.source
            node.indent = None
            if node.lookup != t.name:
                node.mark_changed()
                any_changes = True
            else:
                node.mark_version()
            # we need to invalidate the newline if we changed whitespace or
            # logical nodes that come after it
            if node.lookup == "<ws>" or node.lookup != t.name:
                prev = node.prev_term
                while isinstance(prev.symbol, IndentationTerminal):
                    prev = prev.prev_term
                if prev.lookup == "<return>":
                    prev.mark_changed()
                    any_changes = True
                elif isinstance(prev, BOS):
                    # if there is no return, re-indentation won't be triggered
                    # in the incremental parser so we have to mark the next
                    # terminal. possibly only use case: bos <ws> pass DEDENT eos
                    node.next_term.mark_changed()
            # XXX this should become neccessary with incparse optimisations turned on
            if node.lookup == "\\" and node.next_term.lookup == "<return>":
                node.next_term.mark_changed()
                any_changes = True
            node.lookup = t.name
            node.lookahead = t.lookahead
        # delete left over nodes
        while True:
            try:
                node = it.next()
                node.parent.remove_child(node)
                any_changes = True
            except StopIteration:
                break
        return any_changes