def merge_back(self, read_nodes, generated_tokens): any_changes = False # insert new nodes into tree it = iter(read_nodes) for t in generated_tokens: try: node = it.next() except StopIteration: node = TextNode(Terminal("")) last_node.insert_after(node) any_changes = True last_node = node node.symbol.name = t.source if node.lookup != t.name or t.source.find("*/") > 0: any_changes = True node.mark_changed() else: node.mark_version() node.lookup = t.name node.lookahead = t.lookahead # delete left over nodes while True: try: node = it.next() node.parent.remove_child(node) any_changes = True except StopIteration: break return any_changes
def test_lookback(self): lexer = IncrementalLexer(""" "\"\"\"[^\"]*\"\"\"":triplestring "\"[^\"]*\"":string "[a-z]+":var """) ast = AST() ast.init() bos = ast.parent.children[0] eos = ast.parent.children[1] text1 = TextNode(Terminal("abcd")) text2 = TextNode(Terminal("ef")) text3 = TextNode(Terminal("ghij")) text4 = TextNode(Terminal("k")) text5 = TextNode(Terminal("lm")) text6 = TextNode(Terminal("nopqr")) text7 = TextNode(Terminal("stu")) bos.insert_after(text1) text1.insert_after(text2) text2.insert_after(text3) text3.insert_after(text4) text4.insert_after(text5) text5.insert_after(text6) text6.insert_after(text7) lexer.relexed.add(text1) lexer.relexed.add(text2) lexer.relexed.add(text3) lexer.relexed.add(text4) lexer.relexed.add(text5) lexer.relexed.add(text6) text1.lookahead = 7 text2.lookahead = 7 text3.lookahead = 1 text4.lookahead = 0 text5.lookahead = 0 text6.lookahead = 3 text7.lookahead = 0 lexer.update_lookback(text1, text1) assert text1.lookback == 0 assert text2.lookback == 1 assert text3.lookback == 2 assert text4.lookback == 3 assert text5.lookback == 3 assert text6.lookback == 0 assert text7.lookback == 1
def merge_back(self, read_nodes, generated_tokens): any_changes = False # insert new nodes into tree it = iter(read_nodes) for t in generated_tokens: try: node = it.next() except StopIteration: node = TextNode(Terminal("")) last_node.insert_after(node) any_changes = True last_node = node node.symbol.name = t.source node.indent = None if node.lookup != t.name: node.mark_changed() any_changes = True else: node.mark_version() # we need to invalidate the newline if we changed whitespace or # logical nodes that come after it if node.lookup == "<ws>" or node.lookup != t.name: prev = node.prev_term while isinstance(prev.symbol, IndentationTerminal): prev = prev.prev_term if prev.lookup == "<return>": prev.mark_changed() any_changes = True elif isinstance(prev, BOS): # if there is no return, re-indentation won't be triggered # in the incremental parser so we have to mark the next # terminal. possibly only use case: bos <ws> pass DEDENT eos node.next_term.mark_changed() # XXX this should become neccessary with incparse optimisations turned on if node.lookup == "\\" and node.next_term.lookup == "<return>": node.next_term.mark_changed() any_changes = True node.lookup = t.name node.lookahead = t.lookahead # delete left over nodes while True: try: node = it.next() node.parent.remove_child(node) any_changes = True except StopIteration: break return any_changes
def test_lookback_wagner(self): lexer = IncrementalLexer(""" "\"\"\"[^\"]*\"\"\"":triplestring "\"[^\"]*\"":string "[a-z]+":var """) ast = AST() ast.init() bos = ast.parent.children[0] eos = ast.parent.children[1] text1 = TextNode(Terminal("\n")) text2 = TextNode(Terminal(" ")) text3 = TextNode(Terminal("/*aaaaaaaaaaaaaaaaaaaaa*/")) text4 = TextNode(Terminal(" ")) text5 = TextNode(Terminal("if")) text6 = TextNode(Terminal("(")) lexer.relexed.add(text1) lexer.relexed.add(text2) lexer.relexed.add(text3) lexer.relexed.add(text4) lexer.relexed.add(text5) lexer.relexed.add(text6) bos.insert_after(text1) text1.insert_after(text2) text2.insert_after(text3) text3.insert_after(text4) text4.insert_after(text5) text5.insert_after(text6) text1.lookahead = 27 text2.lookahead = 1 text3.lookahead = 0 text4.lookahead = 1 text5.lookahead = 1 text6.lookahead = 0 lexer.update_lookback(text1, text1) assert text2.lookback == 1 assert text3.lookback == 2 assert text4.lookback == 3 assert text5.lookback == 1 assert text6.lookback == 1
def merge_back(self, tokens, read): if len(tokens) == 1 and tokens[0][0] == "\x81": return False lastread = read[0].prev_term it_gen = self.iter_gen(tokens) it_read = self.iter_read(read) gen = it_gen.next() read = it_read.next() totalr = 0 totalg = 0 reused = set() current_mt = None changed = False while True: while read is not None and isinstance(read.symbol, IndentationTerminal): read.remove() read = it_read.next() if gen is None and read is None: break if read and read.deleted: read = it_read.next() continue if gen is None: lengen = 0 elif gen[0] == "new mt": if read and read.ismultichild() and not read.parent in reused: current_mt = read.parent # reuse else: current_mt = MultiTextNode() # create new lastread.insert_after(current_mt) # insert multiline under same parent as the nodes it replaces changed = True if current_mt.lookup != gen[1]: changed = True current_mt.lookup = gen[1] current_mt.lookahead = gen[2] self.relexed.add(current_mt) gen = it_gen.next() continue elif gen[0] == "finish mt": reused.add(current_mt) lastread = current_mt gen = it_gen.next() current_mt.update_children() current_mt = None continue else: lengen = len(gen[0]) if totalr >= totalg + lengen: changed = True # One node has been split into multiple nodes. Insert all # remaining nodes until the lengths add up again. new = TextNode(Terminal(gen[0])) self.relexed.add(new) new.lookup = gen[1] if new.lookup == "<E>": # If this token comes from the leftovers of a LexingError, # mark it appropriately new.changed = True # XXX with error recovery, mark as error new.lookahead = gen[2] if current_mt and not lastread.ismultichild(): current_mt.insert_at_beginning(new) else: lastread.insert_after(new) lastread = new totalg += lengen gen = it_gen.next() elif totalr + getlength(read) <= totalg: changed = True # Multiple nodes have been combined into less nodes. Delete old # nodes until the lengths add up again. read.remove() self.remove_check(read) totalr += getlength(read) read = it_read.next() else: # Overwrite old nodes with updated values. Move nodes in or out # of multinodes if needed. totalr += getlength(read) totalg += lengen if read.lookup != gen[1]: read.mark_changed() self.relexed.add(read) changed = True else: read.mark_changed() if not isinstance(read.symbol, MagicTerminal): read.symbol.name = gen[0].replace("\x81", "") read.lookup = gen[1] read.lookahead = gen[2] self.relexed.add(read) else: read.lookup = gen[1] if not current_mt: if read.ismultichild(): # Read node was previously part of a multinode but has # been updated to a normal node. Remove it from the # multinode. read.remove(True) read.deleted = False self.remove_check(read) lastread.insert_after(read) changed = True else: if not read.ismultichild() or current_mt is not read.parent: # Read node has been moved from a normal node into a # multinode or from one multinode into another # multinode. Remove from old locations and insert into # new location. read.remove(True) read.deleted = False self.remove_check(read) if current_mt.isempty(): current_mt.set_children([read]) else: lastread.insert_after(read) changed = True lastread = read read = it_read.next() gen = it_gen.next() return changed
def relex(self, node): # find farthest node that has lookahead into node # start munching tokens and spit out nodes # if generated node already exists => stop # (only if we passed edited node) # find node to start relaxing startnode = node nodes = self.find_preceeding_nodes(node) if nodes: node = nodes[0] if node is startnode: past_startnode = True else: past_startnode = False if isinstance(node, EOS): # nothing to do here return False # relex read_nodes = [] generated_tokens = [] pos = 0 read = 0 current_node = node next_token = self.lexer.get_token_iter(StringWrapper(node)) while True: token = next_token() if token.source == "": read_nodes.append(current_node) break read += len(token.source) # special case when inserting a newline into a string, the lexer # creates a single token. We need to make sure that that newline # gets lexed into its own token if len(token.source) > 1 and token.source.find("\r") >= 0: l = token.source.split("\r") for e in l: t = self.lexer.tokenize(e) generated_tokens.extend(t) if e is not l[-1]: newline = self.lexer.tokenize("\r") generated_tokens.extend(newline) else: generated_tokens.append(token) while read > pos + len(current_node.symbol.name): pos += len(current_node.symbol.name) read_nodes.append(current_node) current_node = current_node.next_term if current_node is startnode: past_startnode = True if past_startnode and read == pos + len(current_node.symbol.name): read_nodes.append(current_node) break any_changes = False # insert new nodes into tree it = iter(read_nodes) for t in generated_tokens: try: node = it.next() except StopIteration: node = TextNode(Terminal("")) last_node.insert_after(node) any_changes = True last_node = node node.symbol.name = t.source if node.lookup != t.name: any_changes = True node.lookup = t.name node.lookahead = t.lookahead # delete left over nodes while True: try: node = it.next() node.parent.remove_child(node) any_changes = True except StopIteration: break return any_changes
def merge_back(self, read_nodes, generated_tokens): """ Replace the symbols in the nodes with the newly generated tokens. We loop over read_nodes and generated tokens at the same pace and replace the read_node.symbol.name with the corresponding generated_token.source. We also update the node's lookup (the type of token). I If it is changed, the node is marked changed (A node whom lookup did not change is identical to the parser) If the arrays are of unequal length: - If the length of generated_tokens is insufficient, we add extra nodes - Excess nodes are removed :param read_nodes: Nodes that have been read by the relexer :param generated_tokens: Tokens that have been found during relexing :return: """ any_changes = False # insert new nodes into tree it = iter(read_nodes) for t in generated_tokens: try: node = it.next() except StopIteration: node = TextNode(Terminal("")) last_node.insert_after(node) any_changes = True last_node = node node.symbol.name = t.source node.indent = None if node.lookup != t.name: node.mark_changed() any_changes = True else: node.mark_version() # we need to invalidate the newline if we changed whitespace or # logical nodes that come after it if node.lookup == "<ws>" or node.lookup != t.name: prev = node.prev_term while isinstance(prev.symbol, IndentationTerminal): prev = prev.prev_term if prev.lookup == "<return>": prev.mark_changed() any_changes = True elif isinstance(prev, BOS): # if there is no return, re-indentation won't be triggered # in the incremental parser so we have to mark the next # terminal. possibly only use case: bos <ws> pass DEDENT eos node.next_term.mark_changed() # XXX this should become neccessary with incparse optimisations turned on if node.lookup == "\\" and node.next_term.lookup == "<return>": node.next_term.mark_changed() any_changes = True node.lookup = t.name node.lookahead = t.lookahead # delete left over nodes while True: try: node = it.next() node.parent.remove_child(node) any_changes = True except StopIteration: break return any_changes