def test_relex3(self): ast = AST() ast.init() bos = ast.parent.children[0] new1 = TextNode(Terminal("1+2")) new2 = TextNode(Terminal("345")) new3 = TextNode(Terminal("6+")) new4 = TextNode(Terminal("789")) # this should never be touched new4.lookup = "INT" new5 = TextNode(Terminal("+")) # this should never be touched new5.lookup = "plus" bos.insert_after(new1) new1.insert_after(new2) new2.insert_after(new3) new3.insert_after(new4) new4.insert_after(new5) self.relex(new1) assert ast.parent.symbol == Nonterminal("Root") assert isinstance(ast.parent.children[0], BOS) assert isinstance(ast.parent.children[-1], EOS) node = bos.next_term assert node.symbol == Terminal("1") node = node.next_term assert node.symbol == Terminal("+") node = node.next_term assert node.symbol == Terminal("23456") node = node.next_term assert node.symbol == Terminal("+") # check that 789 hasn't been relexed assert node.next_term is new4 assert node.next_term.symbol is new4.symbol
def relex_import(self, startnode, version = 0): """Optimised relex for freshly imported files.""" success = self.lex(startnode.symbol.name) bos = startnode.prev_term # bos parent = bos.parent eos = parent.children.pop() last_node = bos for match in success: if match is success[0]: # reuse old node for fist node to mimic the behaviour of a # normal relex node = startnode node.symbol.name = match[0] else: node = TextNode(Terminal(match[0])) node.lookup = match[1] parent.children.append(node) last_node.next_term = node last_node.right = node node.left = last_node node.prev_term = last_node node.parent = parent last_node = node node.mark_changed() parent.children.append(eos) last_node.right = eos # link to eos last_node.next_term = eos eos.left = last_node eos.prev_term = last_node bos.mark_changed() eos.mark_changed() parent.mark_changed()
def test_relex_stop(self): ast = AST() ast.init() bos = ast.parent.children[0] new = TextNode(Terminal("1+2")) old1 = TextNode(Terminal("*")) old2 = TextNode(Terminal("3")) old2.lookup = "INT" bos.insert_after(new) new.insert_after(old1) old1.insert_after(old2) self.relex(new) assert ast.parent.symbol == Nonterminal("Root") assert isinstance(ast.parent.children[0], BOS) assert isinstance(ast.parent.children[-1], EOS) node = bos.next_term assert node.symbol == Terminal("1") node = node.next_term assert node.symbol == Terminal("+") node = node.next_term assert node.symbol == Terminal("2") node = node.next_term assert node.symbol == Terminal("*") node = node.next_term assert node.symbol == Terminal("3") node = node.next_term assert isinstance(node, EOS)
def merge_back(self, read_nodes, generated_tokens): any_changes = False # insert new nodes into tree it = iter(read_nodes) for t in generated_tokens: try: node = it.next() except StopIteration: node = TextNode(Terminal("")) last_node.insert_after(node) any_changes = True last_node = node node.symbol.name = t.source if node.lookup != t.name or t.source.find("*/") > 0: any_changes = True node.mark_changed() else: node.mark_version() node.lookup = t.name node.lookahead = t.lookahead # delete left over nodes while True: try: node = it.next() node.parent.remove_child(node) any_changes = True except StopIteration: break return any_changes
def relex_import(self, startnode, version = 0): """ Replace a node with the tokens of its name :param startnode: node to expand :param version: version assigned to each created node :return: """ success = self.lex(startnode.symbol.name) bos = startnode.prev_term # bos startnode.parent.remove_child(startnode) parent = bos.parent eos = parent.children.pop() last_node = bos for match in success: node = TextNode(Terminal(match[0])) node.version = version node.lookup = match[1] parent.children.append(node) last_node.next_term = node last_node.right = node node.left = last_node node.prev_term = last_node node.parent = parent last_node = node parent.children.append(eos) last_node.right = eos # link to eos last_node.next_term = eos eos.left = last_node eos.prev_term = last_node
def merge_back(self, read_nodes, generated_tokens): any_changes = False # insert new nodes into tree it = iter(read_nodes) for t in generated_tokens: try: node = it.next() except StopIteration: node = TextNode(Terminal("")) last_node.insert_after(node) any_changes = True last_node = node node.symbol.name = t.source node.indent = None if node.lookup != t.name: node.mark_changed() any_changes = True else: node.mark_version() # we need to invalidate the newline if we changed whitespace or # logical nodes that come after it if node.lookup == "<ws>" or node.lookup != t.name: prev = node.prev_term while isinstance(prev.symbol, IndentationTerminal): prev = prev.prev_term if prev.lookup == "<return>": prev.mark_changed() any_changes = True elif isinstance(prev, BOS): # if there is no return, re-indentation won't be triggered # in the incremental parser so we have to mark the next # terminal. possibly only use case: bos <ws> pass DEDENT eos node.next_term.mark_changed() # XXX this should become neccessary with incparse optimisations turned on if node.lookup == "\\" and node.next_term.lookup == "<return>": node.next_term.mark_changed() any_changes = True node.lookup = t.name node.lookahead = t.lookahead # delete left over nodes while True: try: node = it.next() node.parent.remove_child(node) any_changes = True except StopIteration: break return any_changes
def manual_relex(self, bos, pattern): """To avoid a bootstrapping loop (inclexer depends on Lexer and thus RegexParser), we need to lex the regex grammar manually""" import re pos = 0 while pos < len(pattern): for name, regex in self.lrules: r = re.match(regex, pattern[pos:]) if r: n = TextNode(Terminal(r.group(0))) n.lookup = name bos.insert_after(n) bos = n pos += len(r.group(0)) break
def relex_import(self, startnode): success = self.lex(startnode.symbol.name) bos = startnode.prev_term # bos startnode.parent.remove_child(startnode) parent = bos.parent eos = parent.children.pop() last_node = bos for match in success: node = TextNode(Terminal(match[0])) node.lookup = match[1] parent.children.append(node) last_node.next_term = node last_node.right = node node.left = last_node node.prev_term = last_node node.parent = parent last_node = node parent.children.append(eos) last_node.right = eos # link to eos last_node.next_term = eos eos.left = last_node eos.prev_term = last_node
def relex_import(self, startnode, version=0): success = self.lex(startnode.symbol.name) bos = startnode.prev_term # bos startnode.parent.remove_child(startnode) parent = bos.parent eos = parent.children.pop() last_node = bos for match in success: node = TextNode(Terminal(match[0])) node.version = version node.lookup = match[1] parent.children.append(node) last_node.next_term = node last_node.right = node node.left = last_node node.prev_term = last_node node.parent = parent last_node = node parent.children.append(eos) last_node.right = eos # link to eos last_node.next_term = eos eos.left = last_node eos.prev_term = last_node
def merge_back(self, tokens, read): if len(tokens) == 1 and tokens[0][0] == "\x81": return False lastread = read[0].prev_term it_gen = self.iter_gen(tokens) it_read = self.iter_read(read) gen = it_gen.next() read = it_read.next() totalr = 0 totalg = 0 reused = set() current_mt = None changed = False while True: while read is not None and isinstance(read.symbol, IndentationTerminal): read.remove() read = it_read.next() if gen is None and read is None: break if read and read.deleted: read = it_read.next() continue if gen is None: lengen = 0 elif gen[0] == "new mt": if read and read.ismultichild() and not read.parent in reused: current_mt = read.parent # reuse else: current_mt = MultiTextNode() # create new lastread.insert_after(current_mt) # insert multiline under same parent as the nodes it replaces changed = True if current_mt.lookup != gen[1]: changed = True current_mt.lookup = gen[1] current_mt.lookahead = gen[2] self.relexed.add(current_mt) gen = it_gen.next() continue elif gen[0] == "finish mt": reused.add(current_mt) lastread = current_mt gen = it_gen.next() current_mt.update_children() current_mt = None continue else: lengen = len(gen[0]) if totalr >= totalg + lengen: changed = True # One node has been split into multiple nodes. Insert all # remaining nodes until the lengths add up again. new = TextNode(Terminal(gen[0])) self.relexed.add(new) new.lookup = gen[1] if new.lookup == "<E>": # If this token comes from the leftovers of a LexingError, # mark it appropriately new.changed = True # XXX with error recovery, mark as error new.lookahead = gen[2] if current_mt and not lastread.ismultichild(): current_mt.insert_at_beginning(new) else: lastread.insert_after(new) lastread = new totalg += lengen gen = it_gen.next() elif totalr + getlength(read) <= totalg: changed = True # Multiple nodes have been combined into less nodes. Delete old # nodes until the lengths add up again. read.remove() self.remove_check(read) totalr += getlength(read) read = it_read.next() else: # Overwrite old nodes with updated values. Move nodes in or out # of multinodes if needed. totalr += getlength(read) totalg += lengen if read.lookup != gen[1]: read.mark_changed() self.relexed.add(read) changed = True else: read.mark_changed() if not isinstance(read.symbol, MagicTerminal): read.symbol.name = gen[0].replace("\x81", "") read.lookup = gen[1] read.lookahead = gen[2] self.relexed.add(read) else: read.lookup = gen[1] if not current_mt: if read.ismultichild(): # Read node was previously part of a multinode but has # been updated to a normal node. Remove it from the # multinode. read.remove(True) read.deleted = False self.remove_check(read) lastread.insert_after(read) changed = True else: if not read.ismultichild() or current_mt is not read.parent: # Read node has been moved from a normal node into a # multinode or from one multinode into another # multinode. Remove from old locations and insert into # new location. read.remove(True) read.deleted = False self.remove_check(read) if current_mt.isempty(): current_mt.set_children([read]) else: lastread.insert_after(read) changed = True lastread = read read = it_read.next() gen = it_gen.next() return changed
def relex(self, node): # find farthest node that has lookahead into node # start munching tokens and spit out nodes # if generated node already exists => stop # (only if we passed edited node) # find node to start relaxing startnode = node nodes = self.find_preceeding_nodes(node) if nodes: node = nodes[0] if node is startnode: past_startnode = True else: past_startnode = False if isinstance(node, EOS): # nothing to do here return False # relex read_nodes = [] generated_tokens = [] pos = 0 read = 0 current_node = node next_token = self.lexer.get_token_iter(StringWrapper(node)) while True: token = next_token() if token.source == "": read_nodes.append(current_node) break read += len(token.source) # special case when inserting a newline into a string, the lexer # creates a single token. We need to make sure that that newline # gets lexed into its own token if len(token.source) > 1 and token.source.find("\r") >= 0: l = token.source.split("\r") for e in l: t = self.lexer.tokenize(e) generated_tokens.extend(t) if e is not l[-1]: newline = self.lexer.tokenize("\r") generated_tokens.extend(newline) else: generated_tokens.append(token) while read > pos + len(current_node.symbol.name): pos += len(current_node.symbol.name) read_nodes.append(current_node) current_node = current_node.next_term if current_node is startnode: past_startnode = True if past_startnode and read == pos + len(current_node.symbol.name): read_nodes.append(current_node) break any_changes = False # insert new nodes into tree it = iter(read_nodes) for t in generated_tokens: try: node = it.next() except StopIteration: node = TextNode(Terminal("")) last_node.insert_after(node) any_changes = True last_node = node node.symbol.name = t.source if node.lookup != t.name: any_changes = True node.lookup = t.name node.lookahead = t.lookahead # delete left over nodes while True: try: node = it.next() node.parent.remove_child(node) any_changes = True except StopIteration: break return any_changes
def merge_back(self, read_nodes, generated_tokens): """ Replace the symbols in the nodes with the newly generated tokens. We loop over read_nodes and generated tokens at the same pace and replace the read_node.symbol.name with the corresponding generated_token.source. We also update the node's lookup (the type of token). I If it is changed, the node is marked changed (A node whom lookup did not change is identical to the parser) If the arrays are of unequal length: - If the length of generated_tokens is insufficient, we add extra nodes - Excess nodes are removed :param read_nodes: Nodes that have been read by the relexer :param generated_tokens: Tokens that have been found during relexing :return: """ any_changes = False # insert new nodes into tree it = iter(read_nodes) for t in generated_tokens: try: node = it.next() except StopIteration: node = TextNode(Terminal("")) last_node.insert_after(node) any_changes = True last_node = node node.symbol.name = t.source node.indent = None if node.lookup != t.name: node.mark_changed() any_changes = True else: node.mark_version() # we need to invalidate the newline if we changed whitespace or # logical nodes that come after it if node.lookup == "<ws>" or node.lookup != t.name: prev = node.prev_term while isinstance(prev.symbol, IndentationTerminal): prev = prev.prev_term if prev.lookup == "<return>": prev.mark_changed() any_changes = True elif isinstance(prev, BOS): # if there is no return, re-indentation won't be triggered # in the incremental parser so we have to mark the next # terminal. possibly only use case: bos <ws> pass DEDENT eos node.next_term.mark_changed() # XXX this should become neccessary with incparse optimisations turned on if node.lookup == "\\" and node.next_term.lookup == "<return>": node.next_term.mark_changed() any_changes = True node.lookup = t.name node.lookahead = t.lookahead # delete left over nodes while True: try: node = it.next() node.parent.remove_child(node) any_changes = True except StopIteration: break return any_changes