def align_children(self, left, right): lchildren = [ c for c in left.getchildren() if (id(c) in self._l2rmap and self._l2rmap[id(c)].getparent() is right) ] rchildren = [ c for c in right.getchildren() if (id(c) in self._r2lmap and self._r2lmap[id(c)].getparent() is left) ] if not lchildren or not rchildren: # Nothing to align return lcs = utils.longest_common_subsequence( lchildren, rchildren, lambda x, y: self._l2rmap[id(x)] is y) for x, y in lcs: # Mark these as in order self._inorder.add(lchildren[x]) self._inorder.add(rchildren[y]) # Go over those children that are not in order: for lchild in lchildren: if lchild in self._inorder: # Already aligned continue rchild = self._l2rmap[id(lchild)] right_pos = self.find_pos(rchild) rtarget = rchild.getparent() ltarget = self._r2lmap[id(rtarget)] yield actions.MoveNode(utils.getpath(lchild), utils.getpath(ltarget), right_pos) # Do the actual move: left.remove(lchild) ltarget.insert(right_pos, lchild) # Mark the nodes as in order self._inorder.add(lchild) self._inorder.add(rchild)
def update_node_attr(self, left, right): left_xpath = utils.getpath(left) # Update: Look for differences in attributes left_keys = set(left.attrib.keys()) right_keys = set(right.attrib.keys()) new_keys = right_keys.difference(left_keys) removed_keys = left_keys.difference(right_keys) common_keys = left_keys.intersection(right_keys) # We sort the attributes to get a consistent order in the edit script. # That's only so we can do testing in a reasonable way... for key in sorted(common_keys): if left.attrib[key] != right.attrib[key]: yield actions.UpdateAttrib(left_xpath, key, right.attrib[key]) left.attrib[key] = right.attrib[key] # Align: Not needed here, we don't care about the order of # attributes. # Move: Check if any of the new attributes have the same value # as the removed attributes. If they do, it's actually # a renaming, and a move is one action instead of remove + insert newattrmap = {v: k for (k, v) in right.attrib.items() if k in new_keys} for lk in sorted(removed_keys): value = left.attrib[lk] if value in newattrmap: rk = newattrmap[value] yield actions.RenameAttrib(left_xpath, lk, rk) # Remove from list of new attributes new_keys.remove(rk) # Delete used attribute from map of attributes del newattrmap[value] # Update left node left.attrib[rk] = value del left.attrib[lk] # Insert: Find new attributes for key in sorted(new_keys): yield actions.InsertAttrib(left_xpath, key, right.attrib[key]) left.attrib[key] = right.attrib[key] # Delete: remove removed attributes for key in sorted(removed_keys): if key not in left.attrib: # This was already moved continue yield actions.DeleteAttrib(left_xpath, key) del left.attrib[key]
def _handle_MoveNode(self, action, orig_tree): if action.position == 0: yield u"move-first", action.node, action.target return node = orig_tree.xpath(action.node)[0] target = orig_tree.xpath(action.target)[0] # Get the position of the previous sibling position = action.position - 1 if node.getparent() is target: # Moving to a new lower position in the same target, # adjust previous sibling position: if target.index(node) <= position: position += 1 sibling = target[position] yield u"move-after", action.node, utils.getpath(sibling)
def diff(self, left=None, right=None): # Make sure the matching is done first, diff() needs the l2r/r2l maps. if not self._matches: self.match(left, right) # The paper talks about the five phases, and then does four of them # in one phase, in a different order that described. This # implementation in turn differs in order yet again. ltree = self.left.getroottree() for rnode in utils.breadth_first_traverse(self.right): # (a) rparent = rnode.getparent() ltarget = self._r2lmap.get(id(rparent)) # (b) Insert if id(rnode) not in self._r2lmap: # (i) pos = self.find_pos(rnode) # (ii) if rnode.tag is etree.Comment: yield actions.InsertComment(utils.getpath(ltarget, ltree), pos, rnode.text) lnode = etree.Comment(rnode.text) else: yield actions.InsertNode(utils.getpath(ltarget, ltree), rnode.tag, pos) lnode = ltarget.makeelement(rnode.tag) # (iii) self.append_match(lnode, rnode, 1.0) ltarget.insert(pos, lnode) self._inorder.add(lnode) self._inorder.add(rnode) # And then we update attributes. This is different from the # paper, because the paper assumes nodes only has labels and # values. Nodes also has texts, we do them later. for action in self.update_node_attr(lnode, rnode): yield action # (c) else: # Normally there is a check that rnode isn't a root, # but that's perhaps only because comparing valueless # roots is pointless, but in an elementtree we have no such # thing as a valueless root anyway. # (i) lnode = self._r2lmap[id(rnode)] # (iii) Move lparent = lnode.getparent() if ltarget is not lparent: pos = self.find_pos(rnode) yield actions.MoveNode(utils.getpath(lnode, ltree), utils.getpath(ltarget, ltree), pos) # Move the node from current parent to target lparent.remove(lnode) ltarget.insert(pos, lnode) self._inorder.add(lnode) self._inorder.add(rnode) # Rename for action in self.update_node_tag(lnode, rnode): yield action # (ii) Update # XXX If they are exactly equal, we can skip this, # maybe store match results in a cache? for action in self.update_node_attr(lnode, rnode): yield action # (d) Align for action in self.align_children(lnode, rnode): yield action # And lastly, we update all node texts. We do this after # aligning children, because when you generate an XML diff # from this, that XML diff update generates more children, # confusing later inserts or deletes. lnode = self._r2lmap[id(rnode)] for action in self.update_node_text(lnode, rnode): yield action for lnode in utils.reverse_post_order_traverse(self.left): if id(lnode) not in self._l2rmap: # No match yield actions.DeleteNode(utils.getpath(lnode, ltree)) lnode.getparent().remove(lnode)
def update_node_tag(self, left, right): if left.tag != right.tag: left_xpath = utils.getpath(left) yield actions.RenameNode(left_xpath, right.tag) left.tag = right.tag
def _handle_InsertNode(self, action, orig_tree): if action.position == 0: yield u"insert-first", action.target, '\n<%s/>' % action.tag return sibling = orig_tree.xpath(action.target)[0][action.position - 1] yield u"insert-after", utils.getpath(sibling), '\n<%s/>' % action.tag