def diff(self, left=None, right=None): # Make sure the matching is done first, diff() needs the l2r/r2l maps. if not self._matches: self.match(left, right) # The paper talks about the five phases, and then does four of them # in one phase, in a different order that described. This # implementation in turn differs in order yet again. ltree = self.left.getroottree() for rnode in utils.breadth_first_traverse(self.right): # (a) rparent = rnode.getparent() ltarget = self._r2lmap.get(id(rparent)) # (b) Insert if id(rnode) not in self._r2lmap: # (i) pos = self.find_pos(rnode) # (ii) if rnode.tag is etree.Comment: yield actions.InsertComment(utils.getpath(ltarget, ltree), pos, rnode.text) lnode = etree.Comment(rnode.text) else: yield actions.InsertNode(utils.getpath(ltarget, ltree), rnode.tag, pos) lnode = ltarget.makeelement(rnode.tag) # (iii) self.append_match(lnode, rnode, 1.0) ltarget.insert(pos, lnode) self._inorder.add(lnode) self._inorder.add(rnode) # And then we update attributes. This is different from the # paper, because the paper assumes nodes only has labels and # values. Nodes also has texts, we do them later. for action in self.update_node_attr(lnode, rnode): yield action # (c) else: # Normally there is a check that rnode isn't a root, # but that's perhaps only because comparing valueless # roots is pointless, but in an elementtree we have no such # thing as a valueless root anyway. # (i) lnode = self._r2lmap[id(rnode)] # (iii) Move lparent = lnode.getparent() if ltarget is not lparent: pos = self.find_pos(rnode) yield actions.MoveNode(utils.getpath(lnode, ltree), utils.getpath(ltarget, ltree), pos) # Move the node from current parent to target lparent.remove(lnode) ltarget.insert(pos, lnode) self._inorder.add(lnode) self._inorder.add(rnode) # Rename for action in self.update_node_tag(lnode, rnode): yield action # (ii) Update # XXX If they are exactly equal, we can skip this, # maybe store match results in a cache? for action in self.update_node_attr(lnode, rnode): yield action # (d) Align for action in self.align_children(lnode, rnode): yield action # And lastly, we update all node texts. We do this after # aligning children, because when you generate an XML diff # from this, that XML diff update generates more children, # confusing later inserts or deletes. lnode = self._r2lmap[id(rnode)] for action in self.update_node_text(lnode, rnode): yield action for lnode in utils.reverse_post_order_traverse(self.left): if id(lnode) not in self._l2rmap: # No match yield actions.DeleteNode(utils.getpath(lnode, ltree)) lnode.getparent().remove(lnode)
def test_del_node(self): action = actions.DeleteNode("/document/node") expected = "[remove, /document/node]" self._format_test(action, expected)
def _handle_delete(self, node): return actions.DeleteNode(node)
def test_del_node(self): left = '<document><node attr="val">Text</node></document>' action = actions.DeleteNode("/document/node") expected = START + ' attr="val" diff:delete="">Text' + END self._format_test(left, action, expected)
def test_del_node(self): action = actions.DeleteNode('/document/node') expected = '[remove, /document/node]' self._format_test(action, expected)