def diff_location(self, old_location, new_location): # Here we match up the children of the given locations. This is done in # three steps. First we use full tree equality to match up children # that are identical all the way down. Then, we use a heuristic # function to match up children that are similar, based on text # content. Lastly, we just use node tag types to match up elements. For # the text-similar matches and the tag-only matches, we still have more # work to do, so we recurse on these. The non-matching parts that # remain are used to output edit script entries. old_children = list(get_location(self.old_dom, old_location).childNodes) new_children = list(get_location(self.new_dom, new_location).childNodes) if not old_children and not new_children: return matching_blocks, recursion_indices = self.match_children(old_children, new_children) # Apply changes for this level. for tag, i1, i2, j1, j2 in adjusted_ops(get_opcodes(matching_blocks)): if tag == 'delete': assert j1 == j2 # delete range from right to left for index, child in reversed(list(enumerate(old_children[i1:i2]))): self.delete(old_location + [i1 + index], child) old_children.pop(i1 + index) elif tag == 'insert': assert i1 == i2 # insert range from left to right for index, child in enumerate(new_children[j1:j2]): self.insert(new_location + [i1 + index], child) old_children.insert(i1 + index, child) recursion_indices = list(adjust_indices(recursion_indices, i1, i2, j1, j2)) # Recurse to deeper level. for old_index, new_index in recursion_indices: self.diff_location(old_location + [old_index], new_location + [new_index])
def test_get_location(): html = '<ins><li>A</li><li><em>B</em></li></ins>' original = parse_minidom(html) try: get_location(original, [10]) raise AssertionError('ValueError not raised') except ValueError: pass
def run_edit_script(self): """ Run an xml edit script, and return the new html produced. """ for action, location, properties in self.edit_script: if action == 'delete': node = get_location(self.dom, location) self.action_delete(node) elif action == 'insert': parent = get_location(self.dom, location[:-1]) child_index = location[-1] self.action_insert(parent, child_index, **properties) return self.dom
def diff_location(self, old_location, new_location): # Here we match up the children of the given locations. This is done in # three steps. First we use full tree equality to match up children # that are identical all the way down. Then, we use a heuristic # function to match up children that are similar, based on text # content. Lastly, we just use node tag types to match up elements. For # the text-similar matches and the tag-only matches, we still have more # work to do, so we recurse on these. The non-matching parts that # remain are used to output edit script entries. old_children = list( get_location(self.old_dom, old_location).childNodes, ) new_children = list( get_location(self.new_dom, new_location).childNodes, ) if not old_children and not new_children: return matching_blocks, recursion_indices = self.match_children( old_children, new_children, ) # Apply changes for this level. for tag, i1, i2, j1, j2 in adjusted_ops(get_opcodes(matching_blocks)): if tag == 'delete': assert j1 == j2 # delete range from right to left children = reversed(list(enumerate(old_children[i1:i2]))) for index, child in children: self.delete(old_location + [i1 + index], child) old_children.pop(i1 + index) elif tag == 'insert': assert i1 == i2 # insert range from left to right for index, child in enumerate(new_children[j1:j2]): self.insert(new_location + [i1 + index], child) old_children.insert(i1 + index, child) recursion_indices = list( adjust_indices(recursion_indices, i1, i2, j1, j2), ) # Recurse to deeper level. for old_index, new_index in recursion_indices: self.diff_location( old_location + [old_index], new_location + [new_index], )
def test(original, distributed): original = parse_minidom(original) distributed = parse_minidom(distributed) node = get_location(original, [0]) distribute(node) assert_html_equal( minidom_tostring(original), minidom_tostring(distributed), )
def delete(self, location, node): # delete from the bottom up, children before parent, right to left for child_index, child in reversed(list(enumerate(node.childNodes))): self.delete(location + [child_index], child) # write deletion to the edit script self.edit_script.append(( 'delete', location, node_properties(node), )) # actually delete the node assert node.parentNode == get_location(self.old_dom, location[:-1]) assert node.ownerDocument == self.old_dom remove_node(node)
def insert(self, location, node): # write insertion to the edit script self.edit_script.append(( 'insert', location, node_properties(node), )) # actually insert the node node_copy = node.cloneNode(deep=False) parent = get_location(self.old_dom, location[:-1]) next_sibling = get_child(parent, location[-1]) insert_or_append(parent, node_copy, next_sibling) # insert from the top down, parent before children, left to right for child_index, child in enumerate(node.childNodes): self.insert(location + [child_index], child)