コード例 #1
0
ファイル: html.py プロジェクト: PolicyStat/htmltreediff
def fix_lists(dom):
    # <ins> and <del> tags are not allowed within <ul> or <ol> tags.
    # Move them to the nearest li, so that the numbering isn't interrupted.

    _internalize_changes_markup(dom, set(["li"]))

    # Find all del > li and ins > li sets.
    del_tags = set()
    ins_tags = set()
    for node in list(dom.getElementsByTagName("li")):
        parent = node.parentNode
        if parent.tagName == "del":
            del_tags.add(parent)
        elif parent.tagName == "ins":
            ins_tags.add(parent)
    # Change ins > li into li > ins.
    for ins_tag in ins_tags:
        distribute(ins_tag)
    # Change del > li into li.del-li > del.
    for del_tag in del_tags:
        children = list(del_tag.childNodes)
        unwrap(del_tag)
        for c in children:
            if c.nodeName == "li":
                c.setAttribute("class", "del-li")
                wrap_inner(c, "del")
コード例 #2
0
ファイル: html.py プロジェクト: PolicyStat/htmltreediff
def _internalize_changes_markup(dom, child_tag_names):
    # Delete tags are always ordered first.
    for del_tag in list(dom.getElementsByTagName("del")):
        ins_tag = del_tag.nextSibling
        # The one child tag of `del_tag` should be child_tag_names
        if len(del_tag.childNodes) != 1:
            continue
        if ins_tag is None or len(ins_tag.childNodes) != 1:
            continue
        if ins_tag.tagName != "ins":
            continue
        deleted_tag = del_tag.firstChild
        if not is_element(deleted_tag):
            continue
        if deleted_tag.tagName not in child_tag_names:
            continue
        # The one child tag of `ins_tag` should be child_tag_names
        inserted_tag = ins_tag.firstChild
        if not is_element(inserted_tag):
            continue
        if inserted_tag.tagName not in child_tag_names:
            continue

        attributes = dict([key, value] for key, value in inserted_tag.attributes.items())
        nodes_to_unwrap = [deleted_tag, inserted_tag]
        for n in nodes_to_unwrap:
            unwrap(n)
        new_node = wrap_nodes([del_tag, ins_tag], inserted_tag.tagName)
        for key, value in attributes.items():
            new_node.setAttribute(key, value)
コード例 #3
0
def distribute(node):
    """
    Wrap a copy of the given element around the contents of each of its
    children, removing the node in the process.
    """
    children = list(c for c in node.childNodes if is_element(c))
    unwrap(node)
    tag_name = node.tagName
    for c in children:
        wrap_inner(c, tag_name)
コード例 #4
0
def remove_nesting(dom, tag_name):
    """
    Unwrap items in the node list that have ancestors with the same tag.
    """
    for node in dom.getElementsByTagName(tag_name):
        for ancestor in ancestors(node):
            if ancestor is node:
                continue
            if ancestor is dom.documentElement:
                break
            if ancestor.tagName == tag_name:
                unwrap(node)
                break
コード例 #5
0
ファイル: html.py プロジェクト: exKAZUu/htmltreediff
def fix_lists(dom):
    # <ins> and <del> tags are not allowed within <ul> or <ol> tags.
    # Move them to the nearest li, so that the numbering isn't interrupted.

    # Find all del > li and ins > li sets.
    del_tags = set()
    ins_tags = set()
    for node in list(dom.getElementsByTagName('li')):
        parent = node.parentNode
        if parent.tagName == 'del':
            del_tags.add(parent)
        elif parent.tagName == 'ins':
            ins_tags.add(parent)
    # Change ins > li into li > ins.
    for ins_tag in ins_tags:
        distribute(ins_tag)
    # Change del > li into li.del-li > del.
    for del_tag in del_tags:
        children = list(del_tag.childNodes)
        unwrap(del_tag)
        for c in children:
            if c.nodeName == 'li':
                c.setAttribute('class', 'del-li')
                wrap_inner(c, 'del')
コード例 #6
0
def _strip_changes_old(node):
    for ins_node in node.getElementsByTagName('ins'):
        remove_node(ins_node)
    for del_node in node.getElementsByTagName('del'):
        unwrap(del_node)