Exemple #1
0
def htmldiff(old_html, new_html):
    try:
        old_html_tokens = tokenize(old_html, include_hrefs=False) 
        new_html_tokens = tokenize(new_html, include_hrefs=False) 
    except (KeyError, ParserError):
        return new_html
    
    result = htmldiff_tokens(old_html_tokens, new_html_tokens) 
    result = ''.join(result).strip() 
    
    return fixup_ins_del_tags(result)
Exemple #2
0
def htmldiff(old_html, new_html):
    try:
        old_html_tokens = tokenize(old_html, include_hrefs=False)
        new_html_tokens = tokenize(new_html, include_hrefs=False)
    except (KeyError, ParserError):
        return new_html

    result = htmldiff_tokens(old_html_tokens, new_html_tokens)
    result = ''.join(result).strip()

    return fixup_ins_del_tags(result)
Exemple #3
0
def htmldiff(old_html, new_html):
    """ Modified lxml.html.diff.htmldiff:
    * include_hrefs=False - it's hard to fix this " Link: href " stuff,
      and it's not needed (right?)
    * do not do fixup_ins_del_tags, as it re-parses everything
     and we don't need it here
    """
    old_html_tokens = lxml_diff.tokenize(old_html, include_hrefs=False)
    new_html_tokens = lxml_diff.tokenize(new_html, include_hrefs=False)
    result = lxml_diff.htmldiff_tokens(old_html_tokens, new_html_tokens)
    result = ''.join(result).strip()
    return result