def compare_versions(): resource_id_1 = request.form['v_o_id'] resource_id_2 = request.form['v_t_id'] version_1 = request.form['v_o'] version_2 = request.form['v_t'] html_1 = ScriptData.get_html_for_version(resource_id_1, version_1) html_2 = ScriptData.get_html_for_version(resource_id_2, version_2) if len(html_1) < 10000 and len(html_2) < 10000: return diff(html_1, html_2) begining_string = '' while '</p>' in html_1: first_tag = html_1.index('</p>') + 4 if html_1[:first_tag] == html_2[:first_tag]: begining_string += html_1[:first_tag] html_1 = html_1[first_tag:] html_2 = html_2[first_tag:] else: break end_string = '' while '</p>' in html_1: last_tag_start = html_1.rfind('<p CLASS=') length_of_tag = len(html_1) - last_tag_start if html_1[-length_of_tag:] == html_2[-length_of_tag:]: end_string = end_string + html_1[-length_of_tag:] html_1 = html_1[:-length_of_tag] html_2 = html_2[:-length_of_tag] else: break return begining_string + diff(html_1, html_2) + end_string
def main(argv=None): if not argv: argv = sys.argv # pragma: no cover with codecs.open(argv[1], 'r', 'utf-8') as file_a: html_a = file_a.read() with codecs.open(argv[2], 'r', 'utf-8') as file_b: html_b = file_b.read() print diff(html_a, html_b, cutoff=0.0, pretty=False)
def main(argv=None): if not argv: argv = sys.argv # pragma: no cover with open(argv[1]) as file_a: html_a = file_a.read() with open(argv[2]) as file_b: html_b = file_b.read() print diff(html_a, html_b, cutoff=0.0, pretty=True)
def diff_html(ref_path, gen_path): res = True with open(ref_path) as ref_file: ref_html = ref_file.read() with open(gen_path) as gen_file: gen_html = gen_file.read() result = diff(ref_html, gen_html, cutoff=0.0, pretty=True) with open ('result.html', 'w') as f: f.write (result) parser = etree.HTMLParser() tree = etree.parse ('result.html', parser=parser) root = tree.getroot() inserted = root.findall(".//ins") deleted = root.findall(".//del") if inserted or deleted: diff_filename = '%s.diff.html' % gen_path sys.stderr.write ("There's a difference, you can open %s" " with your browser to look at it\n" % diff_filename) with open (diff_filename, 'w') as f: f.write ('<head><link rel="stylesheet" type="text/css"' 'href="../diff_highlight.css"></head>') f.write (result) res = False os.unlink ('result.html') return res
def main(argv=None): if not argv: argv = sys.argv # pragma: no cover with open(argv[1]) as file_a: html_a = file_a.read() with open(argv[2]) as file_b: html_b = file_b.read() print(diff(html_a, html_b, cutoff=0.0, pretty=True))
def main(argv=None): if not argv: argv = sys.argv # pragma: no cover with codecs.open(argv[1], 'r', 'utf-16') as file_a: html_a = file_a.read() with codecs.open(argv[2], 'r', 'utf-16') as file_b: html_b = file_b.read() with codecs.open('output.html', 'w', 'utf-16') as file_o: file_o.write(diff(html_a, html_b, cutoff=0.0, pretty=True))
def main(argv=None): if not argv: argv = sys.argv # pragma: no cover with open(argv[1]) as file_a: html_a = file_a.read() with open(argv[2]) as file_b: html_b = file_b.read() output_filename = f"tmp/doc_diff/output_{secrets.token_urlsafe(6)}.html" f = open(output_filename, "w") f.write(diff(html_a, html_b, cutoff=0.0, pretty=True)) f.close() print(output_filename)
def html_tree_diff(a_text, b_text): css = """ diffins {text-decoration : none; background-color: #d4fcbc;} diffdel {text-decoration : none; background-color: #fbb6c2;} diffins * {text-decoration : none; background-color: #d4fcbc;} diffdel * {text-decoration : none; background-color: #fbb6c2;} """ d = htmltreediff.diff(a_text, b_text, ins_tag='diffins', del_tag='diffdel', pretty=True) return insert_style(d, css)
def html_tree_diff(a_text, b_text): color_palette = get_color_palette() css = f''' diffins {{text-decoration : none; background-color: {color_palette['differ_insertion']};}} diffdel {{text-decoration : none; background-color: {color_palette['differ_deletion']};}} diffins * {{text-decoration : none; background-color: {color_palette['differ_insertion']};}} diffdel * {{text-decoration : none; background-color: {color_palette['differ_deletion']};}} ''' d = htmltreediff.diff(a_text, b_text, ins_tag='diffins', del_tag='diffdel', pretty=True) # TODO Count number of changes. return {'diff': insert_style(d, css)}
def diff(a_text, b_text): """ Wraps the ``htmltreediff`` package with the standard arguments and output format used by all diffs in ``web-monitoring-diff``. ``htmltreediff`` parses HTML documents into an XML DOM and attempts to diff the document *structures*, rather than look at streams of tags & text (like ``htmldiffer``) or the readable text content of the HTML (like ``web_monitoring_diff.html_render_diff``). Because of this, it can give extremely accurate and detailed information for documents that are very similar, but its output gets complicated or opaque as the two documents diverge in structure. It can also be very slow. In practice, we've found that many real-world web pages vary their structure enough (over periods as short as a few months) to reduce the value of this diff. It's best used for narrowly-defined scenarios like: - Comparing versions of a page that are very similar, often at very close points in time. - Comparing XML structures you can expect to be very similar, like XML API responses, RSS documents, etc. - Comparing two documents that were generated from the same template with differing underlying data. (Assuming the template is fairly rigid, and does not leave too much document structure up to the underlying data.) ``htmltreediff`` is no longer under active development; we maintain a fork with minimal fixes and Python 3 support. It is not available on PyPI, so you must install via git:: $ pip install git+https://github.com/danielballan/htmltreediff@customize You can also install all experimental differs with:: $ pip install -r requirements-experimental.txt Parameters ---------- a_text : string Source HTML of one document to compare b_text : string Source HTML of the other document to compare Returns ------- dict """ color_palette = get_color_palette() css = f''' diffins {{text-decoration : none; background-color: {color_palette['differ_insertion']};}} diffdel {{text-decoration : none; background-color: {color_palette['differ_deletion']};}} diffins * {{text-decoration : none; background-color: {color_palette['differ_insertion']};}} diffdel * {{text-decoration : none; background-color: {color_palette['differ_deletion']};}} ''' d = htmltreediff.diff(a_text, b_text, ins_tag='diffins', del_tag='diffdel', pretty=True) # TODO Count number of changes. return {'diff': insert_style(d, css)}
from htmltreediff import diff from kitchen.text.converters import to_unicode f1 = open('test1.html', 'r') f2 = open('test2.html', 'r') v1 = f1.read() v2 = f2.read() print diff(to_unicode(v1), to_unicode(v2), pretty=True) f1.close() f2.close()
from htmltreediff import diff print diff('<h1>...one...</h1>', '<h1...two...</h1>', pretty=True)