def diff_list(prev_rev, revision, field): """Generates an array which describes the change in text fields""" if field in ['notes', 'tracking_notes', 'publication_notes', 'characters', 'synopsis', 'script', 'pencils', 'inks', 'colors', 'letters', 'editing', 'feature', 'title', 'format', 'color', 'dimensions', 'paper_stock', 'binding', 'publishing_format', 'format', 'name', 'price', 'indicia_frequency']: diff = diff_match_patch().diff_main(getattr(prev_rev, field), getattr(revision, field)) diff_match_patch().diff_cleanupSemantic(diff) return diff else: return None
def diff(a, b): """ :param a: A string :param b: A string (similar to a) :return: Two strings (a_mod, b_mod) which are basically: a_mod = a - (a intersection b) b_mod = b - (a intersection b) Or if you want to see it in another way, the results are the parts of the string that make it unique between each other. """ dmp = dmp_module.diff_match_patch() changes = dmp.diff_main(a, b, checklines=True, deadline=MAX_DIFF_TIME) dmp.diff_cleanupSemantic(changes) a_changes = [] b_changes = [] for op, change in changes: if op == -1: a_changes.append(change) if op == 1: b_changes.append(change) a_changes = ''.join(a_changes) b_changes = ''.join(b_changes) return a_changes, b_changes
def make_patch(text1, text2): dmp = diff_match_patch() diffs = dmp.diff_main(text1, text2) dmp.diff_cleanupSemantic(diffs) patches = dmp.patch_make(text1, diffs) patchtxt = dmp.patch_toText(patches) return patchtxt
def ResLogCompare(self, logName, leftName, left, rightName, right, only_diff = True): try: with codecs.open(left+'/'+logName, encoding='utf-8', mode='rb') as leftf: leftlog = leftf.readlines() leftf.close() except Exception as e: leftlog =["No %s file found!" % logName] try: with codecs.open(right+'/'+logName, encoding='utf-8', mode='rb') as rightf: rightlog = rightf.readlines() rightf.close() except Exception as e: rightlog =["No %s file found!" % logName] errorWords = logdiffutil.getErrorWords('./data/rules/errorwords') packageDict = logdiffutil.buildPackageDict('./data/rules/LinuxPackageList') lefttext = "" for line in leftlog: text = (line.replace("&", "&") .replace("<", "<") .replace(">", ">")) text = logdiffutil.renderline(text, errorWords, packageDict, 'left') lefttext = lefttext + text righttext = "" for line in rightlog: text = (line.replace("&", "&") .replace("<", "<") .replace(">", ">")) text = logdiffutil.renderline(text, errorWords, packageDict, 'right') righttext = righttext + text diff_obj = diff_match_patch.diff_match_patch() diffs = diff_obj.diff_main(lefttext, righttext) diff_obj.diff_cleanupSemantic(diffs) left_content = [] right_content = [] for (flag, data) in diffs: text = data.replace("\n", "<br>") if flag == diff_obj.DIFF_DELETE: # left_content.append("""<font style=\"background:#aaaaff;\">%s</font>""" % text) left_content.append("""%s""" % text) elif flag == diff_obj.DIFF_INSERT: #right_content.append("""<font style=\"background:#e6ffe6;\">%s</font>""" % text) right_content.append("""%s""" % text) elif flag == diff_obj.DIFF_EQUAL: left_content.append("%s" % text) right_content.append("%s" % text) leftres={} rightres={} leftres['diff'] = "".join(left_content) rightres['diff'] = "".join(right_content) res = { "diff": { leftName: leftres["diff"], rightName: rightres["diff"] }, "results": {} } return res
def test_diff_match_patch(): diff = diff_match_patch() with open(file_path, 'r') as f: text = f.read() #print text result = diff.match_main(text, key, 0) print result
def diff(request, id1, id2): rev1 = get_object_or_404(ProblemRevision, id=id1) rev2 = get_object_or_404(ProblemRevision, id=id2) problem = rev1.revision_for checker = ObjectPermissionChecker(request.user) if (not checker.has_perm('read_problem', problem) and problem.user != request.user): raise Http404 dmp = diff_match_patch() def differ(text1, text2): return text1 != text2 and dmp.diff_main(text1, text2) or None return render(request, "problem/diff.html", {"problem": problem, "editable": checker.has_perm('edit_problem', problem), "rev1": rev1, "rev2": rev2, "rev1link": reverse("judge-problem-old", kwargs={"id": rev1.id, "slug": problem.slug}), "rev2link": reverse("judge-problem-old", kwargs={"id": rev2.id, "slug": problem.slug}), "description": differ(rev1.description, rev2.description), "input": differ(rev1.input, rev2.input), "output": differ(rev1.output, rev2.output), "sample_input": differ(rev1.sample_input, rev2.sample_input), "sample_output": differ(rev1.sample_output, rev2.sample_output), "note": differ(rev1.note, rev2.note), "differ": differ})
def patch_xform(request, domain, app_id, unique_form_id): patch = request.POST['patch'] sha1_checksum = request.POST['sha1'] case_references = _get_case_references(request.POST) app = get_app(domain, app_id) form = app.get_form(unique_form_id) current_xml = form.source if hashlib.sha1(current_xml.encode('utf-8')).hexdigest() != sha1_checksum: return json_response({'status': 'conflict', 'xform': current_xml}) dmp = diff_match_patch() xform, _ = dmp.patch_apply(dmp.patch_fromText(patch), current_xml) save_xform(app, form, xform) if "case_references" in request.POST or "references" in request.POST: form.case_references = case_references response_json = { 'status': 'ok', 'sha1': hashlib.sha1(form.source.encode('utf-8')).hexdigest() } app.save(response_json) notify_form_changed(domain, request.couch_user, app_id, unique_form_id) return json_response(response_json)
def main(): ############ prepare values options, arguments = arguments_parse() text = stdin_read() contentId = options.force_id if options.force_id is not None else contentId_obtain(text, options.id_field) diffPath = config.DIFF_PATH % contentId txtPath = config.TXT_PATH % contentId filepath = Path(txtPath) ############ do your job ### No revised file found ? Why are you even calling me ? if not filepath.is_file(): y_error('\n -- No new revision file found\n') print('\n -- No new revision file found\n') exit(1) ### Ok, go on.. textNew = f_read(txtPath) dmp = diff_match_patch() rawPatch = dmp.patch_make(text, textNew) txtPatch = dmp.patch_toText(rawPatch) f_write(diffPath, txtPatch) print("\nPatch for " + contentId + " Written !\n") ############ job's done ! exit(0)
def diff(self, other): differ = diff_match_patch() diff = differ.diff_main(self.content, other.content) differ.diff_cleanupSemantic(diff) return diff
def diff_submission_forms(old_submission_form, new_submission_form): assert(old_submission_form.submission == new_submission_form.submission) submission = new_submission_form.submission differ = diff_match_patch() old = render_model_instance(old_submission_form) new = render_model_instance(new_submission_form) label_lookup = [] for field_info in paper_forms.get_field_info_for_model(SubmissionForm): if field_info.number: label_lookup.append((field_info.name, u'%s %s' % (field_info.number, field_info.label))) else: label_lookup.append((field_info.name, field_info.label)) label_lookup += [ ('foreignparticipatingcenter_set', _(u'Auslandszentren')), ('investigators', _(u'Zentren')), ('measures', _(u'Studienbezogen/Routinemäßig durchzuführende Therapie und Diagnostik')), ('nontesteduseddrug_set', _(u'Sonstige im Rahmen der Studie verabreichte Medikamente, deren Wirksamkeit und/oder Sicherheit nicht Gegenstand der Prüfung sind')), ('documents', _(u'Dokumente')), ] diffs = [] sorted_keys = [x[0] for x in label_lookup if x[0] in old.keys()] for field in sorted_keys: diff = old[field].diff(new[field]) if differ.diff_levenshtein(diff or []): label = dict(label_lookup)[field] diffs.append((label, diff)) return diffs
def send_diff2server(old_text, new_text): """Generate diffs of how old_text become new_text""" dmp = diff_match_patch() diffs = dmp.diff_main(new_text, old_text) print json.dumps(diffs, indent=" ")
def run_test(self, mishna): # instantiate result result = TestResult(mishna.uid(), [], False) # get TextChunks v1 = TextChunk(mishna, self.language, self.v1).text v2 = TextChunk(mishna, self.language, self.v2).text # strip out non hebrew letter characters or spaces v1 = re.sub(u'[^א-ת^ ^"^\(^\)]+', u'', v1) v2 = re.sub(u'[^א-ת^ ^"^\(^\)]+', u'', v2) # remove multiple spaces v1 = re.sub(u' +', u' ', v1) v2 = re.sub(u' +', u' ', v2) if v1 == v2: result.passed = True # create diff object checker = diff_match_patch.diff_match_patch() diff = checker.diff_main(v1, v2) result.diff = my_prettyHtml(checker, diff) return result
def patch_xform(request, domain, app_id, form_unique_id): patch = request.POST['patch'] sha1_checksum = request.POST['sha1'] case_references = _get_case_references(request.POST) app = get_app(domain, app_id) form = app.get_form(form_unique_id) conflict = _get_xform_conflict_response(form, sha1_checksum) if conflict is not None: return conflict current_xml = form.source dmp = diff_match_patch() xml, _ = dmp.patch_apply(dmp.patch_fromText(patch), current_xml) xml = save_xform(app, form, xml.encode('utf-8')) if "case_references" in request.POST or "references" in request.POST: form.case_references = case_references response_json = { 'status': 'ok', 'sha1': hashlib.sha1(xml).hexdigest() } app.save(response_json) notify_form_changed(domain, request.couch_user, app_id, form_unique_id) return json_response(response_json)
def changelist(request,file_name): ret = list(notepad.objects.filter(filename = file_name).order_by('-created')); ans = " <h2> Changelist for " + file_name + " " + " </h2>"; diff_obj = diff_match_patch.diff_match_patch() for i in range ( 1 , len(ret) ): curr_user = ret[i].author; curr_version = ret[i].version; curr_time = ret[i].created; old_string= ret[i-1].content; new_string = ret[i].content; diffs = diff_obj.diff_main(old_string, new_string) diff_obj.diff_cleanupSemantic(diffs) html = diff_obj.diff_prettyHtml(diffs) curr_string = "<br><br> <b> " + " Delta   " +curr_version + " </b> "; curr_string = curr_string + "   User " + curr_user + "   Time " + curr_time.strftime("%d/%m/%Y %H:%M:%S") ; curr_string = curr_string + "<br>"; curr_string = curr_string + html; ans = ans + curr_string; return HttpResponse( ans );
def apply_diff(fpath, dest_root): dest_file = os.path.join(dest_root, os.path.splitext(fpath)[0]) + ".txt" with file(dest_file, 'r') as fi: h = sha256(fi.read()).hexdigest() if h == patched_files.get(fpath): print('Already applied: %s' % fpath) return dmp = diff_match_patch.diff_match_patch() patches = [] orig_text = None with file(fpath, 'r') as fi: patches = dmp.patch_fromText(fi.read()) with file(dest_file, 'r') as fi: orig_text = fi.read() patched_text, rvals = dmp.patch_apply(patches, orig_text) for rv in rvals: if not rv: raise Exception('Patch failed: %s' % fpath) with file(dest_file, 'w') as fout: fout.write(patched_text) patched_files[fpath] = sha256(patched_text).hexdigest()
def run_compare(): with open('url-content-read.json') as f: readability_text = simplejson.load(f) with open('url-content-fetcher.json') as f: fetcher_text = simplejson.load(f) cnt = 0 z_cnt = 0 dmp = diff_match_patch() rets = [] for key, value in readability_text.items(): if key in fetcher_text: cnt += 1 rc = re.sub(r' ', '', value) fc = re.sub(r' ', '', fetcher_text[key]) l_len = len(rc) r_len = len(fc) retval = dif_content(rc, fc) retval_ground = 0 results = dmp.diff_main(rc, fc) for res in results: if res[0] == 0: retval_ground += len(res[1]) print cnt, ': ', l_len, r_len, retval, retval_ground real_ret = max(retval, retval_ground) rets.append((cnt, l_len, r_len, real_ret)) with open('diff_result_1', 'w') as f: for res in rets: print >> f, res[0], ': ', res[1], res[2], res[3]
def generate_file_diff(filename1, filename2): """ Given two files, return the file diff in HTML format. """ text1 = '' text2 = '' try: with open(filename1) as f: text1 = f.read() except IOError: pass try: with open(filename2) as f: text2 = f.read() except IOError: pass dmp = diff_match_patch() diff = dmp.diff_main(text1, text2) dmp.diff_cleanupSemantic(diff) return dmp.diff_prettyHtml(diff)
def patch_file_fromString(self, patches_text, old_File): if not os.path.isfile(old_File): print 'ERROR: input file name error' print old_File return old_string, oldfilecoding=self.readFile2UnicodeBuf(old_File) if not oldfilecoding: oldfilecoding='utf-8' diff_obj = diff_match_patch.diff_match_patch() patches=diff_obj.patch_fromText(patches_text) patched_res=diff_obj.patch_apply(patches, old_string) someError=False for index,item in enumerate(patched_res[1]): if not item: someError=True print 'ERROR: patch fail at:' print patches[index] if someError: print 'ERROR: some fuzz in patch' new_string=patched_res[0] new_string=new_string.encode(oldfilecoding) return new_string,someError
def diff(request, id1, id2): rev1 = get_object_or_404(ProblemRevision, id=id1) rev2 = get_object_or_404(ProblemRevision, id=id2) problem = rev1.revision_for dmp = diff_match_patch() def differ(text1, text2): return text1 != text2 and dmp.diff_main(text1, text2) or None return render( request, "problem/diff.html", { "problem": problem, "rev1": rev1, "rev2": rev2, "rev1link": reverse("judge-problem-old", kwargs={"id": rev1.id, "slug": problem.slug}), "rev2link": reverse("judge-problem-old", kwargs={"id": rev2.id, "slug": problem.slug}), "description": differ(rev1.description, rev2.description), "input": differ(rev1.input, rev2.input), "output": differ(rev1.output, rev2.output), "sample_input": differ(rev1.sample_input, rev2.sample_input), "sample_output": differ(rev1.sample_output, rev2.sample_output), "note": differ(rev1.note, rev2.note), "differ": differ, }, )
def applyDiff(obj): global PREVIOUS_OBJECTS if obj.url in PREVIOUS_OBJECTS: newObject = PREVIOUS_OBJECTS[obj.url] else: print 'Object not found!' return old_content = newObject.content.decode('utf-8') diff = obj.content var = diff_match_patch.diff_match_patch() patches = var.patch_fromText(diff) results = var.patch_apply(patches, old_content) newObject.content = results[0].encode('utf-8') newObject.headers = obj.headers newObject.status = obj.status newObject.reason = obj.reason del obj try: push_in_cache(newObject, 'diff') except: pass
def patch_view(request, patch_id): p = get_object_or_404(Patch, pk=patch_id) if p.diff: from framlegg.patch import show_diff from diff_match_patch import diff_match_patch import difflib dmp = diff_match_patch() dmp_patches = dmp.patch_fromText(p.diff.encode("utf-8")) newdoc = dmp.patch_apply(dmp_patches, p.document.text) d = difflib.SequenceMatcher(None, p.document.text, newdoc[0]) hilightdoc = show_diff(d) from pygments import highlight from pygments.lexers import DiffLexer from pygments.formatters import HtmlFormatter diff = highlight(p.diff, DiffLexer(), HtmlFormatter()) else: hilightdoc = "" diff = "" return render_to_response( "framlegg/patch_view.html", {"doc": p.document, "patch": p, "newdoc": hilightdoc, "diff": diff}, context_instance=RequestContext(request), )
def history(request): """return a bunch of diff changes""" import diff_match_patch from textwrap import wrap d = diff_match_patch.diff_match_patch() url = request.GET['url'] story, storycreated = Story.objects.get_or_create(url=url) if storycreated: story.get() differences = [] last = None qs = story.storyrevision_set lc = 1 for revision in qs.all(): if lc == 1: last = revision differences.append({ 'at' : last.seen_at, 'difference' : last.entry_content, }) else: next = revision oldtext = last.entry_content newtext = next.entry_content diff = d.diff_main(newtext, oldtext, checklines=False) #diff = d.diff_cleanupSemantic(diff) differences.append({ 'at' : next.seen_at, 'difference' : d.diff_prettyHtml(diff), }) last = next lc = lc + 1 differences.reverse() return render_to_response("hnewsparser/history.html", { 'story' : story, 'differences' : differences })
def wiki_page_diff(request, slug, template="mezawiki/wiki_page_diff.html"): slug_original = slug slug = urlize_title(slug) if slug != slug_original: return HttpResponseRedirect( reverse('wiki_page_diff', args=[slug]) ) try: wiki_pages = WikiPage.objects.published(for_user=request.user) wiki_page = wiki_pages.get(slug=slug) except WikiPage.DoesNotExist: return HttpResponseRedirect(reverse('wiki_page_edit', args=[slug])) try: from_rev = wiki_page.wikipagerevision_set.get(pk=request.REQUEST['from_revision_pk']) to_rev = wiki_page.wikipagerevision_set.get(pk=request.REQUEST['to_revision_pk']) except (KeyError, WikiPage.DoesNotExist): return HttpResponseNotFound() dmp = diff_match_patch() diff = dmp.diff_compute(from_rev.content, to_rev.content, True, 2) undo_error = False if 'undo' in request.REQUEST and request.REQUEST['undo'] == 'error': undo_error = True return render(request, 'mezawiki/wiki_page_diff.html', {'wiki_page': wiki_page, 'from_revision': from_rev, 'to_revision': to_rev, 'diff': diff, 'undo_error': undo_error})
def diff(self, c1, c2): """ Compares two revisions """ def diff_prettyXhtml(self, diffs): """ Extends google's diff_patch_match Similar to diff_prettyHtml but returns an XHTML valid code """ html = [] i = 0 for (op, data) in diffs: text = (data.replace("&", "&").replace("<", "<") .replace(">", ">").replace("\n", "<br />")) if op == self.DIFF_INSERT: html.append('<ins class="added" title="i=%i">%s</ins>' % (i, text)) elif op == self.DIFF_DELETE: html.append('<del class="deleted" title="i=%i">%s</del>' % (i, text)) elif op == self.DIFF_EQUAL: html.append('<span class="equal" title="i=%i">%s</span>' % (i, text)) if op != self.DIFF_DELETE: i += len(data) return "".join(html) repo = self.get_repository() commit_1 = repo.commit(c1) commit_2 = repo.commit(c2) f1 = u"%s" % commit_1.tree[self.slug].data_stream.read().decode('utf-8') f2 = u"%s" % commit_2.tree[self.slug].data_stream.read().decode('utf-8') f1 = f1.encode('utf-8') f2 = f2.encode('utf-8') ui = diff_match_patch() diff = ui.diff_main(f1, f2) ui.diff_cleanupSemantic(diff) return diff_prettyXhtml(ui, diff)
def get_diff_levenshtein(text1, text2): import diff_match_patch dmp = diff_match_patch.diff_match_patch() diffs = dmp.diff_main(text1, text2) d_value = dmp.diff_levenshtein(diffs) max_length = max(len(text1), len(text2)) return 1 - float(d_value) / float(max_length)
def default_differ(text1, text2): try: import diff_match_patch if hasattr(diff_match_patch, "diff_match_patch"): # https://code.google.com/p/google-diff-match-patch/source/browse/trunk/python3/diff_match_patch.py for op, oplen in diff_match_patch.diff_match_patch().diff_main(text1, text2): if op == -1: op = "-" if op == +1: op = "+" if op == 0: op = "=" yield (op, len(oplen)) else: # https://pypi.python.org/pypi/diff_match_patch_python/1.0.1 # Use diff_unicode if defined (in Py2 only), else just the diff function. if hasattr(diff_match_patch, 'diff_unicode'): f = diff_match_patch.diff_unicode else: f = diff_match_patch.diff for x in f(text1, text2): yield x except ImportError: import difflib diff = difflib.SequenceMatcher(text1, text2, autojunk=False) for (tag, i1, i2, j1, j2) in diff.get_opcodes(): if tag == "equal": yield ("=", i2-i1) elif tag == "insert": yield ("+", j2-j1) elif tag == "delete": yield ("-", i2-i1) elif tag == "replace": yield ("-", i2-i1) yield ("+", j2-j1)
def _merge_code(view, edit, code, formatted): def ss(start, end): return view.substr(sublime.Region(start, end)) dmp = diff_match_patch() diffs = dmp.diff_main(code, formatted) dmp.diff_cleanupEfficiency(diffs) i = 0 dirty = False for k, s in diffs: l = len(s) if k == 0: # match l = len(s) if ss(i, i + l) != s: raise MergeException('mismatch', dirty) i += l else: dirty = True if k > 0: # insert view.insert(edit, i, s) i += l else: # delete if ss(i, i + l) != s: raise MergeException('mismatch', dirty) view.erase(edit, sublime.Region(i, i + l)) return dirty
def diff_details(self): if self.diff_details_json is not None: return json.loads(self.diff_details_json) pv = self.previous_version() if pv is None: diff_details = [] else: old = pv.text() cur = self.text() if old is None or cur is None: diff_details = [] else: dmp = diff_match_patch.diff_match_patch() dmp.Diff_Timeout = 3 # seconds; default of 1 is too little diff = dmp.diff_main(old.decode('utf-8'), cur.decode('utf-8')) dmp.diff_cleanupSemantic(diff) diff_details = diff self.diff_details_json = json.dumps(diff_details, ensure_ascii=False) self.save() return self.diff_details()
def delta_python(orig, dest, patterns=REACTIVE_PATTERNS, context=2): """Delta two python files looking for certain patterns""" if isinstance(orig, path): od = orig.text() elif hasattr(orig, 'read'): od = orig.read() else: raise TypeError("Expected path() or file(), got %s" % type(orig)) if isinstance(dest, path): dd = dest.text() elif hasattr(orig, 'read'): dd = dest.read() else: raise TypeError("Expected path() or file(), got %s" % type(dest)) differ = diff_match_patch() linect = 0 lastMatch = None for res in differ.diff_main(od, dd): if res[0] == diff_match_patch.DIFF_EQUAL: linect += res[1].count('\n') lastMatch = res[:] continue elif res[0] == diff_match_patch.DIFF_INSERT: linect += res[1].count('\n') else: linect -= res[1].count('\n') for p in patterns: if p.search(lastMatch[1]): yield [linect, lastMatch, res] break
def GET(self): if not 'user' in session or session.user is None: f = register_form() return render.login(f) i = web.input() if not i.has_key("id"): return render.error("No crash identifier given") if i.has_key("diff"): is_diff = True else: is_diff = False db = connect_db() original_file, crash_file = find_original_file(db, i.id) if original_file is None: return render.error("Cannot find original sample.") dmp = diff_match_patch() buf1 = open(original_file, "rb").read() buf2 = open(crash_file, "rb").read() differences = dmp.diff_main(buf1, buf2, False, False) return render.show_diff(original_file, crash_file, buf1, buf2, \ differences, hexdump)
def compute_similarity(text1, text2): dmp = diff_match_patch() dmp.Diff_Timeout = 0 diff = dmp.diff_main(text1, text2) # similarity common_text = sum([len(txt) for op, txt in diff if op == 0]) text_length = max(len(text1), len(text2)) sim = common_text / text_length return sim
def as_html(self): data = [] dmp = diff_match_patch() for v1, v2 in zip(self.left, self.right): if v1 != v2 and self.new: v1 = "" html = html_diff(v1, v2, dmp) html = mark_safe(html) data.append(html) return data
def __init__(self, context, id=None): self._context = context self._destroy_sent = False self._redirect_sent = False self._redirected_to = None self._last_sent_html = '' self._diff = diff_match_patch() self._children = ComponentHerarchy(context=context) self.subscriptions = set() self.id = str(id or uuid4())
def compute_similarity_and_diff(text1, text2): # using diff_match_patch # Finding degree of similiarity between 2 sentences dmp = diff_match_patch() dmp.Diff_Timeout = 0.0 diff = dmp.diff_main(text1, text2, False) # similarity common_text = sum([len(txt) for op, txt in diff if op == 0]) text_length = max(len(text1), len(text2)) sim = common_text / text_length return sim, diff
def topic(data, tags, msg): server = tags.split(",")[0] match = re.search(r':(\S+)\s+TOPIC\s+(\S+)\s+:(.*)', msg) if not match: return weechat.WEECHAT_RC_ERROR usermask, channel, newtopic = match.groups() nick, host = usermask.split("!", 1) buffer = weechat.buffer_search("irc", server + "." + channel) weechat.prnt("", server + "." + channel) if not buffer: return weechat.WEECHAT_RC_ERROR oldtopic = weechat.buffer_get_string(buffer, "title") if oldtopic == None: oldtopic = "" dmp = diff_match_patch.diff_match_patch() diff = dmp.diff_main(oldtopic, newtopic) dmp.diff_cleanupEfficiency(diff) topic = "" color_reset = weechat.color("reset") color_ins = weechat.color(weechat.config_get_plugin("color_ins")) color_del = weechat.color(weechat.config_get_plugin("color_del")) for chunk in diff: changed, text = chunk topic += "%s%s%s" % ( # 0 (unchanged), 1 (added), -1 (removed) ["", color_ins, color_del][changed], text, ["", color_reset, color_reset][changed]) weechat.prnt_date_tags(buffer, 0, "irc_topicdiff", "%s%s%s%s has changed topic for %s%s%s: %s" % ( weechat.prefix("network"), weechat.color(weechat.info_get("irc_nick_color", nick)) \ if weechat.config_boolean("irc.look.color_nicks_in_server_messages") \ else weechat.color("chat_nick"), nick, color_reset, weechat.color("chat_channel"), channel, color_reset, topic )) return weechat.WEECHAT_RC_OK
def create_patch(fun1, fun2): """ Create a patch from fun1 to fun2. """ dmf = diff_match_patch.diff_match_patch() source1 = inspect.getsource(fun1) source2 = inspect.getsource(fun2) diff = dmf.diff_lineMode(source1, source2, None) dmf_patch = dmf.patch_make(diff) return dmf.patch_toText(dmf_patch)
def get_file_version_by_id(self, file_id, ref_id, before_date=None): """ get file version for a given file id within a reference and before a given date :type file_id: int :param file_id: the id of the target file :type ref_id: str :param ref_id: the id of the reference :type before_date: str (YYYY-mm-dd) :param before_date: if not null, it returns the last version of the file before the given date """ before_date = self._process_date(before_date) changes = self._git_dao.select_file_changes(file_id, ref_id, before_date, patch=True) sorted(changes, key=lambda k: k['committed_date'], reverse=False) # the digestion is needed because the library diff-match-patch requires that the preamble of the diff information (@@ -.. +.. @@) # appears alone in one line. Sometimes GitPython returns such a preamble mixed with other data diff_util = diff_match_patch() diff_util.Diff_Timeout = 0 diff_util.Match_Distance = 5000 diff_util.Match_Threshold = 0.8 diff_util.Patch_DeleteThreshold = 0.8 content = "" res_merge = [] for change in changes: digested_patches = [] p = change.get('patch') for line in p.split('\n'): m = re.match("^@@ -(\d+),?(\d*) \+(\d+),?(\d*) @@", line) if m: rest = line.split(m.group())[1] digested_patches.append(m.group()) if rest: digested_patches.append(rest.rstrip()) else: digested_patches.append(line) ps = diff_util.patch_fromText("\n".join(digested_patches)) res = diff_util.patch_apply(ps, content) content = res[0] res_merge = res_merge + res[1] self._logger.info( str(len([r for r in res_merge if r])) + " out of " + str(len(res_merge)) + " patches were successfully used to rebuild the file") return content
def find_bad(line_1, line_2): changes = [] print_set = ([' ', "'"]) diffs = diff_match_patch.diff_match_patch().diff_main(line_1, line_2) last_change = None last_1_idx = -1 last_2_idx = -1 for code, string in diffs: # print code, string if last_change: if code == 0: changes.append(( last_change, '', )) elif code == 1: changes.append(( last_change, string, )) last_change = None elif code == 1 and string in print_set: changes.append(( space_bounded(line_1, last_1_idx), # plus one because will add it later space_bounded(line_2, last_2_idx + 1), )) elif code == 1: changes.append(( '', string, )) if code == -1 and string in print_set: last_1_idx += len(string) changes.append(( space_bounded(line_1, last_1_idx), space_bounded(line_2, last_2_idx), )) elif code == -1: last_change = string last_1_idx += len(string) elif code == 1: last_2_idx += len(string) else: last_2_idx += len(string) last_1_idx += len(string) if last_change: changes.append(( last_change, '', )) return changes
def __init__(self): self.dmp = diff_match_patch() self.prev = None self.key_file = None # op, size self.last_diff = { 'op': '', 'size': -1, 'start': -1, 'count': -1, }
def _get_revision(self, index): diff = diff_match_patch() # append zero to the array so that a slice with negative indices can retrieve the last element patches = [ diff.patch_fromText(revision.content)[0] for revision in (self.revisions + [0])[1:index] ] if patches == []: #no patches to apply return self.revisions[index].content else: return diff.patch_apply(patches, self.revisions[0].content)[0]
def add_revision(self, revision): '''Call this instead of append to add a document revision''' if len(self.revisions) >= 1: # convert latest revision into a diff before saving diff = diff_match_patch() patch = diff.patch_make(self.current_revision, revision.content) revision.content = diff.patch_toText(patch) self.revisions.append(revision) else: #save the first revision as a full text doc self.revisions.append(revision)
def recognize(filename): dmp = dmplib.diff_match_patch() with open('numSamples.json', 'r') as fi: sample = json.load(fi) def imageArrayToString(img): string = '' for j in img: for k in j: t = k if (t == 255): t = 0 else: t = 1 string += str(t) string += "\n" return (string) def recognizeOne(imgstr, type): similarity = {} for i in range(0, 10): similarity.update({i: []}) tofind = sample[type] for i in range(0, 10): for j in tofind[str(i)]: '''diffs=dmp.diff_main(j,imgstr) diffvalue=dmp.diff_levenshtein(diffs) maxLength=max(len(j),len(imgstr)) smlrt=(1-float(diffvalue)/float(maxLength))*100 ''' smlrt = difflib.SequenceMatcher(None, j, imgstr).ratio() similarity[i].append(smlrt) maxvalue = 0.0 maxnum = '' for i in range(0, 10): for j in similarity[i]: if (j > maxvalue): maxvalue = j maxnum = str(i) return (maxnum) img = cv2.imread(filename) img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) retval, img_binized = cv2.threshold(img_gray, 180, 255, cv2.THRESH_BINARY) digits = [ img_binized[7:31, 0:14], img_binized[12:36, 18:36], img_binized[7:31, 39:57], img_binized[12:36, 62:80] ] stri = recognizeOne(imageArrayToString(digits[0]), 'first') for i in range(1, 4): stri += recognizeOne(imageArrayToString(digits[i]), 'other') return (stri)
def diff_get(txt1, txt2): diff = diff_match_patch() _diff = diff.diff_compute(txt1, txt2, True, 3) result = '' for i in _diff: num, content = i if num < 0: result += ('- ' + content + '\n') if num > 0: result += ('+ ' + content + '\n') return result
def get_trace_diff(trace1, trace2): diff = "" trace1_summary = open(os.path.join("./summary", trace1 + ".txt"), 'r+').readlines() trace2_summary = open(os.path.join("./summary", trace2 + ".txt"), 'r+').readlines() dmp = dmp_module.diff_match_patch() html = "" matched_lines_t2 = set() unmatched_lines_t1 = set() # We want to compare summaries at the granularity of task task_execution_diffs = [] for i in range(len(trace1_summary)): line1 = trace1_summary[i] match_found = False for j in range(len(trace2_summary)): if match_found: break line2 = trace2_summary[j] # The first line in the summary are the templated overview so they are comparable if i == 0 and j == 0: diff = dmp.diff_main(line1, line2) dmp.diff_cleanupSemantic(diff) html += dmp.diff_prettyHtml(diff) + "\n" matched_lines_t2.add(j) match_found = True if j in matched_lines_t2: continue l1_tokens = line1.split('.') l2_tokens = line2.split('.') # Two lines are of the same task if their 1st sentences match if l1_tokens[0] == l2_tokens[0]: match_found = True diff = dmp.diff_main(line1, line2) dmp.diff_cleanupSemantic(diff) html += dmp.diff_prettyHtml(diff) + "\n" matched_lines_t2.add(j) task_execution_diffs += [diff] if not match_found: unmatched_lines_t1.add(i) all_unmatched_lines = [] for i in range(len(trace1_summary)): if i in unmatched_lines_t1: diff = dmp.diff_main(trace1_summary[i], "") dmp.diff_cleanupSemantic(diff) html += dmp.diff_prettyHtml(diff) + "\n" all_unmatched_lines += [trace1_summary[i]] for j in range(len(trace2_summary)): if j not in matched_lines_t2: diff = dmp.diff_main("", trace2_summary[j]) dmp.diff_cleanupSemantic(diff) html += dmp.diff_prettyHtml(diff) + "\n" all_unmatched_lines += [trace2_summary[j]] # Calculate distance using all_unmatched_lines and task_execution_diffs distance = compute_distance(task_execution_diffs, all_unmatched_lines) return distance, html
def wiki_decompress2(filename, k): context_wiki = ET.iterparse(filename, events=("start", "end")) context_wiki = iter(context_wiki) event_wiki, root_wiki = next(context_wiki) intervalLength = k dmp = diff_match_patch() # Keep the Orginal text after every 'm' revisions m = intervalLength + 1 g = 0 start = time.thread_time() for event, elem in context_wiki: if event == "end" and 'Instance' in elem.tag: for ch_elem in elem: if 'Body' in ch_elem.tag: for each in ch_elem: g = g + 1 #print(g) if m != intervalLength + 1: #print(each.text) current_str = each.text #print(current_str) #print(current_str,prev_str) if prev_str == None: prev_str = "" patches = dmp.patch_fromText(current_str) each.text, _ = dmp.patch_apply( patches, prev_str) #each.text = wikiConverter.encode(prev_str, current_str) #p = dmp.patch_make(prev_str,current_str) #each.text = dmp.patch_toText(p) #prev_str = each.text # print("Revision ", count, " written") m = m - 1 if m == 0: m = intervalLength + 1 else: prev_str = each.text # print("Revision ", count, " written") m = m - 1 continue elem.clear() root_wiki.clear() end = time.thread_time() return end - start
def as_html(self): data = [] dmp = diff_match_patch() for v1, v2 in zip(self.left, self.right): if v1 != v2 and self.new: v1 = "" diff = dmp.diff_main(force_str(v1), force_str(v2)) dmp.diff_cleanupSemantic(diff) html = dmp.diff_prettyHtml(diff) html = mark_safe(html) data.append(html) return data
def __init__(self, storage_folder=u"tinydb", db_name='chatbot_db'): # you can include a folder name or absolute path # as a parameter if not it will default to "tinydb" self.connection = TinyMongoClient(storage_folder) # either creates a new database file or accesses an existing one named `my_tiny_database` self.db = getattr(self.connection, db_name) # Use google's diff tool, but it looks like you have to use a Diff_Timeout to get to work well? # See https://github.com/google/diff-match-patch/issues/100 self._difftool = diff_match_patch() self._difftool.Diff_Timeout = 0.01
def __init__(self, idx, value, unit, terms, diff, search_match, match): # Inputs self.idx = idx self.cleaned_value = self.value = value self.unit = unit self.terms = terms self.diff = diff self.search_match = search_match self.match = match # Tags output self.tags = [[] for i in range(len(value) + 1)] self.dmp = diff_match_patch()
def __init__(self, parent): QObject.__init__(self) self.stopped = False self.daemon = True self.mutex = QMutex() self.file_tokens = {} self.diff_patch = diff_match_patch() self.thread = QThread() self.moveToThread(self.thread) self.thread.started.connect(self.started) self.sig_mailbox.connect(self.handle_msg)
def calc_str_diff(original_str: str, new_str: str) -> List[Tuple[int, str]]: """ Calculate String diff between `original_str` and `new_str`. :param original_str: original (db) entry :param new_str: entry from html :return: List of changes. """ dmp = diff_match_patch() diff = dmp.diff_main(original_str, new_str) dmp.diff_cleanupSemantic(diff) return diff
def get_diff(field_name, old_revision, new_revision): old_revision_field = old_revision.field_dict[field_name] new_revision_field = new_revision.field_dict[field_name] dmp = dmp_module.diff_match_patch() diff_field = dmp.diff_main(old_revision_field, new_revision_field) dmp.diff_cleanupSemantic(diff_field) diff_html = dmp.diff_prettyHtml(diff_field).replace( '¶', '') # Removes paragraph character # added by the library. return diff_html
def diff_list(prev_rev, revision, field): """Generates an array which describes the change in text fields""" if field in ['script', 'pencils', 'inks', 'colors', 'letters', 'editing']: diff = diff_match_patch().diff_main(field_value(prev_rev, field), field_value(revision, field)) diff_match_patch().diff_cleanupSemantic(diff) new_diff = [] splitted_link = False for di in diff: if splitted_link: di = (di[0], ' <a href="/creator/' + di[1]) if di[0] == 1: splitted_link = False if di[1].endswith(' <a href="/creator/'): di = (di[0], di[1][:-len(' <a href="/creator/')]) splitted_link = True new_diff.append((di[0], mark_safe(di[1]))) return new_diff if field in [ 'notes', 'tracking_notes', 'publication_notes', 'characters', 'synopsis', 'title', 'format', 'color', 'dimensions', 'paper_stock', 'binding', 'publishing_format', 'format', 'name', 'price', 'indicia_frequency', 'variant_name', 'source_description', 'gcd_official_name', 'bio' ]: diff = diff_match_patch().diff_main(getattr(prev_rev, field), getattr(revision, field)) diff_match_patch().diff_cleanupSemantic(diff) return diff else: return None
def reverse_patch_sleuth(customize_path): reverse_patch_path = customize_path.with_suffix(REVERSE_PATCH_SUFFIX) if not reverse_patch_path.exists(): return sleuth_logger.info( "Removing %s from site customize: %s", SysPathSleuth.__name__, SysPathSleuth.relative_path(customize_path), ) with reverse_patch_path.open() as customize_patch_f: patch = customize_patch_f.read() dmp = diff_match_patch() patches: List[str] = dmp.patch_fromText(patch) patched_customize: str patch_results: List[bool] with customize_path.open("r") as customize_patch_f: customize = customize_patch_f.read() patched_customize, patch_results = dmp.patch_apply(patches, customize) save_patched = bool(patched_customize) for patch_result in patch_results: if not patch_result: raise UninstallError( f"Reverse patch failed; patch file: " f"{reverse_patch_path}.\n" f"Hand edit removal of {SysPathSleuth.__name__}") if save_patched: with customize_path.open("w") as customize_patch_f: customize_patch_f.seek(0) customize_patch_f.write(patched_customize) reverse_patch_path.unlink() if not save_patched: customize_path.unlink() try: # pylint: disable=import-outside-toplevel,unused-import import sitecustomize # pylint: enable=import-outside-toplevel,unused-import # This is too sketch... # sys.path = sys.path.get_base_list() # if isinstance(sys.path, sitecustomize.SysPathSleuth): # error_logger.warning("Hmmm... expected sys.path NOT to be monkey-patched.") # except (AttributeError, ModuleNotFoundError): # This will occur if SysPathSleuth was not installed prior. But, don't skip the # uninstall_sleuth() as the user messaging associated with this condition is shared. pass
def diff_view(self, request, object_id, version_id, extra_context=None): """Generate a diff between document versions.""" opts = self.model._meta app_label = opts.app_label obj = get_object_or_404(self.model, pk=unquote(object_id)) obj_old = get_object_or_404(Version, pk=unquote(version_id), object_id=force_text(obj.pk)) try: logger.debug("{0} views diff_view of {1}".format( request.user.fullname, obj)) except: logger.debug("DocumentAdmin diff_view called without " "object or request.") fieldsets = self.get_fieldsets(request, obj) # inline_instances = self.get_inline_instances(request, obj) d = diff_match_patch() diffs = [] for (name, field_options) in fieldsets: if 'fields' in field_options: for f in field_options['fields']: field = getattr(obj, f) if (not field) or (type(field) not in (str, unicode)): continue diff = d.diff_main(obj_old.field_dict[f] or '', field) d.diff_cleanupSemantic(diff) diffs.append((opts.get_field_by_name(f)[0].verbose_name, mark_safe(d.diff_prettyHtml(diff)))) context = { 'breadcrumbs': self.get_breadcrumbs(request, obj), 'diffs': diffs, 'object': obj, 'opts': self.model._meta, 'version_date': obj_old.revision.date_created, } context.update(extra_context or {}) return TemplateResponse(request, self.object_diff_template or [ 'admin/%s/%s/object_diff.html' % (app_label, opts.object_name.lower()), 'admin/%s/object_diff.html' % app_label, 'admin/object_diff.html' ], context, current_app=self.admin_site.name)
def __extract_instance(self, *args, **kwargs): revisionsDict = kwargs['revisionDict'] m = kwargs['intervalLength'] n = kwargs['instance_num'] returnResult = [] original = n dmp = diff_match_patch() #m = int((math.log(length)) ** 2) + 1 #testing the extraction with new interval if (n-1)%m != 0: count = int((n-1)/m)*m + 1 prev_str = revisionsDict[count] result = prev_str while count < n: count += 1 current_str = revisionsDict[count] patches = dmp.patch_fromText(current_str) result, _ = dmp.patch_apply(patches, prev_str) prev_str = result else: prev_str = revisionsDict[n] result = prev_str ''' if n % m != 0: interval = n - (n % m) + 1 #print('interval', interval) n = n - interval + 1 count = interval prev_str = revisionsDict[count] result = prev_str while count < original: #print("yes") count += 1 #print(repr(revisionsDict[count])) current_str = revisionsDict[count] #print(revisionsDict[count]) patches = dmp.patch_fromText(current_str) result, _ = dmp.patch_apply(patches, prev_str) prev_str = result else: interval = n - (m - 1) n = n - interval + 1 count = interval prev_str = revisionsDict[count] result = prev_str ''' return result
def get_diff_text(text1: List[str], text2: List[str]) -> List[Tuple[int, str]]: """ Take the alignment between two lists and get the difference. """ orig_words = '\n'.join(text1.split()) + '\n' pred_words = '\n'.join(text2.split()) + '\n' diff = diff_match_patch.diff_match_patch() diff.Diff_Timeout = 0 orig_enc, pred_enc, enc = diff.diff_linesToChars(orig_words, pred_words) diffs = diff.diff_main(orig_enc, pred_enc, False) diff.diff_charsToLines(diffs, enc) return diffs
def get_diff(current, stored): data = [] dmp = diff_match_patch() fields = current._meta.fields for field in fields: v1 = getattr(current, field.name, "") v2 = getattr(stored, field.name, "") diff = dmp.diff_main(unicode(v1), unicode(v2)) dmp.diff_cleanupSemantic(diff) html = dmp.diff_prettyHtml(diff) html = mark_safe(html) data.append((field, v1, v2, html)) return data
def dmpDiff(in1, in2, dispind): import time import diff_match_patch as dmp_module start_time = time.process_time() dmp = dmp_module.diff_match_patch() diff = dmp.diff_main(in1.to_wkt(), in2.to_wkt()) diff_time = time.process_time() dmp.diff_cleanupSemantic(diff) clean_time = time.process_time() return (diff, dispind, (diff_time - start_time, clean_time - diff_time))
def create_pat_file(f1, f2): print('--------patch start--------') f1_text = read_file(f1) f2_text = read_file(f2) dmp = dmp_module.diff_match_patch() # 生成差异对象 diff = dmp.diff_main(f1_text, f2_text) # 将差异对象转化成字符串,写入补丁文件中 patches = dmp.patch_toText(dmp.patch_make(diff)) write_file(PATCH_FILE, patches) print(PATCH_FILE + ' file created') print('--------patch end--------')
def verify_patch(f_patch, f1): print('--------verify start--------') patches_text = read_file(f_patch) f1_text = read_file(f1) dmp = dmp_module.diff_match_patch() patches = dmp.patch_fromText(patches_text) result = dmp.patch_apply(patches, f1_text) if result[1][0]: write_file('patched_' + f1, result[0]) print('patch verify success') else: print('patch verify failure') print('--------verify end--------')