def findrenames(repo, added=None, removed=None, threshold=0.5): '''find renamed files -- yields (before, after, score) tuples''' if added is None or removed is None: added, removed = repo.status()[1:3] ctx = repo.changectx() for a in added: aa = repo.wread(a) bestname, bestscore = None, threshold for r in removed: rr = ctx.filectx(r).data() # bdiff.blocks() returns blocks of matching lines # count the number of bytes in each equal = 0 alines = mdiff.splitnewlines(aa) matches = bdiff.blocks(aa, rr) for x1, x2, y1, y2 in matches: for line in alines[x1:x2]: equal += len(line) lengths = len(aa) + len(rr) if lengths: myscore = equal * 2.0 / lengths if myscore >= bestscore: bestname, bestscore = r, myscore if bestname: yield bestname, a, bestscore
def findrenames(repo, added=None, removed=None, threshold=0.5): '''find renamed files -- yields (before, after, score) tuples''' if added is None or removed is None: added, removed = repo.status()[1:3] ctx = repo.changectx() for a in added: aa = repo.wread(a) bestname, bestscore = None, threshold for r in removed: rr = ctx.filectx(r).data() # bdiff.blocks() returns blocks of matching lines # count the number of bytes in each equal = 0 alines = mdiff.splitnewlines(aa) matches = bdiff.blocks(aa, rr) for x1,x2,y1,y2 in matches: for line in alines[x1:x2]: equal += len(line) lengths = len(aa) + len(rr) if lengths: myscore = equal*2.0 / lengths if myscore >= bestscore: bestname, bestscore = r, myscore if bestname: yield bestname, a, bestscore
def score(text): orig, lines = data() # bdiff.blocks() returns blocks of matching lines # count the number of bytes in each equal = 0 matches = bdiff.blocks(text, orig) for x1, x2, y1, y2 in matches: for line in lines[y1:y2]: equal += len(line) lengths = len(text) + len(orig) return equal * 2.0 / lengths
def score(text): if not len(text): return 0.0 if not fctx.cmp(text): return 1.0 if threshold == 1.0: return 0.0 orig = fctx.data() # bdiff.blocks() returns blocks of matching lines # count the number of bytes in each equal = 0 alines = mdiff.splitnewlines(text) matches = bdiff.blocks(text, orig) for x1, x2, y1, y2 in matches: for line in alines[x1:x2]: equal += len(line) lengths = len(text) + len(orig) return equal * 2.0 / lengths
def allblocks(text1, text2, opts=None, lines1=None, lines2=None, refine=False): """Return (block, type) tuples, where block is an mdiff.blocks line entry. type is '=' for blocks matching exactly one another (bdiff blocks), '!' for non-matching blocks and '~' for blocks matching only after having filtered blank lines. If refine is True, then '~' blocks are refined and are only made of blank lines. line1 and line2 are text1 and text2 split with splitnewlines() if they are already available. """ if opts is None: opts = defaultopts if opts.ignorews or opts.ignorewsamount: text1 = wsclean(opts, text1, False) text2 = wsclean(opts, text2, False) diff = bdiff.blocks(text1, text2) for i, s1 in enumerate(diff): # The first match is special. # we've either found a match starting at line 0 or a match later # in the file. If it starts later, old and new below will both be # empty and we'll continue to the next match. if i > 0: s = diff[i - 1] else: s = [0, 0, 0, 0] s = [s[1], s1[0], s[3], s1[2]] # bdiff sometimes gives huge matches past eof, this check eats them, # and deals with the special first match case described above if s[0] != s[1] or s[2] != s[3]: type = '!' if opts.ignoreblanklines: if lines1 is None: lines1 = splitnewlines(text1) if lines2 is None: lines2 = splitnewlines(text2) old = wsclean(opts, "".join(lines1[s[0]:s[1]])) new = wsclean(opts, "".join(lines2[s[2]:s[3]])) if old == new: type = '~' yield s, type yield s1, '='
def pair(parent, child): for a1, a2, b1, b2 in bdiff.blocks(parent[1], child[1]): child[0][b1:b2] = parent[0][a1:a2] return child
def get_matching_blocks(a, b): return [(d[0], d[2], d[1] - d[0]) for d in bdiff.blocks(a, b)]
def bunidiff(t1, t2, l1, l2, header1, header2, opts=defaultopts): def contextend(l, len): ret = l + opts.context if ret > len: ret = len return ret def contextstart(l): ret = l - opts.context if ret < 0: return 0 return ret def yieldhunk(hunk, header): if header: for x in header: yield x (astart, a2, bstart, b2, delta) = hunk aend = contextend(a2, len(l1)) alen = aend - astart blen = b2 - bstart + aend - a2 func = "" if opts.showfunc: # walk backwards from the start of the context # to find a line starting with an alphanumeric char. for x in xrange(astart - 1, -1, -1): t = l1[x].rstrip() if funcre.match(t): func = ' ' + t[:40] break yield "@@ -%d,%d +%d,%d @@%s\n" % (astart + 1, alen, bstart + 1, blen, func) for x in delta: yield x for x in xrange(a2, aend): yield ' ' + l1[x] header = ["--- %s\t\n" % header1, "+++ %s\t\n" % header2] if opts.showfunc: funcre = re.compile('\w') # bdiff.blocks gives us the matching sequences in the files. The loop # below finds the spaces between those matching sequences and translates # them into diff output. # if opts.ignorews or opts.ignorewsamount: t1 = wsclean(opts, t1, False) t2 = wsclean(opts, t2, False) diff = bdiff.blocks(t1, t2) hunk = None for i, s1 in enumerate(diff): # The first match is special. # we've either found a match starting at line 0 or a match later # in the file. If it starts later, old and new below will both be # empty and we'll continue to the next match. if i > 0: s = diff[i - 1] else: s = [0, 0, 0, 0] delta = [] a1 = s[1] a2 = s1[0] b1 = s[3] b2 = s1[2] old = l1[a1:a2] new = l2[b1:b2] # bdiff sometimes gives huge matches past eof, this check eats them, # and deals with the special first match case described above if not old and not new: continue if opts.ignoreblanklines: if wsclean(opts, "".join(old)) == wsclean(opts, "".join(new)): continue astart = contextstart(a1) bstart = contextstart(b1) prev = None if hunk: # join with the previous hunk if it falls inside the context if astart < hunk[1] + opts.context + 1: prev = hunk astart = hunk[1] bstart = hunk[3] else: for x in yieldhunk(hunk, header): yield x # we only want to yield the header if the files differ, and # we only want to yield it once. header = None if prev: # we've joined the previous hunk, record the new ending points. hunk[1] = a2 hunk[3] = b2 delta = hunk[4] else: # create a new hunk hunk = [astart, a2, bstart, b2, delta] delta[len(delta):] = [' ' + x for x in l1[astart:a1]] delta[len(delta):] = ['-' + x for x in old] delta[len(delta):] = ['+' + x for x in new] if hunk: for x in yieldhunk(hunk, header): yield x
def _unidiff(t1, t2, l1, l2, opts=defaultopts): def contextend(l, len): ret = l + opts.context if ret > len: ret = len return ret def contextstart(l): ret = l - opts.context if ret < 0: return 0 return ret def yieldhunk(hunk): (astart, a2, bstart, b2, delta) = hunk aend = contextend(a2, len(l1)) alen = aend - astart blen = b2 - bstart + aend - a2 func = "" if opts.showfunc: # walk backwards from the start of the context # to find a line starting with an alphanumeric char. for x in xrange(astart - 1, -1, -1): t = l1[x].rstrip() if funcre.match(t): func = ' ' + t[:40] break yield "@@ -%d,%d +%d,%d @@%s\n" % (astart + 1, alen, bstart + 1, blen, func) for x in delta: yield x for x in xrange(a2, aend): yield ' ' + l1[x] if opts.showfunc: funcre = re.compile('\w') # bdiff.blocks gives us the matching sequences in the files. The loop # below finds the spaces between those matching sequences and translates # them into diff output. # if opts.ignorews or opts.ignorewsamount: t1 = wsclean(opts, t1, False) t2 = wsclean(opts, t2, False) diff = bdiff.blocks(t1, t2) hunk = None for i, s1 in enumerate(diff): # The first match is special. # we've either found a match starting at line 0 or a match later # in the file. If it starts later, old and new below will both be # empty and we'll continue to the next match. if i > 0: s = diff[i - 1] else: s = [0, 0, 0, 0] delta = [] a1 = s[1] a2 = s1[0] b1 = s[3] b2 = s1[2] old = l1[a1:a2] new = l2[b1:b2] # bdiff sometimes gives huge matches past eof, this check eats them, # and deals with the special first match case described above if not old and not new: continue if opts.ignoreblanklines: if wsclean(opts, "".join(old)) == wsclean(opts, "".join(new)): continue astart = contextstart(a1) bstart = contextstart(b1) prev = None if hunk: # join with the previous hunk if it falls inside the context if astart < hunk[1] + opts.context + 1: prev = hunk astart = hunk[1] bstart = hunk[3] else: for x in yieldhunk(hunk): yield x if prev: # we've joined the previous hunk, record the new ending points. hunk[1] = a2 hunk[3] = b2 delta = hunk[4] else: # create a new hunk hunk = [astart, a2, bstart, b2, delta] delta[len(delta):] = [' ' + x for x in l1[astart:a1]] delta[len(delta):] = ['-' + x for x in old] delta[len(delta):] = ['+' + x for x in new] if hunk: for x in yieldhunk(hunk): yield x
def _unidiff(t1, t2, l1, l2, opts=defaultopts): def contextend(l, len): ret = l + opts.context if ret > len: ret = len return ret def contextstart(l): ret = l - opts.context if ret < 0: return 0 return ret lastfunc = [0, ''] def yieldhunk(hunk): (astart, a2, bstart, b2, delta) = hunk aend = contextend(a2, len(l1)) alen = aend - astart blen = b2 - bstart + aend - a2 func = "" if opts.showfunc: lastpos, func = lastfunc # walk backwards from the start of the context up to the start of # the previous hunk context until we find a line starting with an # alphanumeric char. for i in xrange(astart - 1, lastpos - 1, -1): if l1[i][0].isalnum(): func = ' ' + l1[i].rstrip()[:40] lastfunc[1] = func break # by recording this hunk's starting point as the next place to # start looking for function lines, we avoid reading any line in # the file more than once. lastfunc[0] = astart yield "@@ -%d,%d +%d,%d @@%s\n" % (astart + 1, alen, bstart + 1, blen, func) for x in delta: yield x for x in xrange(a2, aend): yield ' ' + l1[x] # bdiff.blocks gives us the matching sequences in the files. The loop # below finds the spaces between those matching sequences and translates # them into diff output. # if opts.ignorews or opts.ignorewsamount: t1 = wsclean(opts, t1, False) t2 = wsclean(opts, t2, False) diff = bdiff.blocks(t1, t2) hunk = None for i, s1 in enumerate(diff): # The first match is special. # we've either found a match starting at line 0 or a match later # in the file. If it starts later, old and new below will both be # empty and we'll continue to the next match. if i > 0: s = diff[i - 1] else: s = [0, 0, 0, 0] delta = [] a1 = s[1] a2 = s1[0] b1 = s[3] b2 = s1[2] old = l1[a1:a2] new = l2[b1:b2] # bdiff sometimes gives huge matches past eof, this check eats them, # and deals with the special first match case described above if not old and not new: continue if opts.ignoreblanklines: if wsclean(opts, "".join(old)) == wsclean(opts, "".join(new)): continue astart = contextstart(a1) bstart = contextstart(b1) prev = None if hunk: # join with the previous hunk if it falls inside the context if astart < hunk[1] + opts.context + 1: prev = hunk astart = hunk[1] bstart = hunk[3] else: for x in yieldhunk(hunk): yield x if prev: # we've joined the previous hunk, record the new ending points. hunk[1] = a2 hunk[3] = b2 delta = hunk[4] else: # create a new hunk hunk = [astart, a2, bstart, b2, delta] delta[len(delta):] = [' ' + x for x in l1[astart:a1]] delta[len(delta):] = ['-' + x for x in old] delta[len(delta):] = ['+' + x for x in new] if hunk: for x in yieldhunk(hunk): yield x