def diffwith(self, targetfctx, showchanges=False): """calculate fixups needed by examining the differences between self.fctxs[-1] and targetfctx, chunk by chunk. targetfctx is the target state we move towards. we may or may not be able to get there because not all modified chunks can be amended into a non-public fctx unambiguously. call this only once, before apply(). update self.fixups, self.chunkstats, and self.targetlines. """ a = self.contents[-1] alines = self.contentlines[-1] b = targetfctx.data() blines = mdiff.splitnewlines(b) self.targetlines = blines self.linelog.annotate(self.linelog.maxrev) annotated = self.linelog.annotateresult # [(linelog rev, linenum)] assert len(annotated) == len(alines) # add a dummy end line to make insertion at the end easier if annotated: dummyendline = (annotated[-1][0], annotated[-1][1] + 1) annotated.append(dummyendline) # analyse diff blocks for chunk in self._alldiffchunks(a, b, alines, blines): newfixups = self._analysediffchunk(chunk, annotated) self.chunkstats[0] += bool(newfixups) # 1 or 0 self.chunkstats[1] += 1 self.fixups += newfixups if showchanges: self._showchanges(alines, blines, chunk, newfixups)
def overlaydiffcontext(ctx, chunks): """(ctx, [crecord.uihunk]) -> memctx return a memctx with some [1] patches (chunks) applied to ctx. [1]: modifications are handled. renames, mode changes, etc. are ignored. """ # sadly the applying-patch logic is hardly reusable, and messy: # 1. the core logic "_applydiff" is too heavy - it writes .rej files, it # needs a file stream of a patch and will re-parse it, while we have # structured hunk objects at hand. # 2. a lot of different implementations about "chunk" (patch.hunk, # patch.recordhunk, crecord.uihunk) # as we only care about applying changes to modified files, no mode # change, no binary diff, and no renames, it's probably okay to # re-invent the logic using much simpler code here. memworkingcopy = {} # {path: content} patchmap = defaultdict(lambda: []) # {path: [(a1, a2, [bline])]} for path, info in map(_parsechunk, chunks): if not path or not info: continue patchmap[path].append(info) for path, patches in patchmap.iteritems(): if path not in ctx or not patches: continue patches.sort(reverse=True) lines = mdiff.splitnewlines(ctx[path].data()) for a1, a2, blines in patches: lines[a1:a2] = blines memworkingcopy[path] = ''.join(lines) return overlaycontext(memworkingcopy, ctx)
def diffwith(self, targetfctx, fm=None): """calculate fixups needed by examining the differences between self.fctxs[-1] and targetfctx, chunk by chunk. targetfctx is the target state we move towards. we may or may not be able to get there because not all modified chunks can be amended into a non-public fctx unambiguously. call this only once, before apply(). update self.fixups, self.chunkstats, and self.targetlines. """ a = self.contents[-1] alines = self.contentlines[-1] b = targetfctx.data() blines = mdiff.splitnewlines(b) self.targetlines = blines self.linelog.annotate(self.linelog.maxrev) annotated = self.linelog.annotateresult # [(linelog rev, linenum)] assert len(annotated) == len(alines) # add a dummy end line to make insertion at the end easier if annotated: dummyendline = (annotated[-1][0], annotated[-1][1] + 1) annotated.append(dummyendline) # analyse diff blocks for chunk in self._alldiffchunks(a, b, alines, blines): newfixups = self._analysediffchunk(chunk, annotated) self.chunkstats[0] += bool(newfixups) # 1 or 0 self.chunkstats[1] += 1 self.fixups += newfixups if fm is not None: self._showchanges(fm, alines, blines, chunk, newfixups)
def overlaydiffcontext(ctx, chunks): """(ctx, [crecord.uihunk]) -> memctx return a memctx with some [1] patches (chunks) applied to ctx. [1]: modifications are handled. renames, mode changes, etc. are ignored. """ # sadly the applying-patch logic is hardly reusable, and messy: # 1. the core logic "_applydiff" is too heavy - it writes .rej files, it # needs a file stream of a patch and will re-parse it, while we have # structured hunk objects at hand. # 2. a lot of different implementations about "chunk" (patch.hunk, # patch.recordhunk, crecord.uihunk) # as we only care about applying changes to modified files, no mode # change, no binary diff, and no renames, it's probably okay to # re-invent the logic using much simpler code here. memworkingcopy = {} # {path: content} patchmap = defaultdict(lambda: []) # {path: [(a1, a2, [bline])]} for path, info in map(_parsechunk, chunks): if not path or not info: continue patchmap[path].append(info) for path, patches in pycompat.iteritems(patchmap): if path not in ctx or not patches: continue patches.sort(reverse=True) lines = mdiff.splitnewlines(ctx[path].data()) for a1, a2, blines in patches: lines[a1:a2] = blines memworkingcopy[path] = b''.join(lines) return overlaycontext(memworkingcopy, ctx)
def test_splitnewlines(self): cases = {b'a\nb\nc\n': [b'a\n', b'b\n', b'c\n'], b'a\nb\nc': [b'a\n', b'b\n', b'c'], b'a\nb\nc\n\n': [b'a\n', b'b\n', b'c\n', b'\n'], b'': [], b'abcabc': [b'abcabc'], } for inp, want in cases.items(): self.assertEqual(mdiff.splitnewlines(inp), want)
def _checkoutlinelogwithedits(self): """() -> [str]. prompt all lines for edit""" alllines = self.linelog.getalllines() # header editortext = (_(b'HG: editing %s\nHG: "y" means the line to the right ' b'exists in the changeset to the top\nHG:\n') % self.fctxs[-1].path()) # [(idx, fctx)]. hide the dummy emptyfilecontext visiblefctxs = [(i, f) for i, f in enumerate(self.fctxs) if not isinstance(f, emptyfilecontext)] for i, (j, f) in enumerate(visiblefctxs): editortext += _(b'HG: %s/%s %s %s\n') % ( b'|' * i, b'-' * (len(visiblefctxs) - i + 1), node.short(f.node()), f.description().split(b'\n', 1)[0], ) editortext += _(b'HG: %s\n') % (b'|' * len(visiblefctxs)) # figure out the lifetime of a line, this is relatively inefficient, # but probably fine lineset = defaultdict(lambda: set()) # {(llrev, linenum): {llrev}} for i, f in visiblefctxs: self.linelog.annotate((i + 1) * 2) for l in self.linelog.annotateresult: lineset[l].add(i) # append lines for l in alllines: editortext += b' %s : %s' % ( b''.join([(b'y' if i in lineset[l] else b' ') for i, _f in visiblefctxs]), self._getline(l), ) # run editor editedtext = self.ui.edit(editortext, b'', action=b'absorb') if not editedtext: raise error.Abort(_(b'empty editor text')) # parse edited result contents = [b'' for i in self.fctxs] leftpadpos = 4 colonpos = leftpadpos + len(visiblefctxs) + 1 for l in mdiff.splitnewlines(editedtext): if l.startswith(b'HG:'): continue if l[colonpos - 1:colonpos + 2] != b' : ': raise error.Abort(_(b'malformed line: %s') % l) linecontent = l[colonpos + 2:] for i, ch in enumerate(pycompat.bytestr(l[leftpadpos:colonpos - 1])): if ch == b'y': contents[visiblefctxs[i][0]] += linecontent # chunkstats is hard to calculate if anything changes, therefore # set them to just a simple value (1, 1). if editedtext != editortext: self.chunkstats = [1, 1] return contents
def _checkoutlinelogwithedits(self): """() -> [str]. prompt all lines for edit""" alllines = self.linelog.getalllines() # header editortext = (_('HG: editing %s\nHG: "y" means the line to the right ' 'exists in the changeset to the top\nHG:\n') % self.fctxs[-1].path()) # [(idx, fctx)]. hide the dummy emptyfilecontext visiblefctxs = [(i, f) for i, f in enumerate(self.fctxs) if not isinstance(f, emptyfilecontext)] for i, (j, f) in enumerate(visiblefctxs): editortext += (_('HG: %s/%s %s %s\n') % ('|' * i, '-' * (len(visiblefctxs) - i + 1), node.short(f.node()), f.description().split('\n',1)[0])) editortext += _('HG: %s\n') % ('|' * len(visiblefctxs)) # figure out the lifetime of a line, this is relatively inefficient, # but probably fine lineset = defaultdict(lambda: set()) # {(llrev, linenum): {llrev}} for i, f in visiblefctxs: self.linelog.annotate((i + 1) * 2) for l in self.linelog.annotateresult: lineset[l].add(i) # append lines for l in alllines: editortext += (' %s : %s' % (''.join([('y' if i in lineset[l] else ' ') for i, _f in visiblefctxs]), self._getline(l))) # run editor editedtext = self.ui.edit(editortext, '') if not editedtext: raise error.Abort(_('empty editor text')) # parse edited result contents = ['' for i in self.fctxs] leftpadpos = 4 colonpos = leftpadpos + len(visiblefctxs) + 1 for l in mdiff.splitnewlines(editedtext): if l.startswith('HG:'): continue if l[colonpos - 1:colonpos + 2] != ' : ': raise error.Abort(_('malformed line: %s') % l) linecontent = l[colonpos + 2:] for i, ch in enumerate(l[leftpadpos:colonpos - 1]): if ch == 'y': contents[visiblefctxs[i][0]] += linecontent # chunkstats is hard to calculate if anything changes, therefore # set them to just a simple value (1, 1). if editedtext != editortext: self.chunkstats = [1, 1] return contents
def _parsechunk(hunk): """(crecord.uihunk or patch.recordhunk) -> (path, (a1, a2, [bline]))""" if type(hunk) not in (crecord.uihunk, patch.recordhunk): return None, None path = hunk.header.filename() a1 = hunk.fromline + len(hunk.before) - 1 # remove before and after context hunk.before = hunk.after = [] buf = util.stringio() hunk.write(buf) patchlines = mdiff.splitnewlines(buf.getvalue()) # hunk.prettystr() will update hunk.removed a2 = a1 + hunk.removed blines = [l[1:] for l in patchlines[1:] if l[0] != '-'] return path, (a1, a2, blines)
def _parsechunk(hunk): """(crecord.uihunk or patch.recordhunk) -> (path, (a1, a2, [bline]))""" if type(hunk) not in (crecord.uihunk, patch.recordhunk): return None, None path = hunk.header.filename() a1 = hunk.fromline + len(hunk.before) - 1 # remove before and after context hunk.before = hunk.after = [] buf = util.stringio() hunk.write(buf) patchlines = mdiff.splitnewlines(buf.getvalue()) # hunk.prettystr() will update hunk.removed a2 = a1 + hunk.removed blines = [l[1:] for l in patchlines[1:] if not l.startswith(b'-')] return path, (a1, a2, blines)
def modregions(wctx, fname): fctx = wctx.filectx(fname) pctx = fctx.parents() file_data = fctx.data() lines = mdiff.splitnewlines(file_data) if len(pctx) in (1, 2): mod_regions = modified_regions(pctx[0].data(), file_data) if len(pctx) == 2: m2 = modified_regions(pctx[1].data(), file_data) # only the lines that are new in both mod_regions &= m2 else: mod_regions = Regions() mod_regions.add(0, len(lines)) return mod_regions
def _refineannotateresult(self, result, f, showpath, showlines): """add the missing path or line contents, they can be expensive. f could be either node or fctx. """ if showpath: result = self._addpathtoresult(result) if showlines: if isinstance(f, bytes): # f: node or fctx llrev = self.revmap.hsh2rev(f) fctx = self._resolvefctx(f, self.revmap.rev2path(llrev)) else: fctx = f lines = mdiff.splitnewlines(fctx.data()) if len(lines) != len(result): # linelog is probably corrupted raise faerror.CorruptedFileError() result = (result, lines) return result
def modregions(wctx, fname): fctx = wctx.filectx(fname) pctx = fctx.parents() file_data = fctx.data() lines = mdiff.splitnewlines(file_data) if len(pctx) in (1, 2): mod_regions = modified_regions(pctx[0].data(), file_data) if len(pctx) == 2: m2 = modified_regions(pctx[1].data(), file_data) # only the lines that are new in both mod_regions &= m2 else: mod_regions = Regions() mod_regions.append(0, len(lines)) return mod_regions
def _resolvelines(self, annotateresult, revmap, linelog): """(annotateresult) -> [line]. designed for annotatealllines. this is probably the most inefficient code in the whole fastannotate directory. but we have made a decision that the linelog does not store line contents. so getting them requires random accesses to the revlog data, since they can be many, it can be very slow. """ # [llrev] revs = [revmap.hsh2rev(l[0]) for l in annotateresult] result = [None] * len(annotateresult) # {(rev, linenum): [lineindex]} key2idxs = collections.defaultdict(list) for i in pycompat.xrange(len(result)): key2idxs[(revs[i], annotateresult[i][1])].append(i) while key2idxs: # find an unresolved line and its linelog rev to annotate hsh = None try: for (rev, _linenum), idxs in key2idxs.iteritems(): if revmap.rev2flag(rev) & revmapmod.sidebranchflag: continue hsh = annotateresult[idxs[0]][0] break except StopIteration: # no more unresolved lines return result if hsh is None: # the remaining key2idxs are not in main branch, resolving them # using the hard way... revlines = {} for (rev, linenum), idxs in key2idxs.iteritems(): if rev not in revlines: hsh = annotateresult[idxs[0]][0] if self.ui.debugflag: self.ui.debug('fastannotate: reading %s line #%d ' 'to resolve lines %r\n' % (node.short(hsh), linenum, idxs)) fctx = self._resolvefctx(hsh, revmap.rev2path(rev)) lines = mdiff.splitnewlines(fctx.data()) revlines[rev] = lines for idx in idxs: result[idx] = revlines[rev][linenum] assert all(x is not None for x in result) return result # run the annotate and the lines should match to the file content self.ui.debug('fastannotate: annotate %s to resolve lines\n' % node.short(hsh)) linelog.annotate(rev) fctx = self._resolvefctx(hsh, revmap.rev2path(rev)) annotated = linelog.annotateresult lines = mdiff.splitnewlines(fctx.data()) if len(lines) != len(annotated): raise faerror.CorruptedFileError('unexpected annotated lines') # resolve lines from the annotate result for i, line in enumerate(lines): k = annotated[i] if k in key2idxs: for idx in key2idxs[k]: result[idx] = line del key2idxs[k] return result
def do_check_whitespace(ui, repo, *files, **args): """check files for proper m5 style guidelines""" from mercurial import mdiff, util if files: files = frozenset(files) def skip(name): return files and name in files def prompt(name, fixonly=None): if args.get('auto', False): result = 'f' else: while True: result = ui.prompt("(a)bort, (i)gnore, or (f)ix?", default='a') if result in 'aif': break if result == 'a': return True elif result == 'f': fixwhite(repo.wjoin(name), args['tabsize'], fixonly) return False modified, added, removed, deleted, unknown, ignore, clean = repo.status() for fname in added: if skip(fname): continue ok = True for line,num in checkwhite(repo.wjoin(fname)): ui.write("invalid whitespace in %s:%d\n" % (fname, num)) if ui.verbose: ui.write(">>%s<<\n" % line[-1]) ok = False if not ok: if prompt(fname): return True try: wctx = repo.workingctx() except: from mercurial import context wctx = context.workingctx(repo) for fname in modified: if skip(fname): continue if not whitespace_file(fname): continue fctx = wctx.filectx(fname) pctx = fctx.parents() file_data = fctx.data() lines = mdiff.splitnewlines(file_data) if len(pctx) in (1, 2): mod_lines = modified_lines(pctx[0].data(), file_data, len(lines)) if len(pctx) == 2: m2 = modified_lines(pctx[1].data(), file_data, len(lines)) # only the lines that are new in both mod_lines = mod_lines & m2 else: mod_lines = xrange(0, len(lines)) fixonly = set() for i,line in enumerate(lines): if i not in mod_lines: continue if checkwhite_line(line): continue ui.write("invalid whitespace: %s:%d\n" % (fname, i+1)) if ui.verbose: ui.write(">>%s<<\n" % line[:-1]) fixonly.add(i) if fixonly: if prompt(fname, fixonly): return True
def do_check_whitespace(ui, repo, *files, **args): """check files for proper m5 style guidelines""" from mercurial import mdiff, util if files: files = frozenset(files) def skip(name): return files and name in files def prompt(name, fixonly=None): if args.get('auto', False): result = 'f' else: while True: result = ui.prompt("(a)bort, (i)gnore, or (f)ix?", default='a') if result in 'aif': break if result == 'a': return True elif result == 'f': fixwhite(repo.wjoin(name), args['tabsize'], fixonly) return False modified, added, removed, deleted, unknown, ignore, clean = repo.status() for fname in added: if skip(fname): continue ok = True for line, num in checkwhite(repo.wjoin(fname)): ui.write("invalid whitespace in %s:%d\n" % (fname, num)) if ui.verbose: ui.write(">>%s<<\n" % line[-1]) ok = False if not ok: if prompt(fname): return True try: wctx = repo.workingctx() except: from mercurial import context wctx = context.workingctx(repo) for fname in modified: if skip(fname): continue if not whitespace_file(fname): continue fctx = wctx.filectx(fname) pctx = fctx.parents() file_data = fctx.data() lines = mdiff.splitnewlines(file_data) if len(pctx) in (1, 2): mod_lines = modified_lines(pctx[0].data(), file_data, len(lines)) if len(pctx) == 2: m2 = modified_lines(pctx[1].data(), file_data, len(lines)) # only the lines that are new in both mod_lines = mod_lines & m2 else: mod_lines = xrange(0, len(lines)) fixonly = set() for i, line in enumerate(lines): if i not in mod_lines: continue if checkwhite_line(line): continue ui.write("invalid whitespace: %s:%d\n" % (fname, i + 1)) if ui.verbose: ui.write(">>%s<<\n" % line[:-1]) fixonly.add(i) if fixonly: if prompt(fname, fixonly): return True