def _interestingfiles(repo, matcher, maxfiles): """Find what files were added or removed in this commit. Returns a tuple of two lists: (added, removed). Only files not *already* marked as moved are included in the added list. """ stat = repo.status(match=matcher) added = stat[1] removed = stat[2] copy = copies._forwardcopies(repo["."], repo[None], matcher) # remove the copy files for which we already have copy info added = [f for f in added if f not in copy] return added, removed
def _domergecopies(orig, repo, cdst, csrc, base): """Fast copytracing using filename heuristics Handle one case where we assume there are no merge commits in "source branch". Source branch is commits from base up to csrc not including base. If these assumptions don't hold then we fallback to the upstream mergecopies p | p <- cdst - rebase or merge destination, can be draft . . . d <- csrc - commit to be rebased or merged or grafted. | | p d <- base | / p <- common ancestor To find copies we are looking for files with similar filenames. See description of the heuristics below. Upstream copytracing function returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and "dirmove". See below for a more detailed description (mostly copied from upstream). This extension returns "copy" dict only, everything else is empty. "copy" is a mapping from destination name -> source name, where source is in csrc and destination is in cdst or vice-versa. "movewithdir" is a mapping from source name -> destination name, where the file at source present in one context but not the other needs to be moved to destination by the merge process, because the other context moved the directory it is in. "diverge" is a mapping of source name -> list of destination names for divergent renames. On the time of writing this extension it was used only to print warning. "renamedelete" is a mapping of source name -> list of destination names for files deleted in c1 that were renamed in c2 or vice-versa. On the time of writing this extension it was used only to print warning. "dirmove" is a mapping of detected source dir -> destination dir renames. This is needed for handling changes to new files previously grafted into renamed directories. """ if repo.ui.config("experimental", "copytrace") == "on": # user explicitly enabled copytracing - use it return orig(repo, cdst, csrc, base) if not _fastcopytraceenabled(repo.ui): return orig(repo, cdst, csrc, base) # If base, source and destination are all draft branches, let's use full # copytrace for increased capabilities since it will work fast enough if _isfullcopytraceable(repo.ui, cdst, base): configoverrides = {("experimental", "copytrace"): "on"} with repo.ui.configoverride(configoverrides, "mergecopies"): result = orig(repo, cdst, csrc, base) if repo.ui.configbool("copytrace", "enableamendcopytrace"): # Look for additional amend-copies amend_copies = _getamendcopies(repo, cdst, base.p1()) # update result[0] dict w/ amend_copies result[0].update(amend_copies) result[0] = _filtercopies(result[0], cdst, csrc, base) return result # avoid silly behavior for parent -> working dir if csrc.node() is None and cdst.node() == repo.dirstate.p1(): return repo.dirstate.copies(), {}, {}, {}, {} if cdst.rev() is None: cdst = cdst.p1() if csrc.rev() is None: csrc = csrc.p1() copies = {} ctx = csrc changedfiles = set() sourcecommitnum = 0 sourcecommitlimit = repo.ui.configint("copytrace", "sourcecommitlimit") mdst = cdst.manifest() while ctx != base: if len(ctx.parents()) == 2: # To keep things simple let's not handle merges return orig(repo, cdst, csrc, base) changedfiles.update(ctx.files()) ctx = ctx.p1() sourcecommitnum += 1 if sourcecommitnum > sourcecommitlimit: return orig(repo, cdst, csrc, base) cp = copiesmod._forwardcopies(base, csrc) for dst, src in pycompat.iteritems(cp): if src in mdst: copies[dst] = src # file is missing if it isn't present in the destination, but is present in # the base and present in the source. # Presence in the base is important to exclude added files, presence in the # source is important to exclude removed files. missingfiles = list( filter(lambda f: f not in mdst and f in base and f in csrc, changedfiles)) if missingfiles: # Use the following file name heuristic to find moves: moves are # usually either directory moves or renames of the files in the # same directory. That means that we can look for the files in dstc # with either the same basename or the same dirname. basenametofilename = defaultdict(list) dirnametofilename = defaultdict(list) for f in mdst.filesnotin(base.manifest()): basename = os.path.basename(f) dirname = os.path.dirname(f) basenametofilename[basename].append(f) dirnametofilename[dirname].append(f) maxmovecandidatestocheck = repo.ui.configint( "copytrace", "maxmovescandidatestocheck") # in case of a rebase/graft, base may not be a common ancestor anc = cdst.ancestor(csrc) for f in missingfiles: basename = os.path.basename(f) dirname = os.path.dirname(f) samebasename = basenametofilename[basename] samedirname = dirnametofilename[dirname] movecandidates = samebasename + samedirname # f is guaranteed to be present in csrc, that's why # csrc.filectx(f) won't fail f2 = csrc.filectx(f) for candidate in movecandidates[:maxmovecandidatestocheck]: f1 = cdst.filectx(candidate) if copiesmod._related(f1, f2, anc.rev()): # if there are a few related copies then we'll merge # changes into all of them. This matches the behaviour # of upstream copytracing copies[candidate] = f if len(movecandidates) > maxmovecandidatestocheck: msg = "too many moves candidates: %d" % len(movecandidates) repo.ui.log("copytrace", msg=msg, reponame=_getreponame(repo, repo.ui)) if repo.ui.configbool("copytrace", "enableamendcopytrace"): # Look for additional amend-copies. amend_copies = _getamendcopies(repo, cdst, base.p1()) if amend_copies: repo.ui.debug("Loaded amend copytrace for %s" % cdst) for dst, src in pycompat.iteritems(amend_copies): if dst not in copies: copies[dst] = src return _filtercopies(copies, cdst, csrc, base), {}, {}, {}, {}