def shortlog_area_prefix(shortlog): '''Get the prefix of a shortlog which describes its area. This returns the "raw" prefix as it appears in the shortlog. To canonicalize this to one of a known set of areas, use shortlog_area() instead. If no prefix is present, returns None. ''' # Base case for recursion. if not shortlog: return None # 'Revert "foo"' should map to foo's area prefix. if shortlog_is_revert(shortlog): shortlog = shortlog_reverts_what(shortlog) return shortlog_area_prefix(shortlog) # If there is no ':', there is no area. Otherwise, the candidate # area is the substring up to the first ':'. if ':' not in shortlog: return None area, rest = [s.strip() for s in shortlog.split(':', 1)] # subsys: foo should map to foo's area prefix, etc. if area in ['subsys', 'include', 'api']: return shortlog_area_prefix(rest) return area
def _downstream_outstanding_commits(self): # Compute a list of commit objects for outstanding # downstream patches. # Convert downstream and upstream revisions to SHAs. dsha = self._dp.sha(self._dr) usha = self._up.sha(self._ur) # First, get a list of all downstream OOT patches. Note: # pygit2 doesn't seem to have any ready-made rev-list # equivalent, so call out to Project.git() to get the commit # SHAs, then wrap them with pygit2 objects. cp = self._dp.git('rev-list --reverse {} ^{}'.format(dsha, usha), capture_stdout=True) if not cp.stdout.strip(): return [] commit_shas = cp.stdout.decode('utf-8').strip().splitlines() all_downstream_oot = [self._repo.revparse_single(c) for c in commit_shas] # Now filter out reverted patches and mergeups from the # complete list of OOT patches. downstream_out = OrderedDict() for c in all_downstream_oot: sha, sl = str(c.oid), commit_shortlog(c) is_revert = shortlog_is_revert(sl) # this is just a heuristic if len(c.parents) > 1: if not self._include_mergeups: # Skip all the mergeup commits. log.dbg('** skipped mergeup {} ("{}")'.format(sha, sl), level=log.VERBOSE_VERY) continue else: is_revert = False # a merge is never a revert if is_revert: # If a shortlog marks a revert, delete the original commit # from downstream_out, if it can be found. try: rsha = commit_reverts_what(c) except ValueError: # Badly formatted revert message. # Treat as outstanding, but complain. log.wrn( 'revert {} doesn\'t say "reverts commit <SHA>":\n{}'. format(str(sha), textwrap.indent(c.message, '\t'))) rsha = None if rsha in downstream_out: log.dbg('** commit {} ("{}") was reverted in {}'. format(rsha, commit_shortlog(downstream_out[rsha]), sha), level=log.VERBOSE_VERY) del downstream_out[rsha] continue elif rsha is not None: # Make sure the reverted commit is in downstream history. # (It might not be in all_downstream_oot if e.g. # downstream reverts an upstream patch as a hotfix, and we # shouldn't warn about that.) is_ancestor = self._dp.git( 'merge-base --is-ancestor {} {}'.format(rsha, dsha), capture_stdout=True).returncode == 0 if not is_ancestor: log.wrn(('commit {} ("{}") reverts {}, ' "which isn't in downstream history"). format(sha, sl, rsha)) # Emit a warning if we have a non-revert patch with an # incorrect sauce tag. (Again, downstream might carry reverts # of upstream patches as hotfixes, which we shouldn't # warn about.) if (not shortlog_has_sauce(sl, self._downstream_sauce) and not is_revert): log.wrn(f'{self._dp.name}: bad or missing sauce tag: {sha} ("{sl}")') downstream_out[sha] = c log.dbg('** added oot patch: {} ("{}")'.format(sha, sl), level=log.VERBOSE_VERY) return list(downstream_out.values())
def analyze(self): '''Analyze repository history. If this returns without raising an exception, the return value is a ZephyrRepoAnalysis. ''' try: self.repo = pygit2.Repository(self.repo_path) except KeyError: # pygit2 raises KeyError when the current path is not a Git # repository. msg = "Can't initialize Git repository at {}" raise InvalidRepositoryError(msg.format(self.repo_path)) # # Group all upstream commits by area, and collect patch counts. # upstream_new = self._new_upstream_only_commits() upstream_commit_range = (upstream_new[0], upstream_new[-1]) upstream_area_patches = defaultdict(list) for c in upstream_new: area = self._check_known_area(c) or commit_area(c) upstream_area_patches[area].append(c) unknown_area = upstream_area_patches.get(None) if unknown_area: raise UnknownCommitsError(*unknown_area) upstream_area_counts = {} for area, patches in upstream_area_patches.items(): upstream_area_counts[area] = len(patches) # # Analyze downstream portion of the tree. # downstream_only = self._all_downstream_only_commits() downstream_outstanding = OrderedDict() for c in downstream_only: if len(c.parents) > 1: # Skip all the mergeup commits. continue sl = commit_shortlog(c) if shortlog_is_revert(sl): # If a shortlog marks a revert, delete the original commit # from outstanding. what = shortlog_reverts_what(sl) if what not in downstream_outstanding: logging.warning( "%s was reverted, but isn't in downstream history", what) continue del downstream_outstanding[what] else: # Non-revert commits just get appended onto # downstream_outstanding, keyed by shortlog to make finding # them later in case they're reverted easier. # # We could try to support this by looking into the entire # revert message to find the "This reverts commit SHA" # text and computing reverts based on oid rather than # shortlog. That'd be more robust, but let's not worry # about it for now. if sl in downstream_outstanding: msg = 'duplicated commit shortlogs ({})'.format(sl) raise NotImplementedError(msg) # Emit a warning if we have a non-revert patch with an # incorrect sauce tag. (Downstream might carry reverts # of upstream patches as hotfixes, which we shouldn't # warn about.) if not shortlog_has_sauce(sl, self.downstream_sauce): logging.warning('out of tree patch has bad sauce: %s %s', c.oid, sl) downstream_outstanding[sl] = c # Compute likely merged patches. upstream_downstream = [ c for c in upstream_new if c.author.email.endswith(self.downstream_domain) ] likely_merged = OrderedDict() for downstream_sl, downstream_c in downstream_outstanding.items(): def ed(upstream_commit): return editdistance.eval( shortlog_no_sauce(downstream_sl, self.downstream_sauce), commit_shortlog(upstream_commit)) matches = [ c for c in upstream_downstream if ed(c) < self.edit_dist_threshold ] if len(matches) != 0: likely_merged[downstream_sl] = matches return ZephyrRepoAnalysis(upstream_area_counts, upstream_area_patches, upstream_commit_range, downstream_outstanding, likely_merged)
def analyze(self): '''Analyze repository history. If this returns without raising an exception, the return value is a ZephyrRepoAnalysis. ''' try: self.repo = pygit2.Repository(self.repo_path) except KeyError: # pygit2 raises KeyError when the current path is not a Git # repository. msg = "Can't initialize Git repository at {}" raise InvalidRepositoryError(msg.format(self.repo_path)) # # Group all upstream commits by area, and collect patch counts. # upstream_new = self._new_upstream_only_commits() upstream_commit_range = (upstream_new[0], upstream_new[-1]) upstream_area_patches = defaultdict(list) for c in upstream_new: area = self._check_known_area(c) or commit_area(c) upstream_area_patches[area].append(c) unknown_area = upstream_area_patches.get(None) if unknown_area: raise UnknownCommitsError(*unknown_area) upstream_area_counts = {} for area, patches in upstream_area_patches.items(): upstream_area_counts[area] = len(patches) # # Analyze FIO portion of the tree. # fio_only = self._all_fio_only_commits() fio_outstanding = OrderedDict() for c in fio_only: if len(c.parents) > 1: # Skip all the mergeup commits. continue sl = commit_shortlog(c) if shortlog_is_revert(sl): # If a shortlog marks a revert, delete the original commit # from outstanding. what = shortlog_reverts_what(sl) if what not in fio_outstanding: print('WARNING: {} was reverted,'.format(what), "but isn't present in FIO history", file=sys.stderr) continue del fio_outstanding[what] else: # Non-revert commits just get appended onto # fio_outstanding, keyed by shortlog to make finding # them later in case they're reverted easier. # # We could try to support this by looking into the entire # revert message to find the "This reverts commit SHA" # text and computing reverts based on oid rather than # shortlog. That'd be more robust, but let's not worry # about it for now. if sl in fio_outstanding: msg = 'duplicated commit shortlogs ({})'.format(sl) raise NotImplementedError(msg) fio_outstanding[sl] = c # Compute likely merged patches. upstream_fio = [c for c in upstream_new if commit_is_fio(c)] likely_merged = OrderedDict() for fio_sl, fio_c in fio_outstanding.items(): def ed(upstream_commit): return editdistance.eval(shortlog_no_sauce(fio_sl), commit_shortlog(upstream_commit)) matches = [c for c in upstream_fio if ed(c) < self.edit_dist_threshold] if len(matches) != 0: likely_merged[fio_sl] = matches return ZephyrRepoAnalysis(upstream_area_counts, upstream_area_patches, upstream_commit_range, fio_outstanding, likely_merged)