def shortlog_area_prefix(shortlog): '''Get the prefix of a shortlog which describes its area. This returns the "raw" prefix as it appears in the shortlog. To canonicalize this to one of a known set of areas, use shortlog_area() instead. If no prefix is present, returns None. ''' # Base case for recursion. if not shortlog: return None # 'Revert "foo"' should map to foo's area prefix. if shortlog_is_revert(shortlog): shortlog = shortlog_reverts_what(shortlog) return shortlog_area_prefix(shortlog) # If there is no ':', there is no area. Otherwise, the candidate # area is the substring up to the first ':'. if ':' not in shortlog: return None area, rest = [s.strip() for s in shortlog.split(':', 1)] # subsys: foo should map to foo's area prefix, etc. if area in ['subsys', 'include', 'api']: return shortlog_area_prefix(rest) return area
def analyze(self): '''Analyze repository history. If this returns without raising an exception, the return value is a ZephyrRepoAnalysis. ''' try: self.repo = pygit2.Repository(self.repo_path) except KeyError: # pygit2 raises KeyError when the current path is not a Git # repository. msg = "Can't initialize Git repository at {}" raise InvalidRepositoryError(msg.format(self.repo_path)) # # Group all upstream commits by area, and collect patch counts. # upstream_new = self._new_upstream_only_commits() upstream_commit_range = (upstream_new[0], upstream_new[-1]) upstream_area_patches = defaultdict(list) for c in upstream_new: area = self._check_known_area(c) or commit_area(c) upstream_area_patches[area].append(c) unknown_area = upstream_area_patches.get(None) if unknown_area: raise UnknownCommitsError(*unknown_area) upstream_area_counts = {} for area, patches in upstream_area_patches.items(): upstream_area_counts[area] = len(patches) # # Analyze downstream portion of the tree. # downstream_only = self._all_downstream_only_commits() downstream_outstanding = OrderedDict() for c in downstream_only: if len(c.parents) > 1: # Skip all the mergeup commits. continue sl = commit_shortlog(c) if shortlog_is_revert(sl): # If a shortlog marks a revert, delete the original commit # from outstanding. what = shortlog_reverts_what(sl) if what not in downstream_outstanding: logging.warning( "%s was reverted, but isn't in downstream history", what) continue del downstream_outstanding[what] else: # Non-revert commits just get appended onto # downstream_outstanding, keyed by shortlog to make finding # them later in case they're reverted easier. # # We could try to support this by looking into the entire # revert message to find the "This reverts commit SHA" # text and computing reverts based on oid rather than # shortlog. That'd be more robust, but let's not worry # about it for now. if sl in downstream_outstanding: msg = 'duplicated commit shortlogs ({})'.format(sl) raise NotImplementedError(msg) # Emit a warning if we have a non-revert patch with an # incorrect sauce tag. (Downstream might carry reverts # of upstream patches as hotfixes, which we shouldn't # warn about.) if not shortlog_has_sauce(sl, self.downstream_sauce): logging.warning('out of tree patch has bad sauce: %s %s', c.oid, sl) downstream_outstanding[sl] = c # Compute likely merged patches. upstream_downstream = [ c for c in upstream_new if c.author.email.endswith(self.downstream_domain) ] likely_merged = OrderedDict() for downstream_sl, downstream_c in downstream_outstanding.items(): def ed(upstream_commit): return editdistance.eval( shortlog_no_sauce(downstream_sl, self.downstream_sauce), commit_shortlog(upstream_commit)) matches = [ c for c in upstream_downstream if ed(c) < self.edit_dist_threshold ] if len(matches) != 0: likely_merged[downstream_sl] = matches return ZephyrRepoAnalysis(upstream_area_counts, upstream_area_patches, upstream_commit_range, downstream_outstanding, likely_merged)
def analyze(self): '''Analyze repository history. If this returns without raising an exception, the return value is a ZephyrRepoAnalysis. ''' try: self.repo = pygit2.Repository(self.repo_path) except KeyError: # pygit2 raises KeyError when the current path is not a Git # repository. msg = "Can't initialize Git repository at {}" raise InvalidRepositoryError(msg.format(self.repo_path)) # # Group all upstream commits by area, and collect patch counts. # upstream_new = self._new_upstream_only_commits() upstream_commit_range = (upstream_new[0], upstream_new[-1]) upstream_area_patches = defaultdict(list) for c in upstream_new: area = self._check_known_area(c) or commit_area(c) upstream_area_patches[area].append(c) unknown_area = upstream_area_patches.get(None) if unknown_area: raise UnknownCommitsError(*unknown_area) upstream_area_counts = {} for area, patches in upstream_area_patches.items(): upstream_area_counts[area] = len(patches) # # Analyze FIO portion of the tree. # fio_only = self._all_fio_only_commits() fio_outstanding = OrderedDict() for c in fio_only: if len(c.parents) > 1: # Skip all the mergeup commits. continue sl = commit_shortlog(c) if shortlog_is_revert(sl): # If a shortlog marks a revert, delete the original commit # from outstanding. what = shortlog_reverts_what(sl) if what not in fio_outstanding: print('WARNING: {} was reverted,'.format(what), "but isn't present in FIO history", file=sys.stderr) continue del fio_outstanding[what] else: # Non-revert commits just get appended onto # fio_outstanding, keyed by shortlog to make finding # them later in case they're reverted easier. # # We could try to support this by looking into the entire # revert message to find the "This reverts commit SHA" # text and computing reverts based on oid rather than # shortlog. That'd be more robust, but let's not worry # about it for now. if sl in fio_outstanding: msg = 'duplicated commit shortlogs ({})'.format(sl) raise NotImplementedError(msg) fio_outstanding[sl] = c # Compute likely merged patches. upstream_fio = [c for c in upstream_new if commit_is_fio(c)] likely_merged = OrderedDict() for fio_sl, fio_c in fio_outstanding.items(): def ed(upstream_commit): return editdistance.eval(shortlog_no_sauce(fio_sl), commit_shortlog(upstream_commit)) matches = [c for c in upstream_fio if ed(c) < self.edit_dist_threshold] if len(matches) != 0: likely_merged[fio_sl] = matches return ZephyrRepoAnalysis(upstream_area_counts, upstream_area_patches, upstream_commit_range, fio_outstanding, likely_merged)