def _likely_merged_commits(self): # Compute patches which are downstream and probably were # merged upstream, using the following heuristics: # # 1. downstream patches with small shortlog edit distances # from upstream patches # # 2. downstream patches with shortlogs that are prefixes of # upstream patches # # Heuristic #1 catches patches with typos in the shortlogs # that reviewers asked to be fixed, etc. E.g. upstream # shortlog # # Bluetoth: do foo # # matches downstream shortlog # # [nrf fromlist] Bluetooth: do foo # # Heuristic #2 catches situations where we had to shorten our # downstream shortlog to fit the "[nrf xyz]" sauce tag at the # beginning and still fit within CI's shortlog length # restrictions. E.g. upstream shortlog # # subsys: do a thing that is very useful for everyone # # matches downstream shortlog # # [nrf fromlist] subsys: do a thing that is very # # The return value is a map from pygit2 commit objects for # downstream patches, to a list of pygit2 commit objects that # are upstream patches which have similar shortlogs and the # same authors. likely_merged = OrderedDict() for dc in self.downstream_outstanding: sl = commit_shortlog(dc) def ed(upstream_commit): return editdistance.eval( shortlog_no_sauce(sl, self._downstream_sauce), commit_shortlog(upstream_commit)) matches = [ uc for uc in self.upstream_new if # Heuristic #1: ed(uc) < self._edit_dist_threshold or # Heuristic #2: commit_shortlog(uc).startswith(sl) ] if len(matches) != 0: likely_merged[dc] = matches return likely_merged
def print_notes(start_manifest, end_manifest): # Get 'revision' and 'path' dicts for each project we track in # each pinned manifest, keyed by name. start_data = zmp_project_data(start_manifest) end_data = zmp_project_data(end_manifest) notes_metadata = {} for p in ZMP_PROJECTS: start_rev = start_data[p].revision end_rev = end_data[p].revision # end should have the entire history; start might be gone. path = end_data[p].abspath commits = repo_commits(path, start_rev, end_rev) ncommits = len(commits) if ncommits >= 2: sc, ec = commits[0], commits[-1] changes = '''\ {} patches total: - start commit: {} ("{}"). - end commit: {} ("{}").'''.format(ncommits, commit_shortsha(sc), commit_shortlog(sc), commit_shortsha(ec), commit_shortlog(ec)) elif ncommits == 1: changes = 'One new commit: {} ("{}").'.format( commit_shortsha(commits[0]), commit_shortlog(commits[0])) else: changes = 'No changes.' notes_metadata[p] = { 'path': path, # assume it stays the same 'start_revision': start_rev, 'end_revision': end_rev, 'commits': commits, 'changes': changes, } print('''\ ## West {} ## Zephyr {} ## MCUboot {} ## dm-lwm2m {} '''.format(*[notes_metadata[p]['changes'] for p in ZMP_PROJECTS]))
def upstream_commit_line(self, commit, merge_day=False): '''Get a line about the given upstream commit.''' if merge_day: merged = self.commit_merge_day(commit) return '- {} {}, merged {}'.format(commit_shortsha(commit), commit_shortlog(commit), merged) else: return '- {} {}'.format(commit_shortsha(commit), commit_shortlog(commit))
def upstream_commit_line(self, commit, merge_day=False): '''Get a line about the given upstream commit.''' full_oid = str(commit.oid) link = ('https://github.com/zephyrproject-rtos/zephyr/commit/' + full_oid) if merge_day: merged = self.commit_merge_day(commit) return '- [{}]({}) {}, merged {}'.format(commit_shortsha(commit), link, commit_shortlog(commit), merged) else: return '- [{}]({}) {}'.format(commit_shortsha(commit), link, commit_shortlog(commit))
def dump_unknown_commit_help(unknown_commits): msg = """\ Error: can't build mergeup log message. The following commits have unknown areas: {} You can manually specify areas like so: {} Where each AREA is taken from the list: \t{} You can also update AREA_TO_SHORTLOG_RES in {} to permanently associate an area with this type of shortlog. """ unknown_as_list = ['- {} {}'.format(commit_shortsha(c), commit_shortlog(c)) for c in unknown_commits] try_instead = chain((shlex.quote(a) for a in sys.argv), ('--set-area={}:AREA'.format(commit_shortsha(c)) for c in unknown_commits)) print(textwrap.dedent(msg).format('\n'.join(unknown_as_list), ' '.join(try_instead), '\n\t'.join(AREAS), __file__), file=sys.stderr)
def print_to_revert_text(): print('To revert:') if likely_merged: for sl, commits in likely_merged.items(): downstream_oid = outstanding[sl].oid print('{} {}'.format(downstream_oid, sl)) if len(commits) > 1: print('\tlikely merged upstream as one of:') for c in commits: print('\t{} {}'.format(c.oid, commit_shortlog(c))) else: print('\tlikely merged upstream as:') print('\t{} {}'.format(commits[0].oid, commit_shortlog(commits[0]))) else: print('<none>')
def get_pull_info_in_range(args): # Get information about pull requests which are associated with a # commit range. # # This can take a long time to run, so we print what we find to # stderr as we go, as a sign of life so users know what's # happening. # # The return value is a list of pr_info tuples. # Get list of commits from local repository, as pygit2.Commit objects. commit_list = get_pygit2_commits(args) # Get associated PRs using GitHub API, as github.PullRequest objects. # Map each pull request number to its object and commits. gh_repo = get_gh_repo(args) pr_num_to_pr = {} pr_num_to_commits = defaultdict(list) if args.zephyr_areas: sha_to_area = {} for commit in commit_list: sha = str(commit.oid) if args.zephyr_areas: area = zephyr_commit_area(commit) sha_to_area[sha] = area area_str = f'{area:13}:' else: area_str = '' gh_prs = list(gh_repo.get_commit(sha).get_pulls()) if len(gh_prs) != 1: sys.exit(f'{sha} has {len(gh_prs)} prs (expected 1): {gh_prs}') gh_pr = gh_prs[0] pr_num_to_commits[gh_pr.number].append(commit) pr_num_to_pr[gh_pr.number] = gh_pr # cut off the shortlog to a fixed length for readability of the output. shortlog = commit_shortlog(commit)[:15] print(f'{sha}:{area_str}{shortlog:15}:{gh_pr.html_url}:{gh_pr.title}', file=sys.stderr) print(file=sys.stderr) # Bundle up the return dict ret = [] for pr_num, commits in pr_num_to_commits.items(): if args.zephyr_areas: # Assign an area to the PR by taking the area of each # commit, and picking the one that happens the most. # Hopefully this is good enough. area_counts = defaultdict(int) for commit in commits: sha = str(commit.oid) area_counts[sha_to_area[sha]] += 1 zephyr_area = max(area_counts, key=lambda area: area_counts[area]) else: zephyr_area = None ret.append(pr_info(pr_num_to_pr[pr_num], commits, zephyr_area)) return ret
def _check_known_area(self, commit): sha = str(commit.oid) for k, v in self.sha_to_area.items(): if sha.startswith(k): return v if self.area_by_shortlog: spfx = shortlog_area_prefix(commit_shortlog(commit)) return self.area_by_shortlog(spfx) return None
def main(start_manifest, end_manifest, zmp, yaml_indent): if zmp is None: zmp = abspath(os.getcwd()) zmp = abspath(zmp) zephyr = join(zmp, 'zephyr') mcuboot = join(zmp, 'mcuboot') lwm2m = join(zmp, 'zephyr-fota-samples', 'dm-lwm2m') hawkbit = join(zmp, 'zephyr-fota-samples', 'dm-hawkbit-mqtt') start = project_revisions(start_manifest) end = project_revisions(end_manifest) zephyr_highlights = repo_mergeup_highlights(zephyr, start['zephyr'], end['zephyr'], yaml_indent) mcuboot_highlights = repo_mergeup_highlights(mcuboot, start['mcuboot'], end['mcuboot'], yaml_indent) lwm2m_commits = repo_commits(lwm2m, start['dm-lwm2m'], end['dm-lwm2m']) hawkbit_commits = repo_commits(hawkbit, start['dm-hawkbit-mqtt'], end['dm-hawkbit-mqtt']) print('#', '=' * 70) print('# Zephyr highlights:') print(zephyr_highlights) print('#', '=' * 70) print('# MCUboot highlights:') print(mcuboot_highlights) print('#', '=' * 70) print('# dm-lwm2m commits:') for c in lwm2m_commits: print('# - {} {}'.format(commit_shortsha(c), commit_shortlog(c))) print('#', '=' * 70) print('# dm-hawkbit-mqtt commits:') for c in hawkbit_commits: print('# - {} {}'.format(commit_shortsha(c), commit_shortlog(c)))
def to_revert_json_obj(): # List ordering isn't that important here, but let's be consistent. ret = [] for sl, commits in likely_merged.items(): ret.append({ 'downstream': { 'sha': str(outstanding[sl].oid), 'shortlog': sl }, 'upstream-matches': [{ 'sha': str(c.oid), 'shortlog': commit_shortlog(c) } for c in commits] }) return ret
def postamble(self, analysis, context): outstanding = analysis.downstream_outstanding_patches likely_merged = analysis.downstream_merged_patches ret = [] def addl(line, comment=False): if comment: if line: ret.append('# {}'.format(line)) else: ret.append('#') else: ret.append(line) addl('Outstanding Downstream patches') addl('==============================') addl('') for sl, c in outstanding.items(): addl('- {} {}'.format(commit_shortsha(c), sl)) addl('') if not likely_merged: return ret addl('Likely merged downstream patches:', True) addl('IMPORTANT: You probably need to revert these and re-run!', True) addl(' Make sure to check the above as well; these are', True) addl(" guesses that aren't always right.", True) addl('', True) for sl, commits in likely_merged.items(): addl('- "{}", likely merged as one of:'.format(sl), True) for c in commits: addl('\t- {} {}'.format(commit_shortsha(c), commit_shortlog(c)), True) addl('', True) return ret
def _likely_merged_commits(self): # Compute patches which are downstream and probably were # merged upstream, using a shortlog edit distance heuristic. # This is a map from pygit2 commit objects for downstream # patches, to a list of pygit2 commit objects that are # upstream patches which have similar shortlogs and the same # authors. likely_merged = OrderedDict() for dc in self.downstream_outstanding: sl = commit_shortlog(dc) def ed(upstream_commit): return editdistance.eval( shortlog_no_sauce(sl, self._downstream_sauce), commit_shortlog(upstream_commit)) matches = [c for c in self.upstream_new if ed(c) < self._edit_dist_threshold] if len(matches) != 0: likely_merged[dc] = matches return likely_merged
def print_loot(self, name, project, z_project, args, json_data): # Print a list of out of tree outstanding patches in the given # project. # # name: project name # project: the west.manifest.Project instance in the NCS manifest # z_project: the Project instance in the upstream manifest # args: parsed arguments from argparse name_path = _name_and_path(project) # Get the upstream revision of the project. The zephyr project # has to be treated as a special case. if name == 'zephyr': z_rev = self.zephyr_rev else: z_rev = z_project.revision n_rev = 'refs/heads/manifest-rev' try: nsha = project.sha(n_rev) project.git('cat-file -e ' + nsha) except subprocess.CalledProcessError: log.wrn(f"{name_path}: can't get loot; please run " f'"west update" (no "{n_rev}" ref)') return try: zsha = z_project.sha(z_rev) z_project.git('cat-file -e ' + zsha) except subprocess.CalledProcessError: log.wrn(f"{name_path}: can't get loot; please fetch upstream URL " f'{z_project.url} (need revision {z_project.revision})') return try: analyzer = nwh.RepoAnalyzer(project, z_project, n_rev, z_rev) except nwh.InvalidRepositoryError as ire: log.die(f"{name_path}: {str(ire)}") try: loot = analyzer.downstream_outstanding except nwh.UnknownCommitsError as uce: log.die(f'{name_path}: unknown commits: {str(uce)}') if not loot and log.VERBOSE <= log.VERBOSE_NONE: # Don't print output if there's no loot unless verbose # mode is on. return log.banner(name_path) log.inf(f' NCS commit: {nsha}\n' f'upstream commit: {zsha}') log.inf('OOT patches: ' + (f'{len(loot)} total' if loot else 'none') + (', output limited by --file' if args.files else '')) json_sha_list = [] json_shortlog_list = [] for c in loot: if args.files and not commit_affects_files(c, args.files): log.dbg(f"skipping {c.oid}; it doesn't affect file filter", level=log.VERBOSE_VERY) continue sha = str(c.oid) shortlog = commit_shortlog(c) if args.sha_only: log.inf(sha) else: log.inf(f'- {sha} {shortlog}') if args.json: json_sha_list.append(sha) json_shortlog_list.append(shortlog) if args.json: json_data[name] = { 'path': project.path, 'ncs-commit': nsha, 'upstream-commit': zsha, 'shas': json_sha_list, 'shortlogs': json_shortlog_list, }
def ed(upstream_commit): return editdistance.eval( shortlog_no_sauce(sl, self._downstream_sauce), commit_shortlog(upstream_commit))
def analyze(self): '''Analyze repository history. If this returns without raising an exception, the return value is a ZephyrRepoAnalysis. ''' try: self.repo = pygit2.Repository(self.repo_path) except KeyError: # pygit2 raises KeyError when the current path is not a Git # repository. msg = "Can't initialize Git repository at {}" raise InvalidRepositoryError(msg.format(self.repo_path)) # # Group all upstream commits by area, and collect patch counts. # upstream_new = self._new_upstream_only_commits() upstream_commit_range = (upstream_new[0], upstream_new[-1]) upstream_area_patches = defaultdict(list) for c in upstream_new: area = self._check_known_area(c) or commit_area(c) upstream_area_patches[area].append(c) unknown_area = upstream_area_patches.get(None) if unknown_area: raise UnknownCommitsError(*unknown_area) upstream_area_counts = {} for area, patches in upstream_area_patches.items(): upstream_area_counts[area] = len(patches) # # Analyze FIO portion of the tree. # fio_only = self._all_fio_only_commits() fio_outstanding = OrderedDict() for c in fio_only: if len(c.parents) > 1: # Skip all the mergeup commits. continue sl = commit_shortlog(c) if shortlog_is_revert(sl): # If a shortlog marks a revert, delete the original commit # from outstanding. what = shortlog_reverts_what(sl) if what not in fio_outstanding: print('WARNING: {} was reverted,'.format(what), "but isn't present in FIO history", file=sys.stderr) continue del fio_outstanding[what] else: # Non-revert commits just get appended onto # fio_outstanding, keyed by shortlog to make finding # them later in case they're reverted easier. # # We could try to support this by looking into the entire # revert message to find the "This reverts commit SHA" # text and computing reverts based on oid rather than # shortlog. That'd be more robust, but let's not worry # about it for now. if sl in fio_outstanding: msg = 'duplicated commit shortlogs ({})'.format(sl) raise NotImplementedError(msg) fio_outstanding[sl] = c # Compute likely merged patches. upstream_fio = [c for c in upstream_new if commit_is_fio(c)] likely_merged = OrderedDict() for fio_sl, fio_c in fio_outstanding.items(): def ed(upstream_commit): return editdistance.eval(shortlog_no_sauce(fio_sl), commit_shortlog(upstream_commit)) matches = [c for c in upstream_fio if ed(c) < self.edit_dist_threshold] if len(matches) != 0: likely_merged[fio_sl] = matches return ZephyrRepoAnalysis(upstream_area_counts, upstream_area_patches, upstream_commit_range, fio_outstanding, likely_merged)
def _downstream_outstanding_commits(self): # Compute a list of commit objects for outstanding # downstream patches. # Convert downstream and upstream revisions to SHAs. dsha = self._dp.sha(self._dr) usha = self._up.sha(self._ur) # First, get a list of all downstream OOT patches. Note: # pygit2 doesn't seem to have any ready-made rev-list # equivalent, so call out to Project.git() to get the commit # SHAs, then wrap them with pygit2 objects. cp = self._dp.git('rev-list --reverse {} ^{}'.format(dsha, usha), capture_stdout=True) if not cp.stdout.strip(): return [] commit_shas = cp.stdout.decode('utf-8').strip().splitlines() all_downstream_oot = [self._repo.revparse_single(c) for c in commit_shas] # Now filter out reverted patches and mergeups from the # complete list of OOT patches. downstream_out = OrderedDict() for c in all_downstream_oot: sha, sl = str(c.oid), commit_shortlog(c) is_revert = shortlog_is_revert(sl) # this is just a heuristic if len(c.parents) > 1: if not self._include_mergeups: # Skip all the mergeup commits. log.dbg('** skipped mergeup {} ("{}")'.format(sha, sl), level=log.VERBOSE_VERY) continue else: is_revert = False # a merge is never a revert if is_revert: # If a shortlog marks a revert, delete the original commit # from downstream_out, if it can be found. try: rsha = commit_reverts_what(c) except ValueError: # Badly formatted revert message. # Treat as outstanding, but complain. log.wrn( 'revert {} doesn\'t say "reverts commit <SHA>":\n{}'. format(str(sha), textwrap.indent(c.message, '\t'))) rsha = None if rsha in downstream_out: log.dbg('** commit {} ("{}") was reverted in {}'. format(rsha, commit_shortlog(downstream_out[rsha]), sha), level=log.VERBOSE_VERY) del downstream_out[rsha] continue elif rsha is not None: # Make sure the reverted commit is in downstream history. # (It might not be in all_downstream_oot if e.g. # downstream reverts an upstream patch as a hotfix, and we # shouldn't warn about that.) is_ancestor = self._dp.git( 'merge-base --is-ancestor {} {}'.format(rsha, dsha), capture_stdout=True).returncode == 0 if not is_ancestor: log.wrn(('commit {} ("{}") reverts {}, ' "which isn't in downstream history"). format(sha, sl, rsha)) # Emit a warning if we have a non-revert patch with an # incorrect sauce tag. (Again, downstream might carry reverts # of upstream patches as hotfixes, which we shouldn't # warn about.) if (not shortlog_has_sauce(sl, self._downstream_sauce) and not is_revert): log.wrn(f'{self._dp.name}: bad or missing sauce tag: {sha} ("{sl}")') downstream_out[sha] = c log.dbg('** added oot patch: {} ("{}")'.format(sha, sl), level=log.VERBOSE_VERY) return list(downstream_out.values())
def ed(upstream_commit): return editdistance.eval(shortlog_no_sauce(fio_sl), commit_shortlog(upstream_commit))
def analyze(self): '''Analyze repository history. If this returns without raising an exception, the return value is a ZephyrRepoAnalysis. ''' try: self.repo = pygit2.Repository(self.repo_path) except KeyError: # pygit2 raises KeyError when the current path is not a Git # repository. msg = "Can't initialize Git repository at {}" raise InvalidRepositoryError(msg.format(self.repo_path)) # # Group all upstream commits by area, and collect patch counts. # upstream_new = self._new_upstream_only_commits() upstream_commit_range = (upstream_new[0], upstream_new[-1]) upstream_area_patches = defaultdict(list) for c in upstream_new: area = self._check_known_area(c) or commit_area(c) upstream_area_patches[area].append(c) unknown_area = upstream_area_patches.get(None) if unknown_area: raise UnknownCommitsError(*unknown_area) upstream_area_counts = {} for area, patches in upstream_area_patches.items(): upstream_area_counts[area] = len(patches) # # Analyze downstream portion of the tree. # downstream_only = self._all_downstream_only_commits() downstream_outstanding = OrderedDict() for c in downstream_only: if len(c.parents) > 1: # Skip all the mergeup commits. continue sl = commit_shortlog(c) if shortlog_is_revert(sl): # If a shortlog marks a revert, delete the original commit # from outstanding. what = shortlog_reverts_what(sl) if what not in downstream_outstanding: logging.warning( "%s was reverted, but isn't in downstream history", what) continue del downstream_outstanding[what] else: # Non-revert commits just get appended onto # downstream_outstanding, keyed by shortlog to make finding # them later in case they're reverted easier. # # We could try to support this by looking into the entire # revert message to find the "This reverts commit SHA" # text and computing reverts based on oid rather than # shortlog. That'd be more robust, but let's not worry # about it for now. if sl in downstream_outstanding: msg = 'duplicated commit shortlogs ({})'.format(sl) raise NotImplementedError(msg) # Emit a warning if we have a non-revert patch with an # incorrect sauce tag. (Downstream might carry reverts # of upstream patches as hotfixes, which we shouldn't # warn about.) if not shortlog_has_sauce(sl, self.downstream_sauce): logging.warning('out of tree patch has bad sauce: %s %s', c.oid, sl) downstream_outstanding[sl] = c # Compute likely merged patches. upstream_downstream = [ c for c in upstream_new if c.author.email.endswith(self.downstream_domain) ] likely_merged = OrderedDict() for downstream_sl, downstream_c in downstream_outstanding.items(): def ed(upstream_commit): return editdistance.eval( shortlog_no_sauce(downstream_sl, self.downstream_sauce), commit_shortlog(upstream_commit)) matches = [ c for c in upstream_downstream if ed(c) < self.edit_dist_threshold ] if len(matches) != 0: likely_merged[downstream_sl] = matches return ZephyrRepoAnalysis(upstream_area_counts, upstream_area_patches, upstream_commit_range, downstream_outstanding, likely_merged)
def commit_area(commit): '''From a Zephyr commit, get its area.''' return shortlog_area(commit_shortlog(commit))