def analyze_pr(merge, pr): allprs.add(pr) refname = pull_ref_prefix + "/" + pr[1:] tip = name_to_object(repo, refname) noteexpr = re.compile("^%s: (?P<message>.*) r=.* a=.*" % pr[1:], flags=re.M) m = noteexpr.search(merge.message) note = '' if m is None: # GitHub merge note = merge.message.split('\n', 3)[2] else: # Bors merge note = m.group('message') note = note.strip() merge_base_result = repo.merge_base(merge.parents[0], tip) if len(merge_base_result) == 0: print("uh-oh! can't find merge base! pr", pr, file=sys.stderr) exit(-1) merge_base = merge_base_result[0] commits_to_analyze = [tip] seen_commits = set() missing_items = [] authors = set() ncommits = 0 for commit in repo.iter_commits(merge_base.hexsha + '..' + tip.hexsha): spin() if commit in seen_commits: # We may be seeing the same commit twice if a feature branch has # been forked in sub-branches. Just skip over what we've seen # already. continue seen_commits.add(commit) if not commit.message.startswith("Merge"): missing_item, prauthors = process_release_notes(pr, note, commit) authors.update(prauthors) ncommits += 1 if missing_item is not None: missing_items.append(missing_item) if ncommits == len(missing_items): # None of the commits found had a release note. List them. for item in missing_items: missing_release_notes.append(item) text = repo.git.diff(merge_base.hexsha, tip.hexsha, '--', numstat=True) stats = Stats._list_from_string(repo, text) collect_item(pr, note, merge.hexsha[:shamin], ncommits, authors, stats.total, merge.committed_date)
def analyze_pr(merge, pr): allprs.add(pr) refname = pull_ref_prefix + "/" + pr[1:] tip = name_to_object(repo, refname) noteexpr = re.compile("^%s: (?P<message>.*) r=.* a=.*" % pr[1:], flags=re.M) m = noteexpr.search(merge.message) note = '' if m is None: # GitHub merge note = merge.message.split('\n',3)[2] else: # Bors merge note = m.group('message') note = note.strip() merge_base_result = repo.merge_base(merge.parents[0], tip) if len(merge_base_result) == 0: print("uh-oh! can't find merge base! pr", pr, file=sys.stderr) exit(-1) merge_base = merge_base_result[0] commits_to_analyze = [tip] seen_commits = set() missing_items = [] authors = set() ncommits = 0 for commit in repo.iter_commits(merge_base.hexsha + '..' + tip.hexsha): spin() if commit in seen_commits: # We may be seeing the same commit twice if a feature branch has # been forked in sub-branches. Just skip over what we've seen # already. continue seen_commits.add(commit) if not commit.message.startswith("Merge"): missing_item, prauthors = process_release_notes(pr, note, commit) authors.update(prauthors) ncommits += 1 if missing_item is not None: missing_items.append(missing_item) if ncommits == len(missing_items): # None of the commits found had a release note. List them. for item in missing_items: missing_release_notes.append(item) text = repo.git.diff(merge_base.hexsha, tip.hexsha, '--', numstat=True) stats = Stats._list_from_string(repo, text) collect_item(pr, note, merge.hexsha[:shamin], ncommits, authors, stats.total, merge.committed_date)
def test__list_from_string(self): output = fixture('diff_numstat') stats = Stats._list_from_string(self.rorepo, output) assert_equal(2, stats.total['files']) assert_equal(52, stats.total['lines']) assert_equal(29, stats.total['insertions']) assert_equal(23, stats.total['deletions']) assert_equal(29, stats.files["a.txt"]['insertions']) assert_equal(18, stats.files["a.txt"]['deletions']) assert_equal(0, stats.files["b.txt"]['insertions']) assert_equal(5, stats.files["b.txt"]['deletions'])
def stats(self): """Create a git stat from changes between this commit and its first parent or from all changes done if this is the very first commit. :return: git.Stats""" if not self.parents: text = self.repo.git.diff_tree(self.hexsha, '--', numstat=True, root=True) text2 = "" for line in text.splitlines()[1:]: (insertions, deletions, filename) = line.split("\t") text2 += "%s\t%s\t%s\n" % (insertions, deletions, filename) text = text2 else: text = self.repo.git.diff(self.parents[0].hexsha, self.hexsha, '--', numstat=True) return Stats._list_from_string(self.repo, text)
def analyze_pr(merge, pr, parent_idx): allprs.add(pr) noteexpr = re.compile("^%s: (?P<message>.*) r=.* a=.*" % pr[1:], flags=re.M) m = noteexpr.search(merge.message) title = '' if m is None: # GitHub merge title = merge.message.split('\n', 3)[2] else: # Bors merge title = m.group('message') title = title.strip() try: refname = pull_ref_prefix + "/" + pr[1:] tip = name_to_object(repo, refname) except exc.BadName: # Oddly, we have at least one PR (#47761) which does not have a tip # at /refs/pull/47761, although it's valid and merged. # As of 2020-06-08 it's the only PR missing a branch tip there. print("\nuh-oh! can't find PR head in repo", pr, file=sys.stderr) # We deal with it here assuming that the order of the parents # of the merge commit is the same as reported by the # "Merge ..." string in the merge commit's message. # This happens to be true of the missing PR above as well # as for several other merge commits with more than two parents. tip = merge.parents[parent_idx] print( "check at https://github.com/cockroachdb/cockroach/pull/%s that the last commit is %s" % (pr[1:], tip.hexsha), file=sys.stderr) # TODO(knz): If this method is reliable, this means we don't # need the pull tips at /refs/pull *at all* which could # streamline the whole experience. # This should be investigated further. merge_base_result = repo.merge_base(merge.parents[0], tip) if len(merge_base_result) == 0: print("uh-oh! can't find merge base! pr", pr, file=sys.stderr) sys.exit(-1) merge_base = merge_base_result[0] seen_commits = set() missing_items = [] authors = set() ncommits = 0 for commit in repo.iter_commits(merge_base.hexsha + '..' + tip.hexsha): spin() if commit in seen_commits: # We may be seeing the same commit twice if a feature branch has # been forked in sub-branches. Just skip over what we've seen # already. continue seen_commits.add(commit) if not commit.message.startswith("Merge"): missing_item, prauthors = process_release_notes(pr, title, commit) authors.update(prauthors) ncommits += 1 if missing_item is not None: missing_items.append(missing_item) if ncommits == len(missing_items): # None of the commits found had a release note. List them. for item in missing_items: missing_release_notes.append(item) text = repo.git.diff(merge_base.hexsha, tip.hexsha, '--', numstat=True) stats = Stats._list_from_string(repo, text) collect_item(pr, title, merge.hexsha[:shamin], ncommits, authors, stats.total, merge.committed_date)
def analyze_pr(merge, pr): allprs.add(pr) refname = pull_ref_prefix + "/" + pr[1:] tip = name_to_object(repo, refname) noteexpr = re.compile("^%s: (?P<message>.*) r=.* a=.*" % pr[1:], flags=re.M) m = noteexpr.search(merge.message) note = '' if m is None: # GitHub merge note = '\n'.join(merge.message.split('\n')[2:]) else: # Bors merge note = m.group('message') merge_base_result = repo.merge_base(merge.parents[0], tip) if len(merge_base_result) == 0: print("uh-oh! can't find merge base! pr", pr) exit(-1) merge_base = merge_base_result[0] commits_to_analyze = [tip] authors = set() ncommits = 0 while len(commits_to_analyze) > 0: spin() commit = commits_to_analyze.pop(0) if not commit.message.startswith("Merge"): extract_release_notes(pr, note, commit) ncommits += 1 author = author_aliases.get(commit.author.name, commit.author.name) if author != 'GitHub': authors.add(author) committer = author_aliases.get(commit.committer.name, commit.committer.name) if committer != 'GitHub': authors.add(committer) # Exclude any parents reachable from the other side of the # PR merge commit. for parent in commit.parents: if not repo.is_ancestor(parent, merge.parents[0]): commits_to_analyze.append(parent) text = repo.git.diff(merge_base.hexsha, tip.hexsha, '--', numstat=True) stats = Stats._list_from_string(repo, text) individual_authors.update(authors) if len(authors) == 0: authors.add("Unknown Author") item = { 'title': note, 'pr': pr, 'sha': merge.hexsha[:shamin], 'ncommits': ncommits, 'authors': ", ".join(sorted(authors)), 'insertions': stats.total['insertions'], 'deletions': stats.total['deletions'], 'files': stats.total['files'], 'lines': stats.total['lines'], } history = per_group_history.get(item['authors'], []) history.append(item) per_group_history[item['authors']] = history