def analyze_pr(merge, pr): allprs.add(pr) refname = pull_ref_prefix + "/" + pr[1:] tip = name_to_object(repo, refname) noteexpr = re.compile("^%s: (?P<message>.*) r=.* a=.*" % pr[1:], flags=re.M) m = noteexpr.search(merge.message) note = '' if m is None: # GitHub merge note = merge.message.split('\n', 3)[2] else: # Bors merge note = m.group('message') note = note.strip() merge_base_result = repo.merge_base(merge.parents[0], tip) if len(merge_base_result) == 0: print("uh-oh! can't find merge base! pr", pr, file=sys.stderr) exit(-1) merge_base = merge_base_result[0] commits_to_analyze = [tip] seen_commits = set() missing_items = [] authors = set() ncommits = 0 for commit in repo.iter_commits(merge_base.hexsha + '..' + tip.hexsha): spin() if commit in seen_commits: # We may be seeing the same commit twice if a feature branch has # been forked in sub-branches. Just skip over what we've seen # already. continue seen_commits.add(commit) if not commit.message.startswith("Merge"): missing_item, prauthors = process_release_notes(pr, note, commit) authors.update(prauthors) ncommits += 1 if missing_item is not None: missing_items.append(missing_item) if ncommits == len(missing_items): # None of the commits found had a release note. List them. for item in missing_items: missing_release_notes.append(item) text = repo.git.diff(merge_base.hexsha, tip.hexsha, '--', numstat=True) stats = Stats._list_from_string(repo, text) collect_item(pr, note, merge.hexsha[:shamin], ncommits, authors, stats.total, merge.committed_date)
def analyze_pr(merge, pr): allprs.add(pr) refname = pull_ref_prefix + "/" + pr[1:] tip = name_to_object(repo, refname) noteexpr = re.compile("^%s: (?P<message>.*) r=.* a=.*" % pr[1:], flags=re.M) m = noteexpr.search(merge.message) note = '' if m is None: # GitHub merge note = merge.message.split('\n',3)[2] else: # Bors merge note = m.group('message') note = note.strip() merge_base_result = repo.merge_base(merge.parents[0], tip) if len(merge_base_result) == 0: print("uh-oh! can't find merge base! pr", pr, file=sys.stderr) exit(-1) merge_base = merge_base_result[0] commits_to_analyze = [tip] seen_commits = set() missing_items = [] authors = set() ncommits = 0 for commit in repo.iter_commits(merge_base.hexsha + '..' + tip.hexsha): spin() if commit in seen_commits: # We may be seeing the same commit twice if a feature branch has # been forked in sub-branches. Just skip over what we've seen # already. continue seen_commits.add(commit) if not commit.message.startswith("Merge"): missing_item, prauthors = process_release_notes(pr, note, commit) authors.update(prauthors) ncommits += 1 if missing_item is not None: missing_items.append(missing_item) if ncommits == len(missing_items): # None of the commits found had a release note. List them. for item in missing_items: missing_release_notes.append(item) text = repo.git.diff(merge_base.hexsha, tip.hexsha, '--', numstat=True) stats = Stats._list_from_string(repo, text) collect_item(pr, note, merge.hexsha[:shamin], ncommits, authors, stats.total, merge.committed_date)
def experiment_data(self, commit=None, must_contain_results=False): """ :param commit: the commit that all the experiments should have happened or None to include all :type commit: str :param must_contain_results: include only tags that contain results :type must_contain_results: bool :return: all the experiment data :rtype: dict """ results = {} for tag in self.__repository.tags: if not tag.name.startswith(self.__tag_prefix): continue data = json.loads(tag.tag.message) if "results" not in data and must_contain_results: continue if commit is not None and tag.tag.object.hexsha != name_to_object(self.__repository, commit).hexsha: continue results[tag.name] = data return results
def experiment_data(self, commit=None, must_contain_results=False): """ :param commit: the commit that all the experiments should have happened or None to include all :type commit: str :param must_contain_results: include only tags that contain results :type must_contain_results: bool :return: all the experiment data :rtype: dict """ results = {} for tag in self.__repository.tags: if not tag.name.startswith(self.__tag_prefix): continue data = json.loads(tag.tag.message) if "results" not in data and must_contain_results: continue if commit is not None and tag.tag.object.hexsha != name_to_object( self.__repository, commit).hexsha: continue results[tag.name] = data return results
def analyze_pr(merge, pr, parent_idx): allprs.add(pr) noteexpr = re.compile("^%s: (?P<message>.*) r=.* a=.*" % pr[1:], flags=re.M) m = noteexpr.search(merge.message) title = '' if m is None: # GitHub merge title = merge.message.split('\n', 3)[2] else: # Bors merge title = m.group('message') title = title.strip() try: refname = pull_ref_prefix + "/" + pr[1:] tip = name_to_object(repo, refname) except exc.BadName: # Oddly, we have at least one PR (#47761) which does not have a tip # at /refs/pull/47761, although it's valid and merged. # As of 2020-06-08 it's the only PR missing a branch tip there. print("\nuh-oh! can't find PR head in repo", pr, file=sys.stderr) # We deal with it here assuming that the order of the parents # of the merge commit is the same as reported by the # "Merge ..." string in the merge commit's message. # This happens to be true of the missing PR above as well # as for several other merge commits with more than two parents. tip = merge.parents[parent_idx] print( "check at https://github.com/cockroachdb/cockroach/pull/%s that the last commit is %s" % (pr[1:], tip.hexsha), file=sys.stderr) # TODO(knz): If this method is reliable, this means we don't # need the pull tips at /refs/pull *at all* which could # streamline the whole experience. # This should be investigated further. merge_base_result = repo.merge_base(merge.parents[0], tip) if len(merge_base_result) == 0: print("uh-oh! can't find merge base! pr", pr, file=sys.stderr) sys.exit(-1) merge_base = merge_base_result[0] seen_commits = set() missing_items = [] authors = set() ncommits = 0 for commit in repo.iter_commits(merge_base.hexsha + '..' + tip.hexsha): spin() if commit in seen_commits: # We may be seeing the same commit twice if a feature branch has # been forked in sub-branches. Just skip over what we've seen # already. continue seen_commits.add(commit) if not commit.message.startswith("Merge"): missing_item, prauthors = process_release_notes(pr, title, commit) authors.update(prauthors) ncommits += 1 if missing_item is not None: missing_items.append(missing_item) if ncommits == len(missing_items): # None of the commits found had a release note. List them. for item in missing_items: missing_release_notes.append(item) text = repo.git.diff(merge_base.hexsha, tip.hexsha, '--', numstat=True) stats = Stats._list_from_string(repo, text) collect_item(pr, title, merge.hexsha[:shamin], ncommits, authors, stats.total, merge.committed_date)
def analyze_pr(merge, pr): allprs.add(pr) refname = pull_ref_prefix + "/" + pr[1:] tip = name_to_object(repo, refname) noteexpr = re.compile("^%s: (?P<message>.*) r=.* a=.*" % pr[1:], flags=re.M) m = noteexpr.search(merge.message) note = '' if m is None: # GitHub merge note = '\n'.join(merge.message.split('\n')[2:]) else: # Bors merge note = m.group('message') merge_base_result = repo.merge_base(merge.parents[0], tip) if len(merge_base_result) == 0: print("uh-oh! can't find merge base! pr", pr) exit(-1) merge_base = merge_base_result[0] commits_to_analyze = [tip] authors = set() ncommits = 0 while len(commits_to_analyze) > 0: spin() commit = commits_to_analyze.pop(0) if not commit.message.startswith("Merge"): extract_release_notes(pr, note, commit) ncommits += 1 author = author_aliases.get(commit.author.name, commit.author.name) if author != 'GitHub': authors.add(author) committer = author_aliases.get(commit.committer.name, commit.committer.name) if committer != 'GitHub': authors.add(committer) # Exclude any parents reachable from the other side of the # PR merge commit. for parent in commit.parents: if not repo.is_ancestor(parent, merge.parents[0]): commits_to_analyze.append(parent) text = repo.git.diff(merge_base.hexsha, tip.hexsha, '--', numstat=True) stats = Stats._list_from_string(repo, text) individual_authors.update(authors) if len(authors) == 0: authors.add("Unknown Author") item = { 'title': note, 'pr': pr, 'sha': merge.hexsha[:shamin], 'ncommits': ncommits, 'authors': ", ".join(sorted(authors)), 'insertions': stats.total['insertions'], 'deletions': stats.total['deletions'], 'files': stats.total['files'], 'lines': stats.total['lines'], } history = per_group_history.get(item['authors'], []) history.append(item) per_group_history[item['authors']] = history