def fetch_pr_data(args, cache): github = GitHubRepo(user=user, repo=repo, token=GITHUB_TOKEN) if args.from_commit is None or args.to_commit is None: print( "--from-commit and --to-commit must be specified if --skip-query is not used" ) exit(1) i = 0 page_size = 80 cursor = f"{args.from_commit} {i}" while True: r = github.graphql( query=PRS_QUERY, variables={ "owner": user, "name": repo, "after": cursor, "pageSize": page_size, }, ) data = r["data"]["repository"]["defaultBranchRef"]["target"]["history"] if not data["pageInfo"]["hasNextPage"]: break cursor = data["pageInfo"]["endCursor"] results = data["nodes"] to_add = [] stop = False for r in results: if r["oid"] == args.to_commit: print(f"Found {r['oid']}, stopping") stop = True break else: to_add.append(r) oids = [r["oid"] for r in to_add] print(oids) append_and_save(to_add, cache) if stop: break print(i) i += page_size
def fetch_issue(github: GitHubRepo, issue_number: int): query = """query($owner: String!, $name: String!, $number: Int!){ repository(owner: $owner, name: $name) { issue(number: $number) { body comments(first:100) { nodes { body } } } } }""" r = github.graphql( query, variables={ "owner": github.user, "name": github.repo, "number": issue_number, }, ) return r
# deploying it for everyone to avoid spamming in the case of bugs or # ongoing development. if args.allowlist: author_allowlist = args.allowlist.split(",") else: github = GitHubRepo(token=os.environ["GITHUB_TOKEN"], user=user, repo=repo) allowlist_issue = github.get("issues/9983") author_allowlist = set(find_reviewers(allowlist_issue["body"])) if args.pr_json: r = json.loads(args.pr_json) else: q = prs_query(user, repo) r = github.graphql(q) now = datetime.datetime.utcnow() if args.now: now = datetime.datetime.strptime(args.now, GIT_DATE_FORMAT) # Loop until all PRs have been checked while True: prs = r["data"]["repository"]["pullRequests"]["nodes"] # Don't look at draft PRs at all prs_to_check = [] for pr in prs: if pr["isDraft"]: print(f"Skipping #{pr['number']} since it's a draft") elif pr["number"] <= cutoff_pr_number:
class PR: def __init__( self, number: int, owner: str, repo: str, dry_run: bool = False, raw_data: Dict[str, Any] = None, ): self.owner = owner self.number = number self.repo_name = repo self.dry_run = dry_run if dry_run and raw_data: # In test mode there is no need to fetch anything self.raw = raw_data self.github = None else: self.github = GitHubRepo(user=owner, repo=repo, token=os.environ["GITHUB_TOKEN"]) if os.getenv("DEBUG", "0") == "1": # For local runs fill in the requested data but cache it for # later use cached_path = Path("pr.json") if not cached_path.exists(): self.raw = self.fetch_data() with open(cached_path, "w") as f: json.dump(self.raw, f, indent=2) else: with open(cached_path) as f: self.raw = json.load(f) else: # Usual path, fetch the PR's data based on the number from # GitHub self.raw = self.fetch_data() def checker(obj, parent_key): """ Verify that any paged results don't have extra data (if so the bot may still work since most relevant comments will be more recent) """ if parent_key == "pageInfo": if obj.get("hasPreviousPage", False): warnings.warn(f"Found {obj} with a previous page, bot may be missing data") if obj.get("hasNextPage", False): warnings.warn(f"Found {obj} with a next page, bot may be missing data") walk(self.raw, checker) logging.info(f"Verified data, running with PR {to_json_str(self.raw)}") def __repr__(self): return json.dumps(self.raw, indent=2) def plus_one(self, comment: Dict[str, Any]): """ React with a thumbs up to a comment """ url = f"issues/comments/{comment['id']}/reactions" data = {"content": "+1"} if self.dry_run: logging.info(f"Dry run, would have +1'ed to {url} with {data}") else: self.github.post(url, data=data) def head_commit(self): return self.raw["commits"]["nodes"][0]["commit"] def co_authors(self) -> List[str]: authors = [] for commit in self.raw["authorCommits"]["nodes"]: # Co-authors always come after the main author according to the # GitHub docs, so ignore the first item for author in commit["commit"]["authors"]["nodes"][1:]: name = author["name"] email = author["email"] authors.append(f"{name} <{email}>") return list(set(authors)) def head_oid(self): return self.head_commit()["oid"] def ci_jobs(self) -> List[CIJob]: """ Get a list of all CI jobs (GitHub Actions and other) in a unified format """ jobs = [] for item in self.head_commit()["statusCheckRollup"]["contexts"]["nodes"]: if "checkSuite" in item: # GitHub Actions job, parse separately status = item["conclusion"] if status is None: # If the 'conclusion' isn't filled out the job hasn't # finished yet status = "PENDING" jobs.append( { "name": item["checkSuite"]["workflowRun"]["workflow"]["name"] + " / " + item["name"], "url": item["url"], "status": status.upper(), } ) else: # GitHub Status (e.g. from Jenkins) jobs.append( { "name": item["context"], "url": item["targetUrl"], "status": item["state"].upper(), } ) logging.info(f"Found CI jobs for {self.head_commit()['oid']} {to_json_str(jobs)}") return jobs def reviews(self) -> List[Review]: return self.raw["reviews"]["nodes"] def head_commit_reviews(self) -> List[Review]: """ Find reviews associated with the head commit """ commits_to_review_status: Dict[str, List[Review]] = {} for review in self.reviews(): if not review["authorCanPushToRepository"]: # ignore reviews from non-committers continue oid = review["commit"]["oid"] if oid in commits_to_review_status: commits_to_review_status[oid].append(review) else: commits_to_review_status[oid] = [review] # Only use the data for the head commit of the PR head_reviews = commits_to_review_status.get(self.head_oid(), []) return head_reviews def fetch_data(self): """ Fetch the data for this PR from GitHub """ return self.github.graphql( query=PR_QUERY, variables={ "owner": self.owner, "name": self.repo_name, "number": self.number, }, )["data"]["repository"]["pullRequest"] def search_collaborator(self, user: str) -> List[Dict[str, Any]]: """ Query GitHub for collaborators matching 'user' """ return self.github.graphql( query=COLLABORATORS_QUERY, variables={ "owner": self.owner, "name": self.repo_name, "user": user, }, )["data"]["repository"]["collaborators"]["nodes"] def comment(self, text: str) -> None: """ Leave the comment 'text' on this PR """ logging.info(f"Commenting:\n{text}") # TODO: Update latest comment in-place if there has been no activity data = {"body": text} url = f"issues/{self.number}/comments" if self.dry_run: logging.info( f"Dry run, would have commented on url={url} commenting with data={to_json_str(data)}" ) return self.github.post(url, data=data) def state(self) -> str: """ PR state (OPEN, CLOSED, MERGED, etc) """ return self.raw["state"] def processed_body(self) -> str: body = self.raw["body"].strip().replace("\r", "") # Remove any @-mentions of people body = re.sub(r"(\s)@", "\g<1>", body) # Remove the auto-inserted text since it's not useful to have in the commit log body = re.sub(THANKS_MESSAGE, "\n\n", body) return body.strip() def body_with_co_authors(self) -> str: """ Add 'Co-authored-by' strings to the PR body based on the prior commits in the PR """ body = self.processed_body() author_lines = self.co_authors() logging.info(f"Found co-authors: author_lines={author_lines}") full_author_lines = [f"Co-authored-by: {author_line}" for author_line in author_lines] authors_to_add = [] for author_line in author_lines: if author_line not in body: authors_to_add.append(f"Co-authored-by: {author_line}") if len(authors_to_add) > 0: # If the line isn't already in the PR body (it could have been # added manually), put it in full_author_text = "\n".join(authors_to_add) body = f"{body}\n\n{full_author_text}" return body def merge(self) -> None: """ Request a merge of this PR via the GitHub API """ url = f"pulls/{self.number}/merge" title = self.raw["title"] + f" (#{self.number})" body = self.body_with_co_authors() logging.info(f"Full commit:\n{title}\n\n{body}") data = { "commit_title": title, "commit_message": body, # The SHA is necessary in case there was an update right when this # script ran, GitHub will sort out who won "sha": self.head_oid(), "merge_method": "squash", } if self.dry_run: logging.info(f"Dry run, would have merged with url={url} and data={to_json_str(data)}") return self.github.put(url, data=data) def author(self) -> str: return self.raw["author"]["login"] def find_failed_ci_jobs(self) -> List[CIJob]: # NEUTRAL is GitHub Action's way of saying cancelled return [ job for job in self.ci_jobs() if job["status"] not in {"SUCCESS", "SUCCESSFUL", "SKIPPED"} ] def find_missing_expected_jobs(self) -> List[str]: # Map of job name: has seen in completed jobs seen_expected_jobs = {name: False for name in EXPECTED_JOBS} logging.info(f"Expected to see jobs: {seen_expected_jobs}") missing_expected_jobs = [] for job in self.ci_jobs(): seen_expected_jobs[job["name"]] = True for name, seen in seen_expected_jobs.items(): if not seen: missing_expected_jobs.append(name) return missing_expected_jobs def merge_if_passed_checks(self) -> None: failed_ci_jobs = self.find_failed_ci_jobs() all_ci_passed = len(failed_ci_jobs) == 0 has_one_approval = False if not all_ci_passed: failed_jobs_msg = "\n".join( [f" * [{job['name']} (`{job['status']}`)]({job['url']})" for job in failed_ci_jobs] ) self.comment( f"Cannot merge, these CI jobs are not successful on {self.head_oid()}:\n{failed_jobs_msg}" ) return missing_expected_jobs = self.find_missing_expected_jobs() if len(missing_expected_jobs) > 0: missing_jobs_msg = "\n".join([f" * `{name}`" for name in missing_expected_jobs]) self.comment(f"Cannot merge, missing expected jobs:\n{missing_jobs_msg}") return head_commit_reviews = self.head_commit_reviews() for review in head_commit_reviews: if review["state"] == "CHANGES_REQUESTED": self.comment( f"Cannot merge, found [this review]({review['url']}) on {self.head_oid()} with changes requested" ) return if review["state"] == "APPROVED": has_one_approval = True logging.info(f"Found approving review: {to_json_str(review)}") if has_one_approval and all_ci_passed: self.merge() elif not has_one_approval: self.comment( f"Cannot merge, did not find any approving reviews from users with write access on {self.head_oid()}" ) return elif not all_ci_passed: self.comment(f"Cannot merge, CI did not pass on on {self.head_oid()}") return def rerun_jenkins_ci(self) -> None: url = JENKINS_URL + f"job/tvm/job/PR-{self.number}/buildWithParameters" logging.info(f"Rerunning ci with URL={url}") if self.dry_run: logging.info("Dry run, not sending POST") else: post(url, auth=("tvm-bot", TVM_BOT_JENKINS_TOKEN))
parser.add_argument( "--testonly-json", help="(testing) data to use instead of fetching from GitHub" ) args = parser.parse_args() remote = git(["config", "--get", f"remote.{args.remote}.url"]) user, repo = parse_remote(remote) # TODO: Remove this before landing user, repo = ("apache", "tvm") if args.testonly_json: r = json.loads(args.testonly_json) else: github = GitHubRepo(token=os.environ["GITHUB_TOKEN"], user=user, repo=repo) q = commits_query(user, repo) r = github.graphql(q) commits = r["data"]["repository"]["defaultBranchRef"]["target"]["history"]["nodes"] # Limit GraphQL pagination MAX_COMMITS_TO_CHECK = 50 i = 0 while i < MAX_COMMITS_TO_CHECK: # Check each commit for commit in commits: if commit_passed_ci(commit): print(f"Found last good commit: {commit['oid']}: {commit['messageHeadline']}") if not args.dry_run: update_branch( user=user,