예제 #1
0
def fetch_pr_data(args, cache):
    github = GitHubRepo(user=user, repo=repo, token=GITHUB_TOKEN)

    if args.from_commit is None or args.to_commit is None:
        print(
            "--from-commit and --to-commit must be specified if --skip-query is not used"
        )
        exit(1)

    i = 0
    page_size = 80
    cursor = f"{args.from_commit} {i}"

    while True:
        r = github.graphql(
            query=PRS_QUERY,
            variables={
                "owner": user,
                "name": repo,
                "after": cursor,
                "pageSize": page_size,
            },
        )
        data = r["data"]["repository"]["defaultBranchRef"]["target"]["history"]
        if not data["pageInfo"]["hasNextPage"]:
            break
        cursor = data["pageInfo"]["endCursor"]
        results = data["nodes"]

        to_add = []
        stop = False
        for r in results:
            if r["oid"] == args.to_commit:
                print(f"Found {r['oid']}, stopping")
                stop = True
                break
            else:
                to_add.append(r)

        oids = [r["oid"] for r in to_add]
        print(oids)
        append_and_save(to_add, cache)
        if stop:
            break
        print(i)
        i += page_size
예제 #2
0
def fetch_issue(github: GitHubRepo, issue_number: int):
    query = """query($owner: String!, $name: String!, $number: Int!){
    repository(owner: $owner, name: $name) {
        issue(number: $number) {
        body
        comments(first:100) {
            nodes {
            body
            }
        }
        }
    }
    }"""
    r = github.graphql(
        query,
        variables={
            "owner": github.user,
            "name": github.repo,
            "number": issue_number,
        },
    )
    return r
예제 #3
0
    # deploying it for everyone to avoid spamming in the case of bugs or
    # ongoing development.
    if args.allowlist:
        author_allowlist = args.allowlist.split(",")
    else:
        github = GitHubRepo(token=os.environ["GITHUB_TOKEN"],
                            user=user,
                            repo=repo)
        allowlist_issue = github.get("issues/9983")
        author_allowlist = set(find_reviewers(allowlist_issue["body"]))

    if args.pr_json:
        r = json.loads(args.pr_json)
    else:
        q = prs_query(user, repo)
        r = github.graphql(q)

    now = datetime.datetime.utcnow()
    if args.now:
        now = datetime.datetime.strptime(args.now, GIT_DATE_FORMAT)

    # Loop until all PRs have been checked
    while True:
        prs = r["data"]["repository"]["pullRequests"]["nodes"]

        # Don't look at draft PRs at all
        prs_to_check = []
        for pr in prs:
            if pr["isDraft"]:
                print(f"Skipping #{pr['number']} since it's a draft")
            elif pr["number"] <= cutoff_pr_number:
예제 #4
0
class PR:
    def __init__(
        self,
        number: int,
        owner: str,
        repo: str,
        dry_run: bool = False,
        raw_data: Dict[str, Any] = None,
    ):
        self.owner = owner
        self.number = number
        self.repo_name = repo
        self.dry_run = dry_run

        if dry_run and raw_data:
            # In test mode there is no need to fetch anything
            self.raw = raw_data
            self.github = None
        else:
            self.github = GitHubRepo(user=owner, repo=repo, token=os.environ["GITHUB_TOKEN"])
            if os.getenv("DEBUG", "0") == "1":
                # For local runs fill in the requested data but cache it for
                # later use
                cached_path = Path("pr.json")
                if not cached_path.exists():
                    self.raw = self.fetch_data()
                    with open(cached_path, "w") as f:
                        json.dump(self.raw, f, indent=2)
                else:
                    with open(cached_path) as f:
                        self.raw = json.load(f)
            else:
                # Usual path, fetch the PR's data based on the number from
                # GitHub
                self.raw = self.fetch_data()

        def checker(obj, parent_key):
            """
            Verify that any paged results don't have extra data (if so the bot
            may still work since most relevant comments will be more recent)
            """
            if parent_key == "pageInfo":
                if obj.get("hasPreviousPage", False):
                    warnings.warn(f"Found {obj} with a previous page, bot may be missing data")
                if obj.get("hasNextPage", False):
                    warnings.warn(f"Found {obj} with a next page, bot may be missing data")

        walk(self.raw, checker)

        logging.info(f"Verified data, running with PR {to_json_str(self.raw)}")

    def __repr__(self):
        return json.dumps(self.raw, indent=2)

    def plus_one(self, comment: Dict[str, Any]):
        """
        React with a thumbs up to a comment
        """
        url = f"issues/comments/{comment['id']}/reactions"
        data = {"content": "+1"}
        if self.dry_run:
            logging.info(f"Dry run, would have +1'ed to {url} with {data}")
        else:
            self.github.post(url, data=data)

    def head_commit(self):
        return self.raw["commits"]["nodes"][0]["commit"]

    def co_authors(self) -> List[str]:
        authors = []
        for commit in self.raw["authorCommits"]["nodes"]:
            # Co-authors always come after the main author according to the
            # GitHub docs, so ignore the first item
            for author in commit["commit"]["authors"]["nodes"][1:]:
                name = author["name"]
                email = author["email"]
                authors.append(f"{name} <{email}>")

        return list(set(authors))

    def head_oid(self):
        return self.head_commit()["oid"]

    def ci_jobs(self) -> List[CIJob]:
        """
        Get a list of all CI jobs (GitHub Actions and other) in a unified format
        """
        jobs = []
        for item in self.head_commit()["statusCheckRollup"]["contexts"]["nodes"]:
            if "checkSuite" in item:
                # GitHub Actions job, parse separately
                status = item["conclusion"]
                if status is None:
                    # If the 'conclusion' isn't filled out the job hasn't
                    # finished yet
                    status = "PENDING"
                jobs.append(
                    {
                        "name": item["checkSuite"]["workflowRun"]["workflow"]["name"]
                        + " / "
                        + item["name"],
                        "url": item["url"],
                        "status": status.upper(),
                    }
                )
            else:
                # GitHub Status (e.g. from Jenkins)
                jobs.append(
                    {
                        "name": item["context"],
                        "url": item["targetUrl"],
                        "status": item["state"].upper(),
                    }
                )

        logging.info(f"Found CI jobs for {self.head_commit()['oid']} {to_json_str(jobs)}")
        return jobs

    def reviews(self) -> List[Review]:
        return self.raw["reviews"]["nodes"]

    def head_commit_reviews(self) -> List[Review]:
        """
        Find reviews associated with the head commit
        """
        commits_to_review_status: Dict[str, List[Review]] = {}

        for review in self.reviews():
            if not review["authorCanPushToRepository"]:
                # ignore reviews from non-committers
                continue

            oid = review["commit"]["oid"]
            if oid in commits_to_review_status:
                commits_to_review_status[oid].append(review)
            else:
                commits_to_review_status[oid] = [review]

        # Only use the data for the head commit of the PR
        head_reviews = commits_to_review_status.get(self.head_oid(), [])
        return head_reviews

    def fetch_data(self):
        """
        Fetch the data for this PR from GitHub
        """
        return self.github.graphql(
            query=PR_QUERY,
            variables={
                "owner": self.owner,
                "name": self.repo_name,
                "number": self.number,
            },
        )["data"]["repository"]["pullRequest"]

    def search_collaborator(self, user: str) -> List[Dict[str, Any]]:
        """
        Query GitHub for collaborators matching 'user'
        """
        return self.github.graphql(
            query=COLLABORATORS_QUERY,
            variables={
                "owner": self.owner,
                "name": self.repo_name,
                "user": user,
            },
        )["data"]["repository"]["collaborators"]["nodes"]

    def comment(self, text: str) -> None:
        """
        Leave the comment 'text' on this PR
        """
        logging.info(f"Commenting:\n{text}")
        # TODO: Update latest comment in-place if there has been no activity
        data = {"body": text}
        url = f"issues/{self.number}/comments"
        if self.dry_run:
            logging.info(
                f"Dry run, would have commented on url={url} commenting with data={to_json_str(data)}"
            )
            return

        self.github.post(url, data=data)

    def state(self) -> str:
        """
        PR state (OPEN, CLOSED, MERGED, etc)
        """
        return self.raw["state"]

    def processed_body(self) -> str:
        body = self.raw["body"].strip().replace("\r", "")
        # Remove any @-mentions of people
        body = re.sub(r"(\s)@", "\g<1>", body)

        # Remove the auto-inserted text since it's not useful to have in the commit log
        body = re.sub(THANKS_MESSAGE, "\n\n", body)
        return body.strip()

    def body_with_co_authors(self) -> str:
        """
        Add 'Co-authored-by' strings to the PR body based on the prior commits
        in the PR
        """
        body = self.processed_body()
        author_lines = self.co_authors()
        logging.info(f"Found co-authors: author_lines={author_lines}")
        full_author_lines = [f"Co-authored-by: {author_line}" for author_line in author_lines]

        authors_to_add = []
        for author_line in author_lines:
            if author_line not in body:
                authors_to_add.append(f"Co-authored-by: {author_line}")

        if len(authors_to_add) > 0:
            # If the line isn't already in the PR body (it could have been
            # added manually), put it in
            full_author_text = "\n".join(authors_to_add)
            body = f"{body}\n\n{full_author_text}"

        return body

    def merge(self) -> None:
        """
        Request a merge of this PR via the GitHub API
        """
        url = f"pulls/{self.number}/merge"

        title = self.raw["title"] + f" (#{self.number})"
        body = self.body_with_co_authors()
        logging.info(f"Full commit:\n{title}\n\n{body}")

        data = {
            "commit_title": title,
            "commit_message": body,
            # The SHA is necessary in case there was an update right when this
            # script ran, GitHub will sort out who won
            "sha": self.head_oid(),
            "merge_method": "squash",
        }
        if self.dry_run:
            logging.info(f"Dry run, would have merged with url={url} and data={to_json_str(data)}")
            return

        self.github.put(url, data=data)

    def author(self) -> str:
        return self.raw["author"]["login"]

    def find_failed_ci_jobs(self) -> List[CIJob]:
        # NEUTRAL is GitHub Action's way of saying cancelled
        return [
            job
            for job in self.ci_jobs()
            if job["status"] not in {"SUCCESS", "SUCCESSFUL", "SKIPPED"}
        ]

    def find_missing_expected_jobs(self) -> List[str]:
        # Map of job name: has seen in completed jobs
        seen_expected_jobs = {name: False for name in EXPECTED_JOBS}
        logging.info(f"Expected to see jobs: {seen_expected_jobs}")

        missing_expected_jobs = []
        for job in self.ci_jobs():
            seen_expected_jobs[job["name"]] = True

        for name, seen in seen_expected_jobs.items():
            if not seen:
                missing_expected_jobs.append(name)

        return missing_expected_jobs

    def merge_if_passed_checks(self) -> None:
        failed_ci_jobs = self.find_failed_ci_jobs()
        all_ci_passed = len(failed_ci_jobs) == 0
        has_one_approval = False

        if not all_ci_passed:
            failed_jobs_msg = "\n".join(
                [f" * [{job['name']} (`{job['status']}`)]({job['url']})" for job in failed_ci_jobs]
            )
            self.comment(
                f"Cannot merge, these CI jobs are not successful on {self.head_oid()}:\n{failed_jobs_msg}"
            )
            return

        missing_expected_jobs = self.find_missing_expected_jobs()

        if len(missing_expected_jobs) > 0:
            missing_jobs_msg = "\n".join([f" * `{name}`" for name in missing_expected_jobs])
            self.comment(f"Cannot merge, missing expected jobs:\n{missing_jobs_msg}")
            return

        head_commit_reviews = self.head_commit_reviews()
        for review in head_commit_reviews:
            if review["state"] == "CHANGES_REQUESTED":
                self.comment(
                    f"Cannot merge, found [this review]({review['url']}) on {self.head_oid()} with changes requested"
                )
                return

            if review["state"] == "APPROVED":
                has_one_approval = True
                logging.info(f"Found approving review: {to_json_str(review)}")

        if has_one_approval and all_ci_passed:
            self.merge()
        elif not has_one_approval:
            self.comment(
                f"Cannot merge, did not find any approving reviews from users with write access on {self.head_oid()}"
            )
            return
        elif not all_ci_passed:
            self.comment(f"Cannot merge, CI did not pass on on {self.head_oid()}")
            return

    def rerun_jenkins_ci(self) -> None:
        url = JENKINS_URL + f"job/tvm/job/PR-{self.number}/buildWithParameters"
        logging.info(f"Rerunning ci with URL={url}")
        if self.dry_run:
            logging.info("Dry run, not sending POST")
        else:
            post(url, auth=("tvm-bot", TVM_BOT_JENKINS_TOKEN))
예제 #5
0
    parser.add_argument(
        "--testonly-json", help="(testing) data to use instead of fetching from GitHub"
    )
    args = parser.parse_args()

    remote = git(["config", "--get", f"remote.{args.remote}.url"])
    user, repo = parse_remote(remote)
    # TODO: Remove this before landing
    user, repo = ("apache", "tvm")

    if args.testonly_json:
        r = json.loads(args.testonly_json)
    else:
        github = GitHubRepo(token=os.environ["GITHUB_TOKEN"], user=user, repo=repo)
        q = commits_query(user, repo)
        r = github.graphql(q)

    commits = r["data"]["repository"]["defaultBranchRef"]["target"]["history"]["nodes"]

    # Limit GraphQL pagination
    MAX_COMMITS_TO_CHECK = 50
    i = 0

    while i < MAX_COMMITS_TO_CHECK:
        # Check each commit
        for commit in commits:
            if commit_passed_ci(commit):
                print(f"Found last good commit: {commit['oid']}: {commit['messageHeadline']}")
                if not args.dry_run:
                    update_branch(
                        user=user,