Ejemplo n.º 1
0
    def process_new_commit(self, commit: ghstack.diff.Diff) -> None:
        """
        Process a diff that has never been pushed to GitHub before.
        """

        if '[ghstack-poisoned]' in commit.summary:
            raise RuntimeError('''\
This commit is poisoned: it is from a head or base branch--ghstack
cannot validly submit it.  The most common situation for this to
happen is if you checked out the head branch of a pull request that was
previously submitted with ghstack (e.g., by using hub checkout).
Making modifications on the head branch is not supported; instead,
you should fetch the original commits in question by running:

    ghstack checkout $PR_URL

Since we cannot proceed, ghstack will abort now.
''')

        title, pr_body = self._default_title_and_body(commit, None)

        # Determine the next available GhNumber.  We do this by
        # iterating through known branches and keeping track
        # of the max.  The next available GhNumber is the next number.
        # This is technically subject to a race, but we assume
        # end user is not running this script concurrently on
        # multiple machines (you bad bad)
        refs = self.sh.git(
            "for-each-ref",
            # Use OUR username here, since there's none attached to the
            # diff
            "refs/remotes/{}/gh/{}".format(self.remote_name, self.username),
            "--format=%(refname)").split()
        max_ref_num = max(int(ref.split('/')[-2]) for ref in refs) \
            if refs else 0
        ghnum = GhNumber(str(max_ref_num + 1))

        # Create the incremental pull request diff
        tree = commit.patch.apply(self.sh, self.base_tree)

        # Actually, if there's no change in the tree, stop processing
        if tree == self.base_tree:
            self.ignored_diffs.append((commit, None))
            logging.warn("Skipping {} {}, as the commit has no changes"
                         .format(commit.oid, title))
            self.stack_meta.append(None)
            return

        assert ghnum not in self.seen_ghnums
        self.seen_ghnums.add(ghnum)

        new_pull = GitCommitHash(
            self.sh.git("commit-tree", tree,
                        "-p", self.base_commit,
                        input=commit.summary + "\n\n[ghstack-poisoned]"))

        # Push the branches, so that we can create a PR for them
        new_branches = (
            push_spec(new_pull, branch_head(self.username, ghnum)),
            push_spec(self.base_commit, branch_base(self.username, ghnum))
        )
        self.sh.git(
            "push",
            self.remote_name,
            *new_branches,
        )
        self.github.push_hook(new_branches)

        # Time to open the PR
        # NB: GraphQL API does not support opening PRs
        r = self.github.post(
            "repos/{owner}/{repo}/pulls"
            .format(owner=self.repo_owner, repo=self.repo_name),
            title=title,
            head=branch_head(self.username, ghnum),
            base=branch_base(self.username, ghnum),
            body=pr_body,
            maintainer_can_modify=True,
            draft=self.draft,
        )
        number = r['number']

        logging.info("Opened PR #{}".format(number))

        # Update the commit message of the local diff with metadata
        # so we can correlate these later
        pull_request_resolved = ghstack.diff.PullRequestResolved(
            owner=self.repo_owner, repo=self.repo_name, number=number)
        commit_msg = ("{commit_msg}\n\n"
                      "ghstack-source-id: {sourceid}\n"
                      "Pull Request resolved: "
                      "https://{github_url}/{owner}/{repo}/pull/{number}"
                      .format(commit_msg=commit.summary.rstrip(),
                              owner=self.repo_owner,
                              repo=self.repo_name,
                              number=number,
                              sourceid=commit.source_id,
                              github_url=self.github_url))

        # TODO: Try harder to preserve the old author/commit
        # information (is it really necessary? Check what
        # --amend does...)
        new_orig = GitCommitHash(self.sh.git(
            "commit-tree",
            tree,
            "-p", self.base_orig,
            input=commit_msg))

        self.stack_meta.append(DiffMeta(
            title=title,
            number=number,
            body=pr_body,
            ghnum=ghnum,
            username=self.username,
            push_branches=((new_orig, 'orig'), ),
            head_branch=new_pull,
            what='Created',
            closed=False,
            pr_url=pull_request_resolved.url(self.github_url),
        ))

        self.base_commit = new_pull
        self.base_orig = new_orig
        self.base_tree = tree
Ejemplo n.º 2
0
    def process_commit(self, commit: ghstack.git.CommitHeader) -> None:
        title, pr_body = self._default_title_and_body(commit, None)
        commit_id = commit.commit_id()
        tree = commit.tree()
        parents = commit.parents()
        new_orig = commit_id
        author = commit.author()

        logging.info("# Processing {} {}".format(commit_id[:9], title))
        logging.info("Authored by {}".format(author))
        logging.info("Base is {}".format(self.base_commit))

        if len(parents) != 1:
            raise RuntimeError(
                "The commit {} has {} parents, which makes my head explode.  "
                "`git rebase -i` your diffs into a stack, then try again."
                .format(commit_id, len(parents)))
        parent = parents[0]

        # TODO: check if we authored the commit.  We ought not touch PRs we didn't
        # create.

        commit_msg = commit.commit_msg()

        # check if the commit message says what pull request it's associated
        # with
        #   If NONE:
        #       - If possible, allocate ourselves a GhNumber and
        #         then fix the branch afterwards.
        #       - Otherwise, generate a unique branch name, and attach it to
        #         the commit message

        m_metadata = commit.match_metadata()
        if m_metadata is None:
            # Determine the next available GhNumber.  We do this by
            # iterating through known branches and keeping track
            # of the max.  The next available GhNumber is the next number.
            # This is technically subject to a race, but we assume
            # end user is not running this script concurrently on
            # multiple machines (you bad bad)
            refs = self.sh.git(
                "for-each-ref",
                "refs/remotes/origin/gh/{}".format(self.username),
                "--format=%(refname)").split()
            max_ref_num = max(int(ref.split('/')[-2]) for ref in refs) \
                if refs else 0
            ghnum = GhNumber(str(max_ref_num + 1))
            assert ghnum not in self.seen_ghnums
            self.seen_ghnums.add(ghnum)

            # Create the incremental pull request diff
            new_pull = GitCommitHash(
                self.sh.git("commit-tree", tree,
                            "-p", self.base_commit,
                            input=commit_msg))

            # Push the branches, so that we can create a PR for them
            new_branches = (
                push_spec(new_pull, branch_head(self.username, ghnum)),
                push_spec(self.base_commit, branch_base(self.username, ghnum))
            )
            self.sh.git(
                "push",
                "origin",
                *new_branches,
            )
            self.github.push_hook(new_branches)

            # Time to open the PR
            # NB: GraphQL API does not support opening PRs
            r = self.github.post(
                "repos/{owner}/{repo}/pulls"
                .format(owner=self.repo_owner, repo=self.repo_name),
                title=title,
                head=branch_head(self.username, ghnum),
                base=branch_base(self.username, ghnum),
                body=pr_body,
                maintainer_can_modify=True,
            )
            number = r['number']

            logging.info("Opened PR #{}".format(number))

            # Update the commit message of the local diff with metadata
            # so we can correlate these later
            commit_msg = ("{commit_msg}\n\n"
                          "gh-metadata: "
                          "{owner} {repo} {number} {branch_head}"
                          .format(commit_msg=commit_msg.rstrip(),
                                  owner=self.repo_owner,
                                  repo=self.repo_name,
                                  number=number,
                                  branch_head=branch_head(self.username,
                                                          ghnum)))

            # TODO: Try harder to preserve the old author/commit
            # information (is it really necessary? Check what
            # --amend does...)
            new_orig = GitCommitHash(self.sh.git(
                "commit-tree",
                tree,
                "-p", self.base_orig,
                input=commit_msg))

            self.stack_meta.append(DiffMeta(
                title=title,
                number=number,
                body=pr_body,
                ghnum=ghnum,
                push_branches=((new_orig, 'orig'), ),
                what='Created',
                closed=False,
            ))

        else:
            if m_metadata.group("username") != self.username:
                # This is someone else's diff
                raise RuntimeError(
                    "cannot handle stack from diffs of other people yet")

            ghnum = GhNumber(m_metadata.group("ghnum"))
            number = int(m_metadata.group("number"))

            if ghnum in self.seen_ghnums:
                raise RuntimeError(
                    "Something very strange has happened: a commit for "
                    "the pull request #{} occurs twice in your local "
                    "commit stack.  This is usually because of a botched "
                    "rebase.  Please take a look at your git log and seek "
                    "help from your local Git expert.".format(number))
            self.seen_ghnums.add(ghnum)

            # TODO: There is no reason to do a node query here; we can
            # just look up the repo the old fashioned way
            r = self.github.graphql("""
              query ($repo_id: ID!, $number: Int!) {
                node(id: $repo_id) {
                  ... on Repository {
                    pullRequest(number: $number) {
                      id
                      body
                      title
                      closed
                    }
                  }
                }
              }
            """, repo_id=self.repo_id, number=number)["data"]["node"]["pullRequest"]
            pr_body = r["body"]
            # NB: Technically, we don't need to pull this information at
            # all, but it's more convenient to unconditionally edit
            # title in the code below
            # NB: This overrides setting of title previously, from the
            # commit message.
            title = r["title"]
            closed = r["closed"]

            if self.update_fields:
                title, pr_body = self._default_title_and_body(commit, pr_body)

            # Check if updating is needed
            clean_commit_id = GitCommitHash(self.sh.git(
                "rev-parse",
                GitCommitHash("origin/" + branch_orig(self.username, ghnum))
            ))
            push_branches: Tuple[Tuple[GitCommitHash, BranchKind], ...]
            if clean_commit_id == commit_id:
                logging.info("Nothing to do")
                # NB: NOT commit_id, that's the orig commit!
                new_pull = GitCommitHash(self.sh.git(
                    "rev-parse", "origin/" + branch_head(self.username, ghnum)))
                push_branches = ()
            else:
                logging.info("Pushing to #{}".format(number))

                # We've got an update to do!  But what exactly should we
                # do?
                #
                # Here are a number of situations which may have
                # occurred.
                #
                #   1. None of the parent commits changed, and this is
                #      the first change we need to push an update to.
                #
                #   2. A parent commit changed, so we need to restack
                #      this commit too.  (You can't easily tell distinguish
                #      between rebase versus rebase+amend)
                #
                #   3. The parent is now master (any prior parent
                #      commits were absorbed into master.)
                #
                #   4. The parent is totally disconnected, the history
                #      is bogus but at least the merge-base on master
                #      is the same or later.  (This can occur if you
                #      cherry-picked a commit out of an old stack and
                #      want to make it independent.)
                #
                # In cases 1-3, we can maintain a clean merge history
                # if we do a little extra book-keeping, which is what
                # we do now.
                #
                # TODO: What we have here actually works pretty hard to
                # maintain a consistent merge history between all PRs;
                # so, e.g., you could merge with master and things
                # wouldn't break.  But we don't necessarily have to do
                # this; all we need is the delta between base and head
                # to make sense.  The benefit to doing this is you could
                # more easily update single revs only, without doing
                # the rest of the stack.  The downside is that you
                # get less accurate merge structure for your changes
                # (because each "diff" is completely disconnected.)
                #

                # First, check if the parent commit hasn't changed.
                # We do this by checking if our base_commit is the same
                # as the gh/ezyang/X/base commit.
                #
                # In this case, we don't need to include the base as a
                # parent at all; just construct our new diff as a plain,
                # non-merge commit.
                base_args: Tuple[str, ...]
                orig_base_hash = self.sh.git(
                    "rev-parse", "origin/" + branch_base(self.username, ghnum))
                if orig_base_hash == self.base_commit:

                    new_base = self.base_commit
                    base_args = ()

                else:
                    # Second, check if our local base (self.base_commit)
                    # added some new commits, but is still rooted on the
                    # old base.
                    #
                    # If so, all we need to do is include the local base
                    # as a parent when we do the merge.
                    is_ancestor = self.sh.git(
                        "merge-base",
                        "--is-ancestor",
                        "origin/" + branch_base(self.username, ghnum),
                        self.base_commit, exitcode=True)

                    if is_ancestor:
                        new_base = self.base_commit

                    else:
                        # If we've gotten here, it means that the new
                        # base and the old base are completely
                        # unrelated.  We'll make a fake commit that
                        # "resets" the tree back to something that makes
                        # sense and merge with that.  This doesn't fix
                        # the fact that we still incorrectly report
                        # the old base as an ancestor of our commit, but
                        # it's better than nothing.
                        new_base = GitCommitHash(self.sh.git(
                            "commit-tree", self.base_tree,
                            "-p", "origin/" + branch_base(self.username, ghnum),
                            "-p", self.base_commit,
                            input='Update base for {} on "{}"\n\n{}'
                                  .format(self.msg, title, commit_msg)))

                    base_args = ("-p", new_base)

                # Blast our current tree as the newest commit, merging
                # against the previous pull entry, and the newest base.

                tree = commit.tree()
                new_pull = GitCommitHash(self.sh.git(
                    "commit-tree", tree,
                    "-p", "origin/" + branch_head(self.username, ghnum),
                    *base_args,
                    input='{} on "{}"\n\n{}'.format(self.msg, title, commit_msg)))

                # We are in the process of doing an interactive rebase
                # on the orig branch; so if we've edited something in
                # the history, continue restacking the commits.

                if parent != self.base_orig:
                    logging.info("Restacking commit on {}".format(self.base_orig))
                    new_orig = GitCommitHash(self.sh.git(
                        "commit-tree", tree,
                        "-p", self.base_orig, input=commit_msg))

                push_branches = (
                    (new_base, "base"),
                    (new_pull, "head"),
                    (new_orig, "orig"),
                )

            if closed:
                what = 'Skipped closed'
            elif push_branches:
                what = 'Updated'
            else:
                what = 'Skipped'

            self.stack_meta.append(DiffMeta(
                title=title,
                number=number,
                # NB: Ignore the commit message, and just reuse the old commit
                # message.  This is consistent with 'jf submit' default
                # behavior.  The idea is that people may have edited the
                # PR description on GitHub and you don't want to clobber
                # it.
                body=pr_body,
                ghnum=ghnum,
                push_branches=push_branches,
                what=what,
                closed=closed,
            ))

        # The current pull request head commit, is the new base commit
        self.base_commit = new_pull
        self.base_orig = new_orig
        self.base_tree = tree
        logging.debug("base_commit = {}".format(self.base_commit))
        logging.debug("base_orig = {}".format(self.base_orig))
        logging.debug("base_tree = {}".format(self.base_tree))
Ejemplo n.º 3
0
    def elaborate_diff(self, commit: ghstack.diff.Diff, *,
                       is_ghexport: bool = False) -> DiffWithGitHubMetadata:
        """
        Query GitHub API for the current title, body and closed? status
        of the pull request corresponding to a ghstack.diff.Diff.
        """

        assert commit.pull_request_resolved is not None
        assert commit.pull_request_resolved.owner == self.repo_owner
        assert commit.pull_request_resolved.repo == self.repo_name

        number = commit.pull_request_resolved.number
        # TODO: There is no reason to do a node query here; we can
        # just look up the repo the old fashioned way
        r = self.github.graphql("""
          query ($repo_id: ID!, $number: Int!) {
            node(id: $repo_id) {
              ... on Repository {
                pullRequest(number: $number) {
                  body
                  title
                  closed
                  headRefName
                }
              }
            }
          }
        """, repo_id=self.repo_id, number=number)["data"]["node"]["pullRequest"]

        # Sorry, this is a big hack to support the ghexport case
        m = re.match(r'(refs/heads/)?export-D([0-9]+)$', r['headRefName'])
        if m is not None and is_ghexport:
            raise RuntimeError('''\
This commit appears to already be associated with a pull request,
but the pull request was previously submitted with an old version of
ghexport.  You can continue exporting using the old style using:

    ghexport --legacy

For future diffs, we recommend using the non-legacy version of ghexport
as it supports bidirectional syncing.  However, there is no way to
convert a pre-existing PR in the old style to the new format which
supports bidirectional syncing.  If you would like to blow away the old
PR and start anew, edit the Summary in the Phabricator diff to delete
the line 'Pull Request resolved' and then run ghexport again.
''')

        # TODO: Hmm, I'm not sure why this matches
        m = re.match(r'gh/([^/]+)/([0-9]+)/head$', r['headRefName'])
        if m is None:
            if is_ghexport:
                raise RuntimeError('''\
This commit appears to already be associated with a pull request,
but the pull request doesn't look like it was submitted by ghexport
Maybe you exported it using the "Export to Open Source" button on
the Phabricator diff page?  If so, please continue to use that button
to export your diff.

If you think this is in error, edit the Summary in the Phabricator diff
to delete the line 'Pull Request resolved' and then run ghexport again.
''')
            else:
                raise RuntimeError('''\
This commit appears to already be associated with a pull request,
but the pull request doesn't look like it was submitted by ghstack.
If you think this is in error, run:

    ghstack unlink {}

to disassociate the commit with the pull request, and then try again.
(This will create a new pull request!)
'''.format(commit.oid))
        username = m.group(1)
        gh_number = GhNumber(m.group(2))

        # NB: Technically, we don't need to pull this information at
        # all, but it's more convenient to unconditionally edit
        # title/body when we update the pull request info
        title = r['title']
        pr_body = r['body']
        if self.update_fields:
            title, pr_body = self._default_title_and_body(commit, pr_body)

        # TODO: remote summary should be done earlier so we can use
        # it to test if updates are necessary
        remote_summary = ghstack.git.split_header(
            self.sh.git(
                "rev-list", "--max-count=1", "--header",
                self.remote_name + "/" + branch_orig(username, gh_number)
            )
        )[0]
        m_remote_source_id = RE_GHSTACK_SOURCE_ID.search(remote_summary.commit_msg())
        remote_source_id = m_remote_source_id.group(1) if m_remote_source_id else None

        return DiffWithGitHubMetadata(
            diff=commit,
            title=title,
            body=pr_body,
            closed=r['closed'],
            number=number,
            username=username,
            ghnum=gh_number,
            remote_source_id=remote_source_id,
            pull_request_resolved=commit.pull_request_resolved,
        )
Ejemplo n.º 4
0
    def process_new_commit(self, commit: ghstack.diff.Diff) -> None:
        """
        Process a diff that has never been pushed to GitHub before.
        """

        title, pr_body = self._default_title_and_body(commit, None)

        # Determine the next available GhNumber.  We do this by
        # iterating through known branches and keeping track
        # of the max.  The next available GhNumber is the next number.
        # This is technically subject to a race, but we assume
        # end user is not running this script concurrently on
        # multiple machines (you bad bad)
        refs = self.sh.git("for-each-ref",
                           "refs/remotes/origin/gh/{}".format(self.username),
                           "--format=%(refname)").split()
        max_ref_num = max(int(ref.split('/')[-2]) for ref in refs) \
            if refs else 0
        ghnum = GhNumber(str(max_ref_num + 1))

        # Create the incremental pull request diff
        tree = commit.patch.apply(self.sh, self.base_tree)

        # Actually, if there's no change in the tree, stop processing
        if tree == self.base_tree:
            self.ignored_diffs.append((commit, None))
            logging.warn("Skipping {} {}, as the commit has no changes".format(
                commit.oid, title))
            return

        assert ghnum not in self.seen_ghnums
        self.seen_ghnums.add(ghnum)

        new_pull = GitCommitHash(
            self.sh.git("commit-tree",
                        tree,
                        "-p",
                        self.base_commit,
                        input=commit.summary))

        # Push the branches, so that we can create a PR for them
        new_branches = (push_spec(new_pull, branch_head(self.username, ghnum)),
                        push_spec(self.base_commit,
                                  branch_base(self.username, ghnum)))
        self.sh.git(
            "push",
            "origin",
            *new_branches,
        )
        self.github.push_hook(new_branches)

        # Time to open the PR
        # NB: GraphQL API does not support opening PRs
        r = self.github.post(
            "repos/{owner}/{repo}/pulls".format(owner=self.repo_owner,
                                                repo=self.repo_name),
            title=title,
            head=branch_head(self.username, ghnum),
            base=branch_base(self.username, ghnum),
            body=pr_body,
            maintainer_can_modify=True,
        )
        number = r['number']

        logging.info("Opened PR #{}".format(number))

        # Update the commit message of the local diff with metadata
        # so we can correlate these later
        commit_msg = ("{commit_msg}\n\n"
                      "ghstack-source-id: {sourceid}\n"
                      "Pull Request resolved: "
                      "https://github.com/{owner}/{repo}/pull/{number}".format(
                          commit_msg=commit.summary.rstrip(),
                          owner=self.repo_owner,
                          repo=self.repo_name,
                          number=number,
                          sourceid=commit.source_id))

        # TODO: Try harder to preserve the old author/commit
        # information (is it really necessary? Check what
        # --amend does...)
        new_orig = GitCommitHash(
            self.sh.git("commit-tree",
                        tree,
                        "-p",
                        self.base_orig,
                        input=commit_msg))

        self.stack_meta.append(
            DiffMeta(
                title=title,
                number=number,
                body=pr_body,
                ghnum=ghnum,
                push_branches=((new_orig, 'orig'), ),
                head_branch=new_pull,
                what='Created',
                closed=False,
            ))

        self.base_commit = new_pull
        self.base_orig = new_orig
        self.base_tree = tree