def process_new_commit(self, commit: ghstack.diff.Diff) -> None: """ Process a diff that has never been pushed to GitHub before. """ if '[ghstack-poisoned]' in commit.summary: raise RuntimeError('''\ This commit is poisoned: it is from a head or base branch--ghstack cannot validly submit it. The most common situation for this to happen is if you checked out the head branch of a pull request that was previously submitted with ghstack (e.g., by using hub checkout). Making modifications on the head branch is not supported; instead, you should fetch the original commits in question by running: ghstack checkout $PR_URL Since we cannot proceed, ghstack will abort now. ''') title, pr_body = self._default_title_and_body(commit, None) # Determine the next available GhNumber. We do this by # iterating through known branches and keeping track # of the max. The next available GhNumber is the next number. # This is technically subject to a race, but we assume # end user is not running this script concurrently on # multiple machines (you bad bad) refs = self.sh.git( "for-each-ref", # Use OUR username here, since there's none attached to the # diff "refs/remotes/{}/gh/{}".format(self.remote_name, self.username), "--format=%(refname)").split() max_ref_num = max(int(ref.split('/')[-2]) for ref in refs) \ if refs else 0 ghnum = GhNumber(str(max_ref_num + 1)) # Create the incremental pull request diff tree = commit.patch.apply(self.sh, self.base_tree) # Actually, if there's no change in the tree, stop processing if tree == self.base_tree: self.ignored_diffs.append((commit, None)) logging.warn("Skipping {} {}, as the commit has no changes" .format(commit.oid, title)) self.stack_meta.append(None) return assert ghnum not in self.seen_ghnums self.seen_ghnums.add(ghnum) new_pull = GitCommitHash( self.sh.git("commit-tree", tree, "-p", self.base_commit, input=commit.summary + "\n\n[ghstack-poisoned]")) # Push the branches, so that we can create a PR for them new_branches = ( push_spec(new_pull, branch_head(self.username, ghnum)), push_spec(self.base_commit, branch_base(self.username, ghnum)) ) self.sh.git( "push", self.remote_name, *new_branches, ) self.github.push_hook(new_branches) # Time to open the PR # NB: GraphQL API does not support opening PRs r = self.github.post( "repos/{owner}/{repo}/pulls" .format(owner=self.repo_owner, repo=self.repo_name), title=title, head=branch_head(self.username, ghnum), base=branch_base(self.username, ghnum), body=pr_body, maintainer_can_modify=True, draft=self.draft, ) number = r['number'] logging.info("Opened PR #{}".format(number)) # Update the commit message of the local diff with metadata # so we can correlate these later pull_request_resolved = ghstack.diff.PullRequestResolved( owner=self.repo_owner, repo=self.repo_name, number=number) commit_msg = ("{commit_msg}\n\n" "ghstack-source-id: {sourceid}\n" "Pull Request resolved: " "https://{github_url}/{owner}/{repo}/pull/{number}" .format(commit_msg=commit.summary.rstrip(), owner=self.repo_owner, repo=self.repo_name, number=number, sourceid=commit.source_id, github_url=self.github_url)) # TODO: Try harder to preserve the old author/commit # information (is it really necessary? Check what # --amend does...) new_orig = GitCommitHash(self.sh.git( "commit-tree", tree, "-p", self.base_orig, input=commit_msg)) self.stack_meta.append(DiffMeta( title=title, number=number, body=pr_body, ghnum=ghnum, username=self.username, push_branches=((new_orig, 'orig'), ), head_branch=new_pull, what='Created', closed=False, pr_url=pull_request_resolved.url(self.github_url), )) self.base_commit = new_pull self.base_orig = new_orig self.base_tree = tree
def process_old_commit(self, elab_commit: DiffWithGitHubMetadata) -> None: """ Process a diff that has an existing upload to GitHub. """ commit = elab_commit.diff username = elab_commit.username ghnum = elab_commit.ghnum number = elab_commit.number if ghnum in self.seen_ghnums: raise RuntimeError( "Something very strange has happened: a commit for " "the pull request #{} occurs twice in your local " "commit stack. This is usually because of a botched " "rebase. Please take a look at your git log and seek " "help from your local Git expert.".format(number)) self.seen_ghnums.add(ghnum) logging.info("Pushing to #{}".format(number)) # Compute the local and remote source IDs summary = commit.summary m_local_source_id = RE_GHSTACK_SOURCE_ID.search(summary) if m_local_source_id is None: # For BC, just slap on a source ID. After BC is no longer # needed, we can just error in this case; however, this # situation is extremely likely to happen for preexisting # stacks. logging.warning( "Local commit has no ghstack-source-id; assuming that it is " "up-to-date with remote.") summary = "{}\nghstack-source-id: {}".format(summary, commit.source_id) else: local_source_id = m_local_source_id.group(1) if elab_commit.remote_source_id is None: # This should also be an error condition, but I suppose # it can happen in the wild if a user had an aborted # ghstack run, where they updated their head pointer to # a copy with source IDs, but then we failed to push to # orig. We should just go ahead and push in that case. logging.warning( "Remote commit has no ghstack-source-id; assuming that we are " "up-to-date with remote.") else: if local_source_id != elab_commit.remote_source_id and not self.force: logging.debug(f"elab_commit.remote_source_id = {elab_commit.remote_source_id}") raise RuntimeError( "Cowardly refusing to push an update to GitHub, since it " "looks another source has updated GitHub since you last " "pushed. If you want to push anyway, rerun this command " "with --force. Otherwise, diff your changes against " "{} and reapply them on top of an up-to-date commit from " "GitHub.".format(local_source_id)) summary = RE_GHSTACK_SOURCE_ID.sub( 'ghstack-source-id: {}\n'.format(commit.source_id), summary) # We've got an update to do! But what exactly should we # do? # # Here are a number of situations which may have # occurred. # # 1. None of the parent commits changed, and this is # the first change we need to push an update to. # # 2. A parent commit changed, so we need to restack # this commit too. (You can't easily tell distinguish # between rebase versus rebase+amend) # # 3. The parent is now master (any prior parent # commits were absorbed into master.) # # 4. The parent is totally disconnected, the history # is bogus but at least the merge-base on master # is the same or later. (This can occur if you # cherry-picked a commit out of an old stack and # want to make it independent.) # # In cases 1-3, we can maintain a clean merge history # if we do a little extra book-keeping, which is what # we do now. # # TODO: What we have here actually works pretty hard to # maintain a consistent merge history between all PRs; # so, e.g., you could merge with master and things # wouldn't break. But we don't necessarily have to do # this; all we need is the delta between base and head # to make sense. The benefit to doing this is you could # more easily update single revs only, without doing # the rest of the stack. The downside is that you # get less accurate merge structure for your changes # (because each "diff" is completely disconnected.) # # First, check if the parent commit hasn't changed. # We do this by checking if our base_commit is the same # as the gh/ezyang/X/base commit. # # In this case, we don't need to include the base as a # parent at all; just construct our new diff as a plain, # non-merge commit. base_args: Tuple[str, ...] orig_base_hash = self.sh.git( "rev-parse", self.remote_name + "/" + branch_base(username, ghnum)) # I vacillated between whether or not we should use the PR # body or the literal commit message here. Right now we use # the PR body, because after initial commit the original # commit message is not supposed to "matter" anymore. orig # still uses the original commit message, however, because # it's supposed to be the "original". non_orig_commit_msg = RE_STACK.sub('', elab_commit.body) if orig_base_hash == self.base_commit: new_base = self.base_commit base_args = () else: # Second, check if our local base (self.base_commit) # added some new commits, but is still rooted on the # old base. # # If so, all we need to do is include the local base # as a parent when we do the merge. is_ancestor = self.sh.git( "merge-base", "--is-ancestor", self.remote_name + "/" + branch_base(username, ghnum), self.base_commit, exitcode=True) if is_ancestor: new_base = self.base_commit else: # If we've gotten here, it means that the new # base and the old base are completely # unrelated. We'll make a fake commit that # "resets" the tree back to something that makes # sense and merge with that. This doesn't fix # the fact that we still incorrectly report # the old base as an ancestor of our commit, but # it's better than nothing. new_base = GitCommitHash(self.sh.git( "commit-tree", self.base_tree, "-p", self.remote_name + "/" + branch_base(username, ghnum), "-p", self.base_commit, input='Update base for {} on "{}"\n\n{}\n\n[ghstack-poisoned]' .format(self.msg, elab_commit.title, non_orig_commit_msg))) base_args = ("-p", new_base) # Blast our current tree as the newest commit, merging # against the previous pull entry, and the newest base. tree = commit.patch.apply(self.sh, self.base_tree) # Nothing to do, just ignore the diff if tree == self.base_tree: self.ignored_diffs.append((commit, number)) logging.warn("Skipping PR #{} {}, as the commit now has no changes" .format(number, elab_commit.title)) return new_pull = GitCommitHash(self.sh.git( "commit-tree", tree, "-p", self.remote_name + "/" + branch_head(username, ghnum), *base_args, input='{} on "{}"\n\n{}\n\n[ghstack-poisoned]'.format(self.msg, elab_commit.title, non_orig_commit_msg))) # Perform what is effectively an interactive rebase # on the orig branch. # # Hypothetically, there could be a case when this isn't # necessary, but it's INCREDIBLY unlikely (because we'd # have to look EXACTLY like the original orig, and since # we're in the branch that says "hey we changed # something" that's probably not what happened. logging.info("Restacking commit on {}".format(self.base_orig)) new_orig = GitCommitHash(self.sh.git( "commit-tree", tree, "-p", self.base_orig, input=summary)) push_branches = ( (new_base, "base"), (new_pull, "head"), (new_orig, "orig"), ) if elab_commit.closed: what = 'Skipped closed' else: what = 'Updated' self.stack_meta.append(DiffMeta( title=elab_commit.title, number=number, # NB: Ignore the commit message, and just reuse the old commit # message. This is consistent with 'jf submit' default # behavior. The idea is that people may have edited the # PR description on GitHub and you don't want to clobber # it. body=elab_commit.body, ghnum=ghnum, username=username, push_branches=push_branches, head_branch=new_pull, what=what, closed=elab_commit.closed, pr_url=elab_commit.pull_request_resolved.url(self.github_url), )) self.base_commit = new_pull self.base_orig = new_orig self.base_tree = tree
def process_new_commit(self, commit: ghstack.diff.Diff) -> None: """ Process a diff that has never been pushed to GitHub before. """ title, pr_body = self._default_title_and_body(commit, None) # Determine the next available GhNumber. We do this by # iterating through known branches and keeping track # of the max. The next available GhNumber is the next number. # This is technically subject to a race, but we assume # end user is not running this script concurrently on # multiple machines (you bad bad) refs = self.sh.git("for-each-ref", "refs/remotes/origin/gh/{}".format(self.username), "--format=%(refname)").split() max_ref_num = max(int(ref.split('/')[-2]) for ref in refs) \ if refs else 0 ghnum = GhNumber(str(max_ref_num + 1)) # Create the incremental pull request diff tree = commit.patch.apply(self.sh, self.base_tree) # Actually, if there's no change in the tree, stop processing if tree == self.base_tree: self.ignored_diffs.append((commit, None)) logging.warn("Skipping {} {}, as the commit has no changes".format( commit.oid, title)) return assert ghnum not in self.seen_ghnums self.seen_ghnums.add(ghnum) new_pull = GitCommitHash( self.sh.git("commit-tree", tree, "-p", self.base_commit, input=commit.summary)) # Push the branches, so that we can create a PR for them new_branches = (push_spec(new_pull, branch_head(self.username, ghnum)), push_spec(self.base_commit, branch_base(self.username, ghnum))) self.sh.git( "push", "origin", *new_branches, ) self.github.push_hook(new_branches) # Time to open the PR # NB: GraphQL API does not support opening PRs r = self.github.post( "repos/{owner}/{repo}/pulls".format(owner=self.repo_owner, repo=self.repo_name), title=title, head=branch_head(self.username, ghnum), base=branch_base(self.username, ghnum), body=pr_body, maintainer_can_modify=True, ) number = r['number'] logging.info("Opened PR #{}".format(number)) # Update the commit message of the local diff with metadata # so we can correlate these later commit_msg = ("{commit_msg}\n\n" "ghstack-source-id: {sourceid}\n" "Pull Request resolved: " "https://github.com/{owner}/{repo}/pull/{number}".format( commit_msg=commit.summary.rstrip(), owner=self.repo_owner, repo=self.repo_name, number=number, sourceid=commit.source_id)) # TODO: Try harder to preserve the old author/commit # information (is it really necessary? Check what # --amend does...) new_orig = GitCommitHash( self.sh.git("commit-tree", tree, "-p", self.base_orig, input=commit_msg)) self.stack_meta.append( DiffMeta( title=title, number=number, body=pr_body, ghnum=ghnum, push_branches=((new_orig, 'orig'), ), head_branch=new_pull, what='Created', closed=False, )) self.base_commit = new_pull self.base_orig = new_orig self.base_tree = tree