def test_parse_commit_id(self):
        self.assertIsNone(parse_commit_id('foo'))
        self.assertIsNone(parse_commit_id('foo\n\nMozReview-Commit-ID\nbar'))

        self.assertEqual(parse_commit_id('MozReview-Commit-ID: foo123'),
                         'foo123')
        self.assertEqual(
            parse_commit_id('Bug 1 - foo\n\nMozReview-Commit-ID: abc456'),
            'abc456')
    def test_parse_commit_id(self):
        self.assertIsNone(parse_commit_id('foo'))
        self.assertIsNone(parse_commit_id('foo\n\nMozReview-Commit-ID\nbar'))

        self.assertEqual(parse_commit_id('MozReview-Commit-ID: foo123'),
                         'foo123')
        self.assertEqual(parse_commit_id(
            'Bug 1 - foo\n\nMozReview-Commit-ID: abc456'),
            'abc456')
def wrappedpushdiscovery(orig, pushop):
    """Wraps exchange._pushdiscovery to add extra review metadata.

    We discover what nodes to review before discovery. This ensures that
    errors are discovered and reported quickly, without waiting for
    server communication.
    """

    pushop.reviewnodes = None

    caps = getreviewcaps(pushop.remote)
    if 'pushreview' not in caps:
        return orig(pushop)

    ui = pushop.ui
    repo = pushop.repo

    if repo.noreviewboardpush:
        return orig(pushop)

    # If no arguments are specified to push, Mercurial will try to push all
    # non-remote changesets by default. This can result in unexpected behavior,
    # especially for people doing multi-headed development.
    #
    # Since we reject pushes with multiple heads anyway, default to pushing
    # the working copy.
    if not pushop.revs:
        pushop.revs = [repo['.'].node()]

    tipnode = None
    basenode = None

    # Our prepushoutgoing hook validates that all pushed changesets are
    # part of the same DAG head. If revisions were specified by the user,
    # the last is the tip commit to review and the first (if more than 1)
    # is the base commit to review.
    #
    # Note: the revisions are in the order they were specified by the user.
    # This may not be DAG order. So we have to explicitly order them here.
    revs = sorted(repo[r].rev() for r in pushop.revs)
    tipnode = repo[revs[-1]].node()
    if len(revs) > 1:
        basenode = repo[revs[0]].node()

    if repo.pushsingle:
        basenode = tipnode

    # Given a base and tip node, find all changesets to review.
    #
    # A solution that works most of the time is to find all non-public
    # ancestors of that node. This is our default.
    #
    # If basenode is specified, we stop the traversal when we encounter it.
    #
    # Note that we will still refuse to review a public changeset even with
    # basenode. This decision is somewhat arbitrary and can be revisited later
    # if there is an actual need to review public changesets.
    nodes = [tipnode]
    # Special case where basenode is the tip node.
    if basenode and tipnode == basenode:
        pass
    else:
        for node in repo[tipnode].ancestors():
            ctx = repo[node]

            if ctx.phase() == phases.public:
                break
            if basenode and ctx.node() == basenode:
                nodes.insert(0, ctx.node())
                break

            nodes.insert(0, ctx.node())

    # Filter out public nodes.
    publicnodes = []
    for node in nodes:
        ctx = repo[node]
        if ctx.phase() == phases.public:
            publicnodes.append(node)
            ui.status(_('(ignoring public changeset %s in review request)\n') %
                        ctx.hex()[0:12])

    nodes = [n for n in nodes if n not in publicnodes]
    if not nodes:
        raise util.Abort(
            _('no non-public changesets left to review'),
            hint=_('add or change the -r argument to include draft changesets'))

    # We stop completely empty changesets prior to review.
    for node in nodes:
        ctx = repo[node]
        if not ctx.files():
            raise util.Abort(
                    _('cannot review empty changeset %s') % ctx.hex()[:12],
                    hint=_('add files to or remove changeset'))

    # Ensure all reviewed changesets have commit IDs.
    replacenodes = []
    for node in nodes:
        ctx = repo[node]
        if not parse_commit_id(encoding.fromlocal(ctx.description())):
            replacenodes.append(node)

    def makememctx(repo, ctx, revmap, copyfilectxfn):
        parents = newparents(repo, ctx, revmap)
        # Need to make a copy otherwise modification is made on original,
        # which is just plain wrong.
        msg = encoding.fromlocal(ctx.description())
        new_msg, changed = addcommitid(msg, repo=repo)

        memctx = context.memctx(repo, parents,
                                encoding.tolocal(new_msg), ctx.files(),
                                copyfilectxfn, user=ctx.user(),
                                date=ctx.date(), extra=dict(ctx.extra()))

        return memctx

    if replacenodes:
        ui.status(_('(adding commit id to %d changesets)\n') %
                  (len(replacenodes)))
        nodemap = replacechangesets(repo, replacenodes, makememctx,
                                    backuptopic='addcommitid')

        # Since we're in the middle of an operation, update references
        # to rewritten nodes.
        nodes = [nodemap.get(node, node) for node in nodes]
        pushop.revs = [nodemap.get(node, node) for node in pushop.revs]

    pushop.reviewnodes = nodes

    # Since we may rewrite changesets to contain review metadata after
    # push, abort immediately if the working directory state is not
    # compatible with rewriting. This prevents us from successfully
    # pushing and failing to update commit metadata after the push. i.e.
    # it prevents potential loss of metadata.
    #
    # There may be some scenarios where we don't rewrite after push.
    # But coding that here would be complicated. And future server changes
    # may change things like review request mapping, which may invalidate
    # client assumptions. So always assume a rewrite is needed.
    impactedrevs = list(repo.revs('%ln::', nodes))
    if repo['.'].rev() in impactedrevs:
        cmdutil.checkunfinished(repo)
        cmdutil.bailifchanged(repo)

    return orig(pushop)
Example #4
0
    def _process_submission(self, request, local_site, user, privileged_user,
                            repo, identifier, commits):
        logger.info('processing batch submission %s to %s with %d commits' %
                    (identifier, repo.name, len(commits['individual'])))

        try:
            squashed_rr = ReviewRequest.objects.get(commit_id=identifier,
                                                    repository=repo)
            if not squashed_rr.is_mutable_by(user):
                logger.warn('%s not mutable by %s' % (squashed_rr.id, user))
                raise SubmissionException(self.get_no_access_error(request))

            if squashed_rr.status != ReviewRequest.PENDING_REVIEW:
                logger.warn('%s is not a pending review request; cannot edit' %
                            squashed_rr.id)
                raise SubmissionException(
                    REVIEW_REQUEST_UPDATE_NOT_ALLOWED.with_message(
                        SUBMITTED_OR_DISCARDED_ERROR))

            logger.info('using squashed review request %d' % squashed_rr.id)

        except ReviewRequest.DoesNotExist:
            squashed_rr = ReviewRequest.objects.create(user=user,
                                                       repository=repo,
                                                       commit_id=identifier,
                                                       local_site=local_site)

            squashed_commit_data = fetch_commit_data(squashed_rr)
            squashed_commit_data.extra_data.update({
                MOZREVIEW_KEY:
                True,
                IDENTIFIER_KEY:
                identifier,
                FIRST_PUBLIC_ANCESTOR_KEY:
                (commits['squashed']['first_public_ancestor']),
                HAS_COMMIT_MSG_FILEDIFF_KEY:
                True,
                SQUASHED_KEY:
                True,
                DISCARD_ON_PUBLISH_KEY:
                '[]',
                UNPUBLISHED_KEY:
                '[]',
            })
            squashed_commit_data.draft_extra_data.update({
                IDENTIFIER_KEY:
                identifier,
            })
            squashed_commit_data.save(
                update_fields=['extra_data', 'draft_extra_data'])

            logger.info('created squashed review request #%d' % squashed_rr.id)

        # The diffs on diffsets can't be updated, only replaced. So always
        # construct the diffset.

        try:
            # TODO consider moving diffset creation outside of the transaction
            # since it can be quite time consuming.
            # Calling create_from_data() instead of create_from_upload() skips
            # diff size validation. We allow unlimited diff sizes, so no biggie.
            logger.info('%s: generating squashed diffset for %d' %
                        (identifier, squashed_rr.id))
            diffset = DiffSet.objects.create_from_data(
                repository=repo,
                diff_file_name='diff',
                # The original value is a unicode instance. Python 3 can't
                # .encode() a unicode instance, so go to str first.
                diff_file_contents=commits['squashed']['diff_b64'].encode(
                    'ascii').decode('base64'),
                parent_diff_file_name=None,
                parent_diff_file_contents=None,
                diffset_history=None,
                basedir='',
                request=request,
                base_commit_id=commits['squashed'].get('base_commit_id'),
                save=True,
            )

            update_diffset_history(squashed_rr, diffset)
            diffset.save()

            # We pass `force_insert=True` to save to make sure Django generates
            # an SQL INSERT rather than an UPDATE if the DiffSetVerification
            # already exists. It should never already exist so we want the
            # exception `force_insert=True` will cause if that's the case.
            DiffSetVerification(diffset=diffset).save(
                authorized_user=privileged_user, force_insert=True)
        except Exception:
            logger.exception('error processing squashed diff')
            raise DiffProcessingException()

        update_review_request_draft_diffset(squashed_rr, diffset)
        logger.info('%s: updated squashed diffset for %d' %
                    (identifier, squashed_rr.id))

        # TODO: We need to take into account the commits data from the squashed
        # review request's draft. This data represents the mapping from commit
        # to rid in the event that we would have published. We're overwritting
        # this data. This will only come into play if we start trusting the server
        # instead of the client when matching review request ids. Bug 1047516

        squashed_commit_data = fetch_commit_data(squashed_rr)
        previous_commits = get_previous_commits(squashed_rr,
                                                squashed_commit_data)
        remaining_nodes = get_remaining_nodes(previous_commits)
        discard_on_publish_rids = get_discard_on_publish_rids(
            squashed_rr, squashed_commit_data)
        unpublished_rids = get_unpublished_rids(squashed_rr,
                                                squashed_commit_data)
        unclaimed_rids = get_unclaimed_rids(previous_commits,
                                            discard_on_publish_rids,
                                            unpublished_rids)

        logger.info('%s: %d previous commits; %d discard on publish; '
                    '%d unpublished' %
                    (identifier, len(previous_commits),
                     len(discard_on_publish_rids), len(unpublished_rids)))

        # Previously pushed nodes which have been processed and had their review
        # request updated or did not require updating.
        processed_nodes = set()

        node_to_rid = {}

        # A mapping from review request id to the corresponding ReviewRequest.
        review_requests = {}

        # A mapping of review request id to dicts of additional metadata.
        review_data = {}

        squashed_reviewers = set()
        reviewer_cache = ReviewerCache(request)

        warnings = []

        # Do a pass and find all commits that map cleanly to old review requests.
        for commit in commits['individual']:
            node = commit['id']

            if node not in remaining_nodes:
                continue

            # If the commit appears in an old review request, by definition of
            # commits deriving from content, the commit has not changed and there
            # is nothing to update. Update our accounting and move on.
            rid = remaining_nodes[node]
            logger.info('%s: commit %s unchanged; using existing request %d' %
                        (identifier, node, rid))

            del remaining_nodes[node]
            unclaimed_rids.remove(rid)
            processed_nodes.add(node)
            node_to_rid[node] = rid

            rr = ReviewRequest.objects.get(pk=rid)
            review_requests[rid] = rr
            review_data[rid] = get_review_request_data(rr)

            try:
                discard_on_publish_rids.remove(rid)
            except ValueError:
                pass

        logger.info(
            '%s: %d/%d commits mapped exactly' %
            (identifier, len(processed_nodes), len(commits['individual'])))

        # Commit msg FileDiff should be created only if this is a completely
        # new ReviewRequest, or if the ReviewRequest we're updating
        # already had commit message FileDiff.
        create_comm_msg_filediff = squashed_commit_data.extra_data.get(
            HAS_COMMIT_MSG_FILEDIFF_KEY, False)

        # Find commits that map to a previous version.
        for commit in commits['individual']:
            node = commit['id']
            if node in processed_nodes:
                continue

            # The client may have sent obsolescence data saying which commit this
            # commit has derived from. Use that data (if available) to try to find
            # a mapping to an old review request.
            for precursor in commit['precursors']:
                rid = remaining_nodes.get(precursor)
                if not rid:
                    continue

                logger.info('%s: found precursor to commit %s; '
                            'using existing review request %d' %
                            (identifier, node, rid))

                del remaining_nodes[precursor]
                unclaimed_rids.remove(rid)

                rr = ReviewRequest.objects.get(pk=rid)
                draft, warns = update_review_request(local_site, request,
                                                     privileged_user,
                                                     reviewer_cache, rr,
                                                     commit,
                                                     create_comm_msg_filediff)
                squashed_reviewers.update(u for u in draft.target_people.all())
                warnings.extend(warns)
                processed_nodes.add(node)
                node_to_rid[node] = rid
                review_requests[rid] = rr
                review_data[rid] = get_review_request_data(rr)

                try:
                    discard_on_publish_rids.remove(rid)
                except ValueError:
                    pass

                break

        logger.info(
            '%s: %d/%d mapped exactly or to precursors' %
            (identifier, len(processed_nodes), len(commits['individual'])))

        # Clients should add "MozReview-Commit-ID" unique identifiers to
        # commit messages. Search for them and match up accordingly.

        unclaimed_rrs = [
            ReviewRequest.objects.get(pk=rid) for rid in unclaimed_rids
        ]

        for commit in commits['individual']:
            node = commit['id']
            if node in processed_nodes:
                continue

            commit_id = parse_commit_id(commit['message'])
            if not commit_id:
                logger.warn('%s: commit %s does not have commit id' %
                            (identifier, node))
                continue

            for rr in unclaimed_rrs:
                rr_commit_id = parse_commit_id(rr.description)
                if commit_id != rr_commit_id:
                    continue

                # commit ID in commit found in existing review request. Map
                # it up.
                logger.info(
                    '%s: commit ID %s for %s found in review request %d' %
                    (identifier, commit_id, node, rr.id))

                try:
                    del remaining_nodes[node]
                except KeyError:
                    pass

                unclaimed_rids.remove(rr.id)
                unclaimed_rrs.remove(rr)
                draft, warns = update_review_request(local_site, request,
                                                     privileged_user,
                                                     reviewer_cache, rr,
                                                     commit,
                                                     create_comm_msg_filediff)
                squashed_reviewers.update(u for u in draft.target_people.all())
                warnings.extend(warns)
                processed_nodes.add(node)
                node_to_rid[node] = rr.id
                review_requests[rr.id] = rr
                review_data[rr.id] = get_review_request_data(rr)
                try:
                    discard_on_publish_rids.remove(rr.id)
                except ValueError:
                    pass

                break

        logger.info(
            '%s: %d/%d mapped after commit ID matching' %
            (identifier, len(processed_nodes), len(commits['individual'])))

        logger.info('%s: %d unclaimed review requests' %
                    (identifier, len(unclaimed_rids)))

        # Now do a pass over the commits that didn't map cleanly.
        for commit in commits['individual']:
            node = commit['id']
            if node in processed_nodes:
                continue

            # We haven't seen this commit before *and* our mapping above didn't
            # do anything useful with it.

            # This is where things could get complicated. We could involve
            # heuristic based matching (comparing commit messages, changed
            # files, etc). We may do that in the future.

            # For now, match the commit up against the next one in the index.
            # The unclaimed rids list contains review requests which were created
            # when previously updating this review identifier, but not published.
            # If we have more commits than were previously published we'll start
            # reusing these private review requests before creating new ones.
            #
            # We don't reuse existing review requests when obsolescence data is
            # available because the lack of a clean commit mapping (from above)
            # means that the commit is logically new and shouldn't be
            # associated with a review request that belonged to a different
            # logical commit.
            if unclaimed_rids and not commits.get('obsolescence', False):
                assumed_old_rid = unclaimed_rids.pop(0)

                logger.info('%s: mapping %s to unclaimed request %d' %
                            (identifier, node, assumed_old_rid))

                rr = ReviewRequest.objects.get(pk=assumed_old_rid)
                draft, warns = update_review_request(local_site, request,
                                                     privileged_user,
                                                     reviewer_cache, rr,
                                                     commit,
                                                     create_comm_msg_filediff)
                squashed_reviewers.update(u for u in draft.target_people.all())
                warnings.extend(warns)
                processed_nodes.add(commit['id'])
                node_to_rid[node] = assumed_old_rid
                review_requests[assumed_old_rid] = rr
                review_data[assumed_old_rid] = get_review_request_data(rr)

                try:
                    discard_on_publish_rids.remove(assumed_old_rid)
                except ValueError:
                    pass

                continue

            # There are no more unclaimed review request IDs. This means we have
            # more commits than before. Create new review requests as appropriate.
            rr = ReviewRequest.objects.create(user=user,
                                              repository=repo,
                                              commit_id=None,
                                              local_site=local_site)

            commit_data = fetch_commit_data(rr)
            commit_data.extra_data.update({
                MOZREVIEW_KEY: True,
                IDENTIFIER_KEY: identifier,
                SQUASHED_KEY: False,
            })
            commit_data.draft_extra_data.update({
                AUTHOR_KEY: commit['author'],
                IDENTIFIER_KEY: identifier,
            })
            commit_data.save(update_fields=['extra_data', 'draft_extra_data'])

            logger.info('%s: created review request %d for commit %s' %
                        (identifier, rr.id, node))
            draft, warns = update_review_request(local_site, request,
                                                 privileged_user,
                                                 reviewer_cache, rr, commit,
                                                 create_comm_msg_filediff)
            squashed_reviewers.update(u for u in draft.target_people.all())
            warnings.extend(warns)
            processed_nodes.add(commit['id'])
            node_to_rid[node] = rr.id
            review_requests[rr.id] = rr
            review_data[rr.id] = get_review_request_data(rr)
            unpublished_rids.append(rr.id)

        # At this point every incoming commit has been accounted for.
        # If there are any remaining review requests, they must belong to
        # deleted commits. (Or, we made a mistake and updated the wrong review
        # request)
        logger.info('%s: %d unclaimed review requests left over' %
                    (identifier, len(unclaimed_rids)))
        for rid in unclaimed_rids:
            rr = ReviewRequest.objects.get(pk=rid)

            if rr.public and rid not in discard_on_publish_rids:
                # This review request has already been published so we'll need to
                # discard it when we publish the squashed review request.
                discard_on_publish_rids.append(rid)
            elif not rr.public and rid not in unpublished_rids:
                # We've never published this review request so it may be reused in
                # the future for *any* commit. Keep track of it.
                unpublished_rids.append(rid)
            else:
                # This means we've already marked the review request properly
                # in a previous push, so do nothing.
                pass

        commit_list = []
        for commit in commits['individual']:
            node = commit['id']
            commit_list.append([node, node_to_rid[node]])

        # We need to refresh the squashed rr and draft because post save hooks
        # in ReviewBoard result in magical changes to some of its fields.
        squashed_rr = ReviewRequest.objects.get(pk=squashed_rr.id)
        squashed_draft = squashed_rr.draft.get()
        squashed_commit_data = fetch_commit_data(squashed_rr)

        squashed_draft.summary = identifier

        # Reviewboard does not allow review requests with empty descriptions to
        # be published, so we insert some filler here.
        squashed_draft.description = 'This is the parent review request'
        squashed_draft.bugs_closed = ','.join(
            sorted(set(commit['bug'] for commit in commits['individual'])))

        squashed_draft.depends_on.clear()
        for rrid in sorted(node_to_rid.values()):
            rr = ReviewRequest.objects.for_id(rrid, local_site)
            squashed_draft.depends_on.add(rr)

        squashed_draft.target_people.clear()
        for user in sorted(squashed_reviewers):
            squashed_draft.target_people.add(user)

        squashed_commit_data.draft_extra_data[COMMITS_KEY] = json.dumps(
            commit_list)

        if 'base_commit_id' in commits['squashed']:
            squashed_commit_data.draft_extra_data[BASE_COMMIT_KEY] = (
                commits['squashed']['base_commit_id'])

        squashed_commit_data.extra_data.update({
            DISCARD_ON_PUBLISH_KEY:
            json.dumps(discard_on_publish_rids),
            FIRST_PUBLIC_ANCESTOR_KEY:
            (commits['squashed']['first_public_ancestor']),
            UNPUBLISHED_KEY:
            json.dumps(unpublished_rids),
        })

        squashed_draft.save()
        squashed_rr.save(update_fields=['extra_data'])
        squashed_commit_data.save(
            update_fields=['extra_data', 'draft_extra_data'])

        review_requests[squashed_rr.id] = squashed_rr
        review_data[squashed_rr.id] = get_review_request_data(squashed_rr)

        return squashed_rr, node_to_rid, review_data, warnings
    def _process_submission(self, request, local_site, user, privileged_user,
                            repo, identifier, commits):
        logger.info('processing batch submission %s to %s with %d commits' % (
                    identifier, repo.name, len(commits['individual'])))

        try:
            squashed_rr = ReviewRequest.objects.get(commit_id=identifier,
                                                    repository=repo)
            if not squashed_rr.is_mutable_by(user):
                logger.warn('%s not mutable by %s' % (squashed_rr.id, user))
                raise SubmissionException(self.get_no_access_error(request))

            if squashed_rr.status != ReviewRequest.PENDING_REVIEW:
                logger.warn('%s is not a pending review request; cannot edit' %
                            squashed_rr.id)
                raise SubmissionException((INVALID_FORM_DATA, {
                    'fields': {
                        'identifier': ['Parent review request is '
                               'submitted or discarded']}}))

            logger.info('using squashed review request %d' % squashed_rr.id)

        except ReviewRequest.DoesNotExist:
            squashed_rr = ReviewRequest.objects.create(
                    user=user, repository=repo, commit_id=identifier,
                    local_site=local_site)

            squashed_commit_data = fetch_commit_data(squashed_rr)
            squashed_commit_data.extra_data.update({
                MOZREVIEW_KEY: True,
                IDENTIFIER_KEY: identifier,
                FIRST_PUBLIC_ANCESTOR_KEY: (
                    commits['squashed']['first_public_ancestor']),
                SQUASHED_KEY: True,
                DISCARD_ON_PUBLISH_KEY: '[]',
                UNPUBLISHED_KEY: '[]',
            })
            squashed_commit_data.draft_extra_data.update({
                IDENTIFIER_KEY: identifier,
            })
            squashed_commit_data.save(
                update_fields=['extra_data', 'draft_extra_data'])

            logger.info('created squashed review request #%d' % squashed_rr.id)

        # The diffs on diffsets can't be updated, only replaced. So always
        # construct the diffset.

        try:
            # TODO consider moving diffset creation outside of the transaction
            # since it can be quite time consuming.
            # Calling create_from_data() instead of create_from_upload() skips
            # diff size validation. We allow unlimited diff sizes, so no biggie.
            logger.info('%s: generating squashed diffset for %d' % (
                        identifier, squashed_rr.id))
            diffset = DiffSet.objects.create_from_data(
                repository=repo,
                diff_file_name='diff',
                # The original value is a unicode instance. Python 3 can't
                # .encode() a unicode instance, so go to str first.
                diff_file_contents=commits['squashed']['diff_b64'].encode('ascii').decode('base64'),
                parent_diff_file_name=None,
                parent_diff_file_contents=None,
                diffset_history=None,
                basedir='',
                request=request,
                base_commit_id=commits['squashed'].get('base_commit_id'),
                save=True,
                )

            update_diffset_history(squashed_rr, diffset)
            diffset.save()

            # We pass `force_insert=True` to save to make sure Django generates
            # an SQL INSERT rather than an UPDATE if the DiffSetVerification
            # already exists. It should never already exist so we want the
            # exception `force_insert=True` will cause if that's the case.
            DiffSetVerification(diffset=diffset).save(
                authorized_user=privileged_user, force_insert=True)
        except Exception:
            logger.exception('error processing squashed diff')
            raise DiffProcessingException()

        update_review_request_draft_diffset(squashed_rr, diffset)
        logger.info('%s: updated squashed diffset for %d' % (
                    identifier, squashed_rr.id))

        # TODO: We need to take into account the commits data from the squashed
        # review request's draft. This data represents the mapping from commit
        # to rid in the event that we would have published. We're overwritting
        # this data. This will only come into play if we start trusting the server
        # instead of the client when matching review request ids. Bug 1047516

        squashed_commit_data = fetch_commit_data(squashed_rr)
        previous_commits = get_previous_commits(squashed_rr,
                                                squashed_commit_data)
        remaining_nodes = get_remaining_nodes(previous_commits)
        discard_on_publish_rids = get_discard_on_publish_rids(
            squashed_rr, squashed_commit_data)
        unpublished_rids = get_unpublished_rids(
            squashed_rr, squashed_commit_data)
        unclaimed_rids = get_unclaimed_rids(previous_commits,
                                            discard_on_publish_rids,
                                            unpublished_rids)

        logger.info('%s: %d previous commits; %d discard on publish; '
                    '%d unpublished' % (identifier, len(previous_commits),
                                        len(discard_on_publish_rids),
                                        len(unpublished_rids)))

        # Previously pushed nodes which have been processed and had their review
        # request updated or did not require updating.
        processed_nodes = set()

        node_to_rid = {}

        # A mapping from review request id to the corresponding ReviewRequest.
        review_requests = {}

        # A mapping of review request id to dicts of additional metadata.
        review_data = {}

        squashed_reviewers = set()
        reviewer_cache = ReviewerCache(request)

        warnings = []

        # Do a pass and find all commits that map cleanly to old review requests.
        for commit in commits['individual']:
            node = commit['id']

            if node not in remaining_nodes:
                continue

            # If the commit appears in an old review request, by definition of
            # commits deriving from content, the commit has not changed and there
            # is nothing to update. Update our accounting and move on.
            rid = remaining_nodes[node]
            logger.info('%s: commit %s unchanged; using existing request %d' % (
                        identifier, node, rid))

            del remaining_nodes[node]
            unclaimed_rids.remove(rid)
            processed_nodes.add(node)
            node_to_rid[node] = rid

            rr = ReviewRequest.objects.get(pk=rid)
            review_requests[rid] = rr
            review_data[rid] = get_review_request_data(rr)

            try:
                discard_on_publish_rids.remove(rid)
            except ValueError:
                pass

        logger.info('%s: %d/%d commits mapped exactly' % (
                    identifier, len(processed_nodes),
                    len(commits['individual'])))

        # Find commits that map to a previous version.
        for commit in commits['individual']:
            node = commit['id']
            if node in processed_nodes:
                continue

            # The client may have sent obsolescence data saying which commit this
            # commit has derived from. Use that data (if available) to try to find
            # a mapping to an old review request.
            for precursor in commit['precursors']:
                rid = remaining_nodes.get(precursor)
                if not rid:
                    continue

                logger.info('%s: found precursor to commit %s; '
                            'using existing review request %d' % (
                            identifier, node, rid))

                del remaining_nodes[precursor]
                unclaimed_rids.remove(rid)

                rr = ReviewRequest.objects.get(pk=rid)
                draft, warns = update_review_request(local_site, request,
                                                     privileged_user,
                                                     reviewer_cache, rr,
                                                     commit)
                squashed_reviewers.update(u for u in draft.target_people.all())
                warnings.extend(warns)
                processed_nodes.add(node)
                node_to_rid[node] = rid
                review_requests[rid] = rr
                review_data[rid] = get_review_request_data(rr)

                try:
                    discard_on_publish_rids.remove(rid)
                except ValueError:
                    pass

                break

        logger.info('%s: %d/%d mapped exactly or to precursors' % (
                    identifier, len(processed_nodes),
                    len(commits['individual'])))

        # Clients should add "MozReview-Commit-ID" unique identifiers to
        # commit messages. Search for them and match up accordingly.

        unclaimed_rrs = [ReviewRequest.objects.get(pk=rid)
                         for rid in unclaimed_rids]

        for commit in commits['individual']:
            node = commit['id']
            if node in processed_nodes:
                continue

            commit_id = parse_commit_id(commit['message'])
            if not commit_id:
                logger.warn('%s: commit %s does not have commit id' % (
                            identifier, node))
                continue

            for rr in unclaimed_rrs:
                rr_commit_id = parse_commit_id(rr.description)
                if commit_id != rr_commit_id:
                    continue

                # commit ID in commit found in existing review request. Map
                # it up.
                logger.info('%s: commit ID %s for %s found in review request %d' % (
                            identifier, commit_id, node, rr.id))

                try:
                    del remaining_nodes[node]
                except KeyError:
                    pass

                unclaimed_rids.remove(rr.id)
                unclaimed_rrs.remove(rr)
                draft, warns = update_review_request(local_site, request,
                                                     privileged_user,
                                                     reviewer_cache, rr,
                                                     commit)
                squashed_reviewers.update(u for u in draft.target_people.all())
                warnings.extend(warns)
                processed_nodes.add(node)
                node_to_rid[node] = rr.id
                review_requests[rr.id] = rr
                review_data[rr.id] = get_review_request_data(rr)
                try:
                    discard_on_publish_rids.remove(rr.id)
                except ValueError:
                    pass

                break

        logger.info('%s: %d/%d mapped after commit ID matching' % (
                    identifier, len(processed_nodes),
                    len(commits['individual'])))

        logger.info('%s: %d unclaimed review requests' % (identifier, len(unclaimed_rids)))

        # Now do a pass over the commits that didn't map cleanly.
        for commit in commits['individual']:
            node = commit['id']
            if node in processed_nodes:
                continue

            # We haven't seen this commit before *and* our mapping above didn't
            # do anything useful with it.

            # This is where things could get complicated. We could involve
            # heuristic based matching (comparing commit messages, changed
            # files, etc). We may do that in the future.

            # For now, match the commit up against the next one in the index.
            # The unclaimed rids list contains review requests which were created
            # when previously updating this review identifier, but not published.
            # If we have more commits than were previously published we'll start
            # reusing these private review requests before creating new ones.
            #
            # We don't reuse existing review requests when obsolescence data is
            # available because the lack of a clean commit mapping (from above)
            # means that the commit is logically new and shouldn't be
            # associated with a review request that belonged to a different
            # logical commit.
            if unclaimed_rids and not commits.get('obsolescence', False):
                assumed_old_rid = unclaimed_rids.pop(0)

                logger.info('%s: mapping %s to unclaimed request %d' % (
                            identifier, node, assumed_old_rid))

                rr = ReviewRequest.objects.get(pk=assumed_old_rid)
                draft, warns = update_review_request(local_site, request,
                                                     privileged_user,
                                                     reviewer_cache, rr,
                                                     commit)
                squashed_reviewers.update(u for u in draft.target_people.all())
                warnings.extend(warns)
                processed_nodes.add(commit['id'])
                node_to_rid[node] = assumed_old_rid
                review_requests[assumed_old_rid] = rr
                review_data[assumed_old_rid] = get_review_request_data(rr)

                try:
                    discard_on_publish_rids.remove(assumed_old_rid)
                except ValueError:
                    pass

                continue

            # There are no more unclaimed review request IDs. This means we have
            # more commits than before. Create new review requests as appropriate.
            rr = ReviewRequest.objects.create(user=user,
                                              repository=repo,
                                              commit_id=None,
                                              local_site=local_site)

            commit_data = fetch_commit_data(rr)
            commit_data.extra_data.update({
                MOZREVIEW_KEY: True,
                IDENTIFIER_KEY: identifier,
                SQUASHED_KEY: False,
            })
            commit_data.draft_extra_data.update({
                AUTHOR_KEY: commit['author'],
                IDENTIFIER_KEY: identifier,
            })
            commit_data.save(
                update_fields=['extra_data', 'draft_extra_data'])

            logger.info('%s: created review request %d for commit %s' % (
                        identifier, rr.id, node))
            draft, warns = update_review_request(local_site, request,
                                                 privileged_user,
                                                 reviewer_cache, rr, commit)
            squashed_reviewers.update(u for u in draft.target_people.all())
            warnings.extend(warns)
            processed_nodes.add(commit['id'])
            node_to_rid[node] = rr.id
            review_requests[rr.id] = rr
            review_data[rr.id] = get_review_request_data(rr)
            unpublished_rids.append(rr.id)

        # At this point every incoming commit has been accounted for.
        # If there are any remaining review requests, they must belong to
        # deleted commits. (Or, we made a mistake and updated the wrong review
        # request)
        logger.info('%s: %d unclaimed review requests left over' % (
                    identifier, len(unclaimed_rids)))
        for rid in unclaimed_rids:
            rr = ReviewRequest.objects.get(pk=rid)

            if rr.public and rid not in discard_on_publish_rids:
                # This review request has already been published so we'll need to
                # discard it when we publish the squashed review request.
                discard_on_publish_rids.append(rid)
            elif not rr.public and rid not in unpublished_rids:
                # We've never published this review request so it may be reused in
                # the future for *any* commit. Keep track of it.
                unpublished_rids.append(rid)
            else:
                # This means we've already marked the review request properly
                # in a previous push, so do nothing.
                pass

        commit_list = []
        for commit in commits['individual']:
            node = commit['id']
            commit_list.append([node, node_to_rid[node]])

        # We need to refresh the squashed rr and draft because post save hooks
        # in ReviewBoard result in magical changes to some of its fields.
        squashed_rr = ReviewRequest.objects.get(pk=squashed_rr.id)
        squashed_draft = squashed_rr.draft.get()
        squashed_commit_data = fetch_commit_data(squashed_rr)

        squashed_draft.summary = identifier

        # Reviewboard does not allow review requests with empty descriptions to
        # be published, so we insert some filler here.
        squashed_draft.description = 'This is the parent review request'
        squashed_draft.bugs_closed = ','.join(sorted(set(commit['bug'] for commit in commits['individual'])))

        squashed_draft.depends_on.clear()
        for rrid in sorted(node_to_rid.values()):
            rr = ReviewRequest.objects.for_id(rrid, local_site)
            squashed_draft.depends_on.add(rr)

        squashed_draft.target_people.clear()
        for user in sorted(squashed_reviewers):
            squashed_draft.target_people.add(user)

        squashed_commit_data.draft_extra_data[COMMITS_KEY] = json.dumps(
            commit_list)

        if 'base_commit_id' in commits['squashed']:
            squashed_commit_data.draft_extra_data[BASE_COMMIT_KEY] = (
                commits['squashed']['base_commit_id'])

        squashed_commit_data.extra_data.update({
            DISCARD_ON_PUBLISH_KEY: json.dumps(discard_on_publish_rids),
            FIRST_PUBLIC_ANCESTOR_KEY: (
                commits['squashed']['first_public_ancestor']),
            UNPUBLISHED_KEY: json.dumps(unpublished_rids),
        })

        squashed_draft.save()
        squashed_rr.save(update_fields=['extra_data'])
        squashed_commit_data.save(
            update_fields=['extra_data', 'draft_extra_data'])

        review_requests[squashed_rr.id] = squashed_rr
        review_data[squashed_rr.id] = get_review_request_data(squashed_rr)

        return squashed_rr, node_to_rid, review_data, warnings
Example #6
0
def wrappedpushdiscovery(orig, pushop):
    """Wraps exchange._pushdiscovery to add extra review metadata.

    We discover what nodes to review before discovery. This ensures that
    errors are discovered and reported quickly, without waiting for
    server communication.
    """

    pushop.reviewnodes = None

    caps = getreviewcaps(pushop.remote)
    if 'pushreview' not in caps:
        return orig(pushop)

    ui = pushop.ui
    repo = pushop.repo

    if repo.noreviewboardpush:
        return orig(pushop)

    # If no arguments are specified to push, Mercurial will try to push all
    # non-remote changesets by default. This can result in unexpected behavior,
    # especially for people doing multi-headed development.
    #
    # Since we reject pushes with multiple heads anyway, default to pushing
    # the working copy.
    if not pushop.revs:
        pushop.revs = [repo['.'].node()]

    tipnode = None
    basenode = None

    # Our prepushoutgoing hook validates that all pushed changesets are
    # part of the same DAG head. If revisions were specified by the user,
    # the last is the tip commit to review and the first (if more than 1)
    # is the base commit to review.
    #
    # Note: the revisions are in the order they were specified by the user.
    # This may not be DAG order. So we have to explicitly order them here.
    revs = sorted(repo[r].rev() for r in pushop.revs)
    tipnode = repo[revs[-1]].node()
    if len(revs) > 1:
        basenode = repo[revs[0]].node()

    if repo.pushsingle:
        basenode = tipnode

    # Given a base and tip node, find all changesets to review.
    #
    # A solution that works most of the time is to find all non-public
    # ancestors of that node. This is our default.
    #
    # If basenode is specified, we stop the traversal when we encounter it.
    #
    # Note that we will still refuse to review a public changeset even with
    # basenode. This decision is somewhat arbitrary and can be revisited later
    # if there is an actual need to review public changesets.
    nodes = [tipnode]
    # Special case where basenode is the tip node.
    if basenode and tipnode == basenode:
        pass
    else:
        for node in repo[tipnode].ancestors():
            ctx = repo[node]

            if ctx.phase() == phases.public:
                break
            if basenode and ctx.node() == basenode:
                nodes.insert(0, ctx.node())
                break

            nodes.insert(0, ctx.node())

    # Filter out public nodes.
    publicnodes = []
    for node in nodes:
        ctx = repo[node]
        if ctx.phase() == phases.public:
            publicnodes.append(node)
            ui.status(
                _('(ignoring public changeset %s in review request)\n') %
                ctx.hex()[0:12])

    nodes = [n for n in nodes if n not in publicnodes]
    if not nodes:
        raise util.Abort(
            _('no non-public changesets left to review'),
            hint=_(
                'add or change the -r argument to include draft changesets'))

    # We stop completely empty changesets prior to review.
    for node in nodes:
        ctx = repo[node]
        if not ctx.files():
            raise util.Abort(_('cannot review empty changeset %s') %
                             ctx.hex()[:12],
                             hint=_('add files to or remove changeset'))

    run_android_checkstyle(repo, nodes)

    # Ensure all reviewed changesets have commit IDs.
    replacenodes = []
    for node in nodes:
        ctx = repo[node]
        if not parse_commit_id(encoding.fromlocal(ctx.description())):
            replacenodes.append(node)

    def makememctx(repo, ctx, revmap, copyfilectxfn):
        parents = newparents(repo, ctx, revmap)
        # Need to make a copy otherwise modification is made on original,
        # which is just plain wrong.
        msg = encoding.fromlocal(ctx.description())
        new_msg, changed = addcommitid(msg, repo=repo)

        memctx = context.memctx(repo,
                                parents,
                                encoding.tolocal(new_msg),
                                ctx.files(),
                                copyfilectxfn,
                                user=ctx.user(),
                                date=ctx.date(),
                                extra=dict(ctx.extra()))

        return memctx

    if replacenodes:
        ui.status(
            _('(adding commit id to %d changesets)\n') % (len(replacenodes)))
        nodemap = replacechangesets(repo,
                                    replacenodes,
                                    makememctx,
                                    backuptopic='addcommitid')

        # Since we're in the middle of an operation, update references
        # to rewritten nodes.
        nodes = [nodemap.get(node, node) for node in nodes]
        pushop.revs = [nodemap.get(node, node) for node in pushop.revs]

    pushop.reviewnodes = nodes

    # Since we may rewrite changesets to contain review metadata after
    # push, abort immediately if the working directory state is not
    # compatible with rewriting. This prevents us from successfully
    # pushing and failing to update commit metadata after the push. i.e.
    # it prevents potential loss of metadata.
    #
    # There may be some scenarios where we don't rewrite after push.
    # But coding that here would be complicated. And future server changes
    # may change things like review request mapping, which may invalidate
    # client assumptions. So always assume a rewrite is needed.
    impactedrevs = list(repo.revs('%ln::', nodes))
    if repo['.'].rev() in impactedrevs:
        cmdutil.checkunfinished(repo)
        cmdutil.bailifchanged(repo)

    return orig(pushop)