Esempio n. 1
0
def _normalize_pr(client: GithubClient, pr, strip_text_content,
                  redact_names_and_urls):
    return NormalizedPullRequest(
        id=pr['number'],
        additions=pr['additions'],
        deletions=pr['deletions'],
        changed_files=pr['changed_files'],
        is_closed=pr['state'] == 'closed',
        is_merged=pr['merged'],
        created_at=pr['created_at'],
        updated_at=pr['updated_at'],
        merge_date=pr['merged_at'] if pr['merged_at'] else None,
        closed_date=pr['closed_at'] if pr['closed_at'] else None,
        title=sanitize_text(pr['title'], strip_text_content),
        body=sanitize_text(pr['body'], strip_text_content),
        url=(pr['html_url'] if not redact_names_and_urls else None),
        base_branch=(pr['base']['ref'] if not redact_names_and_urls else
                     _branch_redactor.redact_name(pr['base']['ref'])),
        head_branch=(pr['head']['ref'] if not redact_names_and_urls else
                     _branch_redactor.redact_name(pr['head']['ref'])),
        author=_normalize_user(client.get_json(pr['user']['url'])),
        merged_by=(_normalize_user(client.get_json(pr['merged_by']['url']))
                   if pr['merged'] else None),
        commits=[
            _normalize_commit(c, pr['base']['repo'], pr['base']['ref'],
                              strip_text_content, redact_names_and_urls)
            for c in tqdm(
                client.get_pr_commits(pr['base']['repo']['full_name'],
                                      pr['number']),
                f'downloading commits for PR {pr["number"]}',
                leave=False,
                unit='commits',
            )
        ],
        merge_commit=_get_merge_commit(client, pr, strip_text_content,
                                       redact_names_and_urls),
        comments=[
            NormalizedPullRequestComment(
                user=_normalize_user(client.get_json(c['user']['url'])),
                body=c['body'],
                created_at=c['created_at'],
            ) for c in client.get_pr_comments(pr['base']['repo']['full_name'],
                                              pr['number'])
        ],
        approvals=[
            NormalizedPullRequestReview(
                user=_normalize_user(client.get_json(r['user']['url'])),
                foreign_id=r['id'],
                review_state=r['state'],
            ) for r in client.get_pr_reviews(pr['base']['repo']['full_name'],
                                             pr['number'])
        ],
        base_repo=_normalize_pr_repo(pr['base']['repo'],
                                     redact_names_and_urls),
        head_repo=_normalize_pr_repo(pr['head']['repo'],
                                     redact_names_and_urls),
    )
Esempio n. 2
0
def _normalize_pr(
    merge_request,
    normalized_commits: List[NormalizedCommit],
    strip_text_content: bool,
    redact_names_and_urls: bool,
    merge_commit,
):
    base_branch_name = merge_request.target_branch
    head_branch_name = merge_request.source_branch

    # normalize comments, approvals, and commits
    additions, deletions, changed_files = _calculate_diff_counts(
        merge_request.diff)

    # OJ-7701: GitLab merge requests have a PATCH in the diff attribute, not standard diff format. We can't
    # determine the number of files changed from a patch, but we can get the number of lines added and deleted.
    # To get the number of files changed, we can just use the length of the list returned from changes(), which
    # contains file information for each file changed in the merge request.
    changed_files = len(merge_request.changes()['changes'])

    return NormalizedPullRequest(
        id=merge_request.id,
        additions=additions,
        deletions=deletions,
        changed_files=changed_files,
        created_at=merge_request.created_at,
        updated_at=merge_request.updated_at,
        merge_date=merge_request.merged_at,
        closed_date=merge_request.closed_at,
        is_closed=merge_request.state == 'closed',
        is_merged=merge_request.state == 'merged',
        # redacted fields
        url=merge_request.web_url if not redact_names_and_urls else None,
        base_branch=(base_branch_name if not redact_names_and_urls else
                     _branch_redactor.redact_name(base_branch_name)),
        head_branch=(head_branch_name if not redact_names_and_urls else
                     _branch_redactor.redact_name(head_branch_name)),
        # sanitized fields
        title=sanitize_text(merge_request.title, strip_text_content),
        body=sanitize_text(merge_request.description, strip_text_content),
        # normalized fields
        commits=normalized_commits,
        merge_commit=merge_commit,
        author=_normalize_user(merge_request.author),
        merged_by=_normalize_user(merge_request.merged_by),
        approvals=_get_normalized_approvals(merge_request),
        comments=_get_normalized_pr_comments(merge_request,
                                             strip_text_content),
        base_repo=_normalize_short_form_repo(merge_request.target_project,
                                             redact_names_and_urls),
        head_repo=_normalize_short_form_repo(merge_request.source_project,
                                             redact_names_and_urls),
    )
def _normalize_commit(
    api_commit, normalized_repo, branch_name, strip_text_content: bool, redact_names_and_urls: bool
):
    author = _normalize_user(api_commit['author'])
    commit_url = api_commit['links']['html']['href'] if not redact_names_and_urls else None
    return NormalizedCommit(
        hash=api_commit['hash'],
        author=author,
        url=commit_url,
        commit_date=parser.parse(api_commit['date']),
        author_date=None,  # Not available in BB Cloud API,
        message=sanitize_text(api_commit['message'], strip_text_content),
        is_merge=len(api_commit['parents']) > 1,
        repo=normalized_repo.short(),  # use short form of repo
        branch_name=branch_name
        if not redact_names_and_urls
        else _branch_redactor.redact_name(branch_name),
    )
Esempio n. 4
0
def _get_normalized_pr_comments(
        merge_request,
        strip_text_content) -> List[NormalizedPullRequestComment]:
    try:
        return [
            NormalizedPullRequestComment(
                user=_normalize_user(note.author),
                body=sanitize_text(note.body, strip_text_content),
                created_at=note.created_at,
                system_generated=note.system,
            ) for note in merge_request.note_list
        ]
    except (requests.exceptions.RetryError,
            gitlab.exceptions.GitlabHttpError) as e:
        log_and_print_request_error(
            e,
            f'standardizing PR comments for merge_request {merge_request.id} -- '
            f'handling it as if it has no comments',
        )
        return []
Esempio n. 5
0
def _normalize_commit(commit, repo, branch_name, strip_text_content,
                      redact_names_and_urls):
    author = commit.get('author') or {}
    author.update({
        'name': commit['commit']['author']['name'],
        'email': commit['commit']['author']['email']
    })

    return NormalizedCommit(
        hash=commit['sha'],
        url=commit['html_url'] if not redact_names_and_urls else None,
        message=sanitize_text(commit['commit']['message'], strip_text_content),
        commit_date=commit['commit']['committer']['date'],
        author_date=commit['commit']['author']['date'],
        author=_normalize_user(author),
        is_merge=len(commit['parents']) > 1,
        repo=_normalize_pr_repo(repo, redact_names_and_urls),
        branch_name=branch_name if not redact_names_and_urls else
        _branch_redactor.redact_name(branch_name),
    )
Esempio n. 6
0
def _normalize_commit(api_commit, normalized_repo, branch_name,
                      strip_text_content: bool, redact_names_and_urls: bool):
    author = NormalizedUser(
        id=f'{api_commit.author_name}<{api_commit.author_email}>',
        login=api_commit.author_email,
        name=api_commit.author_name,
        email=api_commit.author_email,
    )
    commit_url = (f'{normalized_repo.url}/commit/{api_commit.id}'
                  if not redact_names_and_urls else None)
    return NormalizedCommit(
        hash=api_commit.id,
        author=author,
        url=commit_url,
        commit_date=api_commit.committed_date,
        author_date=api_commit.authored_date,
        message=sanitize_text(api_commit.message, strip_text_content),
        is_merge=len(api_commit.parent_ids) > 1,
        repo=normalized_repo.short(),  # use short form of repo
        branch_name=branch_name if not redact_names_and_urls else
        _branch_redactor.redact_name(branch_name),
    )
def _normalize_pr(
    client, repo, api_pr, strip_text_content: bool, redact_names_and_urls: bool,
):

    # Process the PR's diff to get additions, deletions, changed_files
    additions, deletions, changed_files = None, None, None
    try:
        diff_str = client.pr_diff(repo.project.id, repo.id, api_pr['id'])
        additions, deletions, changed_files = _calculate_diff_counts(diff_str)
        if additions is None:
            agent_logging.log_and_print_error_or_warning(
                logger, logging.WARN, msg_args=[api_pr["id"], repo.id], error_code=3031,
            )
    except requests.exceptions.RetryError:
        # Server threw a 500 on the request for the diff and we started retrying;
        # this happens consistently for certain PRs (if the PR has no commits yet). Just proceed with no diff
        pass
    except requests.exceptions.HTTPError as e:
        if e.response.status_code >= 500:
            # Server threw a 500 on the request for the diff; this happens consistently for certain PRs
            # (if the PR has no commits yet). Just proceed with no diff
            pass
        elif e.response.status_code == 401:
            # Server threw a 401 on the request for the diff; not sure why this would be, but it seems rare
            agent_logging.log_and_print_error_or_warning(
                logger, logging.WARN, msg_args=[api_pr["id"], repo.id], error_code=3041,
            )
        else:
            # Some other HTTP error happened; Re-raise
            raise
    except UnicodeDecodeError:
        # Occasional diffs seem to be invalid UTF-8
        agent_logging.log_and_print_error_or_warning(
            logger, logging.WARN, msg_args=[api_pr["id"], repo.id], error_code=3051,
        )

    # Comments
    comments = [
        NormalizedPullRequestComment(
            user=_normalize_user(c['user']),
            body=sanitize_text(c['content']['raw'], strip_text_content),
            created_at=parser.parse(c['created_on']),
        )
        for c in client.pr_comments(repo.project.id, repo.id, api_pr['id'])
    ]

    # Crawl activity for approvals, merge and closed dates
    approvals = []
    merge_date = None
    merged_by = None
    closed_date = None
    try:
        activity = list(client.pr_activity(repo.project.id, repo.id, api_pr['id']))
        approvals = [
            NormalizedPullRequestReview(
                user=_normalize_user(approval['user']),
                foreign_id=i,  # There's no true ID (unlike with GitHub); use a per-PR sequence
                review_state='APPROVED',
            )
            for i, approval in enumerate(
                (a['approval'] for a in activity if 'approval' in a), start=1,
            )
        ]

        # Obtain the merge_date and merged_by by crawling over the activity history
        pr_updates = [a for a in activity if 'update' in a]
        for a in sorted(pr_updates, key=lambda x: x['update']['date'], reverse=True):
            if a['update']['state'] == 'MERGED':
                merge_date = parser.parse(a['update']['date'])
                merged_by = _normalize_user(a['update']['author'])
                break

        # Obtain the closed_date by crawling over the activity history, looking for the
        # first transition to one of the closed states ('MERGED' or 'DECLINED')
        for a in sorted(pr_updates, key=lambda x: x['update']['date'], reverse=False):
            if a['update']['state'] in ('MERGED', 'DECLINED'):
                closed_date = parser.parse(a['update']['date'])
                break
    except requests.exceptions.HTTPError as e:
        if e.response.status_code == 401:
            # not authorized to see activity; skip it
            pass
        else:
            raise

    # Commits
    commits = [
        _normalize_commit(
            c,
            repo,
            api_pr['destination']['branch']['name'],
            strip_text_content,
            redact_names_and_urls,
        )
        for c in client.pr_commits(repo.project.id, repo.id, api_pr['id'])
    ]
    merge_commit = None
    if (
        api_pr['state'] == 'MERGED'
        and 'merge_commit' in api_pr
        and api_pr['merge_commit']
        and api_pr['merge_commit'].get('hash')
    ):
        api_merge_commit = client.get_commit(
            repo.project.id, api_pr['source']['repository']['uuid'], api_pr['merge_commit']['hash']
        )
        merge_commit = _normalize_commit(
            api_merge_commit,
            repo,
            api_pr['destination']['branch']['name'],
            strip_text_content,
            redact_names_and_urls,
        )

    # Repo links
    base_repo = _normalize_short_form_repo(
        api_pr['destination']['repository'], redact_names_and_urls
    )
    head_repo = _normalize_short_form_repo(api_pr['source']['repository'], redact_names_and_urls)

    return NormalizedPullRequest(
        id=api_pr['id'],
        title=api_pr['title'],
        body=api_pr['description'],
        url=api_pr['links']['html']['href'],
        base_branch=api_pr['destination']['branch']['name'],
        head_branch=api_pr['source']['branch']['name'],
        base_repo=base_repo,
        head_repo=head_repo,
        author=_normalize_user(api_pr['author']),
        is_closed=api_pr['state'] != 'OPEN',
        is_merged=api_pr['state'] == 'MERGED',
        created_at=parser.parse(api_pr['created_on']),
        updated_at=parser.parse(api_pr['updated_on']),
        additions=additions,
        deletions=deletions,
        changed_files=changed_files,
        merge_date=merge_date,
        closed_date=closed_date,
        merged_by=merged_by,
        approvals=approvals,
        commits=commits,
        merge_commit=merge_commit,
        comments=comments,
    )