Exemple #1
0
def get_commits_for_included_branches(
    client: GithubClient,
    api_repos,
    included_branches,
    strip_text_content,
    server_git_instance_info,
    redact_names_and_urls,
):
    for i, repo in enumerate(api_repos, start=1):
        with agent_logging.log_loop_iters(logger, 'repo for branch commits', i,
                                          1):
            pull_since = pull_since_date_for_repo(
                server_git_instance_info, repo['organization']['login'],
                repo['id'], 'commits')

            # Determine branches to pull commits from for this repo. If no branches are explicitly
            # provided in a config, only pull from the repo's default branch.
            # We are working with the github api object rather than a NormalizedRepository here,
            # so we can not use get_branches_for_normalized_repo as we do in bitbucket_cloud_adapter and gitlab_adapter.
            branches_to_process = [repo['default_branch']]
            additional_branch_patterns = included_branches.get(repo['name'])

            if additional_branch_patterns:
                repo_branches = [
                    b['name'] for b in client.get_branches(repo['full_name'])
                ]
                branches_to_process.extend(
                    get_matching_branches(additional_branch_patterns,
                                          repo_branches))

            for branch in branches_to_process:
                try:
                    for j, commit in enumerate(
                            tqdm(
                                client.get_commits(repo['full_name'],
                                                   branch,
                                                   since=pull_since,
                                                   until=None),
                                desc=
                                f'downloading commits on branch {branch} for {repo["name"]}',
                                unit='commits',
                            ),
                            start=1,
                    ):
                        with agent_logging.log_loop_iters(
                                logger, 'branch commit inside repo', j, 100):
                            yield _normalize_commit(commit, repo, branch,
                                                    strip_text_content,
                                                    redact_names_and_urls)

                except Exception as e:
                    print(
                        f':WARN: Got exception for branch {branch}: {e}. Skipping...'
                    )
    def get_commits_for_included_branches(
        self,
        normalized_repos: List[NormalizedRepository],
        included_branches: dict,
        server_git_instance_info,
    ) -> List[NormalizedCommit]:
        print('downloading gitlab commits on included branches... ',
              end='',
              flush=True)
        for i, nrm_repo in enumerate(normalized_repos, start=1):
            with agent_logging.log_loop_iters(logger,
                                              'repo for branch commits', i, 1):
                pull_since = pull_since_date_for_repo(server_git_instance_info,
                                                      nrm_repo.project.login,
                                                      nrm_repo.id, 'commits')

                try:
                    for branch in get_branches_for_normalized_repo(
                            nrm_repo, included_branches):
                        for j, commit in enumerate(
                                tqdm(
                                    self.client.list_project_commits(
                                        nrm_repo.id, pull_since, branch),
                                    desc=
                                    f'downloading commits for branch {branch} in repo {nrm_repo.name} ({nrm_repo.id})',
                                    unit='commits',
                                ),
                                start=1,
                        ):
                            with agent_logging.log_loop_iters(
                                    logger, 'branch commit inside repo', j,
                                    100):
                                yield _normalize_commit(
                                    commit,
                                    nrm_repo,
                                    branch,
                                    self.config.git_strip_text_content,
                                    self.config.git_redact_names_and_urls,
                                )

                except Exception as e:
                    print(
                        f':WARN: Got exception for branch {branch}: {e}. Skipping...'
                    )
        print('✓')
    def get_commits_for_included_branches(
        self,
        normalized_repos: List[NormalizedRepository],
        included_branches: dict,
        server_git_instance_info,
    ) -> List[NormalizedCommit]:
        print('downloading bitbucket commits on included branches... ', end='', flush=True)
        for i, repo in enumerate(normalized_repos, start=1):
            with agent_logging.log_loop_iters(logger, 'repo for branch commits', i, 1):
                pull_since = pull_since_date_for_repo(
                    server_git_instance_info, repo.project.login, repo.id, 'commits'
                )

                for branch in get_branches_for_normalized_repo(repo, included_branches):
                    for j, commit in enumerate(
                        tqdm(
                            self.client.get_commits(repo.project.id, repo.id, branch),
                            desc=f'downloading commits for {repo.name} on branch {branch}',
                            unit='commits',
                        ),
                        start=1,
                    ):
                        with agent_logging.log_loop_iters(
                            logger, 'branch commit inside repo', j, 100
                        ):
                            commit = _normalize_commit(
                                commit,
                                repo,
                                branch,
                                self.config.git_strip_text_content,
                                self.config.git_redact_names_and_urls,
                            )
                            yield commit

                            # yield one commit older than we want to see
                            if commit.commit_date < pull_since:
                                break

        print('✓')
Exemple #4
0
def get_pull_requests(
    client: GithubClient,
    api_repos,
    strip_text_content,
    server_git_instance_info,
    redact_names_and_urls,
):
    for i, repo in enumerate(api_repos, start=1):
        with agent_logging.log_loop_iters(logger, 'repo for pull requests', i,
                                          1):
            pull_since = pull_since_date_for_repo(
                server_git_instance_info, repo['organization']['login'],
                repo['id'], 'prs')
            try:
                for j, pr in enumerate(
                        tqdm(
                            client.get_pullrequests(repo['full_name']),
                            desc=f'downloading PRs for {repo["name"]}',
                            unit='prs',
                        ),
                        start=1,
                ):
                    with agent_logging.log_loop_iters(logger, 'pr inside repo',
                                                      j, 10):
                        updated_at = parser.parse(pr['updated_at'])

                        # PRs are ordered newest to oldest
                        # if this is too old, we're done with this repo
                        if pull_since and updated_at < pull_since:
                            break

                        yield _normalize_pr(client, pr, strip_text_content,
                                            redact_names_and_urls)

            except Exception as e:
                print(
                    f':WARN: Exception getting PRs for repo {repo["name"]}: {e}. Skipping...'
                )
    print()
    def get_pull_requests(
        self, normalized_repos: List[NormalizedRepository], server_git_instance_info,
    ) -> List[NormalizedPullRequest]:
        print('downloading bitbucket prs... ', end='', flush=True)
        for i, repo in enumerate(
            tqdm(normalized_repos, desc='downloading prs for repos', unit='repos'), start=1
        ):
            with agent_logging.log_loop_iters(logger, 'repo for pull requests', i, 1):
                try:
                    pull_since = pull_since_date_for_repo(
                        server_git_instance_info, repo.project.login, repo.id, 'prs'
                    )

                    api_prs = self.client.get_pullrequests(repo.project.id, repo.id)

                    if not api_prs:
                        agent_logging.log_and_print(
                            logger, logging.INFO, f'no prs found for repo {repo.id}. Skipping... '
                        )
                        continue

                    for api_pr in tqdm(api_prs, desc=f'processing prs for {repo.name}', unit='prs'):
                        try:
                            # Skip PRs with missng data
                            if (
                                'source' not in api_pr
                                or 'repository' not in api_pr['source']
                                or not api_pr['source']['repository']
                                or 'destination' not in api_pr
                                or 'repository' not in api_pr['destination']
                                or not api_pr['destination']['repository']
                            ):
                                agent_logging.log_and_print_error_or_warning(
                                    logger, logging.WARN, msg_args=[api_pr['id']], error_code=3030
                                )
                                continue

                            yield _normalize_pr(
                                self.client,
                                repo,
                                api_pr,
                                self.config.git_strip_text_content,
                                self.config.git_redact_names_and_urls,
                            )

                            # PRs are ordered newest to oldest if this
                            # is too old, we're done with this repo.  We
                            # yield one old one on purpose so that we
                            # handle the case correctly when the most
                            # recent PR is really old.
                            if pull_since and parser.parse(api_pr['updated_on']) < pull_since:
                                break

                        except Exception:
                            # if something happens when normalizing a PR, just keep going with the rest
                            agent_logging.log_and_print_error_or_warning(
                                logger,
                                logging.ERROR,
                                msg_args=[api_pr["id"], repo.id],
                                error_code=3011,
                                exc_info=True,
                            )

                except Exception:
                    # if something happens when pulling PRs for a repo, just keep going.
                    agent_logging.log_and_print_error_or_warning(
                        logger, logging.ERROR, msg_args=[repo.id], error_code=3021, exc_info=True,
                    )

        print('✓')
    def get_pull_requests(
        self,
        normalized_repos: List[NormalizedRepository],
        server_git_instance_info,
    ) -> List[NormalizedPullRequest]:
        print('downloading gitlab prs... ', end='', flush=True)

        for i, nrm_repo in enumerate(normalized_repos, start=1):
            print(f'downloading prs for repo {nrm_repo.name} ({nrm_repo.id})')

            with agent_logging.log_loop_iters(logger, 'repo for pull requests',
                                              i, 1):
                try:
                    pull_since = pull_since_date_for_repo(
                        server_git_instance_info, nrm_repo.project.login,
                        nrm_repo.id, 'prs')

                    api_prs = self.client.list_project_merge_requests(
                        nrm_repo.id)

                    if not api_prs or not api_prs.total:
                        agent_logging.log_and_print(
                            logger, logging.WARNING,
                            f"No PRs returned for repo {nrm_repo.id}")
                        continue

                    for api_pr in tqdm(
                            api_prs,
                            desc=
                            f'processing prs for {nrm_repo.name} ({nrm_repo.id})',
                            unit='prs',
                            total=api_prs.total,
                    ):
                        try:
                            updated_at = parser.parse(api_pr.updated_at)

                            # PRs are ordered newest to oldest
                            # if this is too old, we're done with this repo
                            if pull_since and updated_at < pull_since:
                                break

                            try:
                                api_pr = self.client.expand_merge_request_data(
                                    api_pr)
                            except MissingSourceProjectException as e:
                                log_and_print_request_error(
                                    e,
                                    f'fetching source project {api_pr.source_project_id} '
                                    f'for merge_request {api_pr.id}. Skipping...',
                                )
                                continue

                            nrm_commits: List[NormalizedCommit] = [
                                _normalize_commit(
                                    commit,
                                    nrm_repo,
                                    api_pr.target_branch,
                                    self.config.git_strip_text_content,
                                    self.config.git_redact_names_and_urls,
                                ) for commit in api_pr.commit_list
                            ]
                            merge_request = self.client.expand_merge_request_data(
                                api_pr)
                            merge_commit = None
                            if (merge_request.state == 'merged'
                                    and nrm_commits is not None
                                    and merge_request.merge_commit_sha):
                                merge_commit = _normalize_commit(
                                    self.client.get_project_commit(
                                        merge_request.project_id,
                                        merge_request.merge_commit_sha),
                                    nrm_repo,
                                    api_pr.target_branch,
                                    self.config.git_strip_text_content,
                                    self.config.git_redact_names_and_urls,
                                )

                            yield _normalize_pr(
                                api_pr,
                                nrm_commits,
                                self.config.git_strip_text_content,
                                self.config.git_redact_names_and_urls,
                                merge_commit,
                            )
                        except Exception as e:
                            # if something goes wrong with normalizing one of the prs - don't stop pulling. try
                            # the next one.
                            pr_id = f' {api_pr.id}' if api_pr else ''
                            log_and_print_request_error(
                                e,
                                f'normalizing PR {pr_id} from repo {nrm_repo.name} ({nrm_repo.id}). Skipping...',
                                log_as_exception=True,
                            )

                except Exception as e:
                    # if something happens when pulling PRs for a repo, just keep going.
                    log_and_print_request_error(
                        e,
                        f'getting PRs for repo {nrm_repo.name} ({nrm_repo.id}). Skipping...',
                        log_as_exception=True,
                    )