예제 #1
0
def check_and_schedule():
    """Check every pipeline that has been submitted since the last execution of the scheduler.

    If a pipeline is found to contain modifications to the configuration files, the changes are
    reported to the maintainers for additional review and no further action is taken.

    If a pipeline does not contain any modifications to the configuration files, its authorized
    workflows that have an "unauthorized" CircleCI status are triggered on behalf of the submitter.

    Only workflows specified in the AUTHORIZED_WORKFLOWS variable are executed.
    """
    checked_prs = []
    reference_repo_dir = tempfile.TemporaryDirectory()

    # Retrieve the latest pipeline on the reference branch, if any
    reference_pipelines = circleci.fetch_pipelines(
        branch=REFERENCE_BRANCH,
        not_containing_workflows=[SCHEDULER_WORKFLOW],
        limit=1,
        multipage=True,
    )

    # This script requires a reference configuration to exist
    if not reference_pipelines:
        print(
            f"Unable to fetch pipelines for reference branch {REFERENCE_BRANCH}, halting."
        )
        exit(1)

    # Retrieve the reference configuration
    latest_reference_pipeline = reference_pipelines[0]
    reference_config = circleci.get_pipeline_config(
        latest_reference_pipeline["id"])["compiled"]

    # Initialize the reference git repo
    reference_repo = Repo.clone_from(
        latest_reference_pipeline["vcs"]["target_repository_url"],
        reference_repo_dir.name)
    reference_repo.git.checkout(latest_reference_pipeline["vcs"]["revision"])

    # Compute the SHA256 of every protected file
    reference_protected_files = utils.compute_files_hash(
        reference_repo_dir.name, PROTECTED_FILES)

    # Retrieve the latest successful execution of the scheduler pipeline, if any
    scheduler_pipelines = circleci.fetch_pipelines(
        branch=SCHEDULER_BRANCH,
        containing_workflows=[SCHEDULER_WORKFLOW],
        multipage=False,
        successful_only=True,
    )
    latest_scheduler_pipeline = scheduler_pipelines[
        0] if scheduler_pipelines else None

    # Retrieve the pending pipelines to check
    pending_pipelines = circleci.fetch_pipelines(
        multipage=True,
        stopping_pipeline_id=(latest_scheduler_pipeline["id"]
                              if latest_scheduler_pipeline else None),
    )

    # If this variable is not set, the script is running outside CircleCI.
    # In that case, don't filter out pipelines that have been launched soon after the execution
    # of this scheduler run.
    if CURRENT_SCHEDULER_WORKFLOW:
        starting_pipeline_id = circleci.get_workflow(
            CURRENT_SCHEDULER_WORKFLOW)["pipeline_id"]
        pending_pipelines = reversed(
            circleci.filter_pipelines(
                reversed(pending_pipelines),
                stopping_pipeline_id=starting_pipeline_id)[0])

    # Check the pipelines and schedule workflows if appropriate
    for pipeline in pending_pipelines:
        _check_and_schedule_pipeline(checked_prs, pipeline, reference_config,
                                     reference_protected_files)

    reference_repo_dir.cleanup()
예제 #2
0
def check_and_schedule():
    """Check every pipeline that has been submitted since the last execution of the scheduler.

    If a pipeline is found to contain modifications to the configuration files, the changes are
    reported to the maintainers for additional review and no further action is taken.

    If a pipeline does not contain any modifications to the configuration files, its associated
    pull request is a forked one, and the pull request has not already been checked by Danger
    during this run of the scheduler, a Danger session is run on its commit.

    Pipelines are checked from the latest to the oldest, so that every PR is checked by Danger
    only once on its latest commit.
    """
    reference_repo_dir = tempfile.TemporaryDirectory()

    # Retrieve the latest pipeline on the reference branch, if any. We must exclude pipelines
    # triggered by cron jobs, though, as they contain a different compiled CircleCI configuration
    # (due to limitations of how CircleCI works). To avoid this, the scheduler workflow is never
    # run on commit, and therefore we can identify "non-cron" pipelines by asking that they should
    # not contain the scheduler workflow.
    reference_pipelines = circleci.fetch_pipelines(
        branch=REFERENCE_BRANCH,
        not_containing_workflows=[SCHEDULER_WORKFLOW],
        limit=1,
        multipage=True,
    )

    # This script requires a reference configuration to exist
    if not reference_pipelines:
        print(
            f"Unable to fetch pipelines for reference branch {REFERENCE_BRANCH}, halting."
        )
        exit(1)

    # Retrieve the reference configuration
    latest_reference_pipeline = reference_pipelines[0]
    reference_config = circleci.get_pipeline_config(
        latest_reference_pipeline["id"])["compiled"]

    # Initialize the reference git repo
    reference_repo = Repo.clone_from(
        latest_reference_pipeline["vcs"]["target_repository_url"],
        reference_repo_dir.name)
    reference_repo.git.checkout(latest_reference_pipeline["vcs"]["revision"])

    # Compute the SHA256 of every protected file
    reference_protected_files = utils.compute_files_hash(
        reference_repo_dir.name, PROTECTED_FILES)
    reference_scheduler_sha = utils.get_submodule_sha(
        reference_repo, SCHEDULER_SUBMODULE_NAME)

    # Retrieve the latest successful execution of the scheduler pipeline, if any
    scheduler_pipelines = circleci.fetch_pipelines(
        branch=SCHEDULER_BRANCH,
        containing_workflows=[SCHEDULER_WORKFLOW],
        multipage=False,
        successful_only=True,
    )
    latest_scheduler_pipeline = scheduler_pipelines[
        0] if scheduler_pipelines else None

    # Retrieve the pipelines to check
    pipelines_to_check = circleci.fetch_pipelines(
        multipage=True,
        stopping_pipeline_id=(latest_scheduler_pipeline["id"]
                              if latest_scheduler_pipeline else None),
    )

    # If this variable is not set, the script is running outside CircleCI.
    # In that case, don't filter out pipelines that have been launched soon after the execution
    # of this scheduler run.
    current_scheduler_workflow = {
        "pipeline_id": "devmode",
        "pipeline_number": "devmode"
    }
    if CURRENT_SCHEDULER_WORKFLOW:
        current_scheduler_workflow = circleci.get_workflow(
            CURRENT_SCHEDULER_WORKFLOW)
        starting_pipeline_id = current_scheduler_workflow["pipeline_id"]
        pipelines_to_check = reversed(
            circleci.filter_pipelines(
                reversed(pipelines_to_check),
                stopping_pipeline_id=starting_pipeline_id).pipelines)

    # Check recently submitted pipelines for integrity, and retrieve the sublist of safe ones
    check_args = [[
        p, reference_config, reference_protected_files, reference_scheduler_sha
    ] for p in pipelines_to_check]
    with ThreadPoolExecutor(max_workers=MAX_THREADS) as executor:
        results: Iterable[DangerCandidatePipeline] = executor.map(
            lambda x: _check_pipeline(*x), check_args)

    # Sort retrieved pipelines in descending order of submission (newest pipelines come first)
    sorted_pipelines = sorted(
        list(results),
        key=lambda danger_pipeline: danger_pipeline.pipeline_nr,
        reverse=True)

    # Keep track of which PRs have to be checked, and create a DangerPRExecution for each of them
    danger_pr_executions: List[DangerPRExecution] = []
    prs_to_check = set()

    # Only run Danger once per PR, on the latest commit, and if the commit has been verified as safe
    for p in sorted_pipelines:
        if p.pull_requests:
            for pull_request in p.pull_requests:
                if pull_request not in prs_to_check:
                    prs_to_check.add(pull_request)
                    _notify_safety_check(
                        p.check_details,
                        p.commit,
                        pull_request,
                        safe=p.safe,
                        scheduler_workflow=current_scheduler_workflow,
                    )
                    if p.should_run_danger:
                        danger_pr_executions.append(
                            DangerPRExecution(
                                commit=p.commit,
                                pull_request=pull_request,
                                repo_dir=p.repo_dir,
                            ))

    # Print a recap of the Danger jobs we're about to run:
    print(
        f"The following forked PRs have been deemed safe and will be checked by Danger:",
        end=" ")
    print(", ".join([
        str(pr_execution.pull_request) for pr_execution in danger_pr_executions
    ]))

    # Run Danger on the identified PRs. Use processes instead of threads since we need to cd
    # into a directory.
    with ProcessPoolExecutor(max_workers=MAX_PROCESSES) as executor:
        executor.map(_run_danger, danger_pr_executions)

    # Cleanup the temporary repository directories
    repo_dirs = set(result.repo_dir for result in results)
    for repo_dir in repo_dirs:
        repo_dir.cleanup()

    reference_repo_dir.cleanup()
예제 #3
0
def _check_and_schedule_pipeline(
    checked_prs: List[int],
    pipeline: Dict,
    reference_config: str,
    reference_protected_files: Dict[str, Optional[str]],
):
    """Check the pipeline configuration for integrity.

    :param checked_prs: a list of already checked PRs.
    :param pipeline: a pipeline object.
    :param reference_config: the CircleCI configuration of the reference branch.
    :param reference_protected_files: a dictionary mapping the protected files present in the
    original repository of the organization, on the reference branch, to their SHA256 hash.
    """
    repo_dir = tempfile.TemporaryDirectory()
    response = circleci.get_pipeline_config(pipeline["id"])

    # Initialize the original git repo
    try:
        contributor_repo = Repo.clone_from(
            pipeline["vcs"]["origin_repository_url"], repo_dir.name)
        contributor_repo.git.checkout(pipeline["vcs"]["revision"])
    except CommandError:
        check_details = (
            f"Unable to checkout revision {pipeline['vcs']['revision']} "
            f"on contributor repo for pipeline #{pipeline['number']} ({pipeline['id']})!"
        )
        _log_safety_check(check_details, pipeline, False)

    current_protected_files = utils.compute_files_hash(
        repo_dir.name, reference_protected_files.keys())
    repo_dir.cleanup()

    # Check the pipeline's integrity
    is_safe, check_details = _safety_check(current_protected_files,
                                           response["compiled"],
                                           reference_config,
                                           reference_protected_files)
    _log_safety_check(check_details, pipeline, is_safe)

    # Retrieve the pipeline's workflows.
    workflows = circleci.get_pipeline_workflows(pipeline["id"])
    if not workflows:
        return

    should_trigger_workflows = False

    try:
        if pipeline["vcs"]["origin_repository_url"] == pipeline["vcs"][
                "target_repository_url"]:
            # This is a PR on the internal repo.
            # Pick a workflow to extract the pipeline's PRs, retrieve them, and leave a comment.
            for pr in circleci.get_workflow_prs(workflows[0]["id"]):
                should_trigger_workflows = should_trigger_workflows or _notify_checked_pr(
                    check_details, checked_prs, pipeline, pr, is_safe)
        else:
            # This is a PR from a forked repo.
            # The workflow is unauthorized, but we can detect the PR number from the branch.
            pr = int(pipeline["vcs"]["branch"].split("pull/")[1])
            should_trigger_workflows = should_trigger_workflows or _notify_checked_pr(
                check_details, checked_prs, pipeline, pr, is_safe)
    except Exception as e:
        # If anything goes wrong, don't crash, but log the error
        print(
            f"Unable to retrieve PR for pipeline #{pipeline['number']} ({pipeline['id']})!"
        )
        print(e)
        return

    # If the pipeline did not pass the integrity check, the PR has already been run,
    # or we were unable to retrieve the PR, stop
    if not is_safe or not should_trigger_workflows:
        return

    # If the configurations match and the PR hasn't been run yet, pending jobs can be executed
    for workflow in workflows:
        if workflow["name"] in AUTHORIZED_WORKFLOWS and workflow[
                "status"] == "unauthorized":
            print(f"Executing previously unauthorized workflow")
            print(f"Pipeline: #{pipeline['number']} ({pipeline['id']})")
            print(f"Workflow: {workflow['id']}")
            circleci.rerun_workflow(workflow["id"])
예제 #4
0
def _check_pipeline(
    pipeline: Dict,
    reference_config: str,
    reference_protected_files: Dict[str, Optional[str]],
    reference_scheduler_sha: str,
) -> DangerCandidatePipeline:
    """Check the pipeline configuration for integrity.

    :param pipeline: a pipeline object.
    :param reference_config: the CircleCI configuration of the reference branch.
    :param reference_protected_files: a dictionary mapping the protected files present in the
    original repository of the organization, on the reference branch, to their SHA256 hash.
    :param reference_scheduler_sha: the SHA of the scheduler submodule on the reference branch, if
    the submodule exists; an empty string otherwise.
    :return: a DangerPipeline object containing the number of the verified pipeline, the commit on
    which it was run, its associated pull request (if any), a reference to the temporary directory
    in which the repository has been cloned, and a boolean describing whether the pipeline is safe
    for Danger to be run on or not.
    """
    commit = pipeline["vcs"]["revision"]
    # Create a temporary directory to clone the repository. This operation is thread safe, as long
    # as we use the default temp dir (which is computed with an absolute path)
    repo_dir = tempfile.TemporaryDirectory()
    response = circleci.get_pipeline_config(pipeline["id"])

    # Initialize the original git repo
    try:
        contributor_repo = Repo.clone_from(
            pipeline["vcs"]["origin_repository_url"], repo_dir.name)
        contributor_repo.git.checkout(commit)
    except CommandError:
        check_details = (
            f"Unable to checkout revision {commit} "
            f"on contributor repo for pipeline #{pipeline['number']} ({pipeline['id']})!"
        )
        _log_safety_check(check_details, pipeline, False)
        return DangerCandidatePipeline(
            check_details=check_details,
            commit=commit,
            pipeline_nr=pipeline["number"],
            pull_requests=None,
            repo_dir=repo_dir,
            safe=False,
            should_run_danger=False,
        )

    # Compute the file hash of the new versions of the protected files
    new_protected_files = utils.compute_files_hash(
        repo_dir.name, reference_protected_files.keys())
    new_scheduler_sha = utils.get_submodule_sha(contributor_repo,
                                                SCHEDULER_SUBMODULE_NAME)

    # Check the pipeline's integrity
    safe, check_details = _safety_check(
        new_protected_files,
        new_scheduler_sha,
        response["compiled"],
        reference_config,
        reference_protected_files,
        reference_scheduler_sha,
    )
    _log_safety_check(check_details, pipeline, safe)

    # Try to fetch the PR(s) associated with the pipeline. If present, post the result of the
    # analysis as a comment. If the PR is external, Danger should be run, too.
    internal = pipeline["vcs"]["origin_repository_url"] == pipeline["vcs"][
        "target_repository_url"]
    pull_requests = set()
    try:
        if internal:
            # This is a PR on the internal repo. Danger will not be executed by the scheduler,
            # as it is already been run on commit.
            workflows = circleci.get_pipeline_workflows(pipeline["id"])
            if not workflows:
                return DangerCandidatePipeline(
                    check_details=check_details,
                    commit=commit,
                    pipeline_nr=pipeline["number"],
                    pull_requests=None,
                    repo_dir=repo_dir,
                    safe=False,
                    should_run_danger=False,
                )
            pull_requests = circleci.get_workflow_prs(workflows[0]["id"])
        else:
            # This is a PR from a forked repo.
            # Detect the PR number from the branch, post the message, and schedule Danger.
            pull_request = int(pipeline["vcs"]["branch"].split("pull/")[1])
            pull_requests.add(pull_request)
    except Exception as e:
        # If anything goes wrong, don't crash, but log the error
        print(
            f"Unable to retrieve PR for pipeline #{pipeline['number']} ({pipeline['id']})!\n{e}"
        )
        return DangerCandidatePipeline(
            check_details=check_details,
            commit=commit,
            pipeline_nr=pipeline["number"],
            pull_requests=None,
            repo_dir=repo_dir,
            safe=False,
            should_run_danger=False,
        )

    # If the pipeline passed the integrity check and we verified it's associated to a forked PR,
    # schedule a Danger run. Otherwise, Danger should not be executed.
    return DangerCandidatePipeline(
        check_details=check_details,
        commit=commit,
        pipeline_nr=pipeline["number"],
        pull_requests=pull_requests,
        repo_dir=repo_dir,
        safe=safe,
        should_run_danger=not internal and safe,
    )