def create_started_file_s3(bucket, ui_urls):
    """Create the started file in S3 for gubernator."""
    contents = aws_prow_artifacts.create_started(ui_urls)

    target = os.path.join(aws_prow_artifacts.get_s3_dir(bucket),
                          "started.json")
    aws_util.upload_to_s3(contents, target, "started.json")
def run(args, file_handler):  # pylint: disable=too-many-statements,too-many-branches
    # Check https://github.com/kubernetes/test-infra/blob/master/prow/jobs.md
    # for a description of the injected environment variables.
    job_type = os.getenv("JOB_TYPE")
    repo_owner = os.getenv("REPO_OWNER")
    repo_name = os.getenv("REPO_NAME")
    base_branch_name = os.getenv("PULL_BASE_REF")
    pull_base_sha = os.getenv("PULL_BASE_SHA")

    # For presubmit/postsubmit jobs, find the list of files changed by the PR.
    diff_command = []
    if job_type == "presubmit":
        # We need to get a common ancestor for the PR and the base branch
        cloned_repo_dir = os.path.join(args.repos_dir, repo_owner, repo_name)

        _ = util.run([
            "git", "fetch", "origin",
            base_branch_name + ":refs/remotes/origin/" + base_branch_name
        ],
                     cwd=cloned_repo_dir)

        diff_command = ["git", "diff", "--name-only"]
        diff_branch = "remotes/origin/{}".format(base_branch_name)
        try:
            common_ancestor = util.run(
                ["git", "merge-base", "HEAD", diff_branch],
                cwd=cloned_repo_dir)
            diff_command.append(common_ancestor)
        except subprocess.CalledProcessError as e:
            logging.warning(
                "git merge-base failed; see "
                "https://github.com/kubeflow/kubeflow/issues/3523. Diff "
                "will be computed against the current master and "
                "therefore files not changed in the PR might be "
                "considered when determining which tests to trigger")
            diff_command.append(diff_branch)

    elif job_type == "postsubmit":
        # See: https://git-scm.com/docs/git-diff
        # This syntax compares the commit before pull_base_sha with the commit
        # at pull_base_sha
        diff_command = [
            "git", "diff", "--name-only", pull_base_sha + "^", pull_base_sha
        ]

    changed_files = []
    if job_type in ("presubmit", "postsubmit"):
        changed_files = util.run(diff_command,
                                 cwd=os.path.join(args.repos_dir, repo_owner,
                                                  repo_name)).splitlines()

    for f in changed_files:
        logging.info("File %s is modified.", f)

    if args.release:
        generate_env_from_head(args)
    workflows = []
    config = {}
    if args.config_file:
        config, new_workflows = parse_config_file(args.config_file,
                                                  args.repos_dir)
        workflows.extend(new_workflows)

    # Add any paths to the python path
    extra_py_paths = []
    for p in config.get("python_paths", []):
        # Assume that python_paths are in the format $REPO_OWNER/$REPO_NAME/path,
        # we need to ensure that the repo is checked out if it is different from
        # the current one, and if the repo is not kubeflow/testing (which is already
        # checked out).
        segments = p.split("/")
        if ((segments[0] != repo_owner or segments[1] != repo_name)
                and not p.startswith("kubeflow/testing")):
            logging.info("Need to clone %s/%s", segments[0], segments[1])
            util.clone_repo(
                os.path.join(args.repos_dir, segments[0], segments[1]),
                segments[0], segments[1])

        path = os.path.join(args.repos_dir, p)
        extra_py_paths.append(path)

    kf_test_path = os.path.join(args.repos_dir, "kubeflow/testing/py")
    if kf_test_path not in extra_py_paths:
        logging.info("Adding %s to extra python paths", kf_test_path)
        extra_py_paths.append(kf_test_path)

    logging.info("Extra python paths: %s", ":".join(extra_py_paths))

    if not args.cloud_provider or args.cloud_provider == "gcp":
        # Create an initial version of the file with no urls
        create_started_file(args.bucket, {})

        util.maybe_activate_service_account()

        util.configure_kubectl(args.project, args.zone, args.cluster)
        util.load_kube_config()
    elif args.cloud_provider == "aws":
        create_started_file_s3(args.bucket, {})
        aws_util.aws_configure_credential()
        aws_util.load_kube_config()

    tekton_runner = tekton_client.TektonRunner()
    workflow_names = []
    tkn_names = []
    tkn_cleanup_args = []
    ui_urls = {}

    for w in workflows:  # pylint: disable=too-many-nested-blocks
        # Create the name for the workflow
        # We truncate sha numbers to prevent the workflow name from being too large.
        # Workflow name should not be more than 63 characters because its used
        # as a label on the pods.
        #
        # TODO(jlewi):This should no longer be used with Tekton. For tekton
        # name should be based on generateName; we should use labels to
        # provide additional metadata info like PR number.
        workflow_name = os.getenv("JOB_NAME", "") + "-" + w.name

        # Skip this workflow if it is scoped to a different job type.
        if w.job_types and not job_type in w.job_types:
            logging.info(
                "Skipping workflow %s because job type %s is not one of "
                "%s.", w.name, job_type, w.job_types)
            continue

        # If we are scoping this workflow to specific directories, check if any files
        # modified match the specified regex patterns.
        dir_modified = False
        if w.include_dirs:
            for f in changed_files:
                for d in w.include_dirs:
                    if fnmatch.fnmatch(f, d):
                        dir_modified = True
                        logging.info(
                            "Triggering workflow %s because %s in dir %s is modified.",
                            w.name, f, d)
                        break
                if dir_modified:
                    break

        # Only consider modified files when the job is pre or post submit, and if
        # the include_dirs stanza is defined.
        if job_type != "periodic" and w.include_dirs and not dir_modified:
            logging.info(
                "Skipping workflow %s because no code modified in %s.", w.name,
                w.include_dirs)
            continue

        if job_type == "presubmit":
            # When not running under prow we might not set all environment variables
            if os.getenv("PULL_NUMBER"):
                workflow_name += "-{0}".format(os.getenv("PULL_NUMBER"))
            if os.getenv("PULL_PULL_SHA"):
                workflow_name += "-{0}".format(os.getenv("PULL_PULL_SHA")[0:7])

        elif job_type == "postsubmit":
            if os.getenv("PULL_BASE_SHA"):
                workflow_name += "-{0}".format(os.getenv("PULL_BASE_SHA")[0:7])

        # Append the last 4 digits of the build number
        if os.getenv("BUILD_NUMBER"):
            workflow_name += "-{0}".format(os.getenv("BUILD_NUMBER")[-4:])

        salt = uuid.uuid4().hex[0:4]
        # Add some salt. This is mostly a convenience for the case where you
        # are submitting jobs manually for testing/debugging. Since the prow should
        # vend unique build numbers for each job.
        workflow_name += "-{0}".format(salt)
        if w.tekton_run:
            tkn_names.append(workflow_name)
        else:
            workflow_names.append(workflow_name)

        # check if ks workflow and run
        if w.app_dir:
            ks_cmd = ks_util.get_ksonnet_cmd(w.app_dir)

            # Print ksonnet version
            util.run([ks_cmd, "version"])

            # Create a new environment for this run
            env = workflow_name

            util.run([
                ks_cmd, "env", "add", env, "--namespace=" +
                get_namespace(args), "--api-spec=version:v1.8.0"
            ],
                     cwd=w.app_dir)

            util.run([
                ks_cmd, "param", "set", "--env=" + env, w.component, "name",
                workflow_name
            ],
                     cwd=w.app_dir)

            # Set the prow environment variables.
            prow_env = []

            names = [
                "JOB_NAME", "JOB_TYPE", "BUILD_ID", "BUILD_NUMBER",
                "PULL_BASE_SHA", "PULL_NUMBER", "PULL_PULL_SHA", "REPO_OWNER",
                "REPO_NAME"
            ]
            names.sort()
            for v in names:
                if not os.getenv(v):
                    continue
                prow_env.append("{0}={1}".format(v, os.getenv(v)))

            util.run([
                ks_cmd, "param", "set", "--env=" + env, w.component,
                "prow_env", ",".join(prow_env)
            ],
                     cwd=w.app_dir)
            util.run([
                ks_cmd, "param", "set", "--env=" + env, w.component,
                "namespace",
                get_namespace(args)
            ],
                     cwd=w.app_dir)
            util.run([
                ks_cmd, "param", "set", "--env=" + env, w.component, "bucket",
                args.bucket
            ],
                     cwd=w.app_dir)
            if args.cloud_provider == "aws":
                util.run([
                    ks_cmd, "param", "set", "--env=" + env, w.component,
                    "cluster_name", "eks-cluster-{}".format(
                        uuid.uuid4().hex[0:8])
                ],
                         cwd=w.app_dir)
            if args.release:
                util.run([
                    ks_cmd, "param", "set", "--env=" + env, w.component,
                    "versionTag",
                    os.getenv("VERSION_TAG")
                ],
                         cwd=w.app_dir)

            # Set any extra params. We do this in alphabetical order to make it easier to verify in
            # the unittest.
            param_names = w.params.keys()
            if six.PY3:
                # In python3, dict_keys.sort() not work given
                # https://docs.python.org/3/whatsnew/3.0.html#views-and-iterators-instead-of-lists
                param_names = sorted(param_names)
            else:
                param_names.sort()
            for k in param_names:
                util.run([
                    ks_cmd, "param", "set", "--env=" + env, w.component, k,
                    "{0}".format(w.params[k])
                ],
                         cwd=w.app_dir)

            # For debugging print out the manifest
            util.run([ks_cmd, "show", env, "-c", w.component], cwd=w.app_dir)
            util.run([ks_cmd, "apply", env, "-c", w.component], cwd=w.app_dir)

            if not args.cloud_provider or args.cloud_provider == "gcp":
                ui_url = (
                    "http://testing-argo.kubeflow.org/workflows/kubeflow-test-infra/{0}"
                    "?tab=workflow".format(workflow_name))
            elif args.cloud_provider == "aws":
                ui_url = (
                    "http://86308603-argo-argo-5ce9-1162466691.us-west-2.elb.amazonaws.com/workflows/kubeflow-test-infra/{0}"
                    "?tab=workflow".format(workflow_name))
            ui_urls[workflow_name] = ui_url
            logging.info("URL for workflow: %s", ui_url)
        elif w.tekton_run:
            pull_revision = None
            if os.getenv("PULL_NUMBER"):
                pull_revision = "refs/pull/{pull_num}/head".format(
                    pull_num=os.getenv("PULL_NUMBER"))
            elif os.getenv("PULL_BASE_SHA"):
                pull_revision = os.getenv("PULL_BASE_SHA")
            else:
                pull_revision = "master"
            logging.info("Adding Tekton pipeline %s", w.name)
            try:
                pipeline_runner = tekton_client.PipelineRunner(
                    w.tekton_params, w.kwargs.get(TEST_TARGET_ARG_NAME,
                                                  w.name), w.tekton_run,
                    args.bucket, repo_owner, repo_name, pull_revision)
            except (FileNotFoundError, ValueError) as e:
                logging.error(
                    "Error when starting Tekton workflow:%s\n Exception %s;\n"
                    "stacktrace:\n%s", w.tekton_run, e, traceback.format_exc())
                continue
            if w.tekton_teardown:
                logging.info(
                    "Appending teardown process for Tekton pipeline %s",
                    w.name)
                pipeline_runner.append_teardown(
                    tekton_client.PipelineRunner(
                        w.tekton_teardown_params,
                        w.kwargs.get(TEST_TARGET_ARG_NAME,
                                     w.name), w.tekton_teardown, args.bucket,
                        repo_owner, repo_name, pull_revision))
            tekton_runner.append(pipeline_runner)
        else:
            w.kwargs["name"] = workflow_name
            w.kwargs["namespace"] = get_namespace(args)

            if TEST_TARGET_ARG_NAME not in w.kwargs:
                w.kwargs[TEST_TARGET_ARG_NAME] = w.name
                logging.info(
                    "Workflow %s doesn't set arg %s; defaulting to %s", w.name,
                    TEST_TARGET_ARG_NAME, w.kwargs[TEST_TARGET_ARG_NAME])

            # TODO(https://github.com/kubeflow/testing/issues/467): We shell out
            # to e2e_tool in order to dumpy the Argo workflow to a file which then
            # reimport. We do this because importing the py_func module appears
            # to break when we have to dynamically adjust sys.path to insert
            # new paths. Setting PYTHONPATH before launching python however appears
            # to work which is why we shell out to e2e_tool.
            command = [
                "python", "-m", "kubeflow.testing.e2e_tool", "show", w.py_func
            ]
            for k, v in w.kwargs.items():
                # The fire module turns underscores in parameter names into hyphens
                # so we convert underscores in parameter names to hyphens
                command.append("--{0}={1}".format(k.replace("_", "-"), v))

            with tempfile.NamedTemporaryFile(delete=False) as hf:
                workflow_file = hf.name

            command.append("--output=" + hf.name)
            env = os.environ.copy()
            env["PYTHONPATH"] = ":".join(extra_py_paths)
            util.run(command, env=env)

            with open(workflow_file) as hf:
                wf_result = yaml.load(hf)

            group, version = wf_result['apiVersion'].split('/')
            k8s_co = k8s_client.CustomObjectsApi()
            workflow_name = wf_result["metadata"]["name"]
            py_func_result = k8s_co.create_namespaced_custom_object(
                group=group,
                version=version,
                namespace=wf_result["metadata"]["namespace"],
                plural='workflows',
                body=wf_result)
            logging.info("Created workflow:\n%s",
                         yaml.safe_dump(py_func_result))

            if not args.cloud_provider or args.cloud_provider == "gcp":
                ui_url = (
                    "http://testing-argo.kubeflow.org/workflows/kubeflow-test-infra/{0}"
                    "?tab=workflow".format(workflow_name))
            elif args.cloud_provider == "aws":
                ui_url = (
                    "http://86308603-argo-argo-5ce9-1162466691.us-west-2.elb.amazonaws.com/workflows/kubeflow-test-infra/{0}"
                    "?tab=workflow".format(workflow_name))
            ui_urls[workflow_name] = ui_url
            logging.info("URL for workflow: %s", ui_url)

    if not args.cloud_provider or args.cloud_provider == "gcp":
        ui_urls.update(
            tekton_runner.run(
                tekton_client.ClusterInfo(args.project, TEKTON_CLUSTER_ZONE,
                                          TEKTON_CLUSTER_NAME),
                tekton_client.ClusterInfo(args.project, args.zone,
                                          args.cluster)))

        # We delay creating started.json until we know the Argo workflow URLs
        create_started_file(args.bucket, ui_urls)
    elif args.cloud_provider == "aws":
        # We delay creating started.json until we know the Argo workflow URLs
        create_started_file_s3(args.bucket, ui_urls)

    workflow_success = False
    workflow_phase = {}
    workflow_status_yamls = {}
    results = []
    tekton_results = []
    try:
        results = argo_client.wait_for_workflows(
            get_namespace(args),
            workflow_names,
            timeout=datetime.timedelta(minutes=180),
            status_callback=argo_client.log_status)
        if not args.cloud_provider or args.cloud_provider == "gcp":
            util.configure_kubectl(args.project, "us-east1-d", "kf-ci-v1")
            util.load_kube_config()
            tekton_results = tekton_runner.join()
        elif args.cloud_provider == "aws":
            aws_util.load_kube_config()
        workflow_success = True
    except util.ExceptionWithWorkflowResults as e:
        # We explicitly log any exceptions so that they will be captured in the
        # build-log.txt that is uploaded to Gubernator.
        logging.exception("Exception occurred: %s", e)
        results = e.workflow_results
        raise
    except Exception as e:
        logging.exception("Other exception: %s", e)
        raise
    finally:
        if not args.cloud_provider or args.cloud_provider == "gcp":
            util.configure_kubectl(args.project, args.zone, args.cluster)
            util.load_kube_config()
            prow_artifacts_dir = prow_artifacts.get_gcs_dir(args.bucket)
        elif args.cloud_provider == "aws":
            prow_artifacts_dir = aws_prow_artifacts.get_s3_dir(args.bucket)

        # Upload workflow status to GCS/S3.
        for r in results:
            phase = r.get("status", {}).get("phase")
            name = r.get("metadata", {}).get("name")
            workflow_phase[name] = phase
            workflow_status_yamls[name] = yaml.safe_dump(
                r, default_flow_style=False)
            if phase != "Succeeded":
                workflow_success = False
            logging.info("Workflow %s/%s finished phase: %s",
                         get_namespace(args), name, phase)

            if not args.cloud_provider or args.cloud_provider == "gcp":
                for wf_name, wf_status in workflow_status_yamls.items():
                    util.upload_to_gcs(
                        wf_status,
                        os.path.join(prow_artifacts_dir,
                                     '{}.yaml'.format(wf_name)))
            elif args.cloud_provider == "aws":
                for wf_name, wf_status in workflow_status_yamls.items():
                    aws_util.upload_to_s3(
                        wf_status,
                        os.path.join(prow_artifacts_dir,
                                     '{}.yaml'.format(wf_name)),
                        '{}.yaml'.format(wf_name))

        for r in tekton_results:
            condition = "Failed"
            name = r.get("metadata", {}).get("name")
            if r.get("status", {}).get("conditions", []):
                condition = r["status"]["conditions"][0].get(
                    "reason", "Failed")
            workflow_phase[name] = condition
            workflow_status_yamls[name] = yaml.safe_dump(
                r, default_flow_style=False)
            if condition != "Succeeded":
                workflow_success = False
            logging.info("Workflow %s/%s finished phase: %s",
                         args.tekton_namespace, name, condition)

        # Upload logs to GCS. No logs after this point will appear in the
        # file in gcs
        file_handler.flush()

        if not args.cloud_provider or args.cloud_provider == "gcp":
            util.upload_file_to_gcs(
                file_handler.baseFilename,
                os.path.join(prow_artifacts_dir, "build-log.txt"))
            all_tests_success = prow_artifacts.finalize_prow_job(
                args.bucket, workflow_success, workflow_phase, ui_urls)
        elif args.cloud_provider == "aws":
            aws_util.upload_file_to_s3(
                file_handler.baseFilename,
                os.path.join(prow_artifacts_dir, "build-log.txt"))
            all_tests_success = aws_prow_artifacts.finalize_prow_job_to_s3(
                args.bucket, workflow_success, workflow_phase, ui_urls)

    return all_tests_success
Exemple #3
0
def create_finished_file_s3(bucket, success, workflow_phase, ui_urls):
    """Create the started file in S3 for gubernator."""
    contents = create_finished(success, workflow_phase, ui_urls)

    target = os.path.join(get_s3_dir(bucket), "finished.json")
    aws_util.upload_to_s3(contents, target, "finished.json")