def _create_checkout_task(self, task_template): """Checkout the kubeflow/testing and kubeflow/kubeflow code""" main_repo = argo_build_util.get_repo_from_prow_env() if not main_repo: logging.info("Prow environment variables for repo not set") main_repo = "kubeflow/testing@HEAD" logging.info("Main repository: %s", main_repo) repos = [main_repo] checkout = argo_build_util.deep_copy(task_template) checkout["name"] = "checkout" checkout["container"]["command"] = [ "/usr/local/bin/checkout_repos.sh", "--repos=" + ",".join(repos), "--src_dir=" + self.src_root_dir, ] return checkout
def run_papermill_job( notebook_path, name, namespace, # pylint: disable=too-many-branches,too-many-statements repos, image): """Generate a K8s job to run a notebook using papermill Args: notebook_path: Path to the notebook. This should be in the form "{REPO_OWNER}/{REPO}/path/to/notebook.ipynb" name: Name for the K8s job namespace: The namespace where the job should run. repos: Which repos to checkout; if None or empty tries to infer based on PROW environment variables image: The docker image to run the notebook in. """ util.maybe_activate_service_account() with open("job.yaml") as hf: job = yaml.load(hf) if notebook_path.startswith("/"): raise ValueError( "notebook_path={0} should not start with /".format(notebook_path)) # We need to checkout the correct version of the code # in presubmits and postsubmits. We should check the environment variables # for the prow environment variables to get the appropriate values. # We should probably also only do that if the # See # https://github.com/kubernetes/test-infra/blob/45246b09ed105698aa8fb928b7736d14480def29/prow/jobs.md#job-environment-variables if not repos: repos = argo_build_util.get_repo_from_prow_env() if not repos: raise ValueError("Could not get repos from prow environment variable " "and --repos isn't explicitly set") repos += ",kubeflow/testing@HEAD" logging.info("Repos set to %s", repos) job["spec"]["template"]["spec"]["initContainers"][0]["command"] = [ "/usr/local/bin/checkout_repos.sh", "--repos=" + repos, "--src_dir=/src", "--depth=all", ] job["spec"]["template"]["spec"]["containers"][0]["image"] = image full_notebook_path = os.path.join("/src", notebook_path) job["spec"]["template"]["spec"]["containers"][0]["command"] = [ "python3", "-m", "kubeflow.examples.notebook_tests.execute_notebook", "--notebook_path", full_notebook_path ] job["spec"]["template"]["spec"]["containers"][0][ "workingDir"] = os.path.dirname(full_notebook_path) # The prow bucket to use for results/artifacts prow_bucket = prow_artifacts.PROW_RESULTS_BUCKET if os.getenv("REPO_OWNER") and os.getenv("REPO_NAME"): # Running under prow prow_dir = prow_artifacts.get_gcs_dir(prow_bucket) logging.info("Prow artifacts dir: %s", prow_dir) prow_dir = os.path.join(prow_dir, "artifacts") if os.getenv("TEST_TARGET_NAME"): prow_dir = os.path.join(prow_dir, os.getenv("TEST_TARGET_NAME").lstrip("/")) prow_bucket, prow_path = util.split_gcs_uri(prow_dir) else: prow_path = "notebook-test" + datetime.datetime.now().strftime( "%H%M%S") prow_path = prow_path + "-" + uuid.uuid4().hex[0:3] prow_dir = util.to_gcs_uri(prow_bucket, prow_path) prow_path = os.path.join(prow_path, name + ".html") output_gcs = util.to_gcs_uri(NB_BUCKET, prow_path) job["spec"]["template"]["spec"]["containers"][0]["env"] = [ { "name": "OUTPUT_GCS", "value": output_gcs }, { "name": "PYTHONPATH", "value": "/src/kubeflow/testing/py:/src/kubeflow/examples/py" }, ] logging.info("Notebook will be written to %s", output_gcs) util.load_kube_config(persist_config=False) if name: job["metadata"]["name"] = name else: job["metadata"]["name"] = ("notebook-test-" + datetime.datetime.now().strftime("%H%M%S") + "-" + uuid.uuid4().hex[0:3]) name = job["metadata"]["name"] job["metadata"]["namespace"] = namespace # Create an API client object to talk to the K8s master. api_client = k8s_client.ApiClient() batch_api = k8s_client.BatchV1Api(api_client) logging.info("Creating job:\n%s", yaml.dump(job)) actual_job = batch_api.create_namespaced_job(job["metadata"]["namespace"], job) logging.info("Created job %s.%s:\n%s", namespace, name, yaml.safe_dump(actual_job.to_dict())) final_job = util.wait_for_job(api_client, namespace, name, timeout=datetime.timedelta(minutes=30)) logging.info("Final job:\n%s", yaml.safe_dump(final_job.to_dict())) # Download notebook html to artifacts logging.info("Copying %s to bucket %s", output_gcs, prow_bucket) storage_client = storage.Client() bucket = storage_client.get_bucket(NB_BUCKET) blob = bucket.get_blob(prow_path) destination_bucket = storage_client.get_bucket(prow_bucket) bucket.copy_blob(blob, destination_bucket) if not final_job.status.conditions: raise RuntimeError("Job {0}.{1}; did not complete".format( namespace, name)) last_condition = final_job.status.conditions[-1] if last_condition.type not in ["Complete"]: logging.error("Job didn't complete successfully") raise RuntimeError("Job {0}.{1} failed".format(namespace, name))
def build(self): self.workflow = self._build_workflow() task_template = self._build_task_template() #************************************************************************** # Checkout # create the checkout step main_repo = argo_build_util.get_repo_from_prow_env() if not main_repo: logging.info("Prow environment variables for repo not set") main_repo = MAIN_REPO + "@HEAD" logging.info("Main repository: %s", main_repo) repos = [main_repo] repos.extend(EXTRA_REPOS) checkout = argo_build_util.deep_copy(task_template) checkout["name"] = "checkout" checkout["container"]["command"] = [ "/usr/local/bin/checkout_repos.sh", "--repos=" + ",".join(repos), "--src_dir=" + self.src_root_dir ] argo_build_util.add_task_to_dag(self.workflow, E2E_DAG_NAME, checkout, []) # Change the workfing directory for all subsequent steps task_template["container"]["workingDir"] = os.path.join( self.kfctl_pytest_dir) #************************************************************************** # Run build_kfctl and deploy kubeflow step_name = "kfctl-build-deploy" command = [ "pytest", "kfctl_go_test.py", # I think -s mean stdout/stderr will print out to aid in debugging. # Failures still appear to be captured and stored in the junit file. "-s", "--config_path=" + self.config_path, "--build_and_apply=" + str(self.build_and_apply), # Increase the log level so that info level log statements show up. # TODO(https://github.com/kubeflow/testing/issues/372): If we # set a unique artifacts dir for each workflow with the proper # prefix that should work. "--log-cli-level=info", "--junitxml=" + self.artifacts_dir + "/junit_kfctl-build-test" + self.config_name + ".xml", # TODO(jlewi) Test suite name needs to be unique based on parameters. # "-o", "junit_suite_name=test_kfctl_go_deploy_" + self.config_name, "--app_path=" + self.app_dir, ] dependences = [checkout["name"]] build_kfctl = self._build_step(step_name, self.workflow, E2E_DAG_NAME, task_template, command, dependences) #************************************************************************** # Wait for Kubeflow to be ready step_name = "kubeflow-is-ready" command = [ "pytest", "kf_is_ready_test.py", # I think -s mean stdout/stderr will print out to aid in debugging. # Failures still appear to be captured and stored in the junit file. "-s", # TODO(jlewi): We should update kf_is_ready_test to take the config # path and then based on the KfDef spec kf_is_ready_test should # figure out what to do. "--use_basic_auth={0}".format(self.use_basic_auth), # TODO(jlewi): We should be using ISTIO always so can we stop # setting this "--use_istio=true", # Increase the log level so that info level log statements show up. "--log-cli-level=info", "--junitxml=" + os.path.join( self.artifacts_dir, "junit_kfctl-is-ready-test-" + self.config_name + ".xml"), # Test suite name needs to be unique based on parameters "-o", "junit_suite_name=test_kf_is_ready_" + self.config_name, "--app_path=" + self.app_dir, ] dependences = [build_kfctl["name"]] kf_is_ready = self._build_step(step_name, self.workflow, E2E_DAG_NAME, task_template, command, dependences) #************************************************************************** # Wait for endpoint to be ready if self.test_endpoint: step_name = "endpoint-is-ready" command = [ "pytest", "endpoint_ready_test.py", # I think -s mean stdout/stderr will print out to aid in debugging. # Failures still appear to be captured and stored in the junit file. "-s", # Increase the log level so that info level log statements show up. "--log-cli-level=info", # Test timeout in seconds. "--timeout=1800", "--junitxml=" + self.artifacts_dir + "/junit_endpoint-is-ready-test-" + self.config_name + ".xml", # Test suite name needs to be unique based on parameters "-o", "junit_suite_name=test_endpoint_is_ready_" + self.config_name, "--app_path=" + self.app_dir, "--app_name=" + self.app_name, ] dependencies = [build_kfctl["name"]] endpoint_ready = self._build_step(step_name, self.workflow, E2E_DAG_NAME, task_template, command, dependencies) self._build_tests_dag() # Add a task to run the dag dependencies = [kf_is_ready["name"]] argo_build_util.add_task_only_to_dag(self.workflow, E2E_DAG_NAME, TESTS_DAG_NAME, TESTS_DAG_NAME, dependencies) #*************************************************************************** # create_pr_symlink #*************************************************************************** # TODO(jlewi): run_e2e_workflow.py should probably create the PR symlink step_name = "create-pr-symlink" command = [ "python", "-m", "kubeflow.testing.prow_artifacts", "--artifacts_dir=" + self.output_dir, "create_pr_symlink", "--bucket=" + self.bucket, ] dependences = [checkout["name"]] symlink = self._build_step(step_name, self.workflow, E2E_DAG_NAME, task_template, command, dependences) self._build_exit_dag() # Set the labels on all templates self.workflow = argo_build_util.set_task_template_labels(self.workflow) return self.workflow
def __init__(self, name=None, namespace=None, config_path=("https://raw.githubusercontent.com/kubeflow" "/manifests/master/kfdef/kfctl_gcp_iap.yaml"), bucket=None, test_endpoint=False, use_basic_auth=False, build_and_apply=False, test_target_name=None, kf_app_name=None, delete_kf=True, extra_repos="", **kwargs): """Initialize a builder. Args: name: Name for the workflow. namespace: Namespace for the workflow. config_path: Path to the KFDef spec file. bucket: The bucket to upload artifacts to. If not set use default determined by prow_artifacts.py. test_endpoint: Whether to test the endpoint is ready. use_basic_auth: Whether to use basic_auth. test_target_name: (Optional) Name to use as the test target to group tests. kf_app_name: (Optional) Name to use for the Kubeflow deployment. If not set a unique name is assigned. Only set this if you want to reuse an existing deployment across runs. delete_kf: (Optional) Don't run the step to delete Kubeflow. Set to true if you want to leave the deployment up for some reason. """ self.name = name self.namespace = namespace self.bucket = bucket self.config_path = config_path self.build_and_apply = build_and_apply #**************************************************************************** # Define directory locations #**************************************************************************** # mount_path is the directory where the volume to store the test data # should be mounted. self.mount_path = "/mnt/" + "test-data-volume" # test_dir is the root directory for all data for a particular test run. self.test_dir = self.mount_path + "/" + self.name # output_dir is the directory to sync to GCS to contain the output for this # job. self.output_dir = self.test_dir + "/output" # We prefix the artifacts directory with junit because # that's what spyglass/prow requires. This ensures multiple # instances of a workflow triggered by the same prow job # don't end up clobbering each other self.artifacts_dir = self.output_dir + "/artifacts/junit_{0}".format(name) # source directory where all repos should be checked out self.src_root_dir = self.test_dir + "/src" # The directory containing the kubeflow/kfctl repo self.src_dir = self.src_root_dir + "/kubeflow/kfctl" self.kubeflow_dir = self.src_root_dir + "/kubeflow/kubeflow" # Directory in kubeflow/kfctl containing the pytest files. self.kfctl_pytest_dir = os.path.join(self.src_dir, "py/kubeflow/kfctl/testing/pytests") # Top level directories for python testing code in kfctl. self.kfctl_py = os.path.join(self.src_dir, "py") # Build a string of key value pairs that can be passed to various test # steps to allow them to do substitution into different values. values = { "srcrootdir": self.src_root_dir, } value_pairs = ["{0}={1}".format(k,v) for k,v in values.items()] self.values_str = ",".join(value_pairs) # The directory within the kubeflow_testing submodule containing # py scripts to use. self.kubeflow_testing_py = self.src_root_dir + "/kubeflow/testing/py" self.tf_operator_root = os.path.join(self.src_root_dir, "kubeflow/tf-operator") self.tf_operator_py = os.path.join(self.tf_operator_root, "py") self.go_path = self.test_dir # Name for the Kubeflow app. # This needs to be unique for each test run because it is # used to name GCP resources # TODO(jlewi): Might be good to include pull number or build id in the name # Not sure if being non-deterministic is a good idea. # A better approach might be to hash the workflow name to generate a unique # name dependent on the workflow name. We know there will be one workflow # per cluster. self.uuid = uuid.uuid4().hex[0:4] # Config name is the name of the config file. This is used to give junit # files unique names. self.config_name = os.path.splitext(os.path.basename(config_path))[0] # The class name to label junit files. # We want to be able to group related tests in test grid. # Test grid allows grouping by target which corresponds to the classname # attribute in junit files. # So we set an environment variable to the desired class name. # The pytest modules can then look at this environment variable to # explicitly override the classname. # The classname should be unique for each run so it should take into # account the different parameters if test_target_name: self.test_target_name = test_target_name else: self.test_target_name = self.config_name # app_name is the name of the Kubeflow deployment. # This needs to be unique per run since we name GCP resources with it. self.app_name = kf_app_name if not self.app_name: self.app_name = "kfctl-" + self.uuid self.delete_kf = delete_kf # GCP service accounts can only be max 30 characters. Service account names # are generated by taking the app_name and appending suffixes like "user" # and "admin" if len(self.app_name) > 20: raise ValueError(("app_name {0} is longer than 20 characters; this will" "likely exceed GCP naming restrictions.").format( self.app_name)) # Directory for the KF app. self.app_dir = os.path.join(self.test_dir, self.app_name) self.use_basic_auth = use_basic_auth # The name space we create KF artifacts in; e.g. TFJob and notebooks. # TODO(jlewi): These should no longer be running the system namespace but # should move into the namespace associated with the default profile. self.steps_namespace = "kubeflow" self.test_endpoint = test_endpoint self.kfctl_path = os.path.join(self.src_dir, "bin/kfctl") # Fetch the main repo from Prow environment. self.main_repo = argo_build_util.get_repo_from_prow_env() # extra_repos is a list of comma separated repo names with commits, # in the format <repo_owner>/<repo_name>@<commit>, # e.g. "kubeflow/tf-operator@12345,kubeflow/manifests@23456". # This will be used to override the default repo branches. self.extra_repos = [] if extra_repos: self.extra_repos = extra_repos.split(',') # Keep track of step names that subclasses might want to list as dependencies self._run_tests_step_name = None self._test_endpoint_step_name = None self._test_endpoint_template_name = None
def build(self): workflow = self._build_workflow() task_template = self._build_task_template() #************************************************************************** # Checkout # create the checkout step main_repo = argo_build_util.get_repo_from_prow_env() if not main_repo: logging.info("Prow environment variables for repo not set") main_repo = "kubeflow/testing@HEAD" logging.info("Main repository: %s", main_repo) repos = [main_repo] checkout = argo_build_util.deep_copy(task_template) checkout["name"] = "checkout" checkout["container"]["command"] = ["/usr/local/bin/checkout_repos.sh", "--repos=" + ",".join(repos), "--src_dir=" + self.src_root_dir] argo_build_util.add_task_to_dag(workflow, E2E_DAG_NAME, checkout, []) #************************************************************************** # Make dir # pytest was failing trying to call makedirs. My suspicion is its # because the two steps ended up trying to create the directory at the # same time and classing. So we create a separate step to do it. mkdir_step = argo_build_util.deep_copy(task_template) mkdir_step["name"] = "make-artifacts-dir" mkdir_step["container"]["command"] = ["mkdir", "-p", self.artifacts_dir] argo_build_util.add_task_to_dag(workflow, E2E_DAG_NAME, mkdir_step, [checkout["name"]]) #************************************************************************** # Run python unittests py_tests = argo_build_util.deep_copy(task_template) py_tests["name"] = "py-test" py_tests["container"]["command"] = ["python", "-m", "kubeflow.testing.test_py_checks", "--artifacts_dir=" + self.artifacts_dir, # TODO(jlewi): Should we be searching # the entire py/kubeflo/testing tree? "--src_dir=" + self.kubeflow_testing_py + "kubeflow/tests"] argo_build_util.add_task_to_dag(workflow, E2E_DAG_NAME, py_tests, [mkdir_step["name"]]) #*************************************************************************** # py lint #*************************************************************************** py_lint = argo_build_util.deep_copy(task_template) py_lint["name"] = "py-lint" py_lint["container"]["command"] = ["pytest", "test_py_lint.py", # I think -s mean stdout/stderr will # print out to aid in debugging. # Failures still appear to be captured # and stored in the junit file. "-s", "--src_dir=" + self.kubeflow_testing_py, "--rcfile=" + os.path.join( self.testing_src_dir, ".pylintrc"), # Test timeout in seconds. "--timeout=500", "--junitxml=" + self.artifacts_dir + "/junit_py-lint.xml"] py_lint_step = argo_build_util.add_task_to_dag(workflow, E2E_DAG_NAME, py_lint, [mkdir_step["name"]]) py_lint_step["container"]["workingDir"] = os.path.join( self.testing_src_dir, "py/kubeflow/testing") #***************************************************************************** # create_pr_symlink #**************************************************************************** # TODO(jlewi): run_e2e_workflow.py should probably create the PR symlink symlink = argo_build_util.deep_copy(task_template) symlink["name"] = "create-pr-symlink" symlink["container"]["command"] = ["python", "-m", "kubeflow.testing.prow_artifacts", "--artifacts_dir=" + self.output_dir, "create_pr_symlink", ] if self.bucket: symlink["container"]["command"].append("--bucket=" + self.bucket) argo_build_util.add_task_to_dag(workflow, E2E_DAG_NAME, symlink, [checkout["name"]]) #***************************************************************************** # Exit handler workflow #***************************************************************************** copy_artifacts = argo_build_util.deep_copy(task_template) copy_artifacts["name"] = "copy-artifacts" copy_artifacts["container"]["command"] = ["python", "-m", "kubeflow.testing.prow_artifacts", "--artifacts_dir=" + self.output_dir, "copy_artifacts"] if self.bucket: copy_artifacts["container"]["command"].append("--bucket=" + self.bucket) argo_build_util.add_task_to_dag(workflow, EXIT_DAG_NAME, copy_artifacts, []) # Set the labels on all templates workflow = argo_build_util.set_task_template_labels(workflow) return workflow
def test_xgboost_synthetic( record_xml_attribute, name, namespace, # pylint: disable=too-many-branches,too-many-statements repos, image, notebook_artifacts_dir): '''Generate Job and summit.''' util.set_pytest_junit(record_xml_attribute, "test_xgboost_synthetic") util.maybe_activate_service_account() with open("job.yaml") as hf: job = yaml.load(hf) # We need to checkout the correct version of the code # in presubmits and postsubmits. We should check the environment variables # for the prow environment variables to get the appropriate values. # We should probably also only do that if the # See # https://github.com/kubernetes/test-infra/blob/45246b09ed105698aa8fb928b7736d14480def29/prow/jobs.md#job-environment-variables if not repos: repos = argo_build_util.get_repo_from_prow_env() repos += ",kubeflow/testing@HEAD" logging.info("Repos set to %s", repos) job["spec"]["template"]["spec"]["initContainers"][0]["command"] = [ "/usr/local/bin/checkout_repos.sh", "--repos=" + repos, "--src_dir=/src", "--depth=all", ] nb_bucket = "kubeflow-ci-deployment" nb_path = os.path.join("xgboost_synthetic_testing", os.getenv("JOB_TYPE"), os.getenv("HOSTNAME"), "notebook.html") output_gcs = util.to_gcs_uri(nb_bucket, nb_path) logging.info("Tested notebook will be outputed to: %s", output_gcs) job["spec"]["template"]["spec"]["containers"][0]["env"] = [ { "name": "PYTHONPATH", "value": "/src/kubeflow/testing/py" }, { "name": "OUTPUT_GCS", "value": output_gcs }, ] job["spec"]["template"]["spec"]["containers"][0]["image"] = image util.load_kube_config(persist_config=False) if name: job["metadata"]["name"] = name else: job["metadata"]["name"] = ("xgboost-test-" + datetime.datetime.now().strftime("%H%M%S") + "-" + uuid.uuid4().hex[0:3]) name = job["metadata"]["name"] job["metadata"]["namespace"] = namespace # Create an API client object to talk to the K8s master. api_client = k8s_client.ApiClient() batch_api = k8s_client.BatchV1Api(api_client) logging.info("Creating job:\n%s", yaml.dump(job)) actual_job = batch_api.create_namespaced_job(job["metadata"]["namespace"], job) logging.info("Created job %s.%s:\n%s", namespace, name, yaml.safe_dump(actual_job.to_dict())) final_job = util.wait_for_job(api_client, namespace, name, timeout=datetime.timedelta(minutes=30)) logging.info("Final job:\n%s", yaml.safe_dump(final_job.to_dict())) if not final_job.status.conditions: raise RuntimeError("Job {0}.{1}; did not complete".format( namespace, name)) last_condition = final_job.status.conditions[-1] # Download notebook html to artifacts notebook_artifacts_path = os.path.join(notebook_artifacts_dir, "notebook.html") logging.info("Writing notebook artifact to: %s", notebook_artifacts_path) os.makedirs(notebook_artifacts_dir, exist_ok=True) storage_client = storage.Client() bucket = storage_client.get_bucket(nb_bucket) blob = bucket.get_blob(nb_path) blob.download_to_filename(notebook_artifacts_path) if last_condition.type not in ["Complete"]: logging.error("Job didn't complete successfully") raise RuntimeError("Job {0}.{1} failed".format(namespace, name))
def build(self): self.workflow = self._build_workflow() task_template = self._build_task_template() # ************************************************************************** # Checkout # create the checkout step main_repo = argo_build_util.get_repo_from_prow_env() if not main_repo: logging.info("Prow environment variables for repo not set") main_repo = MAIN_REPO + "@HEAD" logging.info("Main repository: %s", main_repo) repos = [main_repo] repos.extend(EXTRA_REPOS) #*************************************************************************** # Checkout the code checkout = argo_build_util.deep_copy(task_template) checkout["name"] = "checkout" checkout["container"]["command"] = [ "/usr/local/bin/checkout_repos.sh", "--repos=" + ",".join(repos), "--src_dir=" + self.src_root_dir ] argo_build_util.add_task_to_dag(self.workflow, E2E_DAG_NAME, checkout, []) #*************************************************************************** # Get credentials for the latest auto-deployed cluster credentials = argo_build_util.deep_copy(task_template) credentials["name"] = "get-credentials" credentials["container"]["command"] = [ "python3", "-m", "kubeflow.testing." "get_kf_testing_cluster", "get-credentials", ] dependencies = [checkout["name"]] argo_build_util.add_task_to_dag(self.workflow, E2E_DAG_NAME, credentials, dependencies) #************************************************************************** # Run a dag of tests self._build_tests_dag() # Add a task to run the dag dependencies = [credentials["name"]] argo_build_util.add_task_only_to_dag(self.workflow, E2E_DAG_NAME, TESTS_DAG_NAME, TESTS_DAG_NAME, dependencies) # ************************************************************************** # create_pr_symlink # *************************************************************************** # TODO(jlewi): run_e2e_workflow.py should probably create the PR symlink step_name = "create-pr-symlink" command = [ "python", "-m", "kubeflow.testing.prow_artifacts", "--artifacts_dir=" + self.output_dir, "create_pr_symlink" ] if self.bucket: command.append(self.bucket) dependencies = [checkout["name"]] self._build_step(step_name, self.workflow, E2E_DAG_NAME, task_template, command, dependencies) self._build_exit_dag() # Set the labels on all templates self.workflow = argo_build_util.set_task_template_labels(self.workflow) return self.workflow
def test_xgboost_synthetic( record_xml_attribute, name, namespace, # pylint: disable=too-many-branches,too-many-statements repos, image): '''Generate Job and summit.''' util.set_pytest_junit(record_xml_attribute, "test_xgboost_synthetic") util.maybe_activate_service_account() with open("job.yaml") as hf: job = yaml.load(hf) # We need to checkout the correct version of the code # in presubmits and postsubmits. We should check the environment variables # for the prow environment variables to get the appropriate values. # We should probably also only do that if the # See # https://github.com/kubernetes/test-infra/blob/45246b09ed105698aa8fb928b7736d14480def29/prow/jobs.md#job-environment-variables if not repos: repos = argo_build_util.get_repo_from_prow_env() logging.info("Repos set to %s", repos) job["spec"]["template"]["spec"]["initContainers"][0]["command"] = [ "/usr/local/bin/checkout_repos.sh", "--repos=" + repos, "--src_dir=/src", "--depth=all", ] job["spec"]["template"]["spec"]["containers"][0]["image"] = image util.load_kube_config(persist_config=False) if name: job["metadata"]["name"] = name else: job["metadata"]["name"] = ("xgboost-test-" + datetime.datetime.now().strftime("%H%M%S") + "-" + uuid.uuid4().hex[0:3]) name = job["metadata"]["name"] job["metadata"]["namespace"] = namespace # Create an API client object to talk to the K8s master. api_client = k8s_client.ApiClient() batch_api = k8s_client.BatchV1Api(api_client) logging.info("Creating job:\n%s", yaml.dump(job)) actual_job = batch_api.create_namespaced_job(job["metadata"]["namespace"], job) logging.info("Created job %s.%s:\n%s", namespace, name, yaml.safe_dump(actual_job.to_dict())) final_job = util.wait_for_job(api_client, namespace, name, timeout=datetime.timedelta(minutes=30)) logging.info("Final job:\n%s", yaml.safe_dump(final_job.to_dict())) if not final_job.status.conditions: raise RuntimeError("Job {0}.{1}; did not complete".format( namespace, name)) last_condition = final_job.status.conditions[-1] if last_condition.type not in ["Complete"]: logging.error("Job didn't complete successfully") raise RuntimeError("Job {0}.{1} failed".format(namespace, name))