def test_write_xml(self): with tempfile.NamedTemporaryFile(delete=False) as hf: pass success = test_util.TestCase() success.class_name = "some_test" success.name = "first" success.time = 10 failure = test_util.TestCase() failure.class_name = "some_test" failure.name = "first" failure.time = 10 failure.failure = "failed for some reason." test_util.create_junit_xml_file([success, failure], hf.name) with open(hf.name) as hf: output = hf.read() print(output) expected = ( """<testsuite failures="1" tests="2" time="20">""" """<testcase classname="some_test" name="first" time="10" />""" """<testcase classname="some_test" name="first" """ """time="10"><failure>failed for some reason.</failure>""" """</testcase></testsuite>""") self.assertEqual(expected, output)
def run_tests(args): # Print out the pylint version because different versions can produce # different results. util.run(["pylint", "--version"]) # kubeflow_testing is imported as a submodule so we should exclude it # TODO(jlewi): Perhaps we should get a list of submodules and exclude # them automatically? dir_excludes = ["kubeflow_testing", "vendor"] includes = ["*_test.py"] test_cases = [] env = os.environ.copy() # TODO(jlewi): Once we switch to using Argo I think we can stop setting # the PYTHONPATH here and just inheriting it from the environment. # When we use ARGO each step will run in its own pod and we can set the # PYTHONPATH environment variable as needed for that pod. env["PYTHONPATH"] = ( args.src_dir + ":" + os.path.join(args.src_dir, "kubeflow_testing", "py")) num_failed = 0 for root, dirs, files in os.walk(args.src_dir, topdown=True): # excludes can be done with fnmatch.filter and complementary set, # but it's more annoying to read. dirs[:] = [d for d in dirs if d not in dir_excludes] for pat in includes: for f in fnmatch.filter(files, pat): full_path = os.path.join(root, f) test_case = test_util.TestCase() test_case.class_name = "pytest" test_case.name = full_path[len(args.src_dir):] start_time = time.time() test_cases.append(test_case) try: util.run(["python", full_path], cwd=args.src_dir, env=env) except subprocess.CalledProcessError: test_case.failure = "{0} failed.".format(test_case.name) num_failed += 1 finally: test_case.time = time.time() - start_time if num_failed: logging.error("%s tests failed.", num_failed) else: logging.info("No lint issues.") if not args.junit_path: logging.info("No --junit_path.") return gcs_client = None if args.junit_path.startswith("gs://"): gcs_client = storage.Client(project=args.project) test_util.create_junit_xml_file(test_cases, args.junit_path, gcs_client)
def setup_kubeflow(args): """Setup Kubeflow. Args: args: Command line arguments that control the setup process. """ project = args.project cluster_name = args.cluster zone = args.zone util.configure_kubectl(project, zone, cluster_name) util.load_kube_config() # Create an API client object to talk to the K8s master. api_client = k8s_client.ApiClient() t = test_util.TestCase() try: start = time.time() params = { "tfJobImage": args.image, "name": "kubeflow-core", "namespace": args.namespace, } component = "core" account = util.run_and_output( ["gcloud", "config", "get-value", "account", "--quiet"]).strip() logging.info("Using GCP account %s", account) ks_deploy(args.test_app_dir, component, params, account=account) # Verify that the TfJob operator is actually deployed. tf_job_deployment_name = "tf-job-operator" logging.info("Verifying TfJob deployment %s started.", tf_job_deployment_name) # TODO(jlewi): We should verify the image of the operator is the correct # one. try: util.wait_for_deployment(api_client, args.namespace, tf_job_deployment_name) finally: # Run kubectl describe to get useful information about the deployment. # This will help troubleshoot any errors. util.run([ "kubectl", "-n", args.namespace, "describe", "deploy", tf_job_deployment_name ]) util.run([ "kubectl", "-n", args.namespace, "describe", "pods", "-l", "name=tf-job-operator" ]) # Reraise the exception so that the step fails because there's no point # continuing the test. except subprocess.CalledProcessError as e: t.failure = "kubeflow-deploy failed;\n" + (e.output or "") raise except util.TimeoutError as e: t.failure = e.message raise finally: t.time = time.time() - start t.name = "kubeflow-deploy" t.class_name = "GKE" gcs_client = storage.Client(project=args.project) test_util.create_junit_xml_file([t], args.junit_path, gcs_client)
def setup_cluster(args): """Setup a GKE cluster for TensorFlow jobs. Args: args: Command line arguments that control the setup process. """ gke = discovery.build("container", "v1") project = args.project cluster_name = args.cluster zone = args.zone machine_type = "n1-standard-8" cluster_request = { "cluster": { "name": cluster_name, "description": "A GKE cluster for TF.", "initialNodeCount": 1, "nodeConfig": { "machineType": machine_type, "oauthScopes": [ "https://www.googleapis.com/auth/cloud-platform", ], }, } } if args.accelerators: cluster_request["cluster"]["nodeConfig"]["accelerators"] = [] for accelerator_spec in args.accelerators: accelerator_type, accelerator_count = accelerator_spec.split( "=", 1) cluster_request["cluster"]["nodeConfig"]["accelerators"].append({ "acceleratorCount": accelerator_count, "acceleratorType": accelerator_type, }) util.create_cluster(gke, project, zone, cluster_request) util.configure_kubectl(project, zone, cluster_name) util.load_kube_config() # Create an API client object to talk to the K8s master. api_client = k8s_client.ApiClient() t = test_util.TestCase() try: start = time.time() # CI tests always failed here due to default-admin exits, check firstly. clusterrolebinding = util.run_and_output( ["kubectl", "get", "clusterrolebinding"]) if "default-admin" not in str(clusterrolebinding): account = util.run_and_output( ["gcloud", "config", "get-value", "account", "--quiet"]).strip() logging.info("Using GCP account %s", account) util.run([ "kubectl", "create", "clusterrolebinding", "default-admin", "--clusterrole=cluster-admin", "--user="******"setup-cluster failed;\n" + (e.output or "") raise except util.TimeoutError as e: t.failure = e.message raise finally: t.time = time.time() - start t.name = "setup-cluster" t.class_name = "GKE" gcs_client = storage.Client(project=args.project) test_util.create_junit_xml_file([t], args.junit_path, gcs_client)
def run_lint(args): start_time = time.time() # Print out the pylint version because different versions can produce # different results. util.run(["pylint", "--version"]) # kubeflow_testing is imported as a submodule so we should exclude it # TODO(jlewi): Perhaps we should get a list of submodules and exclude # them automatically? dir_excludes = [ "kubeflow_testing", "test/test-app", "vendor", ] full_dir_excludes = [ os.path.join(os.path.abspath(args.src_dir), f) for f in dir_excludes ] includes = ["*.py"] failed_files = [] rc_file = os.path.join(args.src_dir, ".pylintrc") for root, dirs, files in os.walk(os.path.abspath(args.src_dir), topdown=True): # excludes can be done with fnmatch.filter and complementary set, # but it's more annoying to read. exclude = False for e in full_dir_excludes: if root.startswith(e): exclude = True break if exclude: continue dirs[:] = [d for d in dirs] for pat in includes: for f in fnmatch.filter(files, pat): full_path = os.path.join(root, f) try: util.run(["pylint", "--rcfile=" + rc_file, full_path], cwd=args.src_dir) except subprocess.CalledProcessError: failed_files.append(full_path[len(args.src_dir):]) if failed_files: failed_files.sort() logging.error("%s files had lint errors:\n%s", len(failed_files), "\n".join(failed_files)) else: logging.info("No lint issues.") if not args.junit_path: logging.info("No --junit_path.") return test_case = test_util.TestCase() test_case.class_name = "pylint" test_case.name = "pylint" test_case.time = time.time() - start_time if failed_files: test_case.failure = "Files with lint issues: {0}".format( ", ".join(failed_files)) gcs_client = None if args.junit_path.startswith("gs://"): gcs_client = storage.Client(project=args.project) test_util.create_junit_xml_file([test_case], args.junit_path, gcs_client)