Esempio n. 1
0
    def test_write_xml(self):
        with tempfile.NamedTemporaryFile(delete=False) as hf:
            pass

        success = test_util.TestCase()
        success.class_name = "some_test"
        success.name = "first"
        success.time = 10

        failure = test_util.TestCase()
        failure.class_name = "some_test"
        failure.name = "first"
        failure.time = 10
        failure.failure = "failed for some reason."

        test_util.create_junit_xml_file([success, failure], hf.name)
        with open(hf.name) as hf:
            output = hf.read()
            print(output)
        expected = (
            """<testsuite failures="1" tests="2" time="20">"""
            """<testcase classname="some_test" name="first" time="10" />"""
            """<testcase classname="some_test" name="first" """
            """time="10"><failure>failed for some reason.</failure>"""
            """</testcase></testsuite>""")

        self.assertEqual(expected, output)
Esempio n. 2
0
    def testOk(self):
        def ok():
            time.sleep(1)

        t = test_util.TestCase()
        test_util.wrap_test(ok, t)
        self.assertGreater(t.time, 0)
        self.assertEqual(None, t.failure)
Esempio n. 3
0
    def testGeneralError(self):
        def run():
            time.sleep(1)
            raise ValueError("some error")

        t = test_util.TestCase()
        self.assertRaises(ValueError, test_util.wrap_test, run, t)
        self.assertGreater(t.time, 0)
        self.assertEqual("Test failed; some error", t.failure)
Esempio n. 4
0
def run_tests(args):
  # Print out the pylint version because different versions can produce
  # different results.
  util.run(["pylint", "--version"])

  # kubeflow_testing is imported as a submodule so we should exclude it
  # TODO(jlewi): Perhaps we should get a list of submodules and exclude
  # them automatically?
  dir_excludes = ["kubeflow_testing", "vendor"]
  includes = ["*_test.py"]
  test_cases = []

  env = os.environ.copy()
  # TODO(jlewi): Once we switch to using Argo I think we can stop setting
  # the PYTHONPATH here and just inheriting it from the environment.
  # When we use ARGO each step will run in its own pod and we can set the
  # PYTHONPATH environment variable as needed for that pod.
  env["PYTHONPATH"] = (
    args.src_dir + ":" + os.path.join(args.src_dir, "kubeflow_testing", "py"))

  num_failed = 0
  for root, dirs, files in os.walk(args.src_dir, topdown=True):
    # excludes can be done with fnmatch.filter and complementary set,
    # but it's more annoying to read.
    dirs[:] = [d for d in dirs if d not in dir_excludes]
    for pat in includes:
      for f in fnmatch.filter(files, pat):
        full_path = os.path.join(root, f)

        test_case = test_util.TestCase()
        test_case.class_name = "pytest"
        test_case.name = full_path[len(args.src_dir):]
        start_time = time.time()
        test_cases.append(test_case)
        try:
          util.run(["python", full_path], cwd=args.src_dir, env=env)
        except subprocess.CalledProcessError:
          test_case.failure = "{0} failed.".format(test_case.name)
          num_failed += 1
        finally:
          test_case.time = time.time() - start_time

  if num_failed:
    logging.error("%s tests failed.", num_failed)
  else:
    logging.info("No lint issues.")

  if not args.junit_path:
    logging.info("No --junit_path.")
    return

  gcs_client = None
  if args.junit_path.startswith("gs://"):
    gcs_client = storage.Client(project=args.project)

  test_util.create_junit_xml_file(test_cases, args.junit_path, gcs_client)
Esempio n. 5
0
    def test_get_num_failures_success(self):
        success = test_util.TestCase()
        success.class_name = "some_test"
        success.name = "first"
        success.time = 10

        e = test_util.create_xml([success])
        s = StringIO.StringIO()
        e.write(s)
        xml_value = s.getvalue()
        self.assertEqual(0, test_util.get_num_failures(xml_value))
Esempio n. 6
0
    def testSubprocessError(self):
        def run():
            raise subprocess.CalledProcessError(10,
                                                "some command",
                                                output="some output")

        t = test_util.TestCase()
        self.assertRaises(subprocess.CalledProcessError, test_util.wrap_test,
                          run, t)
        self.assertGreater(t.time, 0)
        self.assertEqual("Subprocess failed;\nsome output", t.failure)
Esempio n. 7
0
    def test_get_num_failures(self):
        failure = test_util.TestCase()
        failure.class_name = "some_test"
        failure.name = "first"
        failure.time = 10
        failure.failure = "failed for some reason."

        e = test_util.create_xml([failure])
        s = StringIO.StringIO()
        e.write(s)
        xml_value = s.getvalue()
        self.assertEqual(1, test_util.get_num_failures(xml_value))
Esempio n. 8
0
def setup_kubeflow(args):
    """Setup Kubeflow.

  Args:
    args: Command line arguments that control the setup process.
  """
    project = args.project
    cluster_name = args.cluster
    zone = args.zone

    util.configure_kubectl(project, zone, cluster_name)

    util.load_kube_config()
    # Create an API client object to talk to the K8s master.
    api_client = k8s_client.ApiClient()

    t = test_util.TestCase()
    try:
        start = time.time()

        params = {
            "tfJobImage": args.image,
            "name": "kubeflow-core",
            "namespace": args.namespace,
        }

        component = "core"

        account = util.run_and_output(
            ["gcloud", "config", "get-value", "account", "--quiet"]).strip()
        logging.info("Using GCP account %s", account)

        ks_deploy(args.test_app_dir, component, params, account=account)

        # Verify that the TfJob operator is actually deployed.
        tf_job_deployment_name = "tf-job-operator"
        logging.info("Verifying TfJob deployment %s started.",
                     tf_job_deployment_name)

        # TODO(jlewi): We should verify the image of the operator is the correct
        # one.
        try:
            util.wait_for_deployment(api_client, args.namespace,
                                     tf_job_deployment_name)
        finally:
            # Run kubectl describe to get useful information about the deployment.
            # This will help troubleshoot any errors.
            util.run([
                "kubectl", "-n", args.namespace, "describe", "deploy",
                tf_job_deployment_name
            ])
            util.run([
                "kubectl", "-n", args.namespace, "describe", "pods", "-l",
                "name=tf-job-operator"
            ])

    # Reraise the exception so that the step fails because there's no point
    # continuing the test.
    except subprocess.CalledProcessError as e:
        t.failure = "kubeflow-deploy failed;\n" + (e.output or "")
        raise
    except util.TimeoutError as e:
        t.failure = e.message
        raise
    finally:
        t.time = time.time() - start
        t.name = "kubeflow-deploy"
        t.class_name = "GKE"
        gcs_client = storage.Client(project=args.project)
        test_util.create_junit_xml_file([t], args.junit_path, gcs_client)
Esempio n. 9
0
def setup_cluster(args):
    """Setup a GKE cluster for TensorFlow jobs.

  Args:
    args: Command line arguments that control the setup process.
  """
    gke = discovery.build("container", "v1")

    project = args.project
    cluster_name = args.cluster
    zone = args.zone
    machine_type = "n1-standard-8"

    cluster_request = {
        "cluster": {
            "name": cluster_name,
            "description": "A GKE cluster for TF.",
            "initialNodeCount": 1,
            "nodeConfig": {
                "machineType":
                machine_type,
                "oauthScopes": [
                    "https://www.googleapis.com/auth/cloud-platform",
                ],
            },
        }
    }

    if args.accelerators:
        cluster_request["cluster"]["nodeConfig"]["accelerators"] = []
        for accelerator_spec in args.accelerators:
            accelerator_type, accelerator_count = accelerator_spec.split(
                "=", 1)
            cluster_request["cluster"]["nodeConfig"]["accelerators"].append({
                "acceleratorCount":
                accelerator_count,
                "acceleratorType":
                accelerator_type,
            })

    util.create_cluster(gke, project, zone, cluster_request)

    util.configure_kubectl(project, zone, cluster_name)

    util.load_kube_config()
    # Create an API client object to talk to the K8s master.
    api_client = k8s_client.ApiClient()

    t = test_util.TestCase()
    try:
        start = time.time()

        # CI tests always failed here due to default-admin exits, check firstly.
        clusterrolebinding = util.run_and_output(
            ["kubectl", "get", "clusterrolebinding"])
        if "default-admin" not in str(clusterrolebinding):
            account = util.run_and_output(
                ["gcloud", "config", "get-value", "account",
                 "--quiet"]).strip()
            logging.info("Using GCP account %s", account)
            util.run([
                "kubectl", "create", "clusterrolebinding", "default-admin",
                "--clusterrole=cluster-admin", "--user="******"setup-cluster failed;\n" + (e.output or "")
        raise
    except util.TimeoutError as e:
        t.failure = e.message
        raise
    finally:
        t.time = time.time() - start
        t.name = "setup-cluster"
        t.class_name = "GKE"
        gcs_client = storage.Client(project=args.project)
        test_util.create_junit_xml_file([t], args.junit_path, gcs_client)
Esempio n. 10
0
def run_lint(args):
  start_time = time.time()
  # Print out the pylint version because different versions can produce
  # different results.
  util.run(["pylint", "--version"])

  # kubeflow_testing is imported as a submodule so we should exclude it
  # TODO(jlewi): Perhaps we should get a list of submodules and exclude
  # them automatically?
  dir_excludes = [
    "kubeflow_testing",
    "test/test-app",
    "vendor",
  ]
  full_dir_excludes = [
    os.path.join(os.path.abspath(args.src_dir), f) for f in dir_excludes
  ]
  includes = ["*.py"]
  failed_files = []
  rc_file = os.path.join(args.src_dir, ".pylintrc")
  for root, dirs, files in os.walk(os.path.abspath(args.src_dir), topdown=True):
    # excludes can be done with fnmatch.filter and complementary set,
    # but it's more annoying to read.
    exclude = False
    for e in full_dir_excludes:
      if root.startswith(e):
        exclude = True
        break
    if exclude:
      continue

    dirs[:] = [d for d in dirs]
    for pat in includes:
      for f in fnmatch.filter(files, pat):
        full_path = os.path.join(root, f)
        try:
          util.run(["pylint", "--rcfile=" + rc_file, full_path],
                   cwd=args.src_dir)
        except subprocess.CalledProcessError:
          failed_files.append(full_path[len(args.src_dir):])

  if failed_files:
    failed_files.sort()
    logging.error("%s files had lint errors:\n%s", len(failed_files),
                  "\n".join(failed_files))
  else:
    logging.info("No lint issues.")

  if not args.junit_path:
    logging.info("No --junit_path.")
    return

  test_case = test_util.TestCase()
  test_case.class_name = "pylint"
  test_case.name = "pylint"
  test_case.time = time.time() - start_time
  if failed_files:
    test_case.failure = "Files with lint issues: {0}".format(
      ", ".join(failed_files))

  gcs_client = None
  if args.junit_path.startswith("gs://"):
    gcs_client = storage.Client(project=args.project)

  test_util.create_junit_xml_file([test_case], args.junit_path, gcs_client)