コード例 #1
0
ファイル: util_test.py プロジェクト: jose5918/testing
    def testSplitGcsUri(self):
        bucket, path = util.split_gcs_uri("gs://some-bucket/some/path")
        self.assertEquals("some-bucket", bucket)
        self.assertEquals("some/path", path)

        bucket, path = util.split_gcs_uri("gs://some-bucket")
        self.assertEquals("some-bucket", bucket)
        self.assertEquals("", path)
コード例 #2
0
ファイル: prow_artifacts.py プロジェクト: subodh101/testing
def check_no_errors(gcs_client, artifacts_dir):
    """Check that all the XML files exist and there were no errors.
  Args:
    gcs_client: The GCS client.
    artifacts_dir: The directory where artifacts should be stored.
  Returns:
    True if there were no errors and false otherwise.
  """
    bucket_name, prefix = util.split_gcs_uri(artifacts_dir)
    bucket = gcs_client.get_bucket(bucket_name)
    no_errors = True

    for b in bucket.list_blobs(prefix=os.path.join(prefix, "junit")):
        full_path = util.to_gcs_uri(b.bucket, b.path)
        if not os.path.splitext(b.path)[-1] == ".xml":
            logging.info("Skipping %s; not an xml file", full_path)
            continue
        logging.info("Checking %s", full_path)
        xml_contents = b.download_as_string()

        if test_util.get_num_failures(xml_contents) > 0:
            logging.info("Test failures in %s", full_path)
            no_errors = False

    return no_errors
コード例 #3
0
ファイル: prow_artifacts.py プロジェクト: svalleru/testing
def check_no_errors(gcs_client, artifacts_dir):
    """Check that all the XML files exist and there were no errors.
  Args:
    gcs_client: The GCS client.
    artifacts_dir: The directory where artifacts should be stored.
  Returns:
    True if there were no errors and false otherwise.
  """
    bucket_name, prefix = util.split_gcs_uri(artifacts_dir)
    bucket = gcs_client.get_bucket(bucket_name)
    no_errors = True

    # Get a list of actual junit files.
    actual_junit = _get_actual_junit_files(bucket, prefix)

    for f in actual_junit:
        full_path = os.path.join(artifacts_dir, f)
        logging.info("Checking %s", full_path)
        b = bucket.blob(os.path.join(prefix, f))

        xml_contents = b.download_as_string()

        if test_util.get_num_failures(xml_contents) > 0:
            logging.info("Test failures in %s", full_path)
            no_errors = False

    return no_errors
コード例 #4
0
ファイル: run_e2e_workflow.py プロジェクト: jose5918/testing
def upload_file_to_gcs(source, target):
    gcs_client = storage.Client()
    bucket_name, path = util.split_gcs_uri(target)

    bucket = gcs_client.get_bucket(bucket_name)

    logging.info("Uploading file %s to %s.", source, target)
    blob = bucket.blob(path)
    blob.upload_from_filename(source)
コード例 #5
0
ファイル: run_e2e_workflow.py プロジェクト: jose5918/testing
def upload_to_gcs(contents, target):
    gcs_client = storage.Client()

    bucket_name, path = util.split_gcs_uri(target)

    bucket = gcs_client.get_bucket(bucket_name)
    logging.info("Writing %s", target)
    blob = bucket.blob(path)
    blob.upload_from_string(contents)
コード例 #6
0
    def _load_oauth_file(self, oauth_file, admin_project):
        bucket, blob_path = util.split_gcs_uri(oauth_file)

        client = storage.Client(project=admin_project)
        bucket = client.get_bucket(bucket)

        blob = bucket.get_blob(blob_path)
        contents = blob.download_as_string()

        return yaml.load(contents)
コード例 #7
0
def _upload_notebook_html(content, target):
    gcs_client = storage.Client()
    bucket_name, path = util.split_gcs_uri(target)

    bucket = gcs_client.get_bucket(bucket_name)

    logging.info("Uploading notebook to %s.", target)
    blob = bucket.blob(path)
    # Need to set content type so that if we browse in GCS we end up rendering
    # as html.
    blob.upload_from_string(content, content_type="text/html")
コード例 #8
0
def get_oauth(project, oauth_file):
    """Get the OAuth information"""
    bucket, blob_path = util.split_gcs_uri(oauth_file)

    client = storage.Client(project=project)
    bucket = client.get_bucket(bucket)

    blob = bucket.get_blob(blob_path)
    contents = blob.download_as_string()

    oauth_info = yaml.load(contents)
    return oauth_info
コード例 #9
0
ファイル: test_util.py プロジェクト: isabella232/testing-1
def create_junit_xml_file(test_cases, output_path, gcs_client=None):
    """Create a JUnit XML file.

  The junit schema is specified here:
  https://www.ibm.com/support/knowledgecenter/en/SSQ2R2_9.5.0/com.ibm.rsar.analysis.codereview.cobol.doc/topics/cac_useresults_junit.html

  Args:
    test_cases: TestSuite or List of test case objects.
    output_path: Path to write the XML
    gcs_client: GCS client to use if output is GCS.
  """
    t = create_xml(test_cases)
    logging.info("Creating %s", output_path)
    if output_path.startswith("gs://"):
        b = six.StringIO()
        t.write(b)

        bucket_name, path = util.split_gcs_uri(output_path)
        bucket = gcs_client.get_bucket(bucket_name)
        blob = bucket.blob(path)
        blob.upload_from_string(b.getvalue())
    else:
        dir_name = os.path.dirname(output_path)
        if not os.path.exists(dir_name):
            logging.info("Creating directory %s", dir_name)
            try:
                os.makedirs(dir_name)
            except OSError as e:
                if e.errno == errno.EEXIST:
                    # The path already exists. This is probably a race condition
                    # with some other test creating the directory.
                    # We should just be able to continue
                    pass
                else:
                    raise
        t.write(output_path)
コード例 #10
0
def main(): # pylint: disable=too-many-locals,too-many-statements
  logging.basicConfig(level=logging.INFO,
                            format=('%(levelname)s|%(asctime)s'
                                '|%(pathname)s|%(lineno)d| %(message)s'),
                        datefmt='%Y-%m-%dT%H:%M:%S',
                      )
  logging.getLogger().setLevel(logging.INFO)

  parser = argparse.ArgumentParser()

  parser.add_argument(
          "--project", default="kubeflow-ci-deployment", type=str,
            help=("The project."))

  parser.add_argument(
          "--zone", default="us-east1-d", type=str, help=("The zone to deploy in."))
  parser.add_argument(
          "--oauth_file",
            default=("gs://kubeflow-ci-deployment_kf-data/"
                     "kf-iap-oauth.kubeflow-ci-deployment.yaml"),
      type=str, help=("The file containing the OAuth client ID & secret"
                    "for IAP."))

  # TODO(jlewi): Should rename this argument to something like kfctl_src
  # We should try to do it in a backwards compatible way.
  parser.add_argument(
          "--kubeflow_repo",
            default="/src/kubeflow/kubeflow",
      type=str, help=("Path to the source for kfctl. Should be the directory "
                      "containing the Makefile to build kfctl"))

  parser.add_argument(
          "--kfctl_path",
            default="",
      type=str, help=("Path to kfctl; can be a URL."))

  parser.add_argument(
          "--kfctl_config",
            default=("https://raw.githubusercontent.com/kubeflow/manifests"
                     "/master/kfdef/kfctl_gcp_iap.yaml"),
            type=str, help=("Path to the kfctl config to use"))

  parser.add_argument(
          "--apps_dir",
            default=os.getcwd(),
      type=str, help=("Directory to store kubeflow apps."))

  parser.add_argument(
          "--name", type=str, default="kf-vmaster-{uid}",
          help=("Name for the deployment. This can be a python format string "
                "with the variable uid. Uid will automatically be substituted "
                "for a unique value based on the time."))

  parser.add_argument(
          "--email", type=str, default="",
          help=("(Optional). Email of the person to create the default profile"
                "for. If not specificied uses the gcloud config value."))

  parser.add_argument(
          "--extra_users", type=str, default="",
          help=("Comma separated list of additional users to grant access. "
                "Should be in the form user:[email protected] or"
                "serviceAccount:[email protected]"))

  parser.add_argument(
          "--labels", type=str, default="",
          help=("Comma separated list of extra labels; e.g "
                "--labels=k1=v1,k2=v2"))

  parser.add_argument("--setup_project", dest="setup_project",
                      action="store_true", help="Setup the project")
  parser.add_argument("--no-setup_project", dest="setup_project",
                      action="store_false", help="Do not setup the project")
  parser.set_defaults(setup_project=True)

  parser.add_argument("--use_self_cert", dest="use_self_cert",
                      action="store_true",
                      help="Use a self signed certificate")
  parser.add_argument("--no-use_self_cert", dest="use_self_cert",
                      action="store_false",
                      help="Do not use a self signed certificate")
  parser.set_defaults(use_self_cert=True)

  args = parser.parse_args()

  util.maybe_activate_service_account()

  # Wait for credentials to deal with workload identity issues
  gcp_util.get_gcp_credentials()

  # Wrap gcloud commands in retry loop to deal with metadata; workload
  # identity issues.
  @retrying.retry(stop_max_delay=5*60*1000, wait_exponential_max=10000)
  def _gcloud_list():
    # For debugging purposes output the command
    util.run(["gcloud", "config", "list"])
    util.run(["gcloud", "auth", "list"])
  _gcloud_list()

  bucket, blob_path = util.split_gcs_uri(args.oauth_file)

  client = storage.Client(project=args.project)
  bucket = client.get_bucket(bucket)

  blob = bucket.get_blob(blob_path)
  contents = blob.download_as_string()

  oauth_info = yaml.load(contents)

  if args.kubeflow_repo and args.kfctl_path:
    raise ValueError("Exactly one of --kubeflow_repo and --kfctl_path neeeds "
                     "to be set.")

  if not args.kubeflow_repo and not args.kfctl_path:
    raise ValueError("Exactly one of --kubeflow_repo and --kfctl_path neeeds "
                     "to be set.")

  git_describe = ""
  if args.kubeflow_repo:
    git_describe = util.run(["git", "describe", "--tags", "--always", "--dirty"],
                             cwd=args.kubeflow_repo).strip("'")

    kfctl_path = build_kfctl_go(args)
  else:
    if args.kfctl_path.startswith("http"):
      temp_dir = tempfile.mkdtemp()

      filename = "kfctl"

      zipped = False
      if args.kfctl_path.endswith(".tar.gz"):
        zipped = True
        filename = filename + ".tar.gz"

      util.run(["curl", "-L", "-o", filename, args.kfctl_path],
               cwd=temp_dir)
      if zipped:
        util.run(["tar", "-xvf", "kfctl.tar.gz"], cwd=temp_dir)

      kfctl_path = os.path.join(temp_dir, "kfctl")
      logging.info("Changing permissions on %s", kfctl_path)
      os.chmod(kfctl_path, 0o777)
    else:
      kfctl_path = args.kfctl_path

  git_describe = util.run([kfctl_path, "version"])

  logging.info("kfctl path set to %s", kfctl_path)

  # We need to keep the name short to avoid hitting limits with certificates.
  uid = datetime.datetime.now().strftime("%m%d") + "-"
  uid = uid + uuid.uuid4().hex[0:3]

  args.name = args.name.format(uid=uid)
  logging.info("Using name %s", args.name)

  app_dir = os.path.join(args.apps_dir, args.name)

  if not os.path.exists(args.apps_dir):
    os.makedirs(args.apps_dir)

  env = {}
  env.update(os.environ)
  env.update(oauth_info)

  # GCP labels can only take as input alphanumeric characters, hyphens, and
  # underscores. Replace not valid characters with hyphens.
  labels = {"kfctl-git": git_describe,
            "purpose": "kf-test-cluster",
            "auto-deploy": "true"}

  for k, v in labels.items():
    val = v.lower().replace("\"", "")
    val = re.sub(r"[^a-z0-9\-_]", "-", val)
    labels[k] = val

  if args.labels:
    logging.info("Parsing labels %s", args.labels)
    for pair in args.labels.split(","):
      pieces = pair.split("=")
      if len(pieces) != 2:
        logging.error("Skipping pair %s; not of the form key=value", pair)
        continue
      key = pieces[0].strip()
      value = pieces[1].strip()

      labels[key] = value
  logging.info("labels: %s", labels)
  deploy_with_kfctl_go(kfctl_path, args, app_dir, env, labels=labels)
  add_extra_users(args.project, args.extra_users)
コード例 #11
0
def run_papermill_job(
        notebook_path,
        name,
        namespace,  # pylint: disable=too-many-branches,too-many-statements
        repos,
        image):
    """Generate a K8s job to run a notebook using papermill

  Args:
    notebook_path: Path to the notebook. This should be in the form
      "{REPO_OWNER}/{REPO}/path/to/notebook.ipynb"
    name: Name for the K8s job
    namespace: The namespace where the job should run.
    repos: Which repos to checkout; if None or empty tries
      to infer based on PROW environment variables
    image: The docker image to run the notebook in.
  """

    util.maybe_activate_service_account()

    with open("job.yaml") as hf:
        job = yaml.load(hf)

    if notebook_path.startswith("/"):
        raise ValueError(
            "notebook_path={0} should not start with /".format(notebook_path))

    # We need to checkout the correct version of the code
    # in presubmits and postsubmits. We should check the environment variables
    # for the prow environment variables to get the appropriate values.
    # We should probably also only do that if the
    # See
    # https://github.com/kubernetes/test-infra/blob/45246b09ed105698aa8fb928b7736d14480def29/prow/jobs.md#job-environment-variables
    if not repos:
        repos = argo_build_util.get_repo_from_prow_env()

    if not repos:
        raise ValueError("Could not get repos from prow environment variable "
                         "and --repos isn't explicitly set")

    repos += ",kubeflow/testing@HEAD"

    logging.info("Repos set to %s", repos)
    job["spec"]["template"]["spec"]["initContainers"][0]["command"] = [
        "/usr/local/bin/checkout_repos.sh",
        "--repos=" + repos,
        "--src_dir=/src",
        "--depth=all",
    ]

    job["spec"]["template"]["spec"]["containers"][0]["image"] = image

    full_notebook_path = os.path.join("/src", notebook_path)
    job["spec"]["template"]["spec"]["containers"][0]["command"] = [
        "python3", "-m", "kubeflow.examples.notebook_tests.execute_notebook",
        "--notebook_path", full_notebook_path
    ]

    job["spec"]["template"]["spec"]["containers"][0][
        "workingDir"] = os.path.dirname(full_notebook_path)

    # The prow bucket to use for results/artifacts
    prow_bucket = prow_artifacts.PROW_RESULTS_BUCKET

    if os.getenv("REPO_OWNER") and os.getenv("REPO_NAME"):
        # Running under prow
        prow_dir = prow_artifacts.get_gcs_dir(prow_bucket)
        logging.info("Prow artifacts dir: %s", prow_dir)
        prow_dir = os.path.join(prow_dir, "artifacts")

        if os.getenv("TEST_TARGET_NAME"):
            prow_dir = os.path.join(prow_dir,
                                    os.getenv("TEST_TARGET_NAME").lstrip("/"))
        prow_bucket, prow_path = util.split_gcs_uri(prow_dir)

    else:
        prow_path = "notebook-test" + datetime.datetime.now().strftime(
            "%H%M%S")
        prow_path = prow_path + "-" + uuid.uuid4().hex[0:3]
        prow_dir = util.to_gcs_uri(prow_bucket, prow_path)

    prow_path = os.path.join(prow_path, name + ".html")
    output_gcs = util.to_gcs_uri(NB_BUCKET, prow_path)

    job["spec"]["template"]["spec"]["containers"][0]["env"] = [
        {
            "name": "OUTPUT_GCS",
            "value": output_gcs
        },
        {
            "name": "PYTHONPATH",
            "value": "/src/kubeflow/testing/py:/src/kubeflow/examples/py"
        },
    ]

    logging.info("Notebook will be written to %s", output_gcs)
    util.load_kube_config(persist_config=False)

    if name:
        job["metadata"]["name"] = name
    else:
        job["metadata"]["name"] = ("notebook-test-" +
                                   datetime.datetime.now().strftime("%H%M%S") +
                                   "-" + uuid.uuid4().hex[0:3])
    name = job["metadata"]["name"]

    job["metadata"]["namespace"] = namespace

    # Create an API client object to talk to the K8s master.
    api_client = k8s_client.ApiClient()
    batch_api = k8s_client.BatchV1Api(api_client)

    logging.info("Creating job:\n%s", yaml.dump(job))
    actual_job = batch_api.create_namespaced_job(job["metadata"]["namespace"],
                                                 job)
    logging.info("Created job %s.%s:\n%s", namespace, name,
                 yaml.safe_dump(actual_job.to_dict()))

    final_job = util.wait_for_job(api_client,
                                  namespace,
                                  name,
                                  timeout=datetime.timedelta(minutes=30))

    logging.info("Final job:\n%s", yaml.safe_dump(final_job.to_dict()))

    # Download notebook html to artifacts
    logging.info("Copying %s to bucket %s", output_gcs, prow_bucket)

    storage_client = storage.Client()
    bucket = storage_client.get_bucket(NB_BUCKET)
    blob = bucket.get_blob(prow_path)

    destination_bucket = storage_client.get_bucket(prow_bucket)
    bucket.copy_blob(blob, destination_bucket)

    if not final_job.status.conditions:
        raise RuntimeError("Job {0}.{1}; did not complete".format(
            namespace, name))

    last_condition = final_job.status.conditions[-1]

    if last_condition.type not in ["Complete"]:
        logging.error("Job didn't complete successfully")
        raise RuntimeError("Job {0}.{1} failed".format(namespace, name))
コード例 #12
0
def main():  # pylint: disable=too-many-locals,too-many-statements
    logging.basicConfig(
        level=logging.INFO,
        format=('%(levelname)s|%(asctime)s'
                '|%(pathname)s|%(lineno)d| %(message)s'),
        datefmt='%Y-%m-%dT%H:%M:%S',
    )
    logging.getLogger().setLevel(logging.INFO)

    parser = argparse.ArgumentParser()

    parser.add_argument("--project",
                        default="kubeflow-ci-deployment",
                        type=str,
                        help=("The project."))

    parser.add_argument("--zone",
                        default="us-east1-d",
                        type=str,
                        help=("The zone to deploy in."))

    parser.add_argument(
        "--oauth_file",
        default=("gs://kubeflow-ci-deployment_kf-data/"
                 "kf-iap-oauth.kubeflow-ci-deployment.yaml"),
        type=str,
        help=("The file containing the OAuth client ID & secret"
              "for IAP."))

    parser.add_argument("--kubeflow_repo",
                        default="/home/jlewi/git_kubeflow",
                        type=str,
                        help=("Path to the Kubeflow repo to use"))

    parser.add_argument(
        "--kfctl_config",
        default=("https://raw.githubusercontent.com/kubeflow/kubeflow/master"
                 "/bootstrap/config/kfctl_gcp_iap.yaml"),
        type=str,
        help=("Path to the kfctl config to use"))

    parser.add_argument("--apps_dir",
                        default=os.getcwd(),
                        type=str,
                        help=("Directory to store kubeflow apps."))

    parser.add_argument("--name",
                        type=str,
                        default="",
                        help=("Name for the deployment."))

    parser.add_argument("--snapshot_file",
                        default="",
                        type=str,
                        help=("A json file containing information about the "
                              "snapshot to use."))

    parser.add_argument("--job_name",
                        default="",
                        type=str,
                        help=("Pod name running the job."))

    args = parser.parse_args()

    bucket, blob_path = util.split_gcs_uri(args.oauth_file)

    client = storage.Client(project=args.project)
    bucket = client.get_bucket(bucket)

    blob = bucket.get_blob(blob_path)
    contents = blob.download_as_string()

    oauth_info = yaml.load(contents)

    git_describe = util.run(
        ["git", "describe", "--tags", "--always", "--dirty"],
        cwd=args.kubeflow_repo).strip("'")

    if args.snapshot_file:
        logging.info("Loading info from snapshot file %s", args.snapshot_file)
        with open(args.snapshot_file) as hf:
            snapshot_info = json.load(hf)
            name = snapshot_info["name"]
    else:
        name = args.name

    kfctl_path = build_kfctl_go(args)

    app_dir = os.path.join(args.apps_dir, name)
    # Clean up previous deployment. We attempt to run "kfctl delete all"
    # but we don't depend on it succeeding because the app directory might
    # not be up to date.
    # since we are not able to guarantee apps config in repository is up to date.
    if os.path.exists(app_dir):
        try:
            util.run([kfctl_path, "delete", "all", "--delete_storage"],
                     cwd=app_dir)
        except subprocess.CalledProcessError as e:
            logging.error("kfctl delete all failed; %s", e)

    if os.path.exists(app_dir):
        shutil.rmtree(app_dir)

    if not os.path.exists(args.apps_dir):
        os.makedirs(args.apps_dir)

    # Delete deployment beforehand. If not, updating action might be failed when
    # resource permission/requirement is changed. It's cleaner to delete and
    # re-create it.
    delete_deployment = os.path.join(args.kubeflow_repo, "scripts", "gke",
                                     "delete_deployment.sh")

    util.run([
        delete_deployment, "--project=" + args.project, "--deployment=" + name,
        "--zone=" + args.zone
    ],
             cwd=args.apps_dir)

    # Delete script doesn't delete storage deployment by design.
    delete_storage_deployment(args.project, name + "-storage")

    env = {}
    env.update(os.environ)
    env.update(oauth_info)

    labels = {
        "GIT_LABEL": git_describe,
        "PURPOSE": "kf-test-cluster",
    }

    label_args = []
    for k, v in labels.items():
        # labels can only take as input alphanumeric characters, hyphens, and
        # underscores. Replace not valid characters with hyphens.
        val = v.lower().replace("\"", "")
        val = re.sub(r"[^a-z0-9\-_]", "-", val)
        label_args.append("{key}={val}".format(key=k.lower(), val=val))

    endpoint = "{name}.endpoints.{project}.cloud.goog".format(
        name=name, project=args.project)
    # Fire-and-forgot process to undelete endpoint services. Deletion to
    # endpoint service is soft-deletion, e.g. it will be purged after 30
    # days. If any deployments is trying to re-use the same endpoint, it
    # will be an error if it's in soft-deletion. Need to undelete it so
    # that endpoint-controller could complete its job.
    try:
        util.run([
            "gcloud", "endpoints", "services", "undelete", endpoint,
            "--verbosity=info", "--project=" + args.project
        ])
    except subprocess.CalledProcessError as e:
        logging.info("endpoint undeletion is failed: %s", e)

    deploy_with_kfctl_go(kfctl_path, args, app_dir, env)

    create_info_file(args, app_dir, git_describe)
    logging.info("Annotating cluster with labels: %s", str(label_args))

    # Set labels on the deployment
    util.run([
        "gcloud", "--project", args.project, "deployment-manager",
        "deployments", "update", name, "--update-labels", ",".join(label_args)
    ],
             cwd=app_dir)

    # Set labels on the cluster. Labels on the deployment is not shown on
    # Pantheon - it's easier for users to read if cluster also has labels.
    util.run([
        "gcloud", "container", "clusters", "update", name, "--project",
        args.project, "--zone", args.zone, "--update-labels",
        ",".join(label_args)
    ],
             cwd=app_dir)

    # To work around lets-encrypt certificate uses create a self-signed
    # certificate
    kubeflow_branch = None
    for repo in snapshot_info["repos"]:
        if repo["repo"] == "kubeflow":
            kubeflow_branch = repo["branch"]

    logging.info("kubeflow branch %s", kubeflow_branch)

    if kubeflow_branch == "v0.6-branch":
        logging.info("Creating a self signed certificate")
        util.run(["kubectl", "config", "use-context", name])
        tls_endpoint = "--host={0}.endpoints.{1}.cloud.goog".format(
            name, args.project)

        cert_dir = tempfile.mkdtemp()
        util.run(["kube-rsa", tls_endpoint], cwd=cert_dir)
        util.run([
            "kubectl", "-n", "kubeflow", "create", "secret", "tls",
            "envoy-ingress-tls", "--cert=ca.pem", "--key=ca-key.pem"
        ],
                 cwd=cert_dir)
        shutil.rmtree(cert_dir)
    else:
        # starting with 0.7 we are moving to managed GKE certificates.
        # So we can't just generate a self-signed certificate
        # TODO(jlewi): If we still hit lets-encrypt quota issues then
        # we can fix this by generating new hostnames
        logging.info("Not creating a self signed certificate")
コード例 #13
0
def main():  # pylint: disable=too-many-locals,too-many-statements
    logging.basicConfig(
        level=logging.INFO,
        format=('%(levelname)s|%(asctime)s'
                '|%(pathname)s|%(lineno)d| %(message)s'),
        datefmt='%Y-%m-%dT%H:%M:%S',
    )
    logging.getLogger().setLevel(logging.INFO)

    parser = argparse.ArgumentParser()

    parser.add_argument(
        "--base_name",
        default="kf-v0-4",
        type=str,
        help=(
            "The base name for the deployment typically kf-vX-Y or kf-vmaster."
        ))

    parser.add_argument("--project",
                        default="kubeflow-ci",
                        type=str,
                        help=("The project."))

    parser.add_argument("--zone",
                        default="us-east1-d",
                        type=str,
                        help=("The zone to deploy in."))

    parser.add_argument(
        "--oauth_file",
        default="gs://kubeflow-ci_kf-data/kf-iap-oauth.kubeflow-ci.yaml",
        type=str,
        help=("The file containing the OAuth client ID & secret"
              "for IAP."))

    parser.add_argument("--kubeflow_repo",
                        default="/home/jlewi/git_kubeflow",
                        type=str,
                        help=("Path to the Kubeflow repo to use"))

    parser.add_argument("--apps_dir",
                        default=os.getcwd(),
                        type=str,
                        help=("Directory to store kubeflow apps."))

    parser.add_argument(
        "--deployment_worker_cluster",
        default="kubeflow-testing",
        type=str,
        help=("Name of cluster deployment cronjob workers use."))

    parser.add_argument("--cluster_num",
                        default="",
                        type=int,
                        help=("Number of cluster to deploy to."))

    parser.add_argument("--timestamp",
                        default="",
                        type=str,
                        help=("Timestamp deployment takes snapshot."))

    parser.add_argument("--job_name",
                        default="",
                        type=str,
                        help=("Pod name running the job."))

    args = parser.parse_args()

    bucket, blob_path = util.split_gcs_uri(args.oauth_file)

    client = storage.Client(project=args.project)
    bucket = client.get_bucket(bucket)

    blob = bucket.get_blob(blob_path)
    contents = blob.download_as_string()

    oauth_info = yaml.load(contents)

    git_describe = util.run(
        ["git", "describe", "--tags", "--always", "--dirty"],
        cwd=args.kubeflow_repo).strip("'")

    # TODO(https://github.com/kubeflow/testing/issues/95): We want to cycle
    # between N different names e.g.
    # kf-vX-Y-n00, kf-vX-Y-n01, ... kf-vX-Y-n05
    # The reason to reuse names is because for IAP we need to manually
    # set the redirect URIs. So we want to cycle between a set of known
    # endpoints. We should add logic to automatically recycle deployments.
    # i.e. we should find the oldest one and reuse that.
    num = args.cluster_num
    name = "{0}-n{1:02d}".format(args.base_name, num)
    # Clean up previous deployment. We are not able to run "kfctl delete all"
    # since we are not able to guarantee apps config in repository is up to date.
    util.run(["rm", "-rf", name], cwd=args.apps_dir)

    # Delete deployment beforehand. If not, updating action might be failed when
    # resource permission/requirement is changed. It's cleaner to delete and
    # re-create it.
    delete_deployment = os.path.join(args.kubeflow_repo, "scripts", "gke",
                                     "delete_deployment.sh")
    util.run([
        delete_deployment, "--project=" + args.project, "--deployment=" + name,
        "--zone=" + args.zone
    ],
             cwd=args.apps_dir)

    # Create a dummy kubeconfig in cronjob worker.
    util.run([
        "gcloud", "container", "clusters", "get-credentials",
        args.deployment_worker_cluster, "--zone", args.zone, "--project",
        args.project
    ],
             cwd=args.apps_dir)

    app_dir = os.path.join(args.apps_dir, name)
    kfctl = os.path.join(args.kubeflow_repo, "scripts", "kfctl.sh")
    ks_app_dir = os.path.join(app_dir, "ks_app")
    util.run([
        kfctl, "init", name, "--project", args.project, "--zone", args.zone,
        "--platform", "gcp", "--skipInitProject", "true"
    ],
             cwd=args.apps_dir)

    labels = {}
    with open(os.path.join(app_dir, "kf_app.yaml"), "w") as hf:
        app = {
            "labels": {
                "GIT_LABEL": git_describe,
                "PURPOSE": "kf-test-cluster",
            },
        }
        if args.timestamp:
            app["labels"]["SNAPSHOT_TIMESTAMP"] = args.timestamp
        if args.job_name:
            app["labels"]["DEPLOYMENT_JOB"] = args.job_name
        labels = app.get("labels", {})
        yaml.dump(app, hf)

    label_args = []
    for k, v in labels.items():
        # labels can only take as input alphanumeric characters, hyphens, and
        # underscores. Replace not valid characters with hyphens.
        val = v.lower().replace("\"", "")
        val = re.sub(r"[^a-z0-9\-_]", "-", val)
        label_args.append("{key}={val}".format(key=k.lower(), val=val))

    util.run([kfctl, "generate", "all"], cwd=app_dir)
    util.run(["ks", "generate", "seldon", "seldon"], cwd=ks_app_dir)

    env = {}
    env.update(os.environ)
    env.update(oauth_info)
    # kfctl apply all might break during cronjob invocation when depending
    # components are not ready. Make it retry several times should be enough.
    kfctl_apply_with_retry(kfctl, app_dir, env)

    logging.info("Annotating cluster with labels: %s", str(label_args))
    util.run([
        "gcloud", "container", "clusters", "update", name, "--zone", args.zone,
        "--update-labels", ",".join(label_args)
    ],
             cwd=app_dir)
コード例 #14
0
def main():  # pylint: disable=too-many-locals,too-many-statements
    logging.basicConfig(
        level=logging.INFO,
        format=('%(levelname)s|%(asctime)s'
                '|%(pathname)s|%(lineno)d| %(message)s'),
        datefmt='%Y-%m-%dT%H:%M:%S',
    )
    logging.getLogger().setLevel(logging.INFO)

    parser = argparse.ArgumentParser()

    parser.add_argument(
        "--base_name",
        default="kf-v0-4",
        type=str,
        help=(
            "The base name for the deployment typically kf-vX-Y or kf-vmaster."
        ))

    parser.add_argument("--project",
                        default="kubeflow-ci",
                        type=str,
                        help=("The project."))

    parser.add_argument("--zone",
                        default="us-east1-d",
                        type=str,
                        help=("The zone to deploy in."))

    parser.add_argument(
        "--oauth_file",
        default="gs://kubeflow-ci_kf-data/kf-iap-oauth.kubeflow-ci.yaml",
        type=str,
        help=("The file containing the OAuth client ID & secret"
              "for IAP."))

    parser.add_argument("--kubeflow_repo",
                        default="/home/jlewi/git_kubeflow",
                        type=str,
                        help=("Path to the Kubeflow repo to use"))

    parser.add_argument("--apps_dir",
                        default=os.getcwd(),
                        type=str,
                        help=("Directory to store kubeflow apps."))

    parser.add_argument(
        "--deployment_worker_cluster",
        default="kubeflow-testing",
        type=str,
        help=("Name of cluster deployment cronjob workers use."))

    parser.add_argument("--cluster_num",
                        default="",
                        type=int,
                        help=("Number of cluster to deploy to."))

    args = parser.parse_args()

    bucket, blob_path = util.split_gcs_uri(args.oauth_file)

    client = storage.Client(project=args.project)
    bucket = client.get_bucket(bucket)

    blob = bucket.get_blob(blob_path)
    contents = blob.download_as_string()

    oauth_info = yaml.load(contents)

    git_describe = util.run(
        ["git", "describe", "--tags", "--always", "--dirty"],
        cwd=args.kubeflow_repo).strip("'")

    # TODO(https://github.com/kubeflow/testing/issues/95): We want to cycle
    # between N different names e.g.
    # kf-vX-Y-n00, kf-vX-Y-n01, ... kf-vX-Y-n05
    # The reason to reuse names is because for IAP we need to manually
    # set the redirect URIs. So we want to cycle between a set of known
    # endpoints. We should add logic to automatically recycle deployments.
    # i.e. we should find the oldest one and reuse that.
    num = args.cluster_num
    name = "{0}-n{1:02d}".format(args.base_name, num)
    # Clean up previous deployment. We are not able to run "kfctl delete all"
    # since we are not able to guarantee apps config in repository is up to date.
    util.run(["rm", "-rf", name], cwd=args.apps_dir)
    # TODO(gabrielwen):
    # https://github.com/kubeflow/testing/issues/295
    # 1. Is deployment deletion still needed?
    # 2. If it is, figure out permission set up for it.
    # 3. Should use
    # https://github.com/kubeflow/kubeflow/blob/master/scripts/gke/delete_deployment.sh

    # Create a dummy kubeconfig in cronjob worker.
    util.run([
        "gcloud", "container", "clusters", "get-credentials",
        args.deployment_worker_cluster, "--zone", args.zone, "--project",
        args.project
    ],
             cwd=args.apps_dir)

    app_dir = os.path.join(args.apps_dir, name)
    kfctl = os.path.join(args.kubeflow_repo, "scripts", "kfctl.sh")
    util.run([
        kfctl, "init", name, "--project", args.project, "--zone", args.zone,
        "--platform", "gcp", "--skipInitProject", "true"
    ],
             cwd=args.apps_dir)

    with open(os.path.join(app_dir, "kf_app.yaml"), "w") as hf:
        app = {
            "labels": {
                "GIT_LABEL": git_describe,
                "PURPOSE": "kf-test-cluster",
                "CREATOR": getpass.getuser(),
            },
        }
        yaml.dump(app, hf)

    util.run([kfctl, "generate", "all"], cwd=app_dir)

    env = {}
    env.update(os.environ)
    env.update(oauth_info)
    # kfctl apply all might break during cronjob invocation when depending
    # components are not ready. Make it retry several times should be enough.
    kfctl_apply_with_retry(kfctl, app_dir, env)
コード例 #15
0
ファイル: create_kf_instance.py プロジェクト: pdmack/testing
def main():  # pylint: disable=too-many-locals,too-many-statements
    logging.basicConfig(
        level=logging.INFO,
        format=('%(levelname)s|%(asctime)s'
                '|%(pathname)s|%(lineno)d| %(message)s'),
        datefmt='%Y-%m-%dT%H:%M:%S',
    )
    logging.getLogger().setLevel(logging.INFO)

    parser = argparse.ArgumentParser()

    parser.add_argument("--project",
                        default="kubeflow-ci",
                        type=str,
                        help=("The project."))

    parser.add_argument("--zone",
                        default="us-east1-d",
                        type=str,
                        help=("The zone to deploy in."))

    parser.add_argument(
        "--oauth_file",
        default="gs://kubeflow-ci_kf-data/kf-iap-oauth.kubeflow-ci.yaml",
        type=str,
        help=("The file containing the OAuth client ID & secret"
              "for IAP."))

    parser.add_argument("--kubeflow_repo",
                        default="/home/jlewi/git_kubeflow",
                        type=str,
                        help=("Path to the Kubeflow repo to use"))

    parser.add_argument("--apps_dir",
                        default=os.getcwd(),
                        type=str,
                        help=("Directory to store kubeflow apps."))

    parser.add_argument("--name",
                        default="",
                        type=str,
                        help=("Name for the deployment."))

    parser.add_argument("--snapshot_file",
                        default="",
                        type=str,
                        help=("A json file containing information about the "
                              "snapshot to use."))

    parser.add_argument("--timestamp",
                        default="",
                        type=str,
                        help=("Timestamp deployment takes snapshot."))

    parser.add_argument("--job_name",
                        default="",
                        type=str,
                        help=("Pod name running the job."))

    args = parser.parse_args()

    bucket, blob_path = util.split_gcs_uri(args.oauth_file)

    client = storage.Client(project=args.project)
    bucket = client.get_bucket(bucket)

    blob = bucket.get_blob(blob_path)
    contents = blob.download_as_string()

    oauth_info = yaml.load(contents)

    git_describe = util.run(
        ["git", "describe", "--tags", "--always", "--dirty"],
        cwd=args.kubeflow_repo).strip("'")

    timestamp = args.timestamp
    if args.snapshot_file:
        logging.info("Loading info from snapshot file %s", args.snapshot_file)
        with open(args.snapshot_file) as hf:
            snapshot_info = json.load(hf)
            name = snapshot_info["name"]
            timestamp = snapshot_info.get("timestamp", "")
    else:
        name = args.name

    # Clean up previous deployment. We are not able to run "kfctl delete all"
    # since we are not able to guarantee apps config in repository is up to date.
    util.run(["rm", "-rf", name], cwd=args.apps_dir)

    # Delete deployment beforehand. If not, updating action might be failed when
    # resource permission/requirement is changed. It's cleaner to delete and
    # re-create it.
    delete_deployment = os.path.join(args.kubeflow_repo, "scripts", "gke",
                                     "delete_deployment.sh")

    util.run([
        delete_deployment, "--project=" + args.project, "--deployment=" + name,
        "--zone=" + args.zone
    ],
             cwd=args.apps_dir)

    # Delete script doesn't delete storage deployment by design.
    delete_storage_deployment(args.project, name + "-storage")

    app_dir = os.path.join(args.apps_dir, name)
    kfctl = os.path.join(args.kubeflow_repo, "scripts", "kfctl.sh")
    ks_app_dir = os.path.join(app_dir, "ks_app")
    util.run([
        kfctl, "init", name, "--project", args.project, "--zone", args.zone,
        "--platform", "gcp", "--skipInitProject", "true"
    ],
             cwd=args.apps_dir)

    labels = {}
    with open(os.path.join(app_dir, "kf_app.yaml"), "w") as hf:
        app = {
            "labels": {
                "GIT_LABEL": git_describe,
                "PURPOSE": "kf-test-cluster",
            },
        }
        if timestamp:
            app["labels"]["SNAPSHOT_TIMESTAMP"] = timestamp
        if args.job_name:
            app["labels"]["DEPLOYMENT_JOB"] = args.job_name
        labels = app.get("labels", {})
        yaml.dump(app, hf)

    label_args = []
    for k, v in labels.items():
        # labels can only take as input alphanumeric characters, hyphens, and
        # underscores. Replace not valid characters with hyphens.
        val = v.lower().replace("\"", "")
        val = re.sub(r"[^a-z0-9\-_]", "-", val)
        label_args.append("{key}={val}".format(key=k.lower(), val=val))

    env = {}
    env.update(os.environ)
    env.update(oauth_info)

    # We need to apply platform before doing generate k8s because we need
    # to have a cluster for ksonnet.
    # kfctl apply all might break during cronjob invocation when depending
    # components are not ready. Make it retry several times should be enough.
    run_with_retry([kfctl, "generate", "platform"], cwd=app_dir, env=env)
    run_with_retry([kfctl, "apply", "platform"], cwd=app_dir, env=env)
    run_with_retry([kfctl, "generate", "k8s"], cwd=app_dir, env=env)
    run_with_retry([kfctl, "apply", "k8s"], cwd=app_dir, env=env)
    run_with_retry(["ks", "generate", "seldon", "seldon"],
                   cwd=ks_app_dir,
                   env=env)

    logging.info("Annotating cluster with labels: %s", str(label_args))

    # Set labels on the deployment
    util.run([
        "gcloud", "--project", args.project, "deployment-manager",
        "deployments", "update", name, "--update-labels", ",".join(label_args)
    ],
             cwd=app_dir)

    # To work around lets-encrypt certificate uses create a self-signed
    # certificate
    util.run([
        "gcloud", "container", "clusters", "get-credentials", name, "--zone",
        args.zone, "--project", args.project
    ])
    tls_endpoint = "--host=%s.endpoints.kubeflow-ci.cloud.goog" % name
    util.run(["kube-rsa", tls_endpoint])
    util.run([
        "kubectl", "-n", "kubeflow", "create", "secret", "tls",
        "envoy-ingress-tls", "--cert=ca.pem", "--key=ca-key.pem"
    ])
コード例 #16
0
def main(): # pylint: disable=too-many-locals,too-many-statements
  logging.basicConfig(level=logging.INFO,
                      format=('%(levelname)s|%(asctime)s'
                              '|%(pathname)s|%(lineno)d| %(message)s'),
                      datefmt='%Y-%m-%dT%H:%M:%S',
                      )
  logging.getLogger().setLevel(logging.INFO)

  parser = argparse.ArgumentParser()

  parser.add_argument(
    "--base_name", default="kf-v0-4", type=str,
    help=("The base name for the deployment typically kf-vX-Y or kf-vmaster."))

  parser.add_argument(
    "--project", default="kubeflow-ci", type=str, help=("The project."))

  parser.add_argument(
    "--zone", default="us-east1-d", type=str, help=("The zone to deploy in."))

  parser.add_argument(
    "--oauth_file",
    default="gs://kubeflow-ci_kf-data/kf-iap-oauth.kubeflow-ci.yaml",
    type=str, help=("The file containing the OAuth client ID & secret"
                    "for IAP."))

  parser.add_argument(
    "--kubeflow_repo",
    default="/home/jlewi/git_kubeflow",
    type=str, help=("Path to the Kubeflow repo to use"))

  parser.add_argument(
    "--apps_dir",
    default=os.getcwd(),
    type=str, help=("Directory to store kubeflow apps."))

  args = parser.parse_args()

  bucket, blob_path = util.split_gcs_uri(args.oauth_file)

  client = storage.Client(project=args.project)
  bucket = client.get_bucket(bucket)

  blob = bucket.get_blob(blob_path)
  contents = blob.download_as_string()

  oauth_info = yaml.load(contents)

  git_describe = util.run(["git", "describe", "--tags", "--always", "--dirty"],
                          cwd=args.kubeflow_repo).strip("'")


  # TODO(https://github.com/kubeflow/testing/issues/95): We want to cycle
  # between N different names e.g.
  # kf-vX-Y-n00, kf-vX-Y-n01, ... kf-vX-Y-n05
  # The reason to reuse names is because for IAP we need to manually
  # set the redirect URIs. So we want to cycle between a set of known
  # endpoints. We should add logic to automatically recycle deployments.
  # i.e. we should find the oldest one and reuse that.
  num = 0
  name = "{0}-n{1:02d}".format(args.base_name, num)
  app_dir = os.path.join(args.apps_dir, name)
  kfctl = os.path.join(args.kubeflow_repo, "scripts", "kfctl.sh")
  util.run([kfctl, "init", name, "--project", args.project, "--zone", args.zone,
            "--platform", "gcp", "--skipInitProject", "true"], cwd=args.apps_dir
           )

  with open(os.path.join(app_dir, "kf_app.yaml"), "w") as hf:
    app = {
      "labels": {
        "GIT_LABEL": git_describe,
        "PURPOSE": "kf-test-cluster",
        "CREATOR": getpass.getuser(),
      },
    }
    yaml.dump(app, hf)

  util.run([kfctl, "generate", "all"], cwd=app_dir)

  env = {}
  env.update(os.environ)
  env.update(oauth_info)
  util.run([kfctl, "apply", "all"], cwd=app_dir, env=env)