Example #1
0
def install_addon():
  """Install Benchmark Addons."""
  logging.basicConfig(level=logging.INFO,
                      format=('%(asctime)s %(name)-12s %(levelname)-8s %(message)s'),
                      datefmt='%Y-%m-%dT%H:%M:%S',
                      )
  logging.getLogger().setLevel(logging.INFO)

  args = parse_args()
  namespace = args.namespace
  base_dir = args.base_dir
  kubeconfig_path = str(os.environ['KUBECONFIG'])
  api_client = deploy_utils.create_k8s_client(kubeconfig_path)

  # Setup GPU Device Plugin
  install_gpu_drivers(api_client)
  wait_for_gpu_driver_install(api_client)

  # Setup ksonnet application
  app_dir = deploy_utils.setup_ks_app(base_dir, namespace, api_client, args.kubeflow_registry, args.kubebench_registry)

  # Deploy Kubeflow
  install_kubeflow(api_client, app_dir, namespace)
  wait_for_kubeflow_install(api_client, namespace)

  # change the namespace to default to set up nfs-volume and nfs-server
  namespace = "default"

  # Deploy NFS for kubebench
  install_kubebench_nfs(api_client, app_dir, namespace)

  # Deploy Github Secret
  github_token = str(os.environ['GITHUB_TOKEN'])
  install_github_secret(api_client, namespace, args.github_secret_name, github_token)
Example #2
0
def install_addon():
    """Install Benchmark Addons."""
    logging.basicConfig(
        level=logging.INFO,
        format=('%(asctime)s %(name)-12s %(levelname)-8s %(message)s'),
        datefmt='%Y-%m-%dT%H:%M:%S',
    )
    logging.getLogger().setLevel(logging.INFO)

    args = parse_args()
    namespace = args.namespace
    base_dir = args.base_dir
    kubeconfig_path = str(os.environ['KUBECONFIG'])
    api_client = deploy_utils.create_k8s_client(kubeconfig_path)

    kubeflow_app_dir = os.path.join(base_dir, "ks-app")
    if os.path.exists(kubeflow_app_dir):
        logging.info("kubeflow app dir already exists. skip install")
        return

    # Setup ksonnet application
    app_dir = deploy_utils.setup_ks_app(base_dir, namespace, api_client,
                                        args.kubeflow_registry,
                                        args.kubebench_registry)

    # Deploy Kubeflow
    install_kubeflow(api_client, app_dir, namespace)
    wait_for_kubeflow_install(api_client, namespace)

    # change the namespace to default to set up nfs-volume and nfs-server
    namespace = "default"

    # Deploy NFS for kubebench
    install_kubebench_nfs(api_client, app_dir, namespace)
def copy_dataset():
    """Install Benchmark Addons."""
    logging.basicConfig(
        level=logging.INFO,
        format=('%(asctime)s %(name)-12s %(levelname)-8s %(message)s'),
        datefmt='%Y-%m-%dT%H:%M:%S',
    )
    logging.getLogger().setLevel(logging.INFO)

    args = parse_args()
    namespace = args.namespace
    kubeconfig_path = str(os.environ['KUBECONFIG'])
    api_client = deploy_utils.create_k8s_client(kubeconfig_path)

    batchv1_api = k8s_client.BatchV1Api(api_client)

    # need pvc, s3 bucket dataset name
    deployment = create_job_object(args.runner_image, args.region,
                                   args.s3_import_path, args.pvc)
    batchv1_api.create_namespaced_job(namespace, deployment)

    # describe
    logging.info("Wait for data copy finish.")
    wait_for_job(api_client, namespace, "copy-dataset-worker")
    logging.info("Finish copy data from %s to pvc %s", args.s3_import_path,
                 args.pvc)
Example #4
0
def install_addon():
  """Install Benchmark Addons."""
  logging.basicConfig(level=logging.INFO,
                      format=('%(asctime)s %(name)-12s %(levelname)-8s %(message)s'),
                      datefmt='%Y-%m-%dT%H:%M:%S',
                      )
  logging.getLogger().setLevel(logging.INFO)

  args = parse_args()
  namespace = args.namespace
  base_dir = args.base_dir
  kubeconfig_path = str(os.environ['KUBECONFIG'])
  api_client = deploy_utils.create_k8s_client(kubeconfig_path)

  # Setup GPU Device Plugin
  install_gpu_drivers(api_client)
  wait_for_gpu_driver_install(api_client)
Example #5
0
def install_addon():
    """Install Benchmark Addons."""
    logging.basicConfig(
        level=logging.INFO,
        format=('%(asctime)s %(name)-12s %(levelname)-8s %(message)s'),
        datefmt='%Y-%m-%dT%H:%M:%S',
    )
    logging.getLogger().setLevel(logging.INFO)

    args = parse_args()
    namespace = args.namespace
    kubeconfig_path = str(os.environ['KUBECONFIG'])
    api_client = deploy_utils.create_k8s_client(kubeconfig_path)

    # Deploy Github Secret. Can be passed from user's parameter
    github_token = str(os.environ['GITHUB_TOKEN'])
    install_github_secret(api_client, namespace, args.github_secret_name,
                          base64.b64encode(github_token))
Example #6
0
def install_addon():
    """Install Benchmark Addons."""
    logging.basicConfig(
        level=logging.INFO,
        format=('%(asctime)s %(name)-12s %(levelname)-8s %(message)s'),
        datefmt='%Y-%m-%dT%H:%M:%S',
    )
    logging.getLogger().setLevel(logging.INFO)

    args = parse_args()
    namespace = args.namespace
    kubeconfig_path = str(os.environ['KUBECONFIG'])
    api_client = deploy_utils.create_k8s_client(kubeconfig_path)

    # Deploy AWS Secret. Get from environment.
    access_key_id = str(os.environ['AWS_ACCESS_KEY_ID'])
    access_key = str(os.environ['AWS_SECRET_ACCESS_KEY'])
    install_aws_secret(api_client, namespace, args.aws_secret_name,
                       base64.b64encode(access_key_id),
                       base64.b64encode(access_key))
Example #7
0
def run_benchmark_job():
    """Submit benchmark jobs to remote kubernetes cluster."""
    args = parse_args()
    app_dir = os.path.join(str(os.environ['BENCHMARK_DIR']), "ks-app")

    kubeconfig_path = str(os.environ['KUBECONFIG'])
    api_client = deploy_utils.create_k8s_client(kubeconfig_path)

    namespace = args.namespace
    job_name = args.experiment_name

    # Set the namespace of kb job to default
    namespace = "default"
    # Deploy Kubebench
    util.run(
        ["ks", "generate", "kubebench-job", job_name, "--name=" + job_name],
        cwd=app_dir)
    job_config_prefix = "ks param set " + job_name + " "

    cmd = job_config_prefix + "mainJobKsRegistry " + args.training_job_registry
    util.run(cmd.split(), cwd=app_dir)
    cmd = job_config_prefix + "mainJobKsPackage " + args.training_job_pkg
    util.run(cmd.split(), cwd=app_dir)
    cmd = job_config_prefix + "mainJobKsPrototype " + args.training_job_prototype
    util.run(cmd.split(), cwd=app_dir)
    cmd = job_config_prefix + "mainJobConfig " + args.training_job_config
    util.run(cmd.split(), cwd=app_dir)

    cmd = job_config_prefix + "awsCredentialsSecret " + args.aws_secret
    util.run(cmd.split(), cwd=app_dir)
    cmd = job_config_prefix + "awsCredentialsSecretAccessKeyId " + args.aws_access_key_id
    util.run(cmd.split(), cwd=app_dir)
    cmd = job_config_prefix + "awsCredentialsSecretAccessKey " + args.aws_secret_access_key
    util.run(cmd.split(), cwd=app_dir)
    cmd = job_config_prefix + "awsRegion " + args.aws_region
    util.run(cmd.split(), cwd=app_dir)

    cmd = job_config_prefix + "githubTokenSecret " + args.github_secret_name
    util.run(cmd.split(), cwd=app_dir)
    cmd = job_config_prefix + "githubTokenSecretKey GITHUB_TOKEN"
    util.run(cmd.split(), cwd=app_dir)
    cmd = job_config_prefix + "controllerImage seedjeffwan/configurator:20190415"
    util.run(cmd.split(), cwd=app_dir)
    cmd = job_config_prefix + "postJobImage seedjeffwan/mpi-post-processor:logs"
    util.run(cmd.split(), cwd=app_dir)
    cmd = job_config_prefix + "postJobArgs null"
    util.run(cmd.split(), cwd=app_dir)
    cmd = job_config_prefix + "reporterType null"
    util.run(cmd.split(), cwd=app_dir)

    cmd = job_config_prefix + "experimentDataPvc " + args.data_pvc
    util.run(cmd.split(), cwd=app_dir)

    # cmd = "ks param set " + job_name + " config_args -- --config-file=" + pvc_mount + \
    #         "/config/" + config_name + ".yaml"
    # util.run(cmd.split(), cwd=app_dir)
    # cmd = "ks param set " + job_name + " report_args -- --output-file=" + pvc_mount + \
    #         "/output/results.csv"
    # util.run(cmd.split(), cwd=app_dir)

    apply_command = ["ks", "apply", "default", "-c", job_name]
    util.run(apply_command, cwd=app_dir)

    # TODO: expose timeout setting here.
    deploy_utils.wait_for_benchmark_job(job_name, namespace)
    deploy_utils.cleanup_benchmark_job(app_dir, job_name)