def install_addon(): """Install Benchmark Addons.""" logging.basicConfig(level=logging.INFO, format=('%(asctime)s %(name)-12s %(levelname)-8s %(message)s'), datefmt='%Y-%m-%dT%H:%M:%S', ) logging.getLogger().setLevel(logging.INFO) args = parse_args() namespace = args.namespace base_dir = args.base_dir kubeconfig_path = str(os.environ['KUBECONFIG']) api_client = deploy_utils.create_k8s_client(kubeconfig_path) # Setup GPU Device Plugin install_gpu_drivers(api_client) wait_for_gpu_driver_install(api_client) # Setup ksonnet application app_dir = deploy_utils.setup_ks_app(base_dir, namespace, api_client, args.kubeflow_registry, args.kubebench_registry) # Deploy Kubeflow install_kubeflow(api_client, app_dir, namespace) wait_for_kubeflow_install(api_client, namespace) # change the namespace to default to set up nfs-volume and nfs-server namespace = "default" # Deploy NFS for kubebench install_kubebench_nfs(api_client, app_dir, namespace) # Deploy Github Secret github_token = str(os.environ['GITHUB_TOKEN']) install_github_secret(api_client, namespace, args.github_secret_name, github_token)
def install_addon(): """Install Benchmark Addons.""" logging.basicConfig( level=logging.INFO, format=('%(asctime)s %(name)-12s %(levelname)-8s %(message)s'), datefmt='%Y-%m-%dT%H:%M:%S', ) logging.getLogger().setLevel(logging.INFO) args = parse_args() namespace = args.namespace base_dir = args.base_dir kubeconfig_path = str(os.environ['KUBECONFIG']) api_client = deploy_utils.create_k8s_client(kubeconfig_path) kubeflow_app_dir = os.path.join(base_dir, "ks-app") if os.path.exists(kubeflow_app_dir): logging.info("kubeflow app dir already exists. skip install") return # Setup ksonnet application app_dir = deploy_utils.setup_ks_app(base_dir, namespace, api_client, args.kubeflow_registry, args.kubebench_registry) # Deploy Kubeflow install_kubeflow(api_client, app_dir, namespace) wait_for_kubeflow_install(api_client, namespace) # change the namespace to default to set up nfs-volume and nfs-server namespace = "default" # Deploy NFS for kubebench install_kubebench_nfs(api_client, app_dir, namespace)
def copy_dataset(): """Install Benchmark Addons.""" logging.basicConfig( level=logging.INFO, format=('%(asctime)s %(name)-12s %(levelname)-8s %(message)s'), datefmt='%Y-%m-%dT%H:%M:%S', ) logging.getLogger().setLevel(logging.INFO) args = parse_args() namespace = args.namespace kubeconfig_path = str(os.environ['KUBECONFIG']) api_client = deploy_utils.create_k8s_client(kubeconfig_path) batchv1_api = k8s_client.BatchV1Api(api_client) # need pvc, s3 bucket dataset name deployment = create_job_object(args.runner_image, args.region, args.s3_import_path, args.pvc) batchv1_api.create_namespaced_job(namespace, deployment) # describe logging.info("Wait for data copy finish.") wait_for_job(api_client, namespace, "copy-dataset-worker") logging.info("Finish copy data from %s to pvc %s", args.s3_import_path, args.pvc)
def install_addon(): """Install Benchmark Addons.""" logging.basicConfig(level=logging.INFO, format=('%(asctime)s %(name)-12s %(levelname)-8s %(message)s'), datefmt='%Y-%m-%dT%H:%M:%S', ) logging.getLogger().setLevel(logging.INFO) args = parse_args() namespace = args.namespace base_dir = args.base_dir kubeconfig_path = str(os.environ['KUBECONFIG']) api_client = deploy_utils.create_k8s_client(kubeconfig_path) # Setup GPU Device Plugin install_gpu_drivers(api_client) wait_for_gpu_driver_install(api_client)
def install_addon(): """Install Benchmark Addons.""" logging.basicConfig( level=logging.INFO, format=('%(asctime)s %(name)-12s %(levelname)-8s %(message)s'), datefmt='%Y-%m-%dT%H:%M:%S', ) logging.getLogger().setLevel(logging.INFO) args = parse_args() namespace = args.namespace kubeconfig_path = str(os.environ['KUBECONFIG']) api_client = deploy_utils.create_k8s_client(kubeconfig_path) # Deploy Github Secret. Can be passed from user's parameter github_token = str(os.environ['GITHUB_TOKEN']) install_github_secret(api_client, namespace, args.github_secret_name, base64.b64encode(github_token))
def install_addon(): """Install Benchmark Addons.""" logging.basicConfig( level=logging.INFO, format=('%(asctime)s %(name)-12s %(levelname)-8s %(message)s'), datefmt='%Y-%m-%dT%H:%M:%S', ) logging.getLogger().setLevel(logging.INFO) args = parse_args() namespace = args.namespace kubeconfig_path = str(os.environ['KUBECONFIG']) api_client = deploy_utils.create_k8s_client(kubeconfig_path) # Deploy AWS Secret. Get from environment. access_key_id = str(os.environ['AWS_ACCESS_KEY_ID']) access_key = str(os.environ['AWS_SECRET_ACCESS_KEY']) install_aws_secret(api_client, namespace, args.aws_secret_name, base64.b64encode(access_key_id), base64.b64encode(access_key))
def run_benchmark_job(): """Submit benchmark jobs to remote kubernetes cluster.""" args = parse_args() app_dir = os.path.join(str(os.environ['BENCHMARK_DIR']), "ks-app") kubeconfig_path = str(os.environ['KUBECONFIG']) api_client = deploy_utils.create_k8s_client(kubeconfig_path) namespace = args.namespace job_name = args.experiment_name # Set the namespace of kb job to default namespace = "default" # Deploy Kubebench util.run( ["ks", "generate", "kubebench-job", job_name, "--name=" + job_name], cwd=app_dir) job_config_prefix = "ks param set " + job_name + " " cmd = job_config_prefix + "mainJobKsRegistry " + args.training_job_registry util.run(cmd.split(), cwd=app_dir) cmd = job_config_prefix + "mainJobKsPackage " + args.training_job_pkg util.run(cmd.split(), cwd=app_dir) cmd = job_config_prefix + "mainJobKsPrototype " + args.training_job_prototype util.run(cmd.split(), cwd=app_dir) cmd = job_config_prefix + "mainJobConfig " + args.training_job_config util.run(cmd.split(), cwd=app_dir) cmd = job_config_prefix + "awsCredentialsSecret " + args.aws_secret util.run(cmd.split(), cwd=app_dir) cmd = job_config_prefix + "awsCredentialsSecretAccessKeyId " + args.aws_access_key_id util.run(cmd.split(), cwd=app_dir) cmd = job_config_prefix + "awsCredentialsSecretAccessKey " + args.aws_secret_access_key util.run(cmd.split(), cwd=app_dir) cmd = job_config_prefix + "awsRegion " + args.aws_region util.run(cmd.split(), cwd=app_dir) cmd = job_config_prefix + "githubTokenSecret " + args.github_secret_name util.run(cmd.split(), cwd=app_dir) cmd = job_config_prefix + "githubTokenSecretKey GITHUB_TOKEN" util.run(cmd.split(), cwd=app_dir) cmd = job_config_prefix + "controllerImage seedjeffwan/configurator:20190415" util.run(cmd.split(), cwd=app_dir) cmd = job_config_prefix + "postJobImage seedjeffwan/mpi-post-processor:logs" util.run(cmd.split(), cwd=app_dir) cmd = job_config_prefix + "postJobArgs null" util.run(cmd.split(), cwd=app_dir) cmd = job_config_prefix + "reporterType null" util.run(cmd.split(), cwd=app_dir) cmd = job_config_prefix + "experimentDataPvc " + args.data_pvc util.run(cmd.split(), cwd=app_dir) # cmd = "ks param set " + job_name + " config_args -- --config-file=" + pvc_mount + \ # "/config/" + config_name + ".yaml" # util.run(cmd.split(), cwd=app_dir) # cmd = "ks param set " + job_name + " report_args -- --output-file=" + pvc_mount + \ # "/output/results.csv" # util.run(cmd.split(), cwd=app_dir) apply_command = ["ks", "apply", "default", "-c", job_name] util.run(apply_command, cwd=app_dir) # TODO: expose timeout setting here. deploy_utils.wait_for_benchmark_job(job_name, namespace) deploy_utils.cleanup_benchmark_job(app_dir, job_name)