def execute(args: typing.NamedTuple): spark_client = aztk.spark.Client(config.load_aztk_screts()) if spark_client.stop_job_app(args.job_id, args.app_name): log.info("Stopped app {0}".format(args.app_name)) else: log.error("App with name {0} does not exist or was already deleted")
def execute(args: typing.NamedTuple): spark_client = aztk.spark.Client(config.load_aztk_screts()) log.info('-------------------------------------------') log.info('spark cluster id: {}'.format(args.cluster_id)) log.info('username: {}'.format(args.username)) log.info('-------------------------------------------') if args.ssh_key: ssh_key = args.ssh_key else: ssh_key = spark_client.secrets_config.ssh_pub_key ssh_key, password = utils.get_ssh_key_or_prompt(ssh_key, args.username, args.password, spark_client.secrets_config) spark_client.create_user( cluster_id=args.cluster_id, username=args.username, password=password, ssh_key=ssh_key ) if password: log.info('password: %s', '*' * len(password)) elif ssh_key: log.info('ssh public key: %s', ssh_key) log.info('-------------------------------------------')
def execute(args: typing.NamedTuple): spark_client = aztk.spark.Client(config.load_aztk_screts()) ssh_conf = SshConfig() ssh_conf.merge(cluster_id=args.cluster_id, username=args.username, job_ui_port=args.jobui, job_history_ui_port=args.jobhistoryui, web_ui_port=args.webui, jupyter_port=args.jupyter, name_node_ui_port=args.namenodeui, rstudio_server_port=args.rstudioserver, host=args.host, connect=args.connect) http_prefix = 'http://localhost:' log.info("-------------------------------------------") log.info("spark cluster id: %s", ssh_conf.cluster_id) log.info("open webui: %s%s", http_prefix, ssh_conf.web_ui_port) log.info("open jobui: %s%s", http_prefix, ssh_conf.job_ui_port) log.info("open jobhistoryui: %s%s", http_prefix, ssh_conf.job_history_ui_port) log.info("open jupyter: %s%s", http_prefix, ssh_conf.jupyter_port) log.info("open namenodeui: %s%s", http_prefix, ssh_conf.name_node_ui_port) log.info("open rstudio server: %s%s", http_prefix, ssh_conf.rstudio_server_port) log.info("ssh username: %s", ssh_conf.username) log.info("connect: %s", ssh_conf.connect) log.info("-------------------------------------------") # get ssh command try: ssh_cmd = utils.ssh_in_master( client=spark_client, cluster_id=ssh_conf.cluster_id, webui=ssh_conf.web_ui_port, jobui=ssh_conf.job_ui_port, jobhistoryui=ssh_conf.job_history_ui_port, namenodeui=ssh_conf.name_node_ui_port, jupyter=ssh_conf.jupyter_port, rstudioserver=ssh_conf.rstudio_server_port, username=ssh_conf.username, host=ssh_conf.host, connect=ssh_conf.connect) if not ssh_conf.connect: log.info("") log.info( "Use the following command to connect to your spark head node:" ) log.info("\t%s", ssh_cmd) except batch_error.BatchErrorException as e: if e.error.code == "PoolNotFound": raise aztk.error.AztkError( "The cluster you are trying to connect to does not exist.") else: raise
def execute(args: typing.NamedTuple): spark_client = aztk.spark.Client(config.load_aztk_screts()) if args.tail: utils.stream_logs(client=spark_client, cluster_id=args.cluster_id, application_name=args.app_name) else: app_logs = spark_client.get_application_log(cluster_id=args.cluster_id, application_name=args.app_name) print(app_logs.log)
def execute(args: typing.NamedTuple): spark_client = aztk.spark.Client(config.load_aztk_screts()) job_conf = JobConfig() job_conf.merge(args.job_id, args.job_conf) aztk_applications = [] for application in job_conf.applications: aztk_applications.append( aztk.spark.models.ApplicationConfiguration( name=application.get('name'), application=application.get('application'), application_args=application.get('application_args'), main_class=application.get('main_class'), jars=[], py_files=[], files=[], driver_java_options=application.get('driver_java_options'), driver_library_path=application.get('driver_library_path'), driver_class_path=application.get('driver_class_path'), driver_memory=application.get('driver_memory'), executor_memory=application.get('executor_memory'), driver_cores=application.get('driver_cores'), executor_cores=application.get('executor_cores'))) # by default, load spark configuration files in .aztk/ spark_configuration = config.load_aztk_spark_config() # overwrite with values in job_conf if they exist if job_conf.spark_defaults_conf: spark_configuration.spark_defaults_conf = job_conf.spark_defaults_conf if job_conf.spark_env_sh: spark_configuration.spark_env_sh = job_conf.spark_env_sh if job_conf.core_site_xml: spark_configuration.core_site_xml = job_conf.core_site_xml job_configuration = aztk.spark.models.JobConfiguration( id=job_conf.id, applications=aztk_applications, custom_scripts=job_conf.custom_scripts, spark_configuration=spark_configuration, vm_size=job_conf.vm_size, docker_repo=job_conf.docker_repo, max_dedicated_nodes=job_conf.max_dedicated_nodes, max_low_pri_nodes=job_conf.max_low_pri_nodes, subnet_id=job_conf.subnet_id, worker_on_master=job_conf.worker_on_master) #TODO: utils.print_job_conf(job_configuration) spark_client.submit_job(job_configuration)
def execute(args: typing.NamedTuple): spark_client = aztk.spark.Client(config.load_aztk_screts()) cluster_conf = ClusterConfiguration() cluster_conf.spark_configuration = load_aztk_spark_config() # read cluster.yaml configuartion file, overwrite values with args file_config, wait = config.read_cluster_config() cluster_conf.merge(file_config) cluster_conf.merge( ClusterConfiguration(cluster_id=args.cluster_id, vm_count=args.size, vm_low_pri_count=args.size_low_pri, vm_size=args.vm_size, subnet_id=args.subnet_id, user_configuration=UserConfiguration( username=args.username, password=args.password, ), docker_repo=args.docker_repo)) wait = wait if args.wait is None else args.wait user_configuration = cluster_conf.user_configuration if user_configuration and user_configuration.username: ssh_key, password = utils.get_ssh_key_or_prompt( spark_client.secrets_config.ssh_pub_key, user_configuration.username, user_configuration.password, spark_client.secrets_config) cluster_conf.user_configuration = aztk.spark.models.UserConfiguration( username=user_configuration.username, password=password, ssh_key=ssh_key) else: cluster_conf.user_configuration = None print_cluster_conf(cluster_conf, wait) spinner = utils.Spinner() spinner.start() # create spark cluster cluster = spark_client.create_cluster(cluster_conf, wait=wait) spinner.stop() if wait: log.info("Cluster %s created successfully.", cluster.id) else: log.info("Cluster %s is being provisioned.", cluster.id)
def execute(args: typing.NamedTuple): spark_client = aztk.spark.Client(config.load_aztk_screts()) cluster_id = args.cluster_id if not args.force: confirmation_cluster_id = input( "Please confirm the id of the cluster you wish to delete: ") if confirmation_cluster_id != cluster_id: log.error( "Confirmation cluster id does not match. Please try again.") return if spark_client.delete_cluster(cluster_id): log.info("Deleting cluster %s", cluster_id) else: log.error("Cluster with id '%s' doesn't exist or was already deleted.", cluster_id)
def execute(args: typing.NamedTuple): spark_client = aztk.spark.Client(config.load_aztk_screts()) job_id = args.job_id if not args.force: # check if job exists before prompting for confirmation spark_client.get_job(job_id) confirmation_cluster_id = input( "Please confirm the id of the cluster you wish to delete: ") if confirmation_cluster_id != job_id: log.error( "Confirmation cluster id does not match. Please try again.") return if spark_client.delete_job(job_id): log.info("Deleting Job %s", job_id) else: log.error("Job with id '%s' doesn't exist or was already deleted.", job_id)
def execute(args: typing.NamedTuple): spark_client = aztk.spark.Client(config.load_aztk_screts()) result = spark_client.cluster_run(args.cluster_id, args.command)
def execute(args: typing.NamedTuple): spark_client = aztk.spark.Client(config.load_aztk_screts()) utils.print_applications(spark_client.list_applications(args.job_id))
def execute(args: typing.NamedTuple): spark_client = aztk.spark.Client(config.load_aztk_screts()) # read cluster.yaml configuartion file, overwrite values with args cluster_conf = ClusterConfig() cluster_conf.merge(uid=args.cluster_id, size=args.size, size_low_pri=args.size_low_pri, vm_size=args.vm_size, subnet_id=args.subnet_id, wait=args.wait, username=args.username, password=args.password, docker_repo=args.docker_repo) if cluster_conf.custom_scripts: custom_scripts = [] for custom_script in cluster_conf.custom_scripts: custom_scripts.append( aztk.spark.models.CustomScript(script=custom_script['script'], run_on=custom_script['runOn'])) else: custom_scripts = None if cluster_conf.file_shares: file_shares = [] for file_share in cluster_conf.file_shares: file_shares.append( aztk.spark.models.FileShare( storage_account_name=file_share['storage_account_name'], storage_account_key=file_share['storage_account_key'], file_share_path=file_share['file_share_path'], mount_path=file_share['mount_path'])) else: file_shares = None if cluster_conf.username: ssh_key, password = utils.get_ssh_key_or_prompt( spark_client.secrets_config.ssh_pub_key, cluster_conf.username, cluster_conf.password, spark_client.secrets_config) user_conf = aztk.spark.models.UserConfiguration( username=cluster_conf.username, password=password, ssh_key=ssh_key) else: user_conf = None print_cluster_conf(cluster_conf) spinner = utils.Spinner() spinner.start() # create spark cluster cluster = spark_client.create_cluster( aztk.spark.models.ClusterConfiguration( cluster_id=cluster_conf.uid, vm_count=cluster_conf.size, vm_low_pri_count=cluster_conf.size_low_pri, vm_size=cluster_conf.vm_size, subnet_id=cluster_conf.subnet_id, custom_scripts=custom_scripts, file_shares=file_shares, docker_repo=cluster_conf.docker_repo, spark_configuration=load_aztk_spark_config(), user_configuration=user_conf), wait=cluster_conf.wait) spinner.stop() if cluster_conf.wait: log.info("Cluster %s created successfully.", cluster.id) else: log.info("Cluster %s is being provisioned.", cluster.id)
def execute(args: typing.NamedTuple): spark_client = aztk.spark.Client(config.load_aztk_screts()) spark_client.stop_job(args.job_id) print("Stopped Job {0}".format(args.job_id))
def execute(_: typing.NamedTuple): spark_client = aztk.spark.Client(config.load_aztk_screts()) clusters = spark_client.list_clusters() utils.print_clusters(clusters)
def execute(args: typing.NamedTuple): spark_client = aztk.spark.Client(config.load_aztk_screts()) utils.print_jobs(spark_client.list_jobs())
import subprocess from datetime import datetime from azure.batch.models import BatchErrorException import aztk.spark from aztk.error import AztkError from cli import config dt = datetime.now() time = dt.microsecond base_job_id = "job-{}".format(time) # load secrets # note: this assumes secrets are set up in .aztk/secrets spark_client = aztk.spark.Client(config.load_aztk_screts()) def test_submit_job(): test_id = "submit-" try: app1 = aztk.spark.models.ApplicationConfiguration( name="pipy100", application="examples/src/main/python/pi.py", application_args=[100]) app2 = aztk.spark.models.ApplicationConfiguration( name="pipy101", application="examples/src/main/python/pi.py", application_args=[100]) job_configuration = aztk.spark.models.JobConfiguration( id=test_id + base_job_id,
def execute(args: typing.NamedTuple): spark_client = aztk.spark.Client(config.load_aztk_screts()) app_logs = spark_client.get_job_application_log(args.job_id, args.app_name) print(app_logs.log)
def execute(args: typing.NamedTuple): spark_client = aztk.spark.Client(config.load_aztk_screts()) cluster_id = args.cluster_id cluster = spark_client.get_cluster(cluster_id) utils.print_cluster(spark_client, cluster)
def execute(args: typing.NamedTuple): spark_client = aztk.spark.Client(config.load_aztk_screts()) utils.print_application( spark_client.get_application(args.job_id, args.app_name))
def execute(args: typing.NamedTuple): spark_client = aztk.spark.Client(config.load_aztk_screts()) utils.print_job(spark_client, spark_client.get_job(args.job_id))
def execute(args: typing.NamedTuple): spark_client = aztk.spark.Client(config.load_aztk_screts()) spark_client.cluster_copy(cluster_id=args.cluster_id, source_path=args.source_path, destination_path=args.dest_path)
def execute(args: typing.NamedTuple): spark_client = aztk.spark.Client(config.load_aztk_screts()) jars = [] py_files = [] files = [] if args.jars is not None: jars = args.jars.replace(' ', '').split(',') if args.py_files is not None: py_files = args.py_files.replace(' ', '').split(',') if args.files is not None: files = args.files.replace(' ', '').split(',') log.info("-------------------------------------------") log.info("Spark cluster id: %s", args.cluster_id) log.info("Spark app name: %s", args.name) log.info("Wait for app completion: %s", args.wait) if args.main_class is not None: log.info("Entry point class: %s", args.main_class) if jars: log.info("JARS: %s", jars) if py_files: log.info("PY_Files: %s", py_files) if files: log.info("Files: %s", files) if args.driver_java_options is not None: log.info("Driver java options: %s", args.driver_java_options) if args.driver_library_path is not None: log.info("Driver library path: %s", args.driver_library_path) if args.driver_class_path is not None: log.info("Driver class path: %s", args.driver_class_path) if args.driver_memory is not None: log.info("Driver memory: %s", args.driver_memory) if args.executor_memory is not None: log.info("Executor memory: %s", args.executor_memory) if args.driver_cores is not None: log.info("Driver cores: %s", args.driver_cores) if args.executor_cores is not None: log.info("Executor cores: %s", args.executor_cores) log.info("Application: %s", args.app) log.info("Application arguments: %s", args.app_args) log.info("-------------------------------------------") spark_client.submit( cluster_id=args.cluster_id, application = aztk.spark.models.Application( name=args.name, application=args.app, application_args=args.app_args, main_class=args.main_class, jars=jars, py_files=py_files, files=files, driver_java_options=args.driver_java_options, driver_library_path=args.driver_library_path, driver_class_path=args.driver_class_path, driver_memory=args.driver_memory, executor_memory=args.executor_memory, driver_cores=args.driver_cores, executor_cores=args.executor_cores, max_retry_count=args.max_retry_count ), wait=False ) if args.wait: utils.stream_logs(client=spark_client, cluster_id=args.cluster_id, application_name=args.name)