def execute(args: typing.NamedTuple): spark_client = aztk.spark.Client(config.load_aztk_secrets()) job_conf = JobConfig() job_conf.merge(args.job_id, args.job_conf) # by default, load spark configuration files in .aztk/ spark_configuration = config.load_aztk_spark_config() # overwrite with values in job_conf if they exist if job_conf.spark_defaults_conf: spark_configuration.spark_defaults_conf = job_conf.spark_defaults_conf if job_conf.spark_env_sh: spark_configuration.spark_env_sh = job_conf.spark_env_sh if job_conf.core_site_xml: spark_configuration.core_site_xml = job_conf.core_site_xml job_configuration = aztk.spark.models.JobConfiguration( id=job_conf.id, applications=job_conf.applications, spark_configuration=spark_configuration, vm_size=job_conf.vm_size, toolkit=job_conf.toolkit, max_dedicated_nodes=job_conf.max_dedicated_nodes, max_low_pri_nodes=job_conf.max_low_pri_nodes, subnet_id=job_conf.subnet_id, plugins=job_conf.plugins, worker_on_master=job_conf.worker_on_master, scheduling_target=job_conf.scheduling_target, ) # TODO: utils.print_job_conf(job_configuration) spark_client.job.submit(job_configuration, args.vm_os_ver)
def execute(args: typing.NamedTuple): spark_client = aztk.spark.Client(config.load_aztk_secrets()) job_conf = JobConfig() job_conf.merge(args.job_id, args.job_conf) # by default, load spark configuration files in .aztk/ spark_configuration = config.load_aztk_spark_config() # overwrite with values in job_conf if they exist if job_conf.spark_defaults_conf: spark_configuration.spark_defaults_conf = job_conf.spark_defaults_conf if job_conf.spark_env_sh: spark_configuration.spark_env_sh = job_conf.spark_env_sh if job_conf.core_site_xml: spark_configuration.core_site_xml = job_conf.core_site_xml job_configuration = aztk.spark.models.JobConfiguration( id=job_conf.id, applications=job_conf.applications, custom_scripts=job_conf.custom_scripts, spark_configuration=spark_configuration, vm_size=job_conf.vm_size, docker_repo=job_conf.docker_repo, max_dedicated_nodes=job_conf.max_dedicated_nodes, max_low_pri_nodes=job_conf.max_low_pri_nodes, subnet_id=job_conf.subnet_id, worker_on_master=job_conf.worker_on_master) #TODO: utils.print_job_conf(job_configuration) spark_client.submit_job(job_configuration)
def execute(args: typing.NamedTuple): spark_client = aztk.spark.Client(config.load_aztk_secrets()) cluster_conf = ClusterConfiguration() cluster_conf.spark_configuration = load_aztk_spark_config(args.spark_conf) # read cluster.yaml configuration file, overwrite values with args file_config, wait = config.read_cluster_config() if args.cluster_path is None \ else config.read_cluster_config(args.cluster_path) cluster_conf.merge(file_config) cluster_conf.merge( ClusterConfiguration( cluster_id=args.cluster_id, size=args.size, size_low_priority=args.size_low_priority, vm_size=args.vm_size, subnet_id=args.subnet_id, user_configuration=UserConfiguration(username=args.username, password=args.password), )) if cluster_conf.toolkit: if args.docker_repo: cluster_conf.toolkit.docker_repo = args.docker_repo if args.docker_run_options: cluster_conf.toolkit.docker_run_options = args.docker_run_options wait = wait if args.wait is None else args.wait user_configuration = cluster_conf.user_configuration if user_configuration and user_configuration.username: ssh_key, password = utils.get_ssh_key_or_prompt( spark_client.secrets_configuration.ssh_pub_key, user_configuration.username, user_configuration.password, spark_client.secrets_configuration, ) cluster_conf.user_configuration = aztk.spark.models.UserConfiguration( username=user_configuration.username, password=password, ssh_key=ssh_key) else: cluster_conf.user_configuration = None cluster_conf.validate() utils.print_cluster_conf(cluster_conf, wait) with utils.Spinner(): # create spark cluster cluster = spark_client.cluster.create( cluster_configuration=cluster_conf, vm_ver=args.vm_os_ver, wait=wait) if wait: log.info("Cluster %s created successfully.", cluster.id) else: log.info("Cluster %s is being provisioned.", cluster.id)
def execute(args: typing.NamedTuple): spark_client = aztk.spark.Client(config.load_aztk_secrets()) cluster_conf = ClusterConfiguration() cluster_conf.spark_configuration = load_aztk_spark_config() # read cluster.yaml configuration file, overwrite values with args file_config, wait = config.read_cluster_config() cluster_conf.merge(file_config) if args.size_low_pri is not None: deprecate("0.9.0", "--size-low-pri has been deprecated.", "Please use --size-low-priority.") args.size_low_priority = args.size_low_pri cluster_conf.merge(ClusterConfiguration( cluster_id=args.cluster_id, size=args.size, size_low_priority=args.size_low_priority, vm_size=args.vm_size, subnet_id=args.subnet_id, user_configuration=UserConfiguration( username=args.username, password=args.password, ))) if args.docker_repo and cluster_conf.toolkit: cluster_conf.toolkit.docker_repo = args.docker_repo wait = wait if args.wait is None else args.wait user_configuration = cluster_conf.user_configuration if user_configuration and user_configuration.username: ssh_key, password = utils.get_ssh_key_or_prompt(spark_client.secrets_config.ssh_pub_key, user_configuration.username, user_configuration.password, spark_client.secrets_config) cluster_conf.user_configuration = aztk.spark.models.UserConfiguration( username=user_configuration.username, password=password, ssh_key=ssh_key ) else: cluster_conf.user_configuration = None cluster_conf.validate() utils.print_cluster_conf(cluster_conf, wait) with utils.Spinner(): # create spark cluster cluster = spark_client.create_cluster( cluster_conf, wait=wait ) if wait: log.info("Cluster %s created successfully.", cluster.id) else: log.info("Cluster %s is being provisioned.", cluster.id)
def execute(args: typing.NamedTuple): spark_client = aztk.spark.Client(config.load_aztk_secrets()) cluster_conf = ClusterConfiguration() cluster_conf.spark_configuration = load_aztk_spark_config() # read cluster.yaml configuartion file, overwrite values with args file_config, wait = config.read_cluster_config() cluster_conf.merge(file_config) cluster_conf.merge(ClusterConfiguration( cluster_id=args.cluster_id, vm_count=args.size, vm_low_pri_count=args.size_low_pri, vm_size=args.vm_size, subnet_id=args.subnet_id, user_configuration=UserConfiguration( username=args.username, password=args.password, ), docker_repo=args.docker_repo)) wait = wait if args.wait is None else args.wait user_configuration = cluster_conf.user_configuration if user_configuration and user_configuration.username: ssh_key, password = utils.get_ssh_key_or_prompt(spark_client.secrets_config.ssh_pub_key, user_configuration.username, user_configuration.password, spark_client.secrets_config) cluster_conf.user_configuration = aztk.spark.models.UserConfiguration( username=user_configuration.username, password=password, ssh_key=ssh_key ) else: cluster_conf.user_configuration = None utils.print_cluster_conf(cluster_conf, wait) spinner = utils.Spinner() spinner.start() # create spark cluster cluster = spark_client.create_cluster( cluster_conf, wait=wait ) spinner.stop() if wait: log.info("Cluster %s created successfully.", cluster.id) else: log.info("Cluster %s is being provisioned.", cluster.id)
def execute(args: typing.NamedTuple): spark_client = aztk.spark.Client(config.load_aztk_secrets()) job_conf = JobConfig() job_conf.merge(args.job_id, args.job_conf) aztk_applications = [] for application in job_conf.applications: aztk_applications.append( aztk.spark.models.ApplicationConfiguration( name=application.get('name'), application=application.get('application'), application_args=application.get('application_args'), main_class=application.get('main_class'), jars=[], py_files=[], files=[], driver_java_options=application.get('driver_java_options'), driver_library_path=application.get('driver_library_path'), driver_class_path=application.get('driver_class_path'), driver_memory=application.get('driver_memory'), executor_memory=application.get('executor_memory'), driver_cores=application.get('driver_cores'), executor_cores=application.get('executor_cores'))) # by default, load spark configuration files in .aztk/ spark_configuration = config.load_aztk_spark_config() # overwrite with values in job_conf if they exist if job_conf.spark_defaults_conf: spark_configuration.spark_defaults_conf = job_conf.spark_defaults_conf if job_conf.spark_env_sh: spark_configuration.spark_env_sh = job_conf.spark_env_sh if job_conf.core_site_xml: spark_configuration.core_site_xml = job_conf.core_site_xml job_configuration = aztk.spark.models.JobConfiguration( id=job_conf.id, applications=aztk_applications, custom_scripts=job_conf.custom_scripts, spark_configuration=spark_configuration, vm_size=job_conf.vm_size, docker_repo=job_conf.docker_repo, max_dedicated_nodes=job_conf.max_dedicated_nodes, max_low_pri_nodes=job_conf.max_low_pri_nodes, subnet_id=job_conf.subnet_id, worker_on_master=job_conf.worker_on_master) #TODO: utils.print_job_conf(job_configuration) spark_client.submit_job(job_configuration)