def execute(args: typing.NamedTuple): spark_client = aztk.spark.Client(config.load_aztk_screts()) job_conf = JobConfig() job_conf.merge(args.job_id, args.job_conf) aztk_applications = [] for application in job_conf.applications: aztk_applications.append( aztk.spark.models.ApplicationConfiguration( name=application.get('name'), application=application.get('application'), application_args=application.get('application_args'), main_class=application.get('main_class'), jars=[], py_files=[], files=[], driver_java_options=application.get('driver_java_options'), driver_library_path=application.get('driver_library_path'), driver_class_path=application.get('driver_class_path'), driver_memory=application.get('driver_memory'), executor_memory=application.get('executor_memory'), driver_cores=application.get('driver_cores'), executor_cores=application.get('executor_cores'))) # by default, load spark configuration files in .aztk/ spark_configuration = config.load_aztk_spark_config() # overwrite with values in job_conf if they exist if job_conf.spark_defaults_conf: spark_configuration.spark_defaults_conf = job_conf.spark_defaults_conf if job_conf.spark_env_sh: spark_configuration.spark_env_sh = job_conf.spark_env_sh if job_conf.core_site_xml: spark_configuration.core_site_xml = job_conf.core_site_xml job_configuration = aztk.spark.models.JobConfiguration( id=job_conf.id, applications=aztk_applications, custom_scripts=job_conf.custom_scripts, spark_configuration=spark_configuration, vm_size=job_conf.vm_size, docker_repo=job_conf.docker_repo, max_dedicated_nodes=job_conf.max_dedicated_nodes, max_low_pri_nodes=job_conf.max_low_pri_nodes, subnet_id=job_conf.subnet_id, worker_on_master=job_conf.worker_on_master) #TODO: utils.print_job_conf(job_configuration) spark_client.submit_job(job_configuration)
def execute(args: typing.NamedTuple): spark_client = aztk.spark.Client(config.load_aztk_screts()) cluster_conf = ClusterConfiguration() cluster_conf.spark_configuration = load_aztk_spark_config() # read cluster.yaml configuartion file, overwrite values with args file_config, wait = config.read_cluster_config() cluster_conf.merge(file_config) cluster_conf.merge( ClusterConfiguration(cluster_id=args.cluster_id, vm_count=args.size, vm_low_pri_count=args.size_low_pri, vm_size=args.vm_size, subnet_id=args.subnet_id, user_configuration=UserConfiguration( username=args.username, password=args.password, ), docker_repo=args.docker_repo)) wait = wait if args.wait is None else args.wait user_configuration = cluster_conf.user_configuration if user_configuration and user_configuration.username: ssh_key, password = utils.get_ssh_key_or_prompt( spark_client.secrets_config.ssh_pub_key, user_configuration.username, user_configuration.password, spark_client.secrets_config) cluster_conf.user_configuration = aztk.spark.models.UserConfiguration( username=user_configuration.username, password=password, ssh_key=ssh_key) else: cluster_conf.user_configuration = None print_cluster_conf(cluster_conf, wait) spinner = utils.Spinner() spinner.start() # create spark cluster cluster = spark_client.create_cluster(cluster_conf, wait=wait) spinner.stop() if wait: log.info("Cluster %s created successfully.", cluster.id) else: log.info("Cluster %s is being provisioned.", cluster.id)
def execute(args: typing.NamedTuple): spark_client = aztk.spark.Client(config.load_aztk_screts()) # read cluster.yaml configuartion file, overwrite values with args cluster_conf = ClusterConfig() cluster_conf.merge(uid=args.cluster_id, size=args.size, size_low_pri=args.size_low_pri, vm_size=args.vm_size, subnet_id=args.subnet_id, wait=args.wait, username=args.username, password=args.password, docker_repo=args.docker_repo) if cluster_conf.custom_scripts: custom_scripts = [] for custom_script in cluster_conf.custom_scripts: custom_scripts.append( aztk.spark.models.CustomScript(script=custom_script['script'], run_on=custom_script['runOn'])) else: custom_scripts = None if cluster_conf.file_shares: file_shares = [] for file_share in cluster_conf.file_shares: file_shares.append( aztk.spark.models.FileShare( storage_account_name=file_share['storage_account_name'], storage_account_key=file_share['storage_account_key'], file_share_path=file_share['file_share_path'], mount_path=file_share['mount_path'])) else: file_shares = None if cluster_conf.username: ssh_key, password = utils.get_ssh_key_or_prompt( spark_client.secrets_config.ssh_pub_key, cluster_conf.username, cluster_conf.password, spark_client.secrets_config) user_conf = aztk.spark.models.UserConfiguration( username=cluster_conf.username, password=password, ssh_key=ssh_key) else: user_conf = None print_cluster_conf(cluster_conf) spinner = utils.Spinner() spinner.start() # create spark cluster cluster = spark_client.create_cluster( aztk.spark.models.ClusterConfiguration( cluster_id=cluster_conf.uid, vm_count=cluster_conf.size, vm_low_pri_count=cluster_conf.size_low_pri, vm_size=cluster_conf.vm_size, subnet_id=cluster_conf.subnet_id, custom_scripts=custom_scripts, file_shares=file_shares, docker_repo=cluster_conf.docker_repo, spark_configuration=load_aztk_spark_config(), user_configuration=user_conf), wait=cluster_conf.wait) spinner.stop() if cluster_conf.wait: log.info("Cluster %s created successfully.", cluster.id) else: log.info("Cluster %s is being provisioned.", cluster.id)