def generate_task(spark_client, job, application_tasks): resource_files = [] for application, task in application_tasks: task_definition_resource_file = helpers.upload_text_to_container( container_name=job.id, application_name=application.name + ".yaml", file_path=application.name + ".yaml", content=yaml.dump(task), blob_client=spark_client.blob_client, ) resource_files.append(task_definition_resource_file) task_cmd = __app_cmd() # Create task task = batch_models.JobManagerTask( id=job.id, command_line=helpers.wrap_commands_in_shell([task_cmd]), resource_files=resource_files, kill_job_on_completion=False, allow_low_priority_node=True, user_identity=batch_models.UserIdentity( auto_user=batch_models.AutoUserSpecification( scope=batch_models.AutoUserScope.task, elevation_level=batch_models.ElevationLevel.admin)), ) return task
def generate_cluster_start_task( core_base_operations, zip_resource_file: batch_models.ResourceFile, cluster_id: str, gpu_enabled: bool, docker_repo: str = None, docker_run_options: str = None, file_shares: List[models.FileShare] = None, mixed_mode: bool = False, worker_on_master: bool = True, ): """ This will return the start task object for the pool to be created. :param cluster_id str: Id of the cluster(Used for uploading the resource files) :param zip_resource_file: Resource file object pointing to the zip file containing scripts to run on the node """ resource_files = [zip_resource_file] spark_web_ui_port = constants.DOCKER_SPARK_WEB_UI_PORT spark_worker_ui_port = constants.DOCKER_SPARK_WORKER_UI_PORT spark_job_ui_port = constants.DOCKER_SPARK_JOB_UI_PORT spark_container_name = constants.DOCKER_SPARK_CONTAINER_NAME spark_submit_logs_file = constants.SPARK_SUBMIT_LOGS_FILE # TODO use certificate environment_settings = ( __get_secrets_env(core_base_operations) + [ batch_models.EnvironmentSetting(name="SPARK_WEB_UI_PORT", value=spark_web_ui_port), batch_models.EnvironmentSetting(name="SPARK_WORKER_UI_PORT", value=spark_worker_ui_port), batch_models.EnvironmentSetting(name="SPARK_JOB_UI_PORT", value=spark_job_ui_port), batch_models.EnvironmentSetting(name="SPARK_CONTAINER_NAME", value=spark_container_name), batch_models.EnvironmentSetting(name="SPARK_SUBMIT_LOGS_FILE", value=spark_submit_logs_file), batch_models.EnvironmentSetting( name="AZTK_GPU_ENABLED", value=helpers.bool_env(gpu_enabled)), ] + __get_docker_credentials(core_base_operations) + _get_aztk_environment(cluster_id, worker_on_master, mixed_mode)) # start task command command = __cluster_install_cmd(zip_resource_file, gpu_enabled, docker_repo, docker_run_options, file_shares) return batch_models.StartTask( command_line=helpers.wrap_commands_in_shell(command), resource_files=resource_files, environment_settings=environment_settings, user_identity=POOL_ADMIN_USER_IDENTITY, wait_for_success=True, max_task_retry_count=2, )
def generate_cluster_start_task( spark_client, zip_resource_file: batch_models.ResourceFile, gpu_enabled: bool, docker_repo: str = None, file_shares: List[aztk_models.FileShare] = None): """ This will return the start task object for the pool to be created. :param cluster_id str: Id of the cluster(Used for uploading the resource files) :param zip_resource_file: Resource file object pointing to the zip file containing scripts to run on the node """ resource_files = [zip_resource_file] spark_web_ui_port = constants.DOCKER_SPARK_WEB_UI_PORT spark_worker_ui_port = constants.DOCKER_SPARK_WORKER_UI_PORT spark_jupyter_port = constants.DOCKER_SPARK_JUPYTER_PORT spark_job_ui_port = constants.DOCKER_SPARK_JOB_UI_PORT spark_rstudio_server_port = constants.DOCKER_SPARK_RSTUDIO_SERVER_PORT # TODO use certificate environment_settings = [ batch_models.EnvironmentSetting( name="BATCH_ACCOUNT_KEY", value=spark_client.batch_config.account_key), batch_models.EnvironmentSetting( name="BATCH_ACCOUNT_URL", value=spark_client.batch_config.account_url), batch_models.EnvironmentSetting( name="STORAGE_ACCOUNT_NAME", value=spark_client.blob_config.account_name), batch_models.EnvironmentSetting( name="STORAGE_ACCOUNT_KEY", value=spark_client.blob_config.account_key), batch_models.EnvironmentSetting( name="STORAGE_ACCOUNT_SUFFIX", value=spark_client.blob_config.account_suffix), batch_models.EnvironmentSetting( name="SPARK_WEB_UI_PORT", value=spark_web_ui_port), batch_models.EnvironmentSetting( name="SPARK_WORKER_UI_PORT", value=spark_worker_ui_port), batch_models.EnvironmentSetting( name="SPARK_JUPYTER_PORT", value=spark_jupyter_port), batch_models.EnvironmentSetting( name="SPARK_JOB_UI_PORT", value=spark_job_ui_port), batch_models.EnvironmentSetting( name="SPARK_RSTUDIO_SERVER_PORT", value=spark_rstudio_server_port), ] + __get_docker_credentials(spark_client) # start task command command = __cluster_install_cmd(zip_resource_file, gpu_enabled, docker_repo, file_shares) return batch_models.StartTask( command_line=helpers.wrap_commands_in_shell(command), resource_files=resource_files, environment_settings=environment_settings, user_identity=POOL_ADMIN_USER_IDENTITY, wait_for_success=True)
def submit_application(spark_client, cluster_id, application, wait: bool = False): """ Submit a spark app """ resource_files = [] app_resource_file = helpers.upload_file_to_container(container_name=application.name, file_path=application.application, blob_client=spark_client.blob_client, use_full_path=False) # Upload application file resource_files.append(app_resource_file) # Upload dependent JARS jar_resource_file_paths = [] for jar in application.jars: current_jar_resource_file_path = helpers.upload_file_to_container(container_name=application.name, file_path=jar, blob_client=spark_client.blob_client, use_full_path=False) jar_resource_file_paths.append(current_jar_resource_file_path) resource_files.append(current_jar_resource_file_path) # Upload dependent python files py_files_resource_file_paths = [] for py_file in application.py_files: current_py_files_resource_file_path = helpers.upload_file_to_container(container_name=application.name, file_path=py_file, blob_client=spark_client.blob_client, use_full_path=False) py_files_resource_file_paths.append( current_py_files_resource_file_path) resource_files.append(current_py_files_resource_file_path) # Upload other dependent files files_resource_file_paths = [] for file in application.files: files_resource_file_path = helpers.upload_file_to_container(container_name=application.name, file_path=file, blob_client=spark_client.blob_client, use_full_path=False) files_resource_file_paths.append(files_resource_file_path) resource_files.append(files_resource_file_path) # create command to submit task cmd = __app_submit_cmd( spark_client=spark_client, cluster_id=cluster_id, name=application.name, app=app_resource_file.file_path, app_args=application.application_args, main_class=application.main_class, jars=[jar_resource_file_path.file_path for jar_resource_file_path in jar_resource_file_paths], py_files=[py_files_resource.file_path for py_files_resource in py_files_resource_file_paths], files=[file_resource_file_path.file_path for file_resource_file_path in files_resource_file_paths], driver_java_options=application.driver_java_options, driver_library_path=application.driver_library_path, driver_class_path=application.driver_class_path, driver_memory=application.driver_memory, executor_memory=application.executor_memory, driver_cores=application.driver_cores, executor_cores=application.executor_cores) # Get cluster size cluster = spark_client.get_cluster(cluster_id) # Affinitize task to master node # master_node_affinity_id = helpers.get_master_node_id(cluster_id, spark_client.batch_client) rls = spark_client.get_remote_login_settings(cluster.id, cluster.master_node_id) # Create task task = batch_models.TaskAddParameter( id=application.name, affinity_info=batch_models.AffinityInformation( affinity_id=cluster.master_node_id), command_line=helpers.wrap_commands_in_shell(cmd), resource_files=resource_files, user_identity=batch_models.UserIdentity( auto_user=batch_models.AutoUserSpecification( scope=batch_models.AutoUserScope.task, elevation_level=batch_models.ElevationLevel.admin)) ) # Add task to batch job (which has the same name as cluster_id) job_id = cluster_id spark_client.batch_client.task.add(job_id=job_id, task=task) if wait: helpers.wait_for_task_to_complete(job_id=job_id, task_id=task.id, batch_client=spark_client.batch_client)
def generate_task(spark_client, container_id, application, remote=False): resource_files = [] # The application provided is not hosted remotely and therefore must be uploaded if not remote: app_resource_file = helpers.upload_file_to_container( container_name=container_id, application_name=application.name, file_path=application.application, blob_client=spark_client.blob_client, use_full_path=False, ) # Upload application file resource_files.append(app_resource_file) application.application = "$AZ_BATCH_TASK_WORKING_DIR/" + os.path.basename( application.application) # Upload dependent JARS jar_resource_file_paths = [] for jar in application.jars: current_jar_resource_file_path = helpers.upload_file_to_container( container_name=container_id, application_name=application.name, file_path=jar, blob_client=spark_client.blob_client, use_full_path=False, ) jar_resource_file_paths.append(current_jar_resource_file_path) resource_files.append(current_jar_resource_file_path) # Upload dependent python files py_files_resource_file_paths = [] for py_file in application.py_files: current_py_files_resource_file_path = helpers.upload_file_to_container( container_name=container_id, application_name=application.name, file_path=py_file, blob_client=spark_client.blob_client, use_full_path=False, ) py_files_resource_file_paths.append( current_py_files_resource_file_path) resource_files.append(current_py_files_resource_file_path) # Upload other dependent files files_resource_file_paths = [] for file in application.files: files_resource_file_path = helpers.upload_file_to_container( container_name=container_id, application_name=application.name, file_path=file, blob_client=spark_client.blob_client, use_full_path=False, ) files_resource_file_paths.append(files_resource_file_path) resource_files.append(files_resource_file_path) # Upload application definition application.jars = [os.path.basename(jar) for jar in application.jars] application.py_files = [ os.path.basename(py_files) for py_files in application.py_files ] application.files = [ os.path.basename(files) for files in application.files ] application_definition_file = helpers.upload_text_to_container( container_name=container_id, application_name=application.name, file_path="application.yaml", content=yaml.dump(vars(application)), blob_client=spark_client.blob_client, ) resource_files.append(application_definition_file) # create command to submit task task_cmd = CommandBuilder("sudo docker exec") task_cmd.add_argument("-i") task_cmd.add_option( "-e", "AZ_BATCH_TASK_WORKING_DIR=$AZ_BATCH_TASK_WORKING_DIR") task_cmd.add_option("-e", "STORAGE_LOGS_CONTAINER={0}".format(container_id)) task_cmd.add_argument("spark /bin/bash >> output.log 2>&1") task_cmd.add_argument( r'-c "source ~/.bashrc; ' r"export PYTHONPATH=$PYTHONPATH:\$AZTK_WORKING_DIR; " r"cd \$AZ_BATCH_TASK_WORKING_DIR; " r'\$AZTK_WORKING_DIR/.aztk-env/.venv/bin/python \$AZTK_WORKING_DIR/aztk/node_scripts/submit.py"' ) # Create task task = batch_models.TaskAddParameter( id=application.name, command_line=helpers.wrap_commands_in_shell([task_cmd.to_str()]), resource_files=resource_files, constraints=batch_models.TaskConstraints( max_task_retry_count=application.max_retry_count), user_identity=batch_models.UserIdentity( auto_user=batch_models.AutoUserSpecification( scope=batch_models.AutoUserScope.task, elevation_level=batch_models.ElevationLevel.admin)), ) return task
def generate_task(spark_client, container_id, application): resource_files = [] app_resource_file = helpers.upload_file_to_container( container_name=container_id, application_name=application.name, file_path=application.application, blob_client=spark_client.blob_client, use_full_path=False) # Upload application file resource_files.append(app_resource_file) # Upload dependent JARS jar_resource_file_paths = [] for jar in application.jars: current_jar_resource_file_path = helpers.upload_file_to_container( container_name=container_id, application_name=application.name, file_path=jar, blob_client=spark_client.blob_client, use_full_path=False) jar_resource_file_paths.append(current_jar_resource_file_path) resource_files.append(current_jar_resource_file_path) # Upload dependent python files py_files_resource_file_paths = [] for py_file in application.py_files: current_py_files_resource_file_path = helpers.upload_file_to_container( container_name=container_id, application_name=application.name, file_path=py_file, blob_client=spark_client.blob_client, use_full_path=False) py_files_resource_file_paths.append( current_py_files_resource_file_path) resource_files.append(current_py_files_resource_file_path) # Upload other dependent files files_resource_file_paths = [] for file in application.files: files_resource_file_path = helpers.upload_file_to_container( container_name=container_id, application_name=application.name, file_path=file, blob_client=spark_client.blob_client, use_full_path=False) files_resource_file_paths.append(files_resource_file_path) resource_files.append(files_resource_file_path) # Upload application definition application.application = os.path.basename(application.application) application.jars = [os.path.basename(jar) for jar in application.jars] application.py_files = [ os.path.basename(py_files) for py_files in application.py_files ] application.files = [ os.path.basename(files) for files in application.files ] application_definition_file = helpers.upload_text_to_container( container_name=container_id, application_name=application.name, file_path='application.yaml', content=yaml.dump(vars(application)), blob_client=spark_client.blob_client) resource_files.append(application_definition_file) # create command to submit task task_cmd = CommandBuilder('sudo docker exec') task_cmd.add_argument('-i') task_cmd.add_option( '-e', 'AZ_BATCH_TASK_WORKING_DIR=$AZ_BATCH_TASK_WORKING_DIR') task_cmd.add_option('-e', 'STORAGE_LOGS_CONTAINER={0}'.format(container_id)) task_cmd.add_argument('spark /bin/bash >> output.log 2>&1') task_cmd.add_argument('-c "source ~/.bashrc; '\ 'cd $AZ_BATCH_TASK_WORKING_DIR; ' \ '\$(pyenv root)/versions/\$AZTK_PYTHON_VERSION/bin/python ' \ '\$DOCKER_WORKING_DIR/aztk/node_scripts/submit.py"') # Create task task = batch_models.TaskAddParameter( id=application.name, command_line=helpers.wrap_commands_in_shell([task_cmd.to_str()]), resource_files=resource_files, constraints=batch_models.TaskConstraints( max_task_retry_count=application.max_retry_count), user_identity=batch_models.UserIdentity( auto_user=batch_models.AutoUserSpecification( scope=batch_models.AutoUserScope.task, elevation_level=batch_models.ElevationLevel.admin))) return task