def setup_spark_container(): """ Code run in the main spark container """ is_master = os.environ.get("AZTK_IS_MASTER") == "true" is_worker = os.environ.get("AZTK_IS_WORKER") == "true" print("Setting spark container. Master: ", is_master, ", Worker: ", is_worker) print("Copying spark setup config") spark.setup_conf() print("Done copying spark setup config") spark.setup_connection() if is_master: spark.start_spark_master() if is_worker: spark.start_spark_worker() plugins.setup_plugins(target=PluginTarget.SparkContainer, is_master=is_master, is_worker=is_worker) open("/tmp/setup_complete", "a").close()
def setup_node(): client = config.batch_client create_user.create_user(batch_client=client) spark.setup_conf() if os.environ['AZ_BATCH_NODE_IS_DEDICATED'] == "true" or os.environ[ 'MIXED_MODE'] == "False": is_master = pick_master.find_master(client) else: is_master = False wait_until_master_selected.main() master_node_id = pick_master.get_master_node_id( config.batch_client.pool.get(config.pool_id)) master_node = config.batch_client.compute_node.get(config.pool_id, master_node_id) os.environ["MASTER_IP"] = master_node.ip_address if is_master: setup_as_master() plugins.setup_plugins(is_master=True, is_worker=True) scripts.run_custom_scripts(is_master=True, is_worker=True) else: setup_as_worker() plugins.setup_plugins(is_master=False, is_worker=True) scripts.run_custom_scripts(is_master=False, is_worker=True) open("/tmp/setup_complete", 'a').close()
def setup_host(docker_repo: str, docker_run_options: str): """ Code to be run on the node (NOT in a container) :param docker_repo: location of the Docker image to use :param docker_run_options: additional command-line options to pass to docker run """ client = config.batch_client create_user.create_user(batch_client=client) if os.environ["AZ_BATCH_NODE_IS_DEDICATED"] == "true" or os.environ[ "AZTK_MIXED_MODE"] == "false": is_master = pick_master.find_master(client) else: is_master = False wait_until_master_selected.main() is_worker = not is_master or os.environ.get( "AZTK_WORKER_ON_MASTER") == "true" master_node_id = pick_master.get_master_node_id( config.batch_client.pool.get(config.pool_id)) master_node = config.batch_client.compute_node.get(config.pool_id, master_node_id) if is_master: os.environ["AZTK_IS_MASTER"] = "true" else: os.environ["AZTK_IS_MASTER"] = "false" if is_worker: os.environ["AZTK_IS_WORKER"] = "true" else: os.environ["AZTK_IS_WORKER"] = "false" os.environ["AZTK_MASTER_IP"] = master_node.ip_address cluster_conf = read_cluster_config() # setup_node_scheduling(client, cluster_conf, is_master) # TODO pass azure file shares spark_container.start_spark_container( docker_repo=docker_repo, docker_run_options=docker_run_options, gpu_enabled=os.environ.get("AZTK_GPU_ENABLED") == "true", plugins=cluster_conf.plugins, ) plugins.setup_plugins(target=PluginTarget.Host, is_master=is_master, is_worker=is_worker)