Esempio n. 1
0
def setup_connection():
    """
        This setup spark config with which nodes are slaves and which are master
    """
    master_node_id = pick_master.get_master_node_id(
        batch_client.pool.get(config.pool_id))
    master_node = get_node(master_node_id)

    master_config_file = os.path.join(spark_conf_folder, "master")
    master_file = open(master_config_file, 'w')

    print("Adding master node ip {0} to config file '{1}'".format(
        master_node.ip_address, master_config_file))
    master_file.write("{0}\n".format(master_node.ip_address))

    master_file.close()
Esempio n. 2
0
def wait_for_master():
    print("Waiting for master to be ready.")
    master_node_id = pick_master.get_master_node_id(
        batch_client.pool.get(config.pool_id))

    if master_node_id == config.node_id:
        return

    while True:
        master_node = get_node(master_node_id)

        if master_node.state in [batchmodels.ComputeNodeState.idle, batchmodels.ComputeNodeState.running]:
            break
        else:
            print("{0} Still waiting on master", datetime.datetime.now())
            time.sleep(10)
Esempio n. 3
0
def setup_host(docker_repo: str):
    """
    Code to be run on the node(NOT in a container)
    """
    client = config.batch_client

    create_user.create_user(batch_client=client)
    if os.environ['AZ_BATCH_NODE_IS_DEDICATED'] == "true" or os.environ[
            'AZTK_MIXED_MODE'] == "false":
        is_master = pick_master.find_master(client)
    else:
        is_master = False
        wait_until_master_selected.main()

    is_worker = not is_master or os.environ.get(
        "AZTK_WORKER_ON_MASTER") == "true"
    master_node_id = pick_master.get_master_node_id(
        config.batch_client.pool.get(config.pool_id))
    master_node = config.batch_client.compute_node.get(config.pool_id,
                                                       master_node_id)

    if is_master:
        os.environ["AZTK_IS_MASTER"] = "true"
    else:
        os.environ["AZTK_IS_MASTER"] = "false"
    if is_worker:
        os.environ["AZTK_IS_WORKER"] = "true"
    else:
        os.environ["AZTK_IS_WORKER"] = "false"

    os.environ["AZTK_MASTER_IP"] = master_node.ip_address

    cluster_conf = read_cluster_config()

    #TODO pass azure file shares
    spark_container.start_spark_container(
        docker_repo=docker_repo,
        gpu_enabled=os.environ.get("AZTK_GPU_ENABLED") == "true",
        plugins=cluster_conf.plugins,
    )
    plugins.setup_plugins(target=PluginTarget.Host,
                          is_master=is_master,
                          is_worker=is_worker)
Esempio n. 4
0
def setup_node():
    client = config.batch_client

    spark.setup_conf()

    is_master = pick_master.find_master(client)

    master_node_id = pick_master.get_master_node_id(
        config.batch_client.pool.get(config.pool_id))
    master_node = config.batch_client.compute_node.get(config.pool_id,
                                                       master_node_id)

    os.environ["MASTER_IP"] = master_node.ip_address

    if is_master:
        setup_as_master()
        scripts.run_custom_scripts(is_master=True, is_worker=True)

    else:
        setup_as_worker()
        scripts.run_custom_scripts(is_master=False, is_worker=True)
Esempio n. 5
0
def affinitize_task_to_master(batch_client, cluster_id, task):
    pool = batch_client.pool.get(config.pool_id)
    master_node_id = get_master_node_id(pool)
    master_node = batch_client.compute_node.get(pool_id=cluster_id, node_id=master_node_id)
    task.affinity_info = batch_models.AffinityInformation(affinity_id=master_node.affinity_id)
    return task