Beispiel #1
0
def affinitize_task_to_master(batch_client, cluster_id, task):
    pool = batch_client.pool.get(config.pool_id)
    master_node_id = get_master_node_id(pool)
    master_node = batch_client.compute_node.get(pool_id=cluster_id,
                                                node_id=master_node_id)
    task.affinity_info = batch_models.AffinityInformation(
        affinity_id=master_node.affinity_id)
    return task
Beispiel #2
0
def start_spark_worker():
    wait_for_master()
    exe = os.path.join(spark_home, "sbin", "start-slave.sh")
    master_node_id = pick_master.get_master_node_id(
        batch_client.pool.get(config.pool_id))
    master_node = get_node(master_node_id)

    cmd = [
        exe, "spark://{0}:7077".format(master_node.ip_address), "--webui-port",
        str(config.spark_worker_ui_port)
    ]
    print("Connecting to master with '{0}'".format(" ".join(cmd)))
    call(cmd)
Beispiel #3
0
def setup_connection():
    """
        This setup spark config with which nodes are slaves and which are master
    """
    master_node_id = pick_master.get_master_node_id(
        batch_client.pool.get(config.pool_id))
    master_node = get_node(master_node_id)

    master_config_file = os.path.join(spark_conf_folder, "master")
    master_file = open(master_config_file, "w", encoding="UTF-8")

    print("Adding master node ip {0} to config file '{1}'".format(
        master_node.ip_address, master_config_file))
    master_file.write("{0}\n".format(master_node.ip_address))

    master_file.close()
Beispiel #4
0
def main():
    master = None

    while master is None:
        try:
            from aztk.node_scripts.core import config
            from aztk.node_scripts.install.pick_master import get_master_node_id

            batch_client = config.batch_client
            pool = batch_client.pool.get(config.pool_id)
            master = get_master_node_id(pool)
            time.sleep(1)

        except Exception as e:
            print(e)
            time.sleep(1)
Beispiel #5
0
def wait_for_master():
    print("Waiting for master to be ready.")
    master_node_id = pick_master.get_master_node_id(
        batch_client.pool.get(config.pool_id))

    if master_node_id == config.node_id:
        return

    while True:
        master_node = get_node(master_node_id)

        if master_node.state in [
                batchmodels.ComputeNodeState.idle,
                batchmodels.ComputeNodeState.running
        ]:
            break
        else:
            print("{0} Still waiting on master", datetime.datetime.now())
            time.sleep(10)
Beispiel #6
0
def setup_host(docker_repo: str, docker_run_options: str):
    """
    Code to be run on the node (NOT in a container)
    :param docker_repo: location of the Docker image to use
    :param docker_run_options: additional command-line options to pass to docker run
    """
    client = config.batch_client

    create_user.create_user(batch_client=client)
    if os.environ["AZ_BATCH_NODE_IS_DEDICATED"] == "true" or os.environ["AZTK_MIXED_MODE"] == "false":
        is_master = pick_master.find_master(client)
    else:
        is_master = False
        wait_until_master_selected.main()

    is_worker = not is_master or os.environ.get("AZTK_WORKER_ON_MASTER") == "true"
    master_node_id = pick_master.get_master_node_id(config.batch_client.pool.get(config.pool_id))
    master_node = config.batch_client.compute_node.get(config.pool_id, master_node_id)

    if is_master:
        os.environ["AZTK_IS_MASTER"] = "true"
    else:
        os.environ["AZTK_IS_MASTER"] = "false"
    if is_worker:
        os.environ["AZTK_IS_WORKER"] = "true"
    else:
        os.environ["AZTK_IS_WORKER"] = "false"

    os.environ["AZTK_MASTER_IP"] = master_node.ip_address

    cluster_conf = read_cluster_config()

    # TODO pass azure file shares
    spark_container.start_spark_container(
        docker_repo=docker_repo,
        docker_run_options=docker_run_options,
        gpu_enabled=os.environ.get("AZTK_GPU_ENABLED") == "true",
        plugins=cluster_conf.plugins,
    )
    plugins.setup_plugins(target=PluginTarget.Host, is_master=is_master, is_worker=is_worker)