Esempio n. 1
0
File: submit.py Progetto: gridl/aztk
def affinitize_task_to_master(spark_client, cluster_id, task):
    cluster = spark_client.get_cluster(cluster_id)
    if cluster.master_node_id is None:
        raise AztkError("Master has not yet been selected. Please wait until the cluster is finished provisioning.")
    master_node = spark_client.batch_client.compute_node.get(pool_id=cluster_id, node_id=cluster.master_node_id)
    task.affinity_info = batch_models.AffinityInformation(affinity_id=master_node.affinity_id)
    return task
Esempio n. 2
0
def affinitize_task_to_master(spark_client, cluster_id, task):
    cluster = spark_client.get_cluster(cluster_id)
    master_node = spark_client.batch_client.compute_node.get(
        pool_id=cluster_id, node_id=cluster.master_node_id)
    task.affinity_info = batch_models.AffinityInformation(
        affinity_id=master_node.affinity_id)
    return task
Esempio n. 3
0
def affinitize_task_to_master(batch_client, cluster_id, task):
    pool = batch_client.pool.get(config.pool_id)
    master_node_id = get_master_node_id(pool)
    master_node = batch_client.compute_node.get(pool_id=cluster_id,
                                                node_id=master_node_id)
    task.affinity_info = batch_models.AffinityInformation(
        affinity_id=master_node.affinity_id)
    return task
Esempio n. 4
0
def designate_master_docker_swarm_node(batch_client, pool_id, nodes, job_id):
    """Designate a master docker swarm node by selecting a node in the
    pool to be the swarm manager. This is accomplished via IP selection in
    the pool of nodes and running the swarm init command via an
    affinitized task. This is for Docker 1.12+.

    :param batch_client: The batch client to use.
    :type batch_client: `batchserviceclient.BatchServiceClient`
    :param str pool_id: The id of the pool.
    :param list nodes: list of `batchserviceclient.models.ComputeNode`
    :param str job_id: The id of the job to create.
    :rtype: tuple
    :return: ((master ipaddress, master node id), swarm token)
    """
    # designate the lowest ip address node as the master
    nodes = sorted(nodes, key=lambda node: node.ip_address)
    master_node_ip_address = nodes[0].ip_address
    master_node_id = nodes[0].id
    master_node_affinity_id = nodes[0].affinity_id
    master_node = (master_node_ip_address, master_node_id)
    print('master node is: {}'.format(master_node))

    # create job
    job = batchmodels.JobAddParameter(
        id=job_id, pool_info=batchmodels.PoolInformation(pool_id=pool_id))

    batch_client.job.add(job)

    # add docker swarm manage as an affinitized task to run on the master node
    # NOTE: task affinity is weak. if the node has no available scheduling
    # slots, the task may be executed on a different node. for this example,
    # it is not an issue since this node should be available for scheduling.
    task_commands = [
        'docker swarm init --advertise-addr {}'.format(master_node_ip_address),
        'docker swarm join-token -q worker',
    ]
    print('initializing docker swarm cluster via Azure Batch task...')
    task = batchmodels.TaskAddParameter(
        id='swarm-manager',
        affinity_info=batchmodels.AffinityInformation(
            affinity_id=master_node_affinity_id),
        command_line=common.helpers.wrap_commands_in_shell(
            'linux', task_commands),
        run_elevated=True,
    )
    batch_client.task.add(job_id=job.id, task=task)

    # wait for task to complete
    common.helpers.wait_for_tasks_to_complete(batch_client, job_id,
                                              datetime.timedelta(minutes=5))

    # retrieve the swarm token
    stdout = common.helpers.read_task_file_as_string(
        batch_client, job.id, task.id, common.helpers._STANDARD_OUT_FILE_NAME)
    token = stdout.splitlines()[-1].strip()
    print('swarm token: {}'.format(token))

    return master_node, token
Esempio n. 5
0
def designate_master_docker_swarm_node(batch_client, pool_id, nodes, job_id):
    """Designate a master docker swarm node by selecting a node in the
    pool to be the swarm manager. This is accomplished via IP selection in
    the pool of nodes and running the swarm manage command via an
    affinitized task.

    :param batch_client: The batch client to use.
    :type batch_client: `batchserviceclient.BatchServiceClient`
    :param str pool_id: The id of the pool to create.
    :param list nodes: list of `batchserviceclient.models.ComputeNode`
    :param str job_id: The id of the job to create.
    :rtype: tuple
    :return: (ipaddress, master node id)
    """
    # designate the lowest ip address node as the master
    nodes = sorted(nodes, key=lambda node: node.ip_address)
    master_node_ip_address = nodes[0].ip_address
    master_node_id = nodes[0].id
    master_node = (master_node_ip_address, master_node_id)
    # create a node list based on the number of nodes and master
    lastoctet = int(master_node_ip_address.split('.')[-1])
    nodelist = '10.0.0.[{}:{}]'.format(lastoctet, lastoctet + len(nodes) - 1)
    print('master node is: {} nodelist is: {}'.format(master_node, nodelist))

    # create job
    job = batchmodels.JobAddParameter(
        id=job_id,
        pool_info=batchmodels.PoolInformation(pool_id=pool_id))

    batch_client.job.add(job)

    # add docker swarm manage as an affinitized task to run on the master node
    task_commands = [
        ('docker run -d -p 3375:3375 -t swarm manage -H tcp://0.0.0.0:3375 '
         '"nodes://{}:2375"').format(nodelist)
    ]
    print('creating docker swarm cluster via Azure Batch task...')
    task = batchmodels.TaskAddParameter(
        id="swarm-master",
        affinity_info=batchmodels.AffinityInformation(
            affinity_id=master_node_id),
        command_line=common.helpers.wrap_commands_in_shell(
            'linux', task_commands),
        run_elevated=True,
    )
    batch_client.task.add(job_id=job.id, task=task)

    # wait for task to complete
    common.helpers.wait_for_tasks_to_complete(
        batch_client,
        job_id,
        datetime.timedelta(minutes=5))
    print('docker swarm cluster created.')

    return master_node
Esempio n. 6
0
def add_nodes_to_swarm(batch_client, pool_id, nodes, job_id, master_node,
                       swarm_token):
    """Add compute nodes to swarm

    :param batch_client: The batch client to use.
    :type batch_client: `batchserviceclient.BatchServiceClient`
    :param str pool_id: The id of the pool to create.
    :param list nodes: list of `batchserviceclient.models.ComputeNode`
    :param str job_id: The id of the job.
    :param tuple master_node: master node info
    :param str swarm_token: swarm token
    """
    task_commands = [
        'docker swarm join --token {} {}:2377'.format(swarm_token,
                                                      master_node[0]),
    ]
    print('joining docker swarm for each compute node via Azure Batch task...')
    i = 0
    for node in nodes:
        # manager node is already part of the swarm, so skip it
        if node.id == master_node[1]:
            continue
        task = batchmodels.TaskAddParameter(
            id='swarm-join-{0:03d}'.format(i),
            affinity_info=batchmodels.AffinityInformation(
                affinity_id=node.affinity_id),
            command_line=common.helpers.wrap_commands_in_shell(
                'linux', task_commands),
            run_elevated=True,
        )
        batch_client.task.add(job_id=job_id, task=task)
        i += 1

    # wait for task to complete
    common.helpers.wait_for_tasks_to_complete(batch_client, job_id,
                                              datetime.timedelta(minutes=5))

    print('docker swarm cluster created.')
Esempio n. 7
0
def submit_application(spark_client, cluster_id, application, wait: bool = False):
    """
    Submit a spark app
    """

    resource_files = []

    app_resource_file = helpers.upload_file_to_container(container_name=application.name,
                                                         file_path=application.application,
                                                         blob_client=spark_client.blob_client,
                                                         use_full_path=False)

    # Upload application file
    resource_files.append(app_resource_file)

    # Upload dependent JARS
    jar_resource_file_paths = []
    for jar in application.jars:
        current_jar_resource_file_path = helpers.upload_file_to_container(container_name=application.name,
                                                                          file_path=jar,
                                                                          blob_client=spark_client.blob_client,
                                                                          use_full_path=False)
        jar_resource_file_paths.append(current_jar_resource_file_path)
        resource_files.append(current_jar_resource_file_path)

    # Upload dependent python files
    py_files_resource_file_paths = []
    for py_file in application.py_files:
        current_py_files_resource_file_path = helpers.upload_file_to_container(container_name=application.name,
                                                                               file_path=py_file,
                                                                               blob_client=spark_client.blob_client,
                                                                               use_full_path=False)
        py_files_resource_file_paths.append(
            current_py_files_resource_file_path)
        resource_files.append(current_py_files_resource_file_path)

    # Upload other dependent files
    files_resource_file_paths = []
    for file in application.files:
        files_resource_file_path = helpers.upload_file_to_container(container_name=application.name,
                                                                    file_path=file,
                                                                    blob_client=spark_client.blob_client,
                                                                    use_full_path=False)
        files_resource_file_paths.append(files_resource_file_path)
        resource_files.append(files_resource_file_path)

    # create command to submit task
    cmd = __app_submit_cmd(
        spark_client=spark_client,
        cluster_id=cluster_id,
        name=application.name,
        app=app_resource_file.file_path,
        app_args=application.application_args,
        main_class=application.main_class,
        jars=[jar_resource_file_path.file_path for jar_resource_file_path in jar_resource_file_paths],
        py_files=[py_files_resource.file_path for py_files_resource in py_files_resource_file_paths],
        files=[file_resource_file_path.file_path for file_resource_file_path in files_resource_file_paths],
        driver_java_options=application.driver_java_options,
        driver_library_path=application.driver_library_path,
        driver_class_path=application.driver_class_path,
        driver_memory=application.driver_memory,
        executor_memory=application.executor_memory,
        driver_cores=application.driver_cores,
        executor_cores=application.executor_cores)

    # Get cluster size
    cluster = spark_client.get_cluster(cluster_id)

    # Affinitize task to master node
    # master_node_affinity_id = helpers.get_master_node_id(cluster_id, spark_client.batch_client)
    rls = spark_client.get_remote_login_settings(cluster.id, cluster.master_node_id)

    # Create task
    task = batch_models.TaskAddParameter(
        id=application.name,
        affinity_info=batch_models.AffinityInformation(
            affinity_id=cluster.master_node_id),
        command_line=helpers.wrap_commands_in_shell(cmd),
        resource_files=resource_files,
        user_identity=batch_models.UserIdentity(
            auto_user=batch_models.AutoUserSpecification(
                scope=batch_models.AutoUserScope.task,
                elevation_level=batch_models.ElevationLevel.admin))
    )

    # Add task to batch job (which has the same name as cluster_id)
    job_id = cluster_id
    spark_client.batch_client.task.add(job_id=job_id, task=task)

    if wait:
        helpers.wait_for_task_to_complete(job_id=job_id, task_id=task.id, batch_client=spark_client.batch_client)