Exemple #1
0
def add_task(batch_service_client, job_id, task_id, application_cmdline,
             input_files, run_elevated, num_instances, coordination_cmdline,
             common_files):
    """
    Adds a task for each input file in the collection to the specified job.

    :param batch_service_client: A Batch service client.
    :type batch_service_client: `azure.batch.BatchServiceClient`
    :param str job_id: The ID of the job to which to add the task.
    :param str task_id: The ID of the task to be added.
    :param str application_cmdline: The application commandline for the task.
    :param list input_files: A collection of input files.
    :param bool run_elevated: flag determining if task should run as elevated
    :param int num_instances: Number of instances for the task
    :param str coordination_cmdline: The application commandline for the task.
    :param list common_files: A collection of common input files.
    """

    print('Adding {} task to job [{}]...'.format(task_id, job_id))

    multi_instance_settings = None
    if coordination_cmdline or (num_instances and num_instances > 1):
        multi_instance_settings = batchmodels.MultiInstanceSettings(
            number_of_instances=num_instances,
            coordination_command_line=coordination_cmdline,
            common_resource_files=common_files)
    task = batchmodels.TaskAddParameter(
        id=task_id,
        command_line=application_cmdline,
        run_elevated=run_elevated,
        resource_files=input_files,
        multi_instance_settings=multi_instance_settings)
    batch_service_client.task.add(job_id, task)
Exemple #2
0
 def multi_instance_task_param(n):
     return models.MultiInstanceSettings(coordination_command_line=(
         "bash -c 'python3 $AZ_BATCH_JOB_PREP_DIR/wd/st7-project-aneo-group-2/init/create_host_file.py;"
         "cp -r $AZ_BATCH_JOB_PREP_DIR/wd/st7-project-aneo-group-2/src $AZ_BATCH_NODE_SHARED_DIR'"
     ),
                                         number_of_instances=(n - 1) // 4 +
                                         1,
                                         common_resource_files=None)
Exemple #3
0
def add_task(
        batch_service_client, job_id, task_id, num_instances,
        application_cmdline, input_files, elevation_level,
        output_file_names, output_container_sas,
        coordination_cmdline, common_files):
    """
    Adds a task for each input file in the collection to the specified job.

    :param batch_service_client: A Batch service client.
    :type batch_service_client: `azure.batch.BatchServiceClient`
    :param str job_id: The ID of the job to which to add the task.
    :param str task_id: The ID of the task to be added.
    :param str application_cmdline: The application commandline for the task.
    :param list input_files: A collection of input files.
    :param elevation_level: Elevation level used to run the task; either
     'admin' or 'nonadmin'.
    :type elevation_level: `azure.batch.models.ElevationLevel`
    :param int num_instances: Number of instances for the task
    :param str coordination_cmdline: The application commandline for the task.
    :param list common_files: A collection of common input files.
    """

    print('Adding {} task to job [{}]...'.format(task_id, job_id))

    multi_instance_settings = None
    if coordination_cmdline or (num_instances and num_instances > 1):
        multi_instance_settings = batchmodels.MultiInstanceSettings(
            number_of_instances=num_instances,
            coordination_command_line=coordination_cmdline,
            common_resource_files=common_files)
    user = batchmodels.AutoUserSpecification(
        scope=batchmodels.AutoUserScope.pool,
        elevation_level=elevation_level)
    output_file = batchmodels.OutputFile(
        file_pattern=output_file_names,
        destination=batchmodels.OutputFileDestination(
            container=batchmodels.OutputFileBlobContainerDestination(
                container_url=output_container_sas)),
        upload_options=batchmodels.OutputFileUploadOptions(
            upload_condition=batchmodels.
            OutputFileUploadCondition.task_completion))
    task = batchmodels.TaskAddParameter(
        id=task_id,
        command_line=application_cmdline,
        user_identity=batchmodels.UserIdentity(auto_user=user),
        resource_files=input_files,
        multi_instance_settings=multi_instance_settings,
        output_files=[output_file])
    batch_service_client.task.add(job_id, task)
Exemple #4
0
def task_submit(task_name):
    '''
    Automatic task submission to Azure. Pool, VMs, Jobs should be created in advance.
    '''
    common_resource_files = []
    for folder, _, files in os.walk('../'):
        # Skip setup folder
        if os.path.abspath(folder) == os.path.abspath('./'):
            continue

        for file_name in files:
            if file_name.endswith('.py') or file_name.endswith('.ini'):
                blob_url = os.path.join(config_azure['storage_account_url'],
                                        file_name)
                file_path = os.path.join(os.path.basename(folder), file_name)
                print('Mapping {} to {}'.format(blob_url, file_path))
                common_resource_files.append(
                    batchmodel.ResourceFile(blob_url,
                                            file_path,
                                            file_mode='0775'))

    command = '/usr/lib64/openmpi/bin/mpirun -mca btl_tcp_if_include eth0 -oversubscribe -n {0} -host $AZ_BATCH_HOST_LIST -wd $AZ_BATCH_TASK_SHARED_DIR python36 $AZ_BATCH_TASK_SHARED_DIR/bench.py'.format(
        config_azure['task_number_of_procs'])
    coordination_command = '/bin/bash -c "echo $AZ_BATCH_HOST_LIST; echo $AZ_BATCH_TASK_SHARED_DIR; echo $AZ_BATCH_MASTER_NODE;"'
    multi_instance_settings = batchmodel.MultiInstanceSettings(
        coordination_command_line=coordination_command,
        number_of_instances=config_azure['task_number_of_instances'],
        common_resource_files=common_resource_files)
    user = batchmodel.UserIdentity(auto_user=batchmodel.AutoUserSpecification(
        scope=batchmodel.AutoUserScope.pool,
        elevation_level=batchmodel.ElevationLevel.non_admin))
    task = batchmodel.TaskAddParameter(
        id=task_name,
        command_line=command,
        multi_instance_settings=multi_instance_settings,
        user_identity=user)

    print('Adding bench tasks to job [{0}]...'.format(config_azure['job_id']))
    batch_service.task.add(config_azure['job_id'], task)
        pool_id=NOTEBOOK_CONFIG['batch_pool_name']))
batch_client.job.add(job)

tasks = []
# Trainer task
tasks.append(
    batchmodels.TaskAddParameter(
        id='TrainerTask',
        command_line=
        r'call C:\\prereq\\mount.bat && C:\\ProgramData\\Anaconda3\\Scripts\\activate.bat py36 && python -u Z:\\scripts_downpour\\manage.py runserver 0.0.0.0:80 data_dir=Z:\\\\ role=trainer experiment_name={0} batch_update_frequency={1} weights_path={2} train_conv_layers={3} per_iter_epsilon_reduction={4} min_epsilon={5}'
        .format(job_id, batch_update_frequency, weights_path,
                train_conv_layers, per_iter_epsilon_reduction, min_epsilon),
        display_name='Trainer',
        user_identity=batchmodels.UserIdentity(
            user_name=NOTEBOOK_CONFIG['batch_job_user_name']),
        multi_instance_settings=batchmodels.MultiInstanceSettings(
            number_of_instances=1, coordination_command_line='cls')))

# Agent tasks
agent_cmd_line = r'call C:\\prereq\\mount.bat && C:\\ProgramData\\Anaconda3\\Scripts\\activate.bat py36 && python -u Z:\\scripts_downpour\\app\\distributed_agent.py data_dir=Z: role=agent max_epoch_runtime_sec={0} per_iter_epsilon_reduction={1:f} min_epsilon={2:f} batch_size={3} replay_memory_size={4} experiment_name={5} weights_path={6} train_conv_layers={7}'.format(
    max_epoch_runtime_sec, per_iter_epsilon_reduction, min_epsilon, batch_size,
    replay_memory_size, job_id, weights_path, train_conv_layers)
for i in range(0, NOTEBOOK_CONFIG['batch_pool_size'] - 1, 1):
    tasks.append(
        batchmodels.TaskAddParameter(
            id='AgentTask_{0}'.format(i),
            command_line=agent_cmd_line,
            display_name='Agent_{0}'.format(i),
            user_identity=batchmodels.UserIdentity(
                user_name=NOTEBOOK_CONFIG['batch_job_user_name']),
            multi_instance_settings=batchmodels.MultiInstanceSettings(
                number_of_instances=1, coordination_command_line='cls')))