Beispiel #1
0
def generate_task(spark_client, job, application_tasks):
    resource_files = []
    for application, task in application_tasks:
        task_definition_resource_file = helpers.upload_text_to_container(
            container_name=job.id,
            application_name=application.name + ".yaml",
            file_path=application.name + ".yaml",
            content=yaml.dump(task),
            blob_client=spark_client.blob_client,
        )
        resource_files.append(task_definition_resource_file)

    task_cmd = __app_cmd()

    # Create task
    task = batch_models.JobManagerTask(
        id=job.id,
        command_line=helpers.wrap_commands_in_shell([task_cmd]),
        resource_files=resource_files,
        kill_job_on_completion=False,
        allow_low_priority_node=True,
        user_identity=batch_models.UserIdentity(
            auto_user=batch_models.AutoUserSpecification(
                scope=batch_models.AutoUserScope.task, elevation_level=batch_models.ElevationLevel.admin)),
    )

    return task
Beispiel #2
0
def create_job_schedule(batch_client, job_schedule_id, vm_size, vm_count,
                        block_blob_client):
    """Creates an Azure Batch pool and job schedule with the specified ids.

    :param batch_client: The batch client to use.
    :type batch_client: `batchserviceclient.BatchServiceClient`
    :param str job_schedule_id: The id of the job schedule to create
    :param str vm_size: vm size (sku)
    :param int vm_count: number of vms to allocate
    :param block_blob_client: The storage block blob client to use.
    :type block_blob_client: `azure.storage.blob.BlockBlobService`
    """
    cloud_service_config = batchmodels.CloudServiceConfiguration(os_family='6')

    user_id = batchmodels.UserIdentity(
        auto_user=batchmodels.AutoUserSpecification(
            elevation_level=_USER_ELEVATION_LEVEL))

    python_download = batchmodels.ResourceFile(http_url=_PYTHON_DOWNLOAD,
                                               file_path='python373.exe')

    pool_info = batchmodels.PoolInformation(
        auto_pool_specification=batchmodels.AutoPoolSpecification(
            auto_pool_id_prefix="JobScheduler",
            pool=batchmodels.PoolSpecification(
                vm_size=vm_size,
                target_dedicated_nodes=vm_count,
                cloud_service_configuration=cloud_service_config,
                start_task=batchmodels.StartTask(
                    command_line=common.helpers.wrap_commands_in_shell(
                        'windows', ['{}'.format(_PYTHON_INSTALL)]),
                    resource_files=[python_download],
                    wait_for_success=True,
                    user_identity=user_id)),
            keep_alive=False,
            pool_lifetime_option=batchmodels.PoolLifetimeOption.job))

    sas_url = common.helpers.upload_blob_and_create_sas(
        block_blob_client, _CONTAINER_NAME, _SIMPLE_TASK_NAME,
        _SIMPLE_TASK_PATH,
        datetime.datetime.utcnow() + datetime.timedelta(minutes=30))

    job_spec = batchmodels.JobSpecification(
        pool_info=pool_info,
        # Terminate job once all tasks under it are complete to allow for a new
        # job to be created under the schedule
        on_all_tasks_complete=batchmodels.OnAllTasksComplete.terminate_job,
        job_manager_task=batchmodels.JobManagerTask(
            id="JobManagerTask",
            command_line=common.helpers.wrap_commands_in_shell(
                'windows', ['python {}'.format(_SIMPLE_TASK_NAME)]),
            resource_files=[
                batchmodels.ResourceFile(file_path=_SIMPLE_TASK_NAME,
                                         http_url=sas_url)
            ]))

    do_not_run_after = datetime.datetime.utcnow() \
        + datetime.timedelta(minutes=30)

    schedule = batchmodels.Schedule(
        do_not_run_after=do_not_run_after,
        recurrence_interval=datetime.timedelta(minutes=10))

    scheduled_job = batchmodels.JobScheduleAddParameter(
        id=job_schedule_id, schedule=schedule, job_specification=job_spec)

    batch_client.job_schedule.add(cloud_job_schedule=scheduled_job)
Beispiel #3
0
def create_job_schedule(batch_client, job_schedule_id, vm_size, vm_count):
    """Creates an Azure Batch pool and job schedule with the specified ids.

    :param batch_client: The batch client to use.
    :type batch_client: `batchserviceclient.BatchServiceClient`
    :param str job_schedule_id: The id of the job schedule to create
    :param str vm_size: vm size (sku)
    :param int vm_count: number of vms to allocate
    """

    pool_info = batchmodels.PoolInformation(
        auto_pool_specification=batchmodels.AutoPoolSpecification(
            auto_pool_id_prefix="JobScheduler",
            pool=batchmodels.PoolSpecification(
                vm_size=vm_size,
                target_dedicated_nodes=vm_count,
                virtual_machine_configuration=batchmodels.VirtualMachineConfiguration(
                    image_reference=batchmodels.ImageReference(
                        publisher="Canonical",
                        offer="UbuntuServer",
                        sku="18.04-LTS",
                        version="latest"
                    ),
                    node_agent_sku_id="batch.node.ubuntu 18.04"
                ),
                start_task=batchmodels.StartTask(
                    command_line="/bin/bash -c "
                                 "\"$AZ_BATCH_APP_PACKAGE_azure_batch_1/azure_batch/job_schedular_node_startup_tasks.sh\"",
                    wait_for_success=True,
                    user_identity=batchmodels.UserIdentity(
                        auto_user=batchmodels.AutoUserSpecification(
                            scope=batchmodels.AutoUserScope.pool,
                            elevation_level=batchmodels.ElevationLevel.admin)
                    ),
                ),
                application_package_references=[batchmodels.ApplicationPackageReference(
                    application_id="azure_batch", version="1"
                )],
            ),
            keep_alive=False,
            pool_lifetime_option=batchmodels.PoolLifetimeOption.job
        )
    )

    job_spec = batchmodels.JobSpecification(
        pool_info=pool_info,
        # Terminate job once all tasks under it are complete to allow for a new
        # job to be created under the schedule
        on_all_tasks_complete=batchmodels.OnAllTasksComplete.terminate_job,
        job_manager_task=batchmodels.JobManagerTask(
            id="JobManagerTask",
            #specify the command that needs to run recursively in job_schedular
            command_line="/bin/bash -c \" python3 "
                         "$AZ_BATCH_APP_PACKAGE_azure_batch_1/azure_batch/azure_batch_main.py\""
        ))

    #mention the interval of the job schedular
    schedule = batchmodels.Schedule(
        recurrence_interval=datetime.timedelta(days=15))

    scheduled_job = batchmodels.JobScheduleAddParameter(
        id=job_schedule_id,
        schedule=schedule,
        job_specification=job_spec)

    batch_client.job_schedule.add(cloud_job_schedule=scheduled_job)