def create_pool(batch_service_client, pool_id, vm_size, imageName, versions, auto_scale_formula): """ Creates a pool of compute nodes with the specified OS settings. :param batch_service_client: A Batch service client. :type batch_service_client: `azure.batch.BatchServiceClient` :param str pool_id: An ID for the new pool. :param str publisher: Marketplace image publisher :param str offer: Marketplace image offer :param str sku: Marketplace image sky """ print('Creating pool [{}]...'.format(pool_id)) new_pool = batch.models.PoolAddParameter( id=pool_id, virtual_machine_configuration=batchmodels.VirtualMachineConfiguration( image_reference=batchmodels.ImageReference( virtual_machine_image_id="/subscriptions/{}/resourceGroups/{}/providers/Microsoft.Compute/images/{}".format( "ad49354a-6ce2-4dae-a51d-b6907372f608", "BrowseCloud", imageName) ), node_agent_sku_id="batch.node.windows amd64"), vm_size=vm_size, start_task=None, enable_auto_scale=True, auto_scale_formula=auto_scale_formula, application_package_references=[batchmodels.ApplicationPackageReference( application_id="browsecloudtrainer", version=version) for version in versions], auto_scale_evaluation_interval=timedelta( minutes=5) # the smallest evaluation interval ) batch_service_client.pool.add(new_pool)
def test_batch_applications(self, batch_job, **kwargs): client = self.create_sharedkey_client(**kwargs) # Test List Applications apps = list(client.application.list()) self.assertEqual(len(apps), 1) # Test Get Application app = client.application.get('application_id') self.assertIsInstance(app, models.ApplicationSummary) self.assertEqual(app.id, 'application_id') self.assertEqual(app.versions, ['v1.0']) # Test Create Task with Application Package task_id = 'python_task_with_app_package' task = models.TaskAddParameter( task_id, 'cmd /c "echo hello world"', application_package_references=[models.ApplicationPackageReference('application_id', 'v1.0')] ) response = client.task.add(batch_job.id, task) self.assertIsNone(response) # Test Get Task with Application Package task = client.task.get(batch_job.id, task_id) self.assertIsInstance(task, models.CloudTask) self.assertEqual(task.application_package_references[0].application_id, 'application_id')
for idx, input_file in enumerate(input_files): command = [ 'python3 $AZ_BATCH_APP_PACKAGE{}/{} -i {} --storageaccount {} --storagecontainer {} --key {}' .format(app_rename_rules(_APP_NAME, _APP_VERSION), _APP_FILE, input_file.file_path, _STORAGE_ACCOUNT_NAME, _OUTPUT_STORAGE_CONTAINER, _STORAGE_ACCOUNT_KEY) ] tasks.append( batch.models.TaskAddParameter( id='make_lm{}'.format(uuid.uuid4()), command_line=wrap_commands_in_shell('linux', command), resource_files=[input_file], # This part assumes you've uploaded an application package and have specified a default version application_package_references=[ batchmodels.ApplicationPackageReference(_APP_NAME) ])) print("There are {} tasks created".format(len(tasks))) successfully_added_all_tasks = batch_client.task.add_collection( _JOB_ID, tasks) registered_tasks = batch_client.task.list(_JOB_ID) print("Starting to wait and pinging the service for job tracking") wait_for_tasks_to_complete( batch_client, _JOB_ID, datetime.timedelta(minutes=_EXPECTED_MODEL_RUN_IN_MINUTES))
def CreateInstances(self, hardwareID, imageID, count): ClientUtils.LogText( 'Creating instances with hardware {}, imageId {}, count {}'.format( hardwareID, imageID, count)) startedInstances = [] config = self._get_config() use_low_priority = config.use_low_priority_vms pool_id = get_pool_name(config.deadline_cloud_region, imageID, hardwareID, use_low_priority) client = self._get_batch_client() ClientUtils.LogText('Looking for existing pool {}'.format(pool_id)) os_images = self.GetAvailableOSImages() os_image = next((x for x in os_images if x.ID == imageID), None) if not os_image: ClientUtils.LogText( 'Failed to find image for id {}'.format(imageID)) return startedInstances is_linux_pool = False if os_image.Platform == Environment2.OS.Linux: is_linux_pool = True pool = client.get_pool(pool_id) if not pool: ClientUtils.LogText( 'Did not find existing pool {}, creating new one'.format( pool_id)) batch_image_spec = images.image_id_to_image_spec(config, imageID) if os_image.Platform == Environment2.OS.Windows: starttask_url = self.batch_config.windows_start_task_url starttask_script = 'deadline-starttask.ps1' else: starttask_url = self.batch_config.linux_start_task_url starttask_script = 'deadline-starttask.sh' app_pkgs = [ batchmodels.ApplicationPackageReference('DeadlineClient') ] starttask_cmd = get_deadline_starttask_cmd(self.batch_config, starttask_script, os_image) app_licenses = None if self.batch_config.app_licenses: app_licenses = self.batch_config.app_licenses.split(';') dedicated_nodes = 0 if use_low_priority else count low_prio_nodes = count if use_low_priority else 0 client.create_pool( pool_id, hardwareID, dedicated_nodes, low_prio_nodes, batch_image_spec, starttask_cmd, starttask_url, starttask_script, self.batch_config.kv_sp_cert_thumb, app_licenses, self.batch_config.disable_remote_access, app_pkgs, self.batch_config.subnet_id, app_insights_app_key=self.batch_config.app_insights_app_key, app_insights_instrumentation_key=self.batch_config. app_insights_instrumentation_key) if self.batch_config.app_licenses: total_nodes = dedicated_nodes + low_prio_nodes client.create_job(pool_id, pool_id, total_nodes, is_linux_pool) else: current_dedicated = pool.target_dedicated_nodes current_low_prio = pool.target_low_priority_nodes if use_low_priority: current_low_prio += count else: current_dedicated += count try: client.resize_pool(pool_id, target_dedicated=current_dedicated, target_low_priority=current_low_prio) if self.batch_config.app_licenses: total_nodes = current_dedicated + current_low_prio client.create_job(pool_id, pool_id, total_nodes, is_linux_pool) except: traceback.print_exc() return self.GetActiveInstances()
command = ['python3 $AZ_BATCH_APP_PACKAGE{}/{} -i {} --storageaccount {} --storagecontainer {} --key {}'.format( app_rename_rules(_APP_NAME,_APP_VERSION), _APP_FILE, input_file.file_path, _STORAGE_ACCOUNT_NAME, _OUTPUT_STORAGE_CONTAINER, _STORAGE_ACCOUNT_KEY)] # Appending the tasks with a random identifier (uuid4). tasks.append( batch.models.TaskAddParameter( id = 'make_lm{}'.format(uuid.uuid4()), command_line = wrap_commands_in_shell('linux',command), resource_files=[input_file], # This part assumes you've uploaded an application package and have specified a default version application_package_references = [batchmodels.ApplicationPackageReference(_APP_NAME)] ) ) print("There are {} tasks created".format(len(tasks))) successfully_added_all_tasks = batch_client.task.add_collection(_JOB_ID, tasks) registered_tasks = batch_client.task.list(_JOB_ID) print("Starting to wait and pinging the service for job tracking") wait_for_tasks_to_complete(batch_client, _JOB_ID, datetime.timedelta(minutes=_EXPECTED_MODEL_RUN_IN_MINUTES) )
def get_pool(self, pool_id, vm_size="standard_d15_v2", node_count=0): ''' Creates an Azure Batch pool. :param pool_id: The pool_id of the pool to create :type pool_id: string :param vm_size: the type of compute nodes in the pool. (Defaults to 'standard_d15_v2') :type vm_size: string :param node_count: The number of compute nodes to initially create in the pool. Defaults to 0. :type node_count: number :rtype: string :return: the pool_id of the create pool ''' batch_client = self.batch_client() pool_list = list(batch_client.pool.list()) for pool in pool_list: if pool.id == pool_id: #We have a pool with this pool_id, is it busy? node_list = list(batch_client.compute_node.list(pool.id)) for node in node_list: if node.running_tasks_count > 0: logging.info( "pool '{0}' exists and is busy".format(pool_id)) break return pool.id logging.info( "pool '{0}' does not exist and will be created".format(pool_id)) user_identity = batchmodels.UserIdentity( auto_user=batchmodels.AutoUserSpecification( elevation_level='admin')) start_task = batchmodels.StartTask( command_line="cmd /c %AZ_BATCH_APP_PACKAGE_STARTUP%\startup.bat", user_identity=user_identity, wait_for_success=True) new_pool = batchmodels.PoolAddParameter(id=pool_id, vm_size=vm_size, start_task=start_task) new_pool.target_dedicated = node_count new_pool.max_tasks_per_node = 1 cloud_service_configuration = batchmodels.CloudServiceConfiguration( os_family=4) new_pool.cloud_service_configuration = cloud_service_configuration new_pool.application_package_references = [ batchmodels.ApplicationPackageReference("anaconda2"), batchmodels.ApplicationPackageReference("startup") ] #add the vnet #The ARM resource identifier of the virtual network subnet which the compute nodes of the pool will join. The virtual #network must be in the same region and subscription as the Azure Batch #account. This property can only be specified for pools created with a #cloudServiceConfiguration. # value u'Property subnetId should be of the form /subscriptions/{0}/resourceGroups/{1}/providers/{2}/virtualNetworks/{3}/subnets/{4}' unicode new_pool.network_configuration = batchmodels.NetworkConfiguration( self.vnet + "/subnets/default") try: batch_client.pool.add(new_pool) except Exception, e: print e
def create_job_schedule(batch_client, job_schedule_id, vm_size, vm_count): """Creates an Azure Batch pool and job schedule with the specified ids. :param batch_client: The batch client to use. :type batch_client: `batchserviceclient.BatchServiceClient` :param str job_schedule_id: The id of the job schedule to create :param str vm_size: vm size (sku) :param int vm_count: number of vms to allocate """ pool_info = batchmodels.PoolInformation( auto_pool_specification=batchmodels.AutoPoolSpecification( auto_pool_id_prefix="JobScheduler", pool=batchmodels.PoolSpecification( vm_size=vm_size, target_dedicated_nodes=vm_count, virtual_machine_configuration=batchmodels.VirtualMachineConfiguration( image_reference=batchmodels.ImageReference( publisher="Canonical", offer="UbuntuServer", sku="18.04-LTS", version="latest" ), node_agent_sku_id="batch.node.ubuntu 18.04" ), start_task=batchmodels.StartTask( command_line="/bin/bash -c " "\"$AZ_BATCH_APP_PACKAGE_azure_batch_1/azure_batch/job_schedular_node_startup_tasks.sh\"", wait_for_success=True, user_identity=batchmodels.UserIdentity( auto_user=batchmodels.AutoUserSpecification( scope=batchmodels.AutoUserScope.pool, elevation_level=batchmodels.ElevationLevel.admin) ), ), application_package_references=[batchmodels.ApplicationPackageReference( application_id="azure_batch", version="1" )], ), keep_alive=False, pool_lifetime_option=batchmodels.PoolLifetimeOption.job ) ) job_spec = batchmodels.JobSpecification( pool_info=pool_info, # Terminate job once all tasks under it are complete to allow for a new # job to be created under the schedule on_all_tasks_complete=batchmodels.OnAllTasksComplete.terminate_job, job_manager_task=batchmodels.JobManagerTask( id="JobManagerTask", #specify the command that needs to run recursively in job_schedular command_line="/bin/bash -c \" python3 " "$AZ_BATCH_APP_PACKAGE_azure_batch_1/azure_batch/azure_batch_main.py\"" )) #mention the interval of the job schedular schedule = batchmodels.Schedule( recurrence_interval=datetime.timedelta(days=15)) scheduled_job = batchmodels.JobScheduleAddParameter( id=job_schedule_id, schedule=schedule, job_specification=job_spec) batch_client.job_schedule.add(cloud_job_schedule=scheduled_job)