def workspace_volume_path(self): """Returns the absolute local path within the container onto which the broker's container volume is mounted :returns: The absolute local path within the container for the broker's volume :rtype: string """ return get_workspace_volume_path(self.name)
def _get_workspace_docker_params(self, job_exe, task_workspaces, workspaces, volume_create, docker_volumes): """Returns the Docker parameters needed for the given task workspaces :param job_exe: The job execution model (must not be queued) with related job and job_type fields :type job_exe: :class:`job.models.JobExecution` :param task_workspaces: List of the task workspaces :type task_workspaces: [:class:`job.configuration.job_parameter.TaskWorkspace`] :param workspaces: A dict of all workspaces stored by name :type workspaces: {string: :class:`storage.models.Workspace`} :param volume_create: Indicates if new volumes need to be created for these workspaces :type volume_create: bool :param docker_volumes: A list to add Docker volume names to :type docker_volumes: [string] :returns: The Docker parameters needed by the given workspaces :rtype: [:class:`job.configuration.job_parameter.DockerParam`] :raises Exception: If the job execution is still queued """ params = [] for task_workspace in task_workspaces: name = task_workspace.name mode = task_workspace.mode if name in workspaces: workspace = workspaces[name] if workspace.volume: vol = workspace.volume if vol.host: # Host mount is special, no volume name, just the host mount path volume_name = vol.remote_path elif volume_create: # Create job_exe workspace volume for first time volume_create_cmd = '$(docker volume create --driver=%s --name=%s %s)' volume_name = get_workspace_volume_name(job_exe, name) docker_volumes.append(volume_name) volume_name = volume_create_cmd % ( vol.driver, volume_name, vol.remote_path) else: # Volume already created, re-use name volume_name = get_workspace_volume_name(job_exe, name) workspace_volume = '%s:%s:%s' % ( volume_name, get_workspace_volume_path(name), mode) params.append(DockerParam('volume', workspace_volume)) return params
def _get_workspace_docker_params(self, framework_id, job_exe_id, task_workspaces, workspaces, volume_create): """Returns the Docker parameters needed for the given task workspaces :param framework_id: The scheduling framework ID :type framework_id: string :param job_exe_id: The job execution ID :type job_exe_id: int :param task_workspaces: List of the task workspaces :type task_workspaces: [:class:`job.configuration.configuration.job_configuration.TaskWorkspace`] :param workspaces: A dict of all workspaces stored by name :type workspaces: {string: :class:`storage.models.Workspace`} :param volume_create: Indicates if new volumes need to be created for these workspaces :type volume_create: bool :returns: The Docker parameters needed by the given workspaces :rtype: [:class:`job.configuration.configuration.job_configuration.DockerParam`] """ params = [] for task_workspace in task_workspaces: name = task_workspace.name mode = task_workspace.mode if name in workspaces: workspace = workspaces[name] if workspace.volume: vol = workspace.volume if vol.host: # Host mount is special, no volume name, just the host mount path volume_name = vol.remote_path elif volume_create: # Create job_exe workspace volume for first time volume_create_cmd = '$(docker volume create --driver=%s --name=%s %s)' volume_name = get_workspace_volume_name( framework_id, job_exe_id, name) volume_name = volume_create_cmd % ( vol.driver, volume_name, vol.remote_path) else: # Volume already created, re-use name volume_name = get_workspace_volume_name( framework_id, job_exe_id, name) workspace_volume = '%s:%s:%s' % ( volume_name, get_workspace_volume_path(name), mode) params.append(DockerParam('volume', workspace_volume)) return params
def _get_workspace_docker_params(self, job_exe, task_workspaces, workspaces, volume_create, docker_volumes): """Returns the Docker parameters needed for the given task workspaces :param job_exe: The job execution model (must not be queued) with related job and job_type fields :type job_exe: :class:`job.models.JobExecution` :param task_workspaces: List of the task workspaces :type task_workspaces: [:class:`job.configuration.configuration.job_configuration.TaskWorkspace`] :param workspaces: A dict of all workspaces stored by name :type workspaces: {string: :class:`storage.models.Workspace`} :param volume_create: Indicates if new volumes need to be created for these workspaces :type volume_create: bool :param docker_volumes: A list to add Docker volume names to :type docker_volumes: [string] :returns: The Docker parameters needed by the given workspaces :rtype: [:class:`job.configuration.configuration.job_configuration.DockerParam`] :raises Exception: If the job execution is still queued """ params = [] for task_workspace in task_workspaces: name = task_workspace.name mode = task_workspace.mode if name in workspaces: workspace = workspaces[name] if workspace.volume: vol = workspace.volume if vol.host: # Host mount is special, no volume name, just the host mount path volume_name = vol.remote_path elif volume_create: # Create job_exe workspace volume for first time volume_create_cmd = '$(docker volume create --driver=%s --name=%s %s)' volume_name = get_workspace_volume_name(job_exe, name) docker_volumes.append(volume_name) volume_name = volume_create_cmd % (vol.driver, volume_name, vol.remote_path) else: # Volume already created, re-use name volume_name = get_workspace_volume_name(job_exe, name) workspace_volume = '%s:%s:%s' % (volume_name, get_workspace_volume_path(name), mode) params.append(DockerParam('volume', workspace_volume)) return params
def _get_workspace_docker_params(self, framework_id, job_exe_id, task_workspaces, workspaces, volume_create): """Returns the Docker parameters needed for the given task workspaces :param framework_id: The scheduling framework ID :type framework_id: string :param job_exe_id: The job execution ID :type job_exe_id: int :param task_workspaces: List of the task workspaces :type task_workspaces: [:class:`job.configuration.configuration.job_configuration.TaskWorkspace`] :param workspaces: A dict of all workspaces stored by name :type workspaces: {string: :class:`storage.models.Workspace`} :param volume_create: Indicates if new volumes need to be created for these workspaces :type volume_create: bool :returns: The Docker parameters needed by the given workspaces :rtype: [:class:`job.configuration.configuration.job_configuration.DockerParam`] """ params = [] for task_workspace in task_workspaces: name = task_workspace.name mode = task_workspace.mode if name in workspaces: workspace = workspaces[name] if workspace.volume: vol = workspace.volume if vol.host: # Host mount is special, no volume name, just the host mount path volume_name = vol.remote_path elif volume_create: # Create job_exe workspace volume for first time volume_create_cmd = '$(docker volume create --driver=%s --name=%s %s)' volume_name = get_workspace_volume_name(framework_id, job_exe_id, name) volume_name = volume_create_cmd % (vol.driver, volume_name, vol.remote_path) else: # Volume already created, re-use name volume_name = get_workspace_volume_name(framework_id, job_exe_id, name) workspace_volume = '%s:%s:%s' % (volume_name, get_workspace_volume_path(name), mode) params.append(DockerParam('volume', workspace_volume)) return params
def _configure_all_tasks(self, config, job_exe, job_type): """Configures the given execution with items that apply to all tasks :param config: The execution configuration :type config: :class:`job.configuration.json.execution.exe_config.ExecutionConfiguration` :param job_exe: The job execution model being scheduled :type job_exe: :class:`job.models.JobExecution` :param job_type: The job type model :type job_type: :class:`job.models.JobType` """ config.set_task_ids(job_exe.get_cluster_id()) # Configure env vars describing allocated task resources for task_type in config.get_task_types(): # Configure env vars describing allocated task resources env_vars = {} for resource in config.get_resources(task_type).resources: env_name = 'ALLOCATED_%s' % normalize_env_var_name(resource.name) env_vars[env_name] = '%.1f' % resource.value # Assumes scalar resources # Configure workspace volumes workspace_volumes = {} for task_workspace in config.get_workspaces(task_type): workspace_model = self._workspaces[task_workspace.name] # TODO: Should refactor workspace broker to return a Volume object and remove BrokerVolume if workspace_model.volume: vol_name = get_workspace_volume_name(job_exe, task_workspace.name) cont_path = get_workspace_volume_path(workspace_model.name) if workspace_model.volume.host: host_path = workspace_model.volume.remote_path volume = Volume(vol_name, cont_path, task_workspace.mode, is_host=True, host_path=host_path) else: driver = workspace_model.volume.driver driver_opts = {} # TODO: Hack alert for nfs broker, as stated above, we should return Volume from broker if driver == 'nfs': driver_opts = {'share': workspace_model.volume.remote_path} volume = Volume(vol_name, cont_path, task_workspace.mode, is_host=False, driver=driver, driver_opts=driver_opts) workspace_volumes[task_workspace.name] = volume config.add_to_task(task_type, env_vars=env_vars, wksp_volumes=workspace_volumes) # Configure tasks for logging if settings.LOGGING_ADDRESS is not None: log_driver = DockerParameter('log-driver', 'syslog') # Must explicitly specify RFC3164 to ensure compatibility with logstash in Docker 1.11+ syslog_format = DockerParameter('log-opt', 'syslog-format=rfc3164') log_address = DockerParameter('log-opt', 'syslog-address=%s' % settings.LOGGING_ADDRESS) if not job_type.is_system: pre_task_tag = DockerParameter('log-opt', 'tag=%s' % config.get_task_id('pre')) config.add_to_task('pre', docker_params=[log_driver, syslog_format, log_address, pre_task_tag]) post_task_tag = DockerParameter('log-opt', 'tag=%s' % config.get_task_id('post')) config.add_to_task('post', docker_params=[log_driver, syslog_format, log_address, post_task_tag]) # TODO: remove es_urls parameter when Scale no longer supports old style job types es_urls = None # Use connection pool to get up-to-date list of elasticsearch nodes if settings.ELASTICSEARCH: hosts = [host.host for host in settings.ELASTICSEARCH.transport.connection_pool.connections] es_urls = ','.join(hosts) # Post task needs ElasticSearch URL to grab logs for old artifact registration es_param = DockerParameter('env', 'SCALE_ELASTICSEARCH_URLS=%s' % es_urls) config.add_to_task('post', docker_params=[es_param]) main_task_tag = DockerParameter('log-opt', 'tag=%s' % config.get_task_id('main')) config.add_to_task('main', docker_params=[log_driver, syslog_format, log_address, main_task_tag])
def _configure_all_tasks(self, config, job_exe, job_type): """Configures the given execution with items that apply to all tasks :param config: The execution configuration :type config: :class:`job.execution.configuration.json.exe_config.ExecutionConfiguration` :param job_exe: The job execution model being scheduled :type job_exe: :class:`job.models.JobExecution` :param job_type: The job type model :type job_type: :class:`job.models.JobType` """ config.set_task_ids(job_exe.get_cluster_id()) for task_type in config.get_task_types(): # Configure env vars describing allocated task resources env_vars = {} nvidia_docker_label = None for resource in config.get_resources(task_type).resources: env_name = 'ALLOCATED_%s' % normalize_env_var_name( resource.name) env_vars[ env_name] = '%.1f' % resource.value # Assumes scalar resources if resource.name == "gpus" and int(resource.value) > 0: gpu_list = GPUManager.get_nvidia_docker_label( job_exe.node_id, job_exe.job_id) nvidia_docker_label = DockerParameter( 'env', 'NVIDIA_VISIBLE_DEVICES={}'.format( gpu_list.strip(','))) # Configure env vars for Scale meta-data env_vars['SCALE_JOB_ID'] = unicode(job_exe.job_id) env_vars['SCALE_EXE_NUM'] = unicode(job_exe.exe_num) if job_exe.recipe_id: env_vars['SCALE_RECIPE_ID'] = unicode(job_exe.recipe_id) if job_exe.batch_id: env_vars['SCALE_BATCH_ID'] = unicode(job_exe.batch_id) # Configure workspace volumes workspace_volumes = {} for task_workspace in config.get_workspaces(task_type): logger.debug(self._workspaces) workspace_model = self._workspaces[task_workspace.name] # TODO: Should refactor workspace broker to return a Volume object and remove BrokerVolume if workspace_model.volume: vol_name = get_workspace_volume_name( job_exe, task_workspace.name) cont_path = get_workspace_volume_path(workspace_model.name) if workspace_model.volume.host: host_path = workspace_model.volume.remote_path volume = Volume(vol_name, cont_path, task_workspace.mode, is_host=True, host_path=host_path) else: driver = workspace_model.volume.driver driver_opts = {} # TODO: Hack alert for nfs broker, as stated above, we should return Volume from broker if driver == 'nfs': driver_opts = { 'share': workspace_model.volume.remote_path } volume = Volume(vol_name, cont_path, task_workspace.mode, is_host=False, driver=driver, driver_opts=driver_opts) workspace_volumes[task_workspace.name] = volume config.add_to_task(task_type, env_vars=env_vars, wksp_volumes=workspace_volumes) # Labels for metric grouping job_id_label = DockerParameter( 'label', 'scale-job-id={}'.format(job_exe.job_id)) job_execution_id_label = DockerParameter( 'label', 'scale-job-execution-id={}'.format(job_exe.exe_num)) job_type_name_label = DockerParameter( 'label', 'scale-job-type-name={}'.format(job_type.name)) job_type_version_label = DockerParameter( 'label', 'scale-job-type-version={}'.format(job_type.version)) main_label = DockerParameter('label', 'scale-task-type=main') if nvidia_docker_label: nvidia_runtime_param = DockerParameter('runtime', 'nvidia') config.add_to_task('main', docker_params=[ job_id_label, job_type_name_label, job_type_version_label, job_execution_id_label, main_label, nvidia_docker_label, nvidia_runtime_param ]) else: config.add_to_task('main', docker_params=[ job_id_label, job_type_name_label, job_type_version_label, job_execution_id_label, main_label ]) if not job_type.is_system: pre_label = DockerParameter('label', 'scale-task-type=pre') post_label = DockerParameter('label', 'scale-task-type=post') config.add_to_task('pre', docker_params=[ job_id_label, job_type_name_label, job_type_version_label, job_execution_id_label, pre_label ]) config.add_to_task('post', docker_params=[ job_id_label, job_type_name_label, job_type_version_label, job_execution_id_label, post_label ]) # Configure tasks for logging if settings.LOGGING_ADDRESS is not None: log_driver = DockerParameter('log-driver', 'fluentd') fluent_precision = DockerParameter( 'log-opt', 'fluentd-sub-second-precision=true') log_address = DockerParameter( 'log-opt', 'fluentd-address=%s' % settings.LOGGING_ADDRESS) if not job_type.is_system: pre_task_tag = DockerParameter( 'log-opt', 'tag=%s|%s|%s|%s|%s' % (config.get_task_id('pre'), job_type.name, job_type.version, job_exe.job_id, job_exe.exe_num)) config.add_to_task('pre', docker_params=[ log_driver, fluent_precision, log_address, pre_task_tag ]) post_task_tag = DockerParameter( 'log-opt', 'tag=%s|%s|%s|%s|%s' % (config.get_task_id('post'), job_type.name, job_type.version, job_exe.job_id, job_exe.exe_num)) config.add_to_task('post', docker_params=[ log_driver, fluent_precision, log_address, post_task_tag ]) # TODO: remove es_urls parameter when Scale no longer supports old style job types # Post task needs ElasticSearch URL to grab logs for old artifact registration es_param = DockerParameter( 'env', 'ELASTICSEARCH_URL=%s' % settings.ELASTICSEARCH_URL) config.add_to_task('post', docker_params=[es_param]) main_task_tag = DockerParameter( 'log-opt', 'tag=%s|%s|%s|%s|%s' % (config.get_task_id('main'), job_type.name, job_type.version, job_exe.job_id, job_exe.exe_num)) config.add_to_task('main', docker_params=[ log_driver, fluent_precision, log_address, main_task_tag ])