def get_mount_volume(self, mount_name, volume_name, container_path, mode): """Returns the volume that has been configured for the given mount name. If the given mount is not defined in this configuration, None is returned. :param mount_name: The name of the mount defined in the job type :type mount_name: string :param volume_name: The name of the volume :type volume_name: string :param container_path: The path within the container onto which the volume will be mounted :type container_path: string :param mode: Either 'ro' for read-only or 'rw' for read-write :type mode: string :returns: The volume that should be mounted into the job container, possibly None :rtype: :class:`job.execution.configuration.volume.Volume` """ if mount_name not in self._configuration['mounts']: return None volume = None mount_config = self._configuration['mounts'][mount_name] mount_type = mount_config['type'] if mount_type == 'host': host_path = mount_config['host_path'] volume = Volume(volume_name, container_path, mode, is_host=True, host_path=host_path) elif mount_type == 'volume': driver = mount_config['driver'] driver_opts = mount_config['driver_opts'] volume = Volume(volume_name, container_path, mode, is_host=False, driver=driver, driver_opts=driver_opts) return volume
def get_volumes(self, task_type): """Returns the Docker volumes for the given task type :param task_type: The task type :type task_type: string :returns: The dict of Docker volumes stored by volume name :rtype: dict """ volumes = {} for task_dict in self._configuration['tasks']: if task_dict['type'] == task_type: if 'volumes' in task_dict: for name, vol_dict in task_dict['volumes'].items(): if vol_dict['type'] == 'host': vol = Volume(name, vol_dict['container_path'], vol_dict['mode'], is_host=True, host_path=vol_dict['host_path']) else: driver = None driver_opts = None if 'driver' in vol_dict: driver = vol_dict['driver'] if 'driver_opts' in vol_dict: driver_opts = vol_dict['driver_opts'] vol = Volume(name, vol_dict['container_path'], vol_dict['mode'], is_host=False, driver=driver, driver_opts=driver_opts) volumes[name] = vol return volumes
def _configure_all_tasks(self, config, job_exe, job_type): """Configures the given execution with items that apply to all tasks :param config: The execution configuration :type config: :class:`job.execution.configuration.json.exe_config.ExecutionConfiguration` :param job_exe: The job execution model being scheduled :type job_exe: :class:`job.models.JobExecution` :param job_type: The job type model :type job_type: :class:`job.models.JobType` """ config.set_task_ids(job_exe.get_cluster_id()) for task_type in config.get_task_types(): # Configure env vars describing allocated task resources env_vars = {} for resource in config.get_resources(task_type).resources: env_name = 'ALLOCATED_%s' % normalize_env_var_name(resource.name) env_vars[env_name] = '%.1f' % resource.value # Assumes scalar resources # Configure env vars for Scale meta-data env_vars['SCALE_JOB_ID'] = unicode(job_exe.job_id) env_vars['SCALE_EXE_NUM'] = unicode(job_exe.exe_num) if job_exe.recipe_id: env_vars['SCALE_RECIPE_ID'] = unicode(job_exe.recipe_id) if job_exe.batch_id: env_vars['SCALE_BATCH_ID'] = unicode(job_exe.batch_id) # Configure workspace volumes workspace_volumes = {} for task_workspace in config.get_workspaces(task_type): logger.debug(self._workspaces) workspace_model = self._workspaces[task_workspace.name] # TODO: Should refactor workspace broker to return a Volume object and remove BrokerVolume if workspace_model.volume: vol_name = get_workspace_volume_name(job_exe, task_workspace.name) cont_path = get_workspace_volume_path(workspace_model.name) if workspace_model.volume.host: host_path = workspace_model.volume.remote_path volume = Volume(vol_name, cont_path, task_workspace.mode, is_host=True, host_path=host_path) else: driver = workspace_model.volume.driver driver_opts = {} # TODO: Hack alert for nfs broker, as stated above, we should return Volume from broker if driver == 'nfs': driver_opts = {'share': workspace_model.volume.remote_path} volume = Volume(vol_name, cont_path, task_workspace.mode, is_host=False, driver=driver, driver_opts=driver_opts) workspace_volumes[task_workspace.name] = volume config.add_to_task(task_type, env_vars=env_vars, wksp_volumes=workspace_volumes) # Labels for metric grouping job_id_label = DockerParameter('label', 'scale-job-id={}'.format(job_exe.job_id)) job_execution_id_label = DockerParameter('label', 'scale-job-execution-id={}'.format(job_exe.exe_num)) job_type_name_label = DockerParameter('label', 'scale-job-type-name={}'.format(job_type.name)) job_type_version_label = DockerParameter('label', 'scale-job-type-version={}'.format(job_type.version)) main_label = DockerParameter('label', 'scale-task-type=main') config.add_to_task('main', docker_params=[job_id_label, job_type_name_label, job_type_version_label, job_execution_id_label, main_label]) if not job_type.is_system: pre_label = DockerParameter('label', 'scale-task-type=pre') post_label = DockerParameter('label', 'scale-task-type=post') config.add_to_task('pre', docker_params=[job_id_label, job_type_name_label, job_type_version_label, job_execution_id_label, pre_label]) config.add_to_task('post', docker_params=[job_id_label, job_type_name_label, job_type_version_label, job_execution_id_label, post_label]) # Configure tasks for logging if settings.LOGGING_ADDRESS is not None: log_driver = DockerParameter('log-driver', 'syslog') # Must explicitly specify RFC3164 to ensure compatibility with logstash in Docker 1.11+ syslog_format = DockerParameter('log-opt', 'syslog-format=rfc3164') log_address = DockerParameter('log-opt', 'syslog-address=%s' % settings.LOGGING_ADDRESS) if not job_type.is_system: pre_task_tag = DockerParameter('log-opt', 'tag=%s|%s' % (config.get_task_id('pre'), job_type.name)) config.add_to_task('pre', docker_params=[log_driver, syslog_format, log_address, pre_task_tag]) post_task_tag = DockerParameter('log-opt', 'tag=%s|%s' % (config.get_task_id('post'), job_type.name)) config.add_to_task('post', docker_params=[log_driver, syslog_format, log_address, post_task_tag]) # TODO: remove es_urls parameter when Scale no longer supports old style job types es_urls = None # Use connection pool to get up-to-date list of elasticsearch nodes if settings.ELASTICSEARCH: hosts = [host.host for host in settings.ELASTICSEARCH.transport.connection_pool.connections] es_urls = ','.join(hosts) # Post task needs ElasticSearch URL to grab logs for old artifact registration es_param = DockerParameter('env', 'SCALE_ELASTICSEARCH_URLS=%s' % es_urls) config.add_to_task('post', docker_params=[es_param]) main_task_tag = DockerParameter('log-opt', 'tag=%s|%s' % (config.get_task_id('main'), job_type.name)) config.add_to_task('main', docker_params=[log_driver, syslog_format, log_address, main_task_tag])
def _configure_regular_job(config, job_exe, job_type, system_logging_level): """Configures the given execution as a regular (non-system) job by adding pre and post tasks, input/output mounts, etc :param config: The execution configuration :type config: :class:`job.execution.configuration.json.exe_config.ExecutionConfiguration` :param job_exe: The job execution model being scheduled :type job_exe: :class:`job.models.JobExecution` :param job_type: The job type model :type job_type: :class:`job.models.JobType` :param system_logging_level: The logging level to be passed in through environment :type system_logging_level: str """ config.create_tasks(['pull', 'pre', 'main', 'post']) config.add_to_task('pull', args=create_pull_command(job_exe.docker_image)) config.add_to_task('pre', args=PRE_TASK_COMMAND_ARGS) config.add_to_task('post', args=POST_TASK_COMMAND_ARGS) # Configure input workspaces ro_input_workspaces = {} rw_input_workspaces = {} for input_workspace in config.get_input_workspace_names(): ro_input_workspaces[input_workspace] = TaskWorkspace(input_workspace, MODE_RO) rw_input_workspaces[input_workspace] = TaskWorkspace(input_workspace, MODE_RW) config.add_to_task('pre', workspaces=ro_input_workspaces) config.add_to_task('main', workspaces=ro_input_workspaces) # Post tasks have access to input workspaces in case input files need moved as part of parse results config.add_to_task('post', workspaces=rw_input_workspaces) # Configure output workspaces output_workspaces = {} for output_workspace in config.get_output_workspace_names(): output_workspaces[output_workspace] = TaskWorkspace(output_workspace, MODE_RW) config.add_to_task('post', workspaces=output_workspaces) # Configure input/output mounts input_mnt_name = 'scale_input_mount' output_mnt_name = 'scale_output_mount' input_vol_name = get_job_exe_input_vol_name(job_exe) output_vol_name = get_job_exe_output_vol_name(job_exe) input_vol_ro = Volume(input_vol_name, SCALE_JOB_EXE_INPUT_PATH, MODE_RO, is_host=False) input_vol_rw = Volume(input_vol_name, SCALE_JOB_EXE_INPUT_PATH, MODE_RW, is_host=False) output_vol_ro = Volume(output_vol_name, SCALE_JOB_EXE_OUTPUT_PATH, MODE_RO, is_host=False) output_vol_rw = Volume(output_vol_name, SCALE_JOB_EXE_OUTPUT_PATH, MODE_RW, is_host=False) config.add_to_task('pre', mount_volumes={input_mnt_name: input_vol_rw, output_mnt_name: output_vol_rw}, env_vars={'SYSTEM_LOGGING_LEVEL': system_logging_level}) config.add_to_task('main', mount_volumes={input_mnt_name: input_vol_ro, output_mnt_name: output_vol_rw}) config.add_to_task('post', mount_volumes={output_mnt_name: output_vol_ro}, env_vars={'SYSTEM_LOGGING_LEVEL': system_logging_level}) # Configure output directory # TODO: original output dir and command arg replacement can be removed when Scale no longer supports old-style # job types env_vars = {'job_output_dir': SCALE_JOB_EXE_OUTPUT_PATH, 'OUTPUT_DIR': SCALE_JOB_EXE_OUTPUT_PATH} args = config._get_task_dict('main')['args'] # TODO: Remove old-style logic for command parameters inject when with v6 if not JobInterfaceSunset.is_seed_dict(job_type.manifest): args = JobInterface.replace_command_parameters(args, env_vars) else: args = environment_expansion(env_vars, args, remove_extras=True) config.add_to_task('main', args=args, env_vars=env_vars) # Configure task resources resources = job_exe.get_resources() # Pull-task and pre-task require full amount of resources config.add_to_task('pull', resources=resources) config.add_to_task('pre', resources=resources) # Main-task no longer requires the input file space resources.subtract(NodeResources([Disk(job_exe.input_file_size)])) config.add_to_task('main', resources=resources) # Post-task no longer requires any disk space resources.remove_resource('disk') config.add_to_task('post', resources=resources)
def _configure_all_tasks(self, config, job_exe, job_type): """Configures the given execution with items that apply to all tasks :param config: The execution configuration :type config: :class:`job.execution.configuration.json.exe_config.ExecutionConfiguration` :param job_exe: The job execution model being scheduled :type job_exe: :class:`job.models.JobExecution` :param job_type: The job type model :type job_type: :class:`job.models.JobType` """ config.set_task_ids(job_exe.get_cluster_id()) for task_type in config.get_task_types(): # Configure env vars describing allocated task resources env_vars = {} nvidia_docker_label = None for resource in config.get_resources(task_type).resources: env_name = 'ALLOCATED_%s' % normalize_env_var_name( resource.name) env_vars[ env_name] = '%.1f' % resource.value # Assumes scalar resources if resource.name == "gpus" and int(resource.value) > 0: gpu_list = GPUManager.get_nvidia_docker_label( job_exe.node_id, job_exe.job_id) nvidia_docker_label = DockerParameter( 'env', 'NVIDIA_VISIBLE_DEVICES={}'.format( gpu_list.strip(','))) # Configure env vars for Scale meta-data env_vars['SCALE_JOB_ID'] = unicode(job_exe.job_id) env_vars['SCALE_EXE_NUM'] = unicode(job_exe.exe_num) if job_exe.recipe_id: env_vars['SCALE_RECIPE_ID'] = unicode(job_exe.recipe_id) if job_exe.batch_id: env_vars['SCALE_BATCH_ID'] = unicode(job_exe.batch_id) # Configure workspace volumes workspace_volumes = {} for task_workspace in config.get_workspaces(task_type): logger.debug(self._workspaces) workspace_model = self._workspaces[task_workspace.name] # TODO: Should refactor workspace broker to return a Volume object and remove BrokerVolume if workspace_model.volume: vol_name = get_workspace_volume_name( job_exe, task_workspace.name) cont_path = get_workspace_volume_path(workspace_model.name) if workspace_model.volume.host: host_path = workspace_model.volume.remote_path volume = Volume(vol_name, cont_path, task_workspace.mode, is_host=True, host_path=host_path) else: driver = workspace_model.volume.driver driver_opts = {} # TODO: Hack alert for nfs broker, as stated above, we should return Volume from broker if driver == 'nfs': driver_opts = { 'share': workspace_model.volume.remote_path } volume = Volume(vol_name, cont_path, task_workspace.mode, is_host=False, driver=driver, driver_opts=driver_opts) workspace_volumes[task_workspace.name] = volume config.add_to_task(task_type, env_vars=env_vars, wksp_volumes=workspace_volumes) # Labels for metric grouping job_id_label = DockerParameter( 'label', 'scale-job-id={}'.format(job_exe.job_id)) job_execution_id_label = DockerParameter( 'label', 'scale-job-execution-id={}'.format(job_exe.exe_num)) job_type_name_label = DockerParameter( 'label', 'scale-job-type-name={}'.format(job_type.name)) job_type_version_label = DockerParameter( 'label', 'scale-job-type-version={}'.format(job_type.version)) main_label = DockerParameter('label', 'scale-task-type=main') if nvidia_docker_label: nvidia_runtime_param = DockerParameter('runtime', 'nvidia') config.add_to_task('main', docker_params=[ job_id_label, job_type_name_label, job_type_version_label, job_execution_id_label, main_label, nvidia_docker_label, nvidia_runtime_param ]) else: config.add_to_task('main', docker_params=[ job_id_label, job_type_name_label, job_type_version_label, job_execution_id_label, main_label ]) if not job_type.is_system: pre_label = DockerParameter('label', 'scale-task-type=pre') post_label = DockerParameter('label', 'scale-task-type=post') config.add_to_task('pre', docker_params=[ job_id_label, job_type_name_label, job_type_version_label, job_execution_id_label, pre_label ]) config.add_to_task('post', docker_params=[ job_id_label, job_type_name_label, job_type_version_label, job_execution_id_label, post_label ]) # Configure tasks for logging if settings.LOGGING_ADDRESS is not None: log_driver = DockerParameter('log-driver', 'fluentd') fluent_precision = DockerParameter( 'log-opt', 'fluentd-sub-second-precision=true') log_address = DockerParameter( 'log-opt', 'fluentd-address=%s' % settings.LOGGING_ADDRESS) if not job_type.is_system: pre_task_tag = DockerParameter( 'log-opt', 'tag=%s|%s|%s|%s|%s' % (config.get_task_id('pre'), job_type.name, job_type.version, job_exe.job_id, job_exe.exe_num)) config.add_to_task('pre', docker_params=[ log_driver, fluent_precision, log_address, pre_task_tag ]) post_task_tag = DockerParameter( 'log-opt', 'tag=%s|%s|%s|%s|%s' % (config.get_task_id('post'), job_type.name, job_type.version, job_exe.job_id, job_exe.exe_num)) config.add_to_task('post', docker_params=[ log_driver, fluent_precision, log_address, post_task_tag ]) # TODO: remove es_urls parameter when Scale no longer supports old style job types # Post task needs ElasticSearch URL to grab logs for old artifact registration es_param = DockerParameter( 'env', 'ELASTICSEARCH_URL=%s' % settings.ELASTICSEARCH_URL) config.add_to_task('post', docker_params=[es_param]) main_task_tag = DockerParameter( 'log-opt', 'tag=%s|%s|%s|%s|%s' % (config.get_task_id('main'), job_type.name, job_type.version, job_exe.job_id, job_exe.exe_num)) config.add_to_task('main', docker_params=[ log_driver, fluent_precision, log_address, main_task_tag ])