def get_injected_env_vars(self, input_files_dict): """Apply all execution time values to job data :param input_files: Mapping of input names to InputFiles :type input_files: {str, :class:`job.execution.configuration.input_file.InputFile`} :return: Mapping of all input keys to their true file / property values :rtype: {str, str} """ env_vars = {} for data_input in self.get_dict()['input_data']: input_name = data_input['name'] if 'value' in data_input: env_vars[normalize_env_var_name( input_name)] = data_input['value'] if 'file_id' in data_input: input_file = input_files_dict[input_name][0] file_name = os.path.basename(input_file.workspace_path) if input_file.local_file_name: file_name = input_file.local_file_name env_vars[normalize_env_var_name(input_name)] = os.path.join( SCALE_JOB_EXE_INPUT_PATH, input_name, file_name) elif 'file_ids' in data_input: env_vars[normalize_env_var_name(input_name)] = os.path.join( SCALE_JOB_EXE_INPUT_PATH, input_name) return env_vars
def _check_for_name_collisions(self): """Ensures all names that map to environment variables are unique, and throws a :class:`job.seed.exceptions.InvalidInterfaceDefinition` if they are not unique. Per Seed specification for implementors we must validate that all reserved keywords, settings and inputs are unique as they are ultimately injected as environment variables. """ # Include reserved keywords env_vars = ["OUTPUT_DIR"] env_vars += [ normalize_env_var_name(setting['name']) for setting in self.get_settings() ] env_vars += [ normalize_env_var_name(input_file['name']) for input_file in self.get_input_files() ] env_vars += [ normalize_env_var_name(json['name']) for json in self.get_input_json() ] env_vars += [ normalize_env_var_name('ALLOCATED_' + resource['name']) for resource in self.get_scalar_resources() ] if len(env_vars) != len(set(env_vars)): raise InvalidSeedManifestDefinition( 'NAME_COLLISION_ERROR', 'Collisions are not allowed between reserved keywords, resources, settings' 'and input names.')
def get_injected_env_vars(self, input_files): """Inject all execution time values to job data mappings :param input_files: Mapping of input names to InputFiles :type input_files: {str, :class:`job.execution.configuration.input_file.InputFile`} :return: Mapping of all input keys to their true file / property values :rtype: {str, str} """ env_vars = {} for file_input in self._new_data.values.values(): if isinstance(file_input, FileValue): env_var_name = normalize_env_var_name(file_input.name) if len(file_input.file_ids) > 1: # When we have input for multiple files, map in the entire directory env_vars[env_var_name] = os.path.join( SCALE_JOB_EXE_INPUT_PATH, file_input.name) else: input_file = input_files[file_input.name][0] file_name = os.path.basename(input_file.workspace_path) if input_file.local_file_name: file_name = input_file.local_file_name env_vars[env_var_name] = os.path.join( SCALE_JOB_EXE_INPUT_PATH, file_input.name, file_name) for json_input in self._new_data.values.values(): if isinstance(file_input, JsonValue): env_vars[normalize_env_var_name( json_input.name)] = json_input.value return env_vars
def get_injected_env_vars(self, input_files, interface): """Inject all execution time values to job data mappings :param input_files: Mapping of input names to InputFiles :type input_files: {str, :class:`job.execution.configuration.input_file.InputFile`} :param interface: The interface to which this data is being passed :type interface: :class:`data.interface.interface.Interface` :return: Mapping of all input keys to their true file / property values :rtype: {str, str} """ env_vars = {} for file_input in self._new_data.values.values(): if isinstance(file_input, FileValue): env_var_name = normalize_env_var_name(file_input.name) if file_input.name not in interface.parameters: logger.warning("File input %s not specified in interface %s" % (file_input.name, interface)) if file_input.name in interface.parameters and interface.parameters[file_input.name].multiple: # When we have input for multiple files, map in the entire directory env_vars[env_var_name] = os.path.join(SCALE_JOB_EXE_INPUT_PATH, file_input.name) else: input_file = input_files[file_input.name][0] file_name = input_file.file_name if input_file.local_file_name: file_name = input_file.local_file_name env_vars[env_var_name] = os.path.join(SCALE_JOB_EXE_INPUT_PATH, file_input.name, file_name) for json_input in self._new_data.values.values(): if isinstance(json_input, JsonValue): env_vars[normalize_env_var_name(json_input.name)] = str(json_input.value) return env_vars
def _configure_secrets(self, config, job_exe, job_type, interface): """Creates a copy of the configuration, configures secrets (masked in one of the copies), and applies any final configuration :param config: The execution configuration, where the secrets will be masked out :type config: :class:`job.execution.configuration.json.exe_config.ExecutionConfiguration` :param job_exe: The job execution model being scheduled :type job_exe: :class:`job.models.JobExecution` :param job_type: The job type model :type job_type: :class:`job.models.JobType` :param interface: The job interface :type interface: :class:`job.configuration.interface.job_interface.JobInterface` :returns: The copy of the execution configuration that contains the secrets :rtype: :class:`job.execution.configuration.json.exe_config.ExecutionConfiguration` """ # Copy the configuration config_with_secrets = config.create_copy() # Configure settings values, some are secret if job_type.is_system: config.add_to_task('main', settings=self._system_settings_hidden) config_with_secrets.add_to_task('main', settings=self._system_settings) else: config.add_to_task('pre', settings=self._system_settings_hidden) config_with_secrets.add_to_task('pre', settings=self._system_settings) config.add_to_task('post', settings=self._system_settings_hidden) config_with_secrets.add_to_task('post', settings=self._system_settings) job_config = job_type.get_job_configuration() secret_settings = secrets_mgr.retrieve_job_type_secrets(job_type.get_secrets_key()) for _config, secrets_hidden in [(config, True), (config_with_secrets, False)]: task_settings = {} for setting in interface.get_settings(): name = setting['name'] if setting['secret']: value = None if name in secret_settings: value = secret_settings[name] if value is not None and secrets_hidden: value = '*****' else: value = job_config.get_setting_value(name) if 'required' in setting and setting['required'] or value is not None: task_settings[name] = value # TODO: command args and env var replacement from the interface should be removed once Scale drops # support for old-style job types args = config._get_task_dict('main')['args'] if JobInterfaceSunset.is_seed_dict(interface.definition): env_vars = task_settings # TODO: Remove this else block when old-style job types are removed else: args = JobInterface.replace_command_parameters(args, task_settings) env_vars = interface.populate_env_vars_arguments(task_settings) _config.add_to_task('main', args=args, env_vars=env_vars, settings=task_settings) # Configure env vars for settings for _config in [config, config_with_secrets]: for task_type in _config.get_task_types(): env_vars = {} for name, value in _config.get_settings(task_type).items(): if value is not None: env_name = normalize_env_var_name(name) env_vars[env_name] = value _config.add_to_task(task_type, env_vars=env_vars) # Configure Docker parameters for env vars and Docker volumes for _config in [config, config_with_secrets]: existing_volumes = set() for task_type in _config.get_task_types(): docker_params = [] for name, value in _config.get_env_vars(task_type).items(): docker_params.append(DockerParameter('env', '%s=%s' % (name, value))) for name, volume in _config.get_volumes(task_type).items(): docker_params.append(volume.to_docker_param(is_created=(name in existing_volumes))) existing_volumes.add(name) _config.add_to_task(task_type, docker_params=docker_params) # TODO: this feature should be removed once Scale drops support for job type docker params # Configure docker parameters listed in job type if job_type.docker_params: docker_params = [] for key, value in job_type.docker_params.items(): docker_params.append(DockerParameter(key, value)) if docker_params: config.add_to_task('main', docker_params=docker_params) config_with_secrets.add_to_task('main', docker_params=docker_params) return config_with_secrets
def _configure_all_tasks(self, config, job_exe, job_type): """Configures the given execution with items that apply to all tasks :param config: The execution configuration :type config: :class:`job.execution.configuration.json.exe_config.ExecutionConfiguration` :param job_exe: The job execution model being scheduled :type job_exe: :class:`job.models.JobExecution` :param job_type: The job type model :type job_type: :class:`job.models.JobType` """ config.set_task_ids(job_exe.get_cluster_id()) for task_type in config.get_task_types(): # Configure env vars describing allocated task resources env_vars = {} for resource in config.get_resources(task_type).resources: env_name = 'ALLOCATED_%s' % normalize_env_var_name(resource.name) env_vars[env_name] = '%.1f' % resource.value # Assumes scalar resources # Configure env vars for Scale meta-data env_vars['SCALE_JOB_ID'] = unicode(job_exe.job_id) env_vars['SCALE_EXE_NUM'] = unicode(job_exe.exe_num) if job_exe.recipe_id: env_vars['SCALE_RECIPE_ID'] = unicode(job_exe.recipe_id) if job_exe.batch_id: env_vars['SCALE_BATCH_ID'] = unicode(job_exe.batch_id) # Configure workspace volumes workspace_volumes = {} for task_workspace in config.get_workspaces(task_type): logger.debug(self._workspaces) workspace_model = self._workspaces[task_workspace.name] # TODO: Should refactor workspace broker to return a Volume object and remove BrokerVolume if workspace_model.volume: vol_name = get_workspace_volume_name(job_exe, task_workspace.name) cont_path = get_workspace_volume_path(workspace_model.name) if workspace_model.volume.host: host_path = workspace_model.volume.remote_path volume = Volume(vol_name, cont_path, task_workspace.mode, is_host=True, host_path=host_path) else: driver = workspace_model.volume.driver driver_opts = {} # TODO: Hack alert for nfs broker, as stated above, we should return Volume from broker if driver == 'nfs': driver_opts = {'share': workspace_model.volume.remote_path} volume = Volume(vol_name, cont_path, task_workspace.mode, is_host=False, driver=driver, driver_opts=driver_opts) workspace_volumes[task_workspace.name] = volume config.add_to_task(task_type, env_vars=env_vars, wksp_volumes=workspace_volumes) # Labels for metric grouping job_id_label = DockerParameter('label', 'scale-job-id={}'.format(job_exe.job_id)) job_execution_id_label = DockerParameter('label', 'scale-job-execution-id={}'.format(job_exe.exe_num)) job_type_name_label = DockerParameter('label', 'scale-job-type-name={}'.format(job_type.name)) job_type_version_label = DockerParameter('label', 'scale-job-type-version={}'.format(job_type.version)) main_label = DockerParameter('label', 'scale-task-type=main') config.add_to_task('main', docker_params=[job_id_label, job_type_name_label, job_type_version_label, job_execution_id_label, main_label]) if not job_type.is_system: pre_label = DockerParameter('label', 'scale-task-type=pre') post_label = DockerParameter('label', 'scale-task-type=post') config.add_to_task('pre', docker_params=[job_id_label, job_type_name_label, job_type_version_label, job_execution_id_label, pre_label]) config.add_to_task('post', docker_params=[job_id_label, job_type_name_label, job_type_version_label, job_execution_id_label, post_label]) # Configure tasks for logging if settings.LOGGING_ADDRESS is not None: log_driver = DockerParameter('log-driver', 'syslog') # Must explicitly specify RFC3164 to ensure compatibility with logstash in Docker 1.11+ syslog_format = DockerParameter('log-opt', 'syslog-format=rfc3164') log_address = DockerParameter('log-opt', 'syslog-address=%s' % settings.LOGGING_ADDRESS) if not job_type.is_system: pre_task_tag = DockerParameter('log-opt', 'tag=%s|%s' % (config.get_task_id('pre'), job_type.name)) config.add_to_task('pre', docker_params=[log_driver, syslog_format, log_address, pre_task_tag]) post_task_tag = DockerParameter('log-opt', 'tag=%s|%s' % (config.get_task_id('post'), job_type.name)) config.add_to_task('post', docker_params=[log_driver, syslog_format, log_address, post_task_tag]) # TODO: remove es_urls parameter when Scale no longer supports old style job types es_urls = None # Use connection pool to get up-to-date list of elasticsearch nodes if settings.ELASTICSEARCH: hosts = [host.host for host in settings.ELASTICSEARCH.transport.connection_pool.connections] es_urls = ','.join(hosts) # Post task needs ElasticSearch URL to grab logs for old artifact registration es_param = DockerParameter('env', 'SCALE_ELASTICSEARCH_URLS=%s' % es_urls) config.add_to_task('post', docker_params=[es_param]) main_task_tag = DockerParameter('log-opt', 'tag=%s|%s' % (config.get_task_id('main'), job_type.name)) config.add_to_task('main', docker_params=[log_driver, syslog_format, log_address, main_task_tag])
def _configure_all_tasks(self, config, job_exe, job_type): """Configures the given execution with items that apply to all tasks :param config: The execution configuration :type config: :class:`job.execution.configuration.json.exe_config.ExecutionConfiguration` :param job_exe: The job execution model being scheduled :type job_exe: :class:`job.models.JobExecution` :param job_type: The job type model :type job_type: :class:`job.models.JobType` """ config.set_task_ids(job_exe.get_cluster_id()) for task_type in config.get_task_types(): # Configure env vars describing allocated task resources env_vars = {} nvidia_docker_label = None for resource in config.get_resources(task_type).resources: env_name = 'ALLOCATED_%s' % normalize_env_var_name( resource.name) env_vars[ env_name] = '%.1f' % resource.value # Assumes scalar resources if resource.name == "gpus" and int(resource.value) > 0: gpu_list = GPUManager.get_nvidia_docker_label( job_exe.node_id, job_exe.job_id) nvidia_docker_label = DockerParameter( 'env', 'NVIDIA_VISIBLE_DEVICES={}'.format( gpu_list.strip(','))) # Configure env vars for Scale meta-data env_vars['SCALE_JOB_ID'] = unicode(job_exe.job_id) env_vars['SCALE_EXE_NUM'] = unicode(job_exe.exe_num) if job_exe.recipe_id: env_vars['SCALE_RECIPE_ID'] = unicode(job_exe.recipe_id) if job_exe.batch_id: env_vars['SCALE_BATCH_ID'] = unicode(job_exe.batch_id) # Configure workspace volumes workspace_volumes = {} for task_workspace in config.get_workspaces(task_type): logger.debug(self._workspaces) workspace_model = self._workspaces[task_workspace.name] # TODO: Should refactor workspace broker to return a Volume object and remove BrokerVolume if workspace_model.volume: vol_name = get_workspace_volume_name( job_exe, task_workspace.name) cont_path = get_workspace_volume_path(workspace_model.name) if workspace_model.volume.host: host_path = workspace_model.volume.remote_path volume = Volume(vol_name, cont_path, task_workspace.mode, is_host=True, host_path=host_path) else: driver = workspace_model.volume.driver driver_opts = {} # TODO: Hack alert for nfs broker, as stated above, we should return Volume from broker if driver == 'nfs': driver_opts = { 'share': workspace_model.volume.remote_path } volume = Volume(vol_name, cont_path, task_workspace.mode, is_host=False, driver=driver, driver_opts=driver_opts) workspace_volumes[task_workspace.name] = volume config.add_to_task(task_type, env_vars=env_vars, wksp_volumes=workspace_volumes) # Labels for metric grouping job_id_label = DockerParameter( 'label', 'scale-job-id={}'.format(job_exe.job_id)) job_execution_id_label = DockerParameter( 'label', 'scale-job-execution-id={}'.format(job_exe.exe_num)) job_type_name_label = DockerParameter( 'label', 'scale-job-type-name={}'.format(job_type.name)) job_type_version_label = DockerParameter( 'label', 'scale-job-type-version={}'.format(job_type.version)) main_label = DockerParameter('label', 'scale-task-type=main') if nvidia_docker_label: nvidia_runtime_param = DockerParameter('runtime', 'nvidia') config.add_to_task('main', docker_params=[ job_id_label, job_type_name_label, job_type_version_label, job_execution_id_label, main_label, nvidia_docker_label, nvidia_runtime_param ]) else: config.add_to_task('main', docker_params=[ job_id_label, job_type_name_label, job_type_version_label, job_execution_id_label, main_label ]) if not job_type.is_system: pre_label = DockerParameter('label', 'scale-task-type=pre') post_label = DockerParameter('label', 'scale-task-type=post') config.add_to_task('pre', docker_params=[ job_id_label, job_type_name_label, job_type_version_label, job_execution_id_label, pre_label ]) config.add_to_task('post', docker_params=[ job_id_label, job_type_name_label, job_type_version_label, job_execution_id_label, post_label ]) # Configure tasks for logging if settings.LOGGING_ADDRESS is not None: log_driver = DockerParameter('log-driver', 'fluentd') fluent_precision = DockerParameter( 'log-opt', 'fluentd-sub-second-precision=true') log_address = DockerParameter( 'log-opt', 'fluentd-address=%s' % settings.LOGGING_ADDRESS) if not job_type.is_system: pre_task_tag = DockerParameter( 'log-opt', 'tag=%s|%s|%s|%s|%s' % (config.get_task_id('pre'), job_type.name, job_type.version, job_exe.job_id, job_exe.exe_num)) config.add_to_task('pre', docker_params=[ log_driver, fluent_precision, log_address, pre_task_tag ]) post_task_tag = DockerParameter( 'log-opt', 'tag=%s|%s|%s|%s|%s' % (config.get_task_id('post'), job_type.name, job_type.version, job_exe.job_id, job_exe.exe_num)) config.add_to_task('post', docker_params=[ log_driver, fluent_precision, log_address, post_task_tag ]) # TODO: remove es_urls parameter when Scale no longer supports old style job types # Post task needs ElasticSearch URL to grab logs for old artifact registration es_param = DockerParameter( 'env', 'ELASTICSEARCH_URL=%s' % settings.ELASTICSEARCH_URL) config.add_to_task('post', docker_params=[es_param]) main_task_tag = DockerParameter( 'log-opt', 'tag=%s|%s|%s|%s|%s' % (config.get_task_id('main'), job_type.name, job_type.version, job_exe.job_id, job_exe.exe_num)) config.add_to_task('main', docker_params=[ log_driver, fluent_precision, log_address, main_task_tag ])