def _perform_post_steps(self, job_exe): """Populates the full set of command arguments for the job execution :param job_exe: The job execution :type job_exe: :class:`job.models.JobExecution` """ job_interface = job_exe.job_type.get_job_interface() job_data = job_exe.job.get_job_data() stdout_and_stderr = None try: stdout_and_stderr, _last_modified = job_exe.get_log_text() except: logger.exception('Failed to retrieve job execution logs') if stdout_and_stderr is None: stdout_and_stderr = '' with transaction.atomic(): if JobInterfaceSunset.is_seed_dict(job_interface.definition): job_results = JobResults() job_results.perform_post_steps(job_interface, job_data, job_exe) else: job_results, results_manifest = job_interface.perform_post_steps( job_exe, job_data, stdout_and_stderr) job_exe_output = JobExecutionOutput() job_exe_output.job_exe_id = job_exe.id job_exe_output.job_id = job_exe.job_id job_exe_output.job_type_id = job_exe.job_type_id job_exe_output.exe_num = job_exe.exe_num job_exe_output.output = job_results.get_dict() job_exe_output.save()
def configure_queued_job(self, job): """Creates and returns an execution configuration for the given queued job. The given job model should have its related job_type, job_type_rev, and batch models populated. :param job: The queued job model :type job: :class:`job.models.Job` :returns: The execution configuration for the queued job :rtype: :class:`job.execution.configuration.json.exe_config.ExecutionConfiguration` """ config = ExecutionConfiguration() data = job.get_job_data() # Add input file meta-data input_files_dict = self._create_input_file_dict(data) config.set_input_files(input_files_dict) # Set up env vars for job's input data input_values = data.get_injected_input_values(input_files_dict) env_vars = data.get_injected_env_vars(input_files_dict) task_workspaces = {} if job.job_type.is_system: # Add any workspaces needed for this system job task_workspaces = QueuedExecutionConfigurator._system_job_workspaces( job) else: # Set any output workspaces needed if job.input and 'version' in job.input and job.input[ 'version'] == '1.0': # Set output workspaces using legacy job data self._cache_workspace_names(data.get_output_workspace_ids()) output_workspaces = {} for output, workspace_id in data.get_output_workspaces().items( ): output_workspaces[output] = self._cached_workspace_names[ workspace_id] config.set_output_workspaces(output_workspaces) else: # Set output workspaces from job configuration output_workspaces = {} job_config = job.job_type.get_job_configuration() interface = JobInterfaceSunset.create(job.job_type.manifest, do_validate=False) for output_name in interface.get_file_output_names(): output_workspace = job_config.get_output_workspace( output_name) if output_workspace: output_workspaces[output_name] = output_workspace config.set_output_workspaces(output_workspaces) # Create main task with fields populated from input data args = job.get_job_interface().get_injected_command_args( input_values, env_vars) config.create_tasks(['main']) config.add_to_task('main', args=args, env_vars=env_vars, workspaces=task_workspaces) return config
def get_job_interface(self): """Returns the interface for this queued job :returns: The job interface :rtype: :class:`job.configuration.interface.job_interface.JobInterface` """ return JobInterfaceSunset.create(self.interface, do_validate=False)
def _configure_main_task(config, job_exe, job_type, interface): """Configures the main task for the given execution with items specific to the main task :param config: The execution configuration :type config: :class:`job.execution.configuration.json.exe_config.ExecutionConfiguration` :param job_exe: The job execution model being scheduled :type job_exe: :class:`job.models.JobExecution` :param job_type: The job type model :type job_type: :class:`job.models.JobType` :param interface: The job interface :type interface: :class:`job.configuration.interface.job_interface.JobInterface` """ # Set shared memory if required by this job type shared_mem = job_type.get_shared_mem_required() if shared_mem > 0: shared_mem = int(math.ceil(shared_mem)) if JobInterfaceSunset.is_seed_dict(job_type.manifest): env_vars = {'ALLOCATED_SHAREDMEM': '%.1f' % float(shared_mem)} # Remove legacy code in v6 else: env_vars = {'ALLOCATED_SHARED_MEM': '%.1f' % float(shared_mem)} config.add_to_task('main', docker_params=[ DockerParameter('shm-size', '%dm' % shared_mem) ], env_vars=env_vars) job_config = job_type.get_job_configuration() mount_volumes = {} for mount in interface.get_mounts(): name = mount['name'] mode = mount['mode'] path = mount['path'] volume_name = get_mount_volume_name(job_exe, name) volume = job_config.get_mount_volume(name, volume_name, path, mode) if volume: mount_volumes[name] = volume else: mount_volumes[name] = None config.add_to_task('main', mount_volumes=mount_volumes)
def queue_jobs(self, jobs, requeue=False, priority=None): """Queues the given jobs. The caller must have obtained model locks on the job models in an atomic transaction. Any jobs that are not in a valid status for being queued, are without job input, or are superseded will be ignored. :param jobs: The job models to put on the queue :type jobs: list :param requeue: Whether this is a re-queue (True) or a first queue (False) :type requeue: bool :param priority: An optional argument to reset the jobs' priority when they are queued :type priority: int :returns: The list of job IDs that were successfully QUEUED :rtype: list """ when_queued = timezone.now() # Set job models to QUEUED queued_job_ids = Job.objects.update_jobs_to_queued(jobs, when_queued, requeue=requeue) if not queued_job_ids: return queued_job_ids # Done if nothing was queued # Retrieve the related job_type, job_type_rev, and batch models for the queued jobs queued_jobs = Job.objects.get_jobs_with_related(queued_job_ids) # Query for all input files of the queued jobs input_files = {} input_file_ids = set() for job in queued_jobs: input_file_ids.update(job.get_job_data().get_input_file_ids()) if input_file_ids: for input_file in ScaleFile.objects.get_files_for_queued_jobs( input_file_ids): input_files[input_file.id] = input_file # Bulk create queue models queues = [] configurator = QueuedExecutionConfigurator(input_files) for job in queued_jobs: config = configurator.configure_queued_job(job) manifest = None if JobInterfaceSunset.is_seed_dict(job.job_type.manifest): manifest = SeedManifest(job.job_type.manifest) if priority: queued_priority = priority elif job.priority: queued_priority = job.priority elif job.batch and self.batch.get_configuration().priority: queued_priority = self.batch.get_configuration().priority else: queued_priority = job.job_type.get_job_configuration().priority queue = Queue() queue.job_type_id = job.job_type_id queue.job_id = job.id queue.recipe_id = job.recipe_id queue.batch_id = job.batch_id queue.exe_num = job.num_exes queue.input_file_size = job.input_file_size if job.input_file_size else 0.0 queue.is_canceled = False queue.priority = queued_priority queue.timeout = manifest.get_timeout() if manifest else job.timeout queue.interface = job.get_job_interface().get_dict() queue.configuration = config.get_dict() queue.resources = job.get_resources().get_json().get_dict() queue.queued = when_queued queues.append(queue) if queues: self.bulk_create(queues) return queued_job_ids
def validate(self, interface_dict): """Validates the configuration against the interface to find setting and mount usages :param interface_dict: The interface for the job type :type interface_dict: dict :returns: A list of warnings discovered during validation. :rtype: [:class:`job.configuration.data.job_data.ValidationWarning`] """ warnings = [] # TODO: In v6 remove sunset and just use SeedManifest class interface = JobInterfaceSunset.create(interface_dict) settings_to_delete = [] if 'settings' in self._configuration and interface.get_settings(): # Remove settings not used in the interface interface_setting_names = [setting['name'] for setting in interface.get_settings()] for setting_name in self._configuration['settings']: if setting_name not in interface_setting_names: warning_str = 'Setting %s will be ignored due to no matching interface designation.' % setting_name settings_to_delete.append({'name': setting_name, 'warning': warning_str}) # Detect any secrets and remove them as settings in configuration interface_secret_names = [setting['name'] for setting in interface.get_settings() if setting['secret']] for setting_name in interface_secret_names: if setting_name in self._configuration['settings']: if setting_name not in settings_to_delete: settings_to_delete.append({'name': setting_name, 'warning': None}) elif 'settings' in self._configuration: # Remove all settings for setting_name in self._configuration['settings']: warning_str = 'Setting %s will be ignored due to no matching interface designation.' % setting_name settings_to_delete.append({'name': setting_name, 'warning': warning_str}) for setting in settings_to_delete: del self._configuration['settings'][setting['name']] if setting['warning']: warnings.append(ValidationWarning('settings', setting['warning'])) mounts_to_delete = [] if interface.get_mounts() and 'mounts' in self._configuration: # Remove mounts not used in the interface interface_mount_names = [mount['name'] for mount in interface.get_mounts()] for mount_name, _mount_value in self._configuration['mounts'].items(): if mount_name not in interface_mount_names: warning_str = 'Mount %s will be ignored due to no matching interface designation.' % mount_name mounts_to_delete.append({'name': mount_name, 'warning': warning_str}) elif 'mounts' in self._configuration: # Remove all mounts for mount_name, _mount_value in self._configuration['mounts'].items(): warning_str = 'Mount %s will be ignored due to no matching interface designation.' % mount_name mounts_to_delete.append({'name': mount_name, 'warning': warning_str}) for mount in mounts_to_delete: del self._configuration['mounts'][mount['name']] warnings.append(ValidationWarning('mounts', mount['warning'])) logger.info(warnings) return warnings
def _perform_job_type_manifest_iteration(self): """Performs a single iteration of updating job type interfaces """ # Get job type ID jt_qry = JobType.objects.all() if self._current_job_type_id: jt_qry = jt_qry.filter(id__gt=self._current_job_type_id) for jt in jt_qry.order_by('id').only('id')[:1]: jt_id = jt.id break jt = JobType.objects.get(pk=jt_id) if not JobInterfaceSunset.is_seed_dict(jt.manifest): jt.is_active = False jt.is_paused = True old_name_version = jt.name + ' ' + jt.version jt.name = 'legacy-' + jt.name.replace('_', '-') if not jt.manifest: jt.manifest = {} input_files = [] input_json = [] output_files = [] global INTERFACE_NAME_COUNTER INTERFACE_NAME_COUNTER = 0 for input in jt.manifest.get('input_data', []): type = input.get('type', '') if 'file' not in type: json = {} json['name'] = get_unique_name(input.get('name')) json['type'] = 'string' json['required'] = input.get('required', True) input_json.append(json) continue file = {} file['name'] = get_unique_name(input.get('name')) file['required'] = input.get('required', True) file['partial'] = input.get('partial', False) file['mediaTypes'] = input.get('media_types', []) file['multiple'] = (type == 'files') input_files.append(file) for output in jt.manifest.get('output_data', []): type = output.get('type', '') file = {} file['name'] = get_unique_name(output.get('name')) file['required'] = output.get('required', True) file['mediaType'] = output.get('media_type', '') file['multiple'] = (type == 'files') file['pattern'] = "*.*" output_files.append(file) mounts = [] for mount in jt.manifest.get('mounts', []): mt = {} mt['name'] = get_unique_name(mount.get('name')) mt['path'] = mount.get('path') mt['mode'] = mount.get('mode', 'ro') mounts.append(mt) settings = [] for setting in jt.manifest.get('settings', []): s = {} s['name'] = get_unique_name(setting.get('name')) s['secret'] = setting.get('secret', False) settings.append(s) for var in jt.manifest.get('env_vars', []): s = {} name = get_unique_name(var.get('name')) name = 'ENV_' + name s['name'] = name settings.append(s) new_manifest = { 'seedVersion': '1.0.0', 'job': { 'name': jt.name, 'jobVersion': '0.0.0', 'packageVersion': '1.0.0', 'title': 'Legacy Title', 'description': 'legacy job type: ' + old_name_version, 'tags': [], 'maintainer': { 'name': 'Legacy', 'email': '*****@*****.**' }, 'timeout': 3600, 'interface': { 'command': jt.manifest.get('command', ''), 'inputs': { 'files': input_files, 'json': input_json }, 'outputs': { 'files': output_files, 'json': [] }, 'mounts': mounts, 'settings': settings }, 'resources': { 'scalar': [ { 'name': 'cpus', 'value': 1.0 }, { 'name': 'mem', 'value': 1024.0 }, { 'name': 'disk', 'value': 1000.0, 'inputMultiplier': 4.0 } ] }, 'errors': [] } } jt.manifest = new_manifest SeedManifest(jt.manifest, do_validate=True) jt.save() for jtr in JobTypeRevision.objects.filter(job_type_id=jt.id).iterator(): jtr.manifest = jt.manifest jtr.save() self._current_job_type_id = jt_id self._updated_job_type += 1 if self._updated_job_type > self._total_job_type: self._updated_job_type = self._total_job_type percent = (float(self._updated_job_type) / float(self._total_job_type)) * 100.00 logger.info('Completed %s of %s job types (%.1f%%)', self._updated_job_type, self._total_job_type, percent)
def _configure_secrets(self, config, job_exe, job_type, interface): """Creates a copy of the configuration, configures secrets (masked in one of the copies), and applies any final configuration :param config: The execution configuration, where the secrets will be masked out :type config: :class:`job.execution.configuration.json.exe_config.ExecutionConfiguration` :param job_exe: The job execution model being scheduled :type job_exe: :class:`job.models.JobExecution` :param job_type: The job type model :type job_type: :class:`job.models.JobType` :param interface: The job interface :type interface: :class:`job.configuration.interface.job_interface.JobInterface` :returns: The copy of the execution configuration that contains the secrets :rtype: :class:`job.execution.configuration.json.exe_config.ExecutionConfiguration` """ # Copy the configuration config_with_secrets = config.create_copy() # Configure settings values, some are secret if job_type.is_system: config.add_to_task('main', settings=self._system_settings_hidden) config_with_secrets.add_to_task('main', settings=self._system_settings) else: config.add_to_task('pre', settings=self._system_settings_hidden) config_with_secrets.add_to_task('pre', settings=self._system_settings) config.add_to_task('post', settings=self._system_settings_hidden) config_with_secrets.add_to_task('post', settings=self._system_settings) job_config = job_type.get_job_configuration() secret_settings = secrets_mgr.retrieve_job_type_secrets(job_type.get_secrets_key()) for _config, secrets_hidden in [(config, True), (config_with_secrets, False)]: task_settings = {} for setting in interface.get_settings(): name = setting['name'] if setting['secret']: value = None if name in secret_settings: value = secret_settings[name] if value is not None and secrets_hidden: value = '*****' else: value = job_config.get_setting_value(name) if 'required' in setting and setting['required'] or value is not None: task_settings[name] = value # TODO: command args and env var replacement from the interface should be removed once Scale drops # support for old-style job types args = config._get_task_dict('main')['args'] if JobInterfaceSunset.is_seed_dict(interface.definition): env_vars = task_settings # TODO: Remove this else block when old-style job types are removed else: args = JobInterface.replace_command_parameters(args, task_settings) env_vars = interface.populate_env_vars_arguments(task_settings) _config.add_to_task('main', args=args, env_vars=env_vars, settings=task_settings) # Configure env vars for settings for _config in [config, config_with_secrets]: for task_type in _config.get_task_types(): env_vars = {} for name, value in _config.get_settings(task_type).items(): if value is not None: env_name = normalize_env_var_name(name) env_vars[env_name] = value _config.add_to_task(task_type, env_vars=env_vars) # Configure Docker parameters for env vars and Docker volumes for _config in [config, config_with_secrets]: existing_volumes = set() for task_type in _config.get_task_types(): docker_params = [] for name, value in _config.get_env_vars(task_type).items(): docker_params.append(DockerParameter('env', '%s=%s' % (name, value))) for name, volume in _config.get_volumes(task_type).items(): docker_params.append(volume.to_docker_param(is_created=(name in existing_volumes))) existing_volumes.add(name) _config.add_to_task(task_type, docker_params=docker_params) # TODO: this feature should be removed once Scale drops support for job type docker params # Configure docker parameters listed in job type if job_type.docker_params: docker_params = [] for key, value in job_type.docker_params.items(): docker_params.append(DockerParameter(key, value)) if docker_params: config.add_to_task('main', docker_params=docker_params) config_with_secrets.add_to_task('main', docker_params=docker_params) return config_with_secrets
def _configure_regular_job(config, job_exe, job_type, system_logging_level): """Configures the given execution as a regular (non-system) job by adding pre and post tasks, input/output mounts, etc :param config: The execution configuration :type config: :class:`job.execution.configuration.json.exe_config.ExecutionConfiguration` :param job_exe: The job execution model being scheduled :type job_exe: :class:`job.models.JobExecution` :param job_type: The job type model :type job_type: :class:`job.models.JobType` :param system_logging_level: The logging level to be passed in through environment :type system_logging_level: str """ config.create_tasks(['pull', 'pre', 'main', 'post']) config.add_to_task('pull', args=create_pull_command(job_exe.docker_image)) config.add_to_task('pre', args=PRE_TASK_COMMAND_ARGS) config.add_to_task('post', args=POST_TASK_COMMAND_ARGS) # Configure input workspaces ro_input_workspaces = {} rw_input_workspaces = {} for input_workspace in config.get_input_workspace_names(): ro_input_workspaces[input_workspace] = TaskWorkspace(input_workspace, MODE_RO) rw_input_workspaces[input_workspace] = TaskWorkspace(input_workspace, MODE_RW) config.add_to_task('pre', workspaces=ro_input_workspaces) config.add_to_task('main', workspaces=ro_input_workspaces) # Post tasks have access to input workspaces in case input files need moved as part of parse results config.add_to_task('post', workspaces=rw_input_workspaces) # Configure output workspaces output_workspaces = {} for output_workspace in config.get_output_workspace_names(): output_workspaces[output_workspace] = TaskWorkspace(output_workspace, MODE_RW) config.add_to_task('post', workspaces=output_workspaces) # Configure input/output mounts input_mnt_name = 'scale_input_mount' output_mnt_name = 'scale_output_mount' input_vol_name = get_job_exe_input_vol_name(job_exe) output_vol_name = get_job_exe_output_vol_name(job_exe) input_vol_ro = Volume(input_vol_name, SCALE_JOB_EXE_INPUT_PATH, MODE_RO, is_host=False) input_vol_rw = Volume(input_vol_name, SCALE_JOB_EXE_INPUT_PATH, MODE_RW, is_host=False) output_vol_ro = Volume(output_vol_name, SCALE_JOB_EXE_OUTPUT_PATH, MODE_RO, is_host=False) output_vol_rw = Volume(output_vol_name, SCALE_JOB_EXE_OUTPUT_PATH, MODE_RW, is_host=False) config.add_to_task('pre', mount_volumes={input_mnt_name: input_vol_rw, output_mnt_name: output_vol_rw}, env_vars={'SYSTEM_LOGGING_LEVEL': system_logging_level}) config.add_to_task('main', mount_volumes={input_mnt_name: input_vol_ro, output_mnt_name: output_vol_rw}) config.add_to_task('post', mount_volumes={output_mnt_name: output_vol_ro}, env_vars={'SYSTEM_LOGGING_LEVEL': system_logging_level}) # Configure output directory # TODO: original output dir and command arg replacement can be removed when Scale no longer supports old-style # job types env_vars = {'job_output_dir': SCALE_JOB_EXE_OUTPUT_PATH, 'OUTPUT_DIR': SCALE_JOB_EXE_OUTPUT_PATH} args = config._get_task_dict('main')['args'] # TODO: Remove old-style logic for command parameters inject when with v6 if not JobInterfaceSunset.is_seed_dict(job_type.manifest): args = JobInterface.replace_command_parameters(args, env_vars) else: args = environment_expansion(env_vars, args, remove_extras=True) config.add_to_task('main', args=args, env_vars=env_vars) # Configure task resources resources = job_exe.get_resources() # Pull-task and pre-task require full amount of resources config.add_to_task('pull', resources=resources) config.add_to_task('pre', resources=resources) # Main-task no longer requires the input file space resources.subtract(NodeResources([Disk(job_exe.input_file_size)])) config.add_to_task('main', resources=resources) # Post-task no longer requires any disk space resources.remove_resource('disk') config.add_to_task('post', resources=resources)
def convert_interface_to_manifest(apps, schema_editor): # Go through all of the JobType models and convert legacy interfaces to Seed manifests # Also inactivate/pause them JobType = apps.get_model('job', 'JobType') JobTypeRevision = apps.get_model('job', 'JobTypeRevision') RecipeTypeJobLink = apps.get_model('recipe', 'RecipeTypeJobLink') RecipeType = apps.get_model('recipe', 'RecipeType') unique = 0 for jt in JobType.objects.all().iterator(): if JobInterfaceSunset.is_seed_dict(jt.manifest): continue jt.is_active = False jt.is_paused = True old_name = jt.name old_name_version = jt.name + ' ' + jt.version jt.name = 'legacy-' + jt.name.replace('_', '-') if not jt.manifest: jt.manifest = {} input_files = [] input_json = [] output_files = [] global INTERFACE_NAME_COUNTER INTERFACE_NAME_COUNTER = 0 for input in jt.manifest.get('input_data', []): type = input.get('type', '') if 'file' not in type: json = {} json['name'] = get_unique_name(input.get('name')) json['type'] = 'string' json['required'] = input.get('required', True) input_json.append(json) continue file = {} file['name'] = get_unique_name(input.get('name')) file['required'] = input.get('required', True) file['partial'] = input.get('partial', False) file['mediaTypes'] = input.get('media_types', []) file['multiple'] = (type == 'files') input_files.append(file) for output in jt.manifest.get('output_data', []): type = output.get('type', '') file = {} file['name'] = get_unique_name(output.get('name')) file['required'] = output.get('required', True) file['mediaType'] = output.get('media_type', '') file['multiple'] = (type == 'files') file['pattern'] = "*.*" output_files.append(file) mounts = [] for mount in jt.manifest.get('mounts', []): mt = {} mt['name'] = get_unique_name(mount.get('name')) mt['path'] = mount.get('path') mt['mode'] = mount.get('mode', 'ro') mounts.append(mt) settings = [] for setting in jt.manifest.get('settings', []): s = {} s['name'] = get_unique_name(setting.get('name')) s['secret'] = setting.get('secret', False) settings.append(s) for var in jt.manifest.get('env_vars', []): s = {} name = get_unique_name(var.get('name')) name = 'ENV_' + name s['name'] = name settings.append(s) errors = [] ec = jt.error_mapping.get('exit_codes', {}) for exit_code, error_name in ec.items(): error = { 'code': int(exit_code), 'name': get_unique_name(error_name), 'title': 'Error Name', 'description': 'Error Description', 'category': 'algorithm' } errors.append(error) new_manifest = { 'seedVersion': '1.0.0', 'job': { 'name': jt.name, 'jobVersion': '0.0.0', 'packageVersion': '1.0.0', 'title': 'LEGACY ' + jt.title, 'description': jt.description, 'tags': [jt.category, old_name_version], 'maintainer': { 'name': jt.author_name, 'email': '*****@*****.**', 'url': jt.author_url }, 'timeout': jt.timeout, 'interface': { 'command': jt.manifest.get('command', ''), 'inputs': { 'files': input_files, 'json': input_json }, 'outputs': { 'files': output_files, 'json': [] }, 'mounts': mounts, 'settings': settings }, 'resources': { 'scalar': [{ 'name': 'cpus', 'value': jt.cpus_required }, { 'name': 'mem', 'value': jt.mem_const_required, 'inputMultiplier': jt.mem_mult_required }, { 'name': 'sharedMem', 'value': jt.shared_mem_required }, { 'name': 'disk', 'value': jt.disk_out_const_required, 'inputMultiplier': jt.disk_out_mult_required }] }, 'errors': errors } } jt.manifest = new_manifest SeedManifest(jt.manifest, do_validate=True) jt.save() for jtr in JobTypeRevision.objects.filter( job_type_id=jt.id).iterator(): jtr.manifest = jt.manifest jtr.save() # Update any recipe types that reference the updated job name for rtjl in RecipeTypeJobLink.objects.all().filter( job_type_id=jt.id).iterator(): recipe_type = RecipeType.objects.get(id=rtjl.id) definition = recipe_type.definition changed = False # v6 if 'nodes' in definition: for node in definition['nodes']: jt_node = node['node_type'] if jt_node['node_type'] == 'job' and jt_node[ 'job_type_name'].replace( '_', '-') == old_name and jt_node[ 'job_type_version'] == jt.version: node['node_type']['job_type_name'] = jt.name changed = True # v5 elif 'jobs' in definition: for job in definition['jobs']: jt_node = job['job_type'] if jt_node['name'].replace( '_', '-' ) == old_name and jt_node['version'] == jt.version: job['job_type']['name'] = jt.name changed = True if changed: recipe_type.definition = definition recipe_type.save()