Exemple #1
0
    def _system_job_workspaces(job):
        """Returns any workspaces needed for the main task if this job is a system job. The given job model should have
        its related job_type and job_type_rev models populated.

        :param job: The queued job model
        :type job: :class:`job.models.Job`
        :returns: A dict where workspaces are stored by name
        :rtype: dict
        """

        workspaces = {}
        data = job.get_job_data()

        # Configure ingest workspace based on input data values
        if job.job_type.name == 'scale-ingest':
            workspace_name = None
            new_workspace_name = None
            prop_dict = data.get_property_values(['Ingest ID', 'workspace', 'new_workspace'])
            if 'workspace' in prop_dict:
                workspace_name = prop_dict['workspace']
                if 'new_workspace' in prop_dict:
                    new_workspace_name = prop_dict['new_workspace']
            else:
                # Old ingest jobs do not have the workspace(s) in their data, will need to query ingest model
                if 'Ingest ID' in prop_dict:
                    ingest_id = int(prop_dict['Ingest ID'])
                    from ingest.models import Ingest
                    ingest = Ingest.objects.select_related('workspace', 'new_workspace').get(id=ingest_id)
                    workspace_name = ingest.workspace.name
                    if ingest.new_workspace:
                        new_workspace_name = ingest.new_workspace.name
            if workspace_name:
                workspaces[workspace_name] = TaskWorkspace(workspace_name, MODE_RW)
            if new_workspace_name:
                workspaces[new_workspace_name] = TaskWorkspace(new_workspace_name, MODE_RW)

        # Configure Strike workspace based on current configuration
        if job.job_type.name == 'scale-strike':
            strike_id = data.get_property_values(['Strike ID'])['Strike ID']
            from ingest.models import Strike
            strike = Strike.objects.get(id=strike_id)
            workspace_name = strike.get_strike_configuration().get_workspace()
            workspaces[workspace_name] = TaskWorkspace(workspace_name, MODE_RW)

        # Configure Scan workspace based on current configuration
        if job.job_type.name == 'scale-scan':
            scan_id = data.get_property_values(['Scan ID'])['Scan ID']
            from ingest.models import Scan
            scan = Scan.objects.get(id=scan_id)
            workspace_name = scan.get_scan_configuration().get_workspace()
            workspaces[workspace_name] = TaskWorkspace(workspace_name, MODE_RW)

        # Configure Scale Delete Files workspaces based on input workspaces
        if job.job_type.name == 'scale-delete-files':
            wrkspc_list = json.loads(data.get_property_values(['workspaces'])['workspaces'])
            workspaces = {w_name: TaskWorkspace(w_name, MODE_RW) for d in wrkspc_list for w_name, _v in d.items()}

        return workspaces
Exemple #2
0
    def get_workspaces(self, task_type):
        """Returns the workspaces for the given task type

        :param task_type: The task type
        :type task_type: string
        :returns: The list of workspaces
        :rtype: [:class:`job.execution.configuration.workspace.TaskWorkspace`]
        """

        workspaces = []
        for task_dict in self._configuration['tasks']:
            if task_dict['type'] == task_type:
                if 'workspaces' in task_dict:
                    for name, workspace_dict in task_dict['workspaces'].items():
                        workspaces.append(TaskWorkspace(name, workspace_dict['mode']))
        return workspaces
Exemple #3
0
    def _configure_regular_job(config, job_exe, job_type, system_logging_level):
        """Configures the given execution as a regular (non-system) job by adding pre and post tasks,
        input/output mounts, etc

        :param config: The execution configuration
        :type config: :class:`job.execution.configuration.json.exe_config.ExecutionConfiguration`
        :param job_exe: The job execution model being scheduled
        :type job_exe: :class:`job.models.JobExecution`
        :param job_type: The job type model
        :type job_type: :class:`job.models.JobType`
        :param system_logging_level: The logging level to be passed in through environment
        :type system_logging_level: str
        """

        config.create_tasks(['pull', 'pre', 'main', 'post'])
        config.add_to_task('pull', args=create_pull_command(job_exe.docker_image))
        config.add_to_task('pre', args=PRE_TASK_COMMAND_ARGS)
        config.add_to_task('post', args=POST_TASK_COMMAND_ARGS)

        # Configure input workspaces
        ro_input_workspaces = {}
        rw_input_workspaces = {}
        for input_workspace in config.get_input_workspace_names():
            ro_input_workspaces[input_workspace] = TaskWorkspace(input_workspace, MODE_RO)
            rw_input_workspaces[input_workspace] = TaskWorkspace(input_workspace, MODE_RW)
        config.add_to_task('pre', workspaces=ro_input_workspaces)
        config.add_to_task('main', workspaces=ro_input_workspaces)
        # Post tasks have access to input workspaces in case input files need moved as part of parse results
        config.add_to_task('post', workspaces=rw_input_workspaces)

        # Configure output workspaces
        output_workspaces = {}
        for output_workspace in config.get_output_workspace_names():
            output_workspaces[output_workspace] = TaskWorkspace(output_workspace, MODE_RW)
        config.add_to_task('post', workspaces=output_workspaces)

        # Configure input/output mounts
        input_mnt_name = 'scale_input_mount'
        output_mnt_name = 'scale_output_mount'
        input_vol_name = get_job_exe_input_vol_name(job_exe)
        output_vol_name = get_job_exe_output_vol_name(job_exe)
        input_vol_ro = Volume(input_vol_name, SCALE_JOB_EXE_INPUT_PATH, MODE_RO, is_host=False)
        input_vol_rw = Volume(input_vol_name, SCALE_JOB_EXE_INPUT_PATH, MODE_RW, is_host=False)
        output_vol_ro = Volume(output_vol_name, SCALE_JOB_EXE_OUTPUT_PATH, MODE_RO, is_host=False)
        output_vol_rw = Volume(output_vol_name, SCALE_JOB_EXE_OUTPUT_PATH, MODE_RW, is_host=False)

        config.add_to_task('pre', mount_volumes={input_mnt_name: input_vol_rw, output_mnt_name: output_vol_rw},
                           env_vars={'SYSTEM_LOGGING_LEVEL': system_logging_level})
        config.add_to_task('main', mount_volumes={input_mnt_name: input_vol_ro, output_mnt_name: output_vol_rw})
        config.add_to_task('post', mount_volumes={output_mnt_name: output_vol_ro},
                           env_vars={'SYSTEM_LOGGING_LEVEL': system_logging_level})

        # Configure output directory
        # TODO: original output dir and command arg replacement can be removed when Scale no longer supports old-style
        # job types
        env_vars = {'job_output_dir': SCALE_JOB_EXE_OUTPUT_PATH, 'OUTPUT_DIR': SCALE_JOB_EXE_OUTPUT_PATH}
        args = config._get_task_dict('main')['args']

        # TODO: Remove old-style logic for command parameters inject when with v6
        if not JobInterfaceSunset.is_seed_dict(job_type.manifest):
            args = JobInterface.replace_command_parameters(args, env_vars)
        else:
            args = environment_expansion(env_vars, args, remove_extras=True)
        config.add_to_task('main', args=args, env_vars=env_vars)

        # Configure task resources
        resources = job_exe.get_resources()
        # Pull-task and pre-task require full amount of resources
        config.add_to_task('pull', resources=resources)
        config.add_to_task('pre', resources=resources)
        # Main-task no longer requires the input file space
        resources.subtract(NodeResources([Disk(job_exe.input_file_size)]))
        config.add_to_task('main', resources=resources)
        # Post-task no longer requires any disk space
        resources.remove_resource('disk')
        config.add_to_task('post', resources=resources)