예제 #1
0
    def _configure_secrets(self, config, job_exe, job_type, interface):
        """Creates a copy of the configuration, configures secrets (masked in one of the copies), and applies any final
        configuration

        :param config: The execution configuration, where the secrets will be masked out
        :type config: :class:`job.configuration.json.execution.exe_config.ExecutionConfiguration`
        :param job_exe: The job execution model being scheduled
        :type job_exe: :class:`job.models.JobExecution`
        :param job_type: The job type model
        :type job_type: :class:`job.models.JobType`
        :param interface: The job interface
        :type interface: :class:`job.configuration.interface.job_interface.JobInterface`
        :returns: The copy of the execution configuration that contains the secrets
        :rtype: :class:`job.configuration.json.execution.exe_config.ExecutionConfiguration`
        """

        # Copy the configuration
        config_with_secrets = config.create_copy()

        # Configure settings values, some are secret
        if job_type.is_system:
            config.add_to_task('main', settings=self._system_settings_hidden)
            config_with_secrets.add_to_task('main', settings=self._system_settings)
        else:
            config.add_to_task('pre', settings=self._system_settings_hidden)
            config_with_secrets.add_to_task('pre', settings=self._system_settings)
            config.add_to_task('post', settings=self._system_settings_hidden)
            config_with_secrets.add_to_task('post', settings=self._system_settings)
            job_config = job_type.get_job_configuration()
            secret_settings = secrets_mgr.retrieve_job_type_secrets(job_type.get_secrets_key())
            for _config, secrets_hidden in [(config, True), (config_with_secrets, False)]:
                task_settings = {}
                # TODO: use better interface method once we switch to Seed
                for setting in interface.get_dict()['settings']:
                    name = setting['name']
                    if setting['secret']:
                        value = None
                        if name in secret_settings:
                            value = secret_settings[name]
                            if value is not None and secrets_hidden:
                                value = '*****'
                    else:
                        value = job_config.get_setting_value(name)
                    if setting['required'] or value is not None:
                        task_settings[name] = value
                # TODO: command args and env var replacement from the interface should be removed once Scale drops
                # support for old-style job types
                args = config._get_task_dict('main')['args']
                args = JobInterface._replace_command_parameters(args, task_settings)
                env_vars = interface.populate_env_vars_arguments(task_settings)
                _config.add_to_task('main', args=args, env_vars=env_vars, settings=task_settings)

        # Configure env vars for settings
        for _config in [config, config_with_secrets]:
            for task_type in _config.get_task_types():
                env_vars = {}
                for name, value in _config.get_settings(task_type).items():
                    if value is not None:
                        env_name = normalize_env_var_name(name)
                        env_vars[env_name] = value
                _config.add_to_task(task_type, env_vars=env_vars)

        # Configure Docker parameters for env vars and Docker volumes
        for _config in [config, config_with_secrets]:
            existing_volumes = set()
            for task_type in _config.get_task_types():
                docker_params = []
                for name, value in _config.get_env_vars(task_type).items():
                    docker_params.append(DockerParameter('env', '%s=%s' % (name, value)))
                for name, volume in _config.get_volumes(task_type).items():
                    docker_params.append(volume.to_docker_param(is_created=(name in existing_volumes)))
                    existing_volumes.add(name)
                _config.add_to_task(task_type, docker_params=docker_params)

        # TODO: this feature should be removed once Scale drops support for job type docker params
        # Configure docker parameters listed in job type
        if job_type.docker_params:
            docker_params = []
            for key, value in job_type.docker_params.items():
                docker_params.append(DockerParameter(key, value))
            if docker_params:
                config.add_to_task('main', docker_params=docker_params)
                config_with_secrets.add_to_task('main', docker_params=docker_params)

        return config_with_secrets
예제 #2
0
    def configure_queued_job(self, job):
        """Creates and returns an execution configuration for the given queued job. The given job model should have its
        related job_type and job_type_rev models populated.

        :param job: The queued job model
        :type job: :class:`job.models.Job`
        :returns: The execution configuration for the queued job
        :rtype: :class:`job.configuration.json.execution.exe_config.ExecutionConfiguration`
        """

        config = ExecutionConfiguration()
        data = job.get_job_data()

        # Add input file meta-data
        input_files_dict = self._create_input_file_dict(data)
        config.set_input_files(input_files_dict)

        # Set up env vars for job's input data
        env_vars = {}
        input_values = {}
        # TODO: refactor after Seed upgrade
        # This step makes sure that all inputs get replaced with blank if a value is not provided
        for input_data_dict in job.get_job_interface().definition['input_data']:
            input_values[input_data_dict['name']] = ''  # Everything gets a blank value by default
        # TODO: refactor this to use JobData method after Seed upgrade
        for data_input in data.get_dict()['input_data']:
            input_name = data_input['name']
            env_var_name = normalize_env_var_name(input_name)
            if 'value' in data_input:
                env_vars[env_var_name] = data_input['value']
                input_values[input_name] = data_input['value']
            if 'file_id' in data_input:
                input_file = input_files_dict[input_name][0]
                file_name = os.path.basename(input_file.workspace_path)
                if input_file.local_file_name:
                    file_name = input_file.local_file_name
                env_vars[env_var_name] = os.path.join(SCALE_JOB_EXE_INPUT_PATH, input_name, file_name)
                input_values[input_name] = os.path.join(SCALE_JOB_EXE_INPUT_PATH, input_name, file_name)
            elif 'file_ids' in data_input:
                env_vars[env_var_name] = os.path.join(SCALE_JOB_EXE_INPUT_PATH, input_name)
                input_values[input_name] = os.path.join(SCALE_JOB_EXE_INPUT_PATH, input_name)

        task_workspaces = {}
        if job.job_type.is_system:
            # Add any workspaces needed for this system job
            task_workspaces = QueuedExecutionConfigurator._system_job_workspaces(job)
        else:
            # Set any output workspaces needed
            # TODO: In the future, output workspaces can be moved from job data to configuration, moving this step to
            # the ScheduledExecutionConfigurator
            self._cache_workspace_names(data.get_output_workspace_ids())
            output_workspaces = {}
            for output, workspace_id in data.get_output_workspaces().items():
                output_workspaces[output] = self._cached_workspace_names[workspace_id]
            config.set_output_workspaces(output_workspaces)

        # Create main task with fields populated from input data
        args = job.get_job_interface().get_command_args()
        # TODO: command arg input param replacement can be removed when old-style job type support is dropped
        args = JobInterface._replace_command_parameters(args, input_values)
        config.create_tasks(['main'])
        config.add_to_task('main', args=args, env_vars=env_vars, workspaces=task_workspaces)
        return config
예제 #3
0
    def _configure_regular_job(config, job_exe, job_type):
        """Configures the given execution as a regular (non-system) job by adding pre and post tasks,
        input/output mounts, etc

        :param config: The execution configuration
        :type config: :class:`job.configuration.json.execution.exe_config.ExecutionConfiguration`
        :param job_exe: The job execution model being scheduled
        :type job_exe: :class:`job.models.JobExecution`
        :param job_type: The job type model
        :type job_type: :class:`job.models.JobType`
        """

        config.create_tasks(['pull', 'pre', 'main', 'post'])
        config.add_to_task('pull', args=create_pull_command(job_type.docker_image))
        env_vars = {'SCALE_JOB_ID': unicode(job_exe.job_id), 'SCALE_EXE_NUM': unicode(job_exe.exe_num)}
        config.add_to_task('pre', args=PRE_TASK_COMMAND_ARGS, env_vars=env_vars)
        config.add_to_task('post', args=POST_TASK_COMMAND_ARGS, env_vars=env_vars)

        # Configure input workspaces
        ro_input_workspaces = {}
        rw_input_workspaces = {}
        for input_workspace in config.get_input_workspace_names():
            ro_input_workspaces[input_workspace] = TaskWorkspace(input_workspace, MODE_RO)
            rw_input_workspaces[input_workspace] = TaskWorkspace(input_workspace, MODE_RW)
        config.add_to_task('pre', workspaces=ro_input_workspaces)
        config.add_to_task('main', workspaces=ro_input_workspaces)
        # Post tasks have access to input workspaces in case input files need moved as part of parse results
        config.add_to_task('post', workspaces=rw_input_workspaces)

        # Configure output workspaces
        output_workspaces = {}
        for output_workspace in config.get_output_workspace_names():
            output_workspaces[output_workspace] = TaskWorkspace(output_workspace, MODE_RW)
        config.add_to_task('post', workspaces=output_workspaces)

        # Configure input/output mounts
        input_mnt_name = 'scale_input_mount'
        output_mnt_name = 'scale_output_mount'
        input_vol_name = get_job_exe_input_vol_name(job_exe)
        output_vol_name = get_job_exe_output_vol_name(job_exe)
        input_vol_ro = Volume(input_vol_name, SCALE_JOB_EXE_INPUT_PATH, MODE_RO, is_host=False)
        input_vol_rw = Volume(input_vol_name, SCALE_JOB_EXE_INPUT_PATH, MODE_RW, is_host=False)
        output_vol_ro = Volume(output_vol_name, SCALE_JOB_EXE_OUTPUT_PATH, MODE_RO, is_host=False)
        output_vol_rw = Volume(output_vol_name, SCALE_JOB_EXE_OUTPUT_PATH, MODE_RW, is_host=False)
        config.add_to_task('pre', mount_volumes={input_mnt_name: input_vol_rw, output_mnt_name: output_vol_rw})
        config.add_to_task('main', mount_volumes={input_mnt_name: input_vol_ro, output_mnt_name: output_vol_rw})
        config.add_to_task('post', mount_volumes={output_mnt_name: output_vol_ro})

        # Configure output directory
        # TODO: original output dir and command arg replacement can be removed when Scale no longer supports old-style
        # job types
        env_vars = {'job_output_dir': SCALE_JOB_EXE_OUTPUT_PATH, 'OUTPUT_DIR': SCALE_JOB_EXE_OUTPUT_PATH}
        args = config._get_task_dict('main')['args']
        args = JobInterface._replace_command_parameters(args, env_vars)
        config.add_to_task('main', args=args, env_vars=env_vars)

        # Configure task resources
        resources = job_exe.get_resources()
        # Pull-task and pre-task require full amount of resources
        config.add_to_task('pull', resources=resources)
        config.add_to_task('pre', resources=resources)
        # Main-task no longer requires the input file space
        resources.subtract(NodeResources([Disk(job_exe.input_file_size)]))
        config.add_to_task('main', resources=resources)
        # Post-task no longer requires any disk space
        resources.remove_resource('disk')
        config.add_to_task('post', resources=resources)