Exemple #1
0
    def get_mount_volume(self, mount_name, volume_name, container_path, mode):
        """Returns the volume that has been configured for the given mount name. If the given mount is not defined in
        this configuration, None is returned.

        :param mount_name: The name of the mount defined in the job type
        :type mount_name: string
        :param volume_name: The name of the volume
        :type volume_name: string
        :param container_path: The path within the container onto which the volume will be mounted
        :type container_path: string
        :param mode: Either 'ro' for read-only or 'rw' for read-write
        :type mode: string
        :returns: The volume that should be mounted into the job container, possibly None
        :rtype: :class:`job.execution.configuration.volume.Volume`
        """

        if mount_name not in self._configuration['mounts']:
            return None

        volume = None
        mount_config = self._configuration['mounts'][mount_name]
        mount_type = mount_config['type']
        if mount_type == 'host':
            host_path = mount_config['host_path']
            volume = Volume(volume_name, container_path, mode, is_host=True, host_path=host_path)
        elif mount_type == 'volume':
            driver = mount_config['driver']
            driver_opts = mount_config['driver_opts']
            volume = Volume(volume_name, container_path, mode, is_host=False, driver=driver, driver_opts=driver_opts)

        return volume
Exemple #2
0
    def get_volumes(self, task_type):
        """Returns the Docker volumes for the given task type

        :param task_type: The task type
        :type task_type: string
        :returns: The dict of Docker volumes stored by volume name
        :rtype: dict
        """

        volumes = {}
        for task_dict in self._configuration['tasks']:
            if task_dict['type'] == task_type:
                if 'volumes' in task_dict:
                    for name, vol_dict in task_dict['volumes'].items():
                        if vol_dict['type'] == 'host':
                            vol = Volume(name,
                                         vol_dict['container_path'],
                                         vol_dict['mode'],
                                         is_host=True,
                                         host_path=vol_dict['host_path'])
                        else:
                            driver = None
                            driver_opts = None
                            if 'driver' in vol_dict:
                                driver = vol_dict['driver']
                            if 'driver_opts' in vol_dict:
                                driver_opts = vol_dict['driver_opts']
                            vol = Volume(name,
                                         vol_dict['container_path'],
                                         vol_dict['mode'],
                                         is_host=False,
                                         driver=driver,
                                         driver_opts=driver_opts)
                        volumes[name] = vol
        return volumes
Exemple #3
0
    def _configure_all_tasks(self, config, job_exe, job_type):
        """Configures the given execution with items that apply to all tasks

        :param config: The execution configuration
        :type config: :class:`job.execution.configuration.json.exe_config.ExecutionConfiguration`
        :param job_exe: The job execution model being scheduled
        :type job_exe: :class:`job.models.JobExecution`
        :param job_type: The job type model
        :type job_type: :class:`job.models.JobType`
        """

        config.set_task_ids(job_exe.get_cluster_id())

        for task_type in config.get_task_types():
            # Configure env vars describing allocated task resources
            env_vars = {}
            for resource in config.get_resources(task_type).resources:
                env_name = 'ALLOCATED_%s' % normalize_env_var_name(resource.name)
                env_vars[env_name] = '%.1f' % resource.value  # Assumes scalar resources

            # Configure env vars for Scale meta-data
            env_vars['SCALE_JOB_ID'] = unicode(job_exe.job_id)
            env_vars['SCALE_EXE_NUM'] = unicode(job_exe.exe_num)
            if job_exe.recipe_id:
                env_vars['SCALE_RECIPE_ID'] = unicode(job_exe.recipe_id)
            if job_exe.batch_id:
                env_vars['SCALE_BATCH_ID'] = unicode(job_exe.batch_id)

            # Configure workspace volumes
            workspace_volumes = {}
            for task_workspace in config.get_workspaces(task_type):
                logger.debug(self._workspaces)
                workspace_model = self._workspaces[task_workspace.name]
                # TODO: Should refactor workspace broker to return a Volume object and remove BrokerVolume
                if workspace_model.volume:
                    vol_name = get_workspace_volume_name(job_exe, task_workspace.name)
                    cont_path = get_workspace_volume_path(workspace_model.name)
                    if workspace_model.volume.host:
                        host_path = workspace_model.volume.remote_path
                        volume = Volume(vol_name, cont_path, task_workspace.mode, is_host=True, host_path=host_path)
                    else:
                        driver = workspace_model.volume.driver
                        driver_opts = {}
                        # TODO: Hack alert for nfs broker, as stated above, we should return Volume from broker
                        if driver == 'nfs':
                            driver_opts = {'share': workspace_model.volume.remote_path}
                        volume = Volume(vol_name, cont_path, task_workspace.mode, is_host=False, driver=driver,
                                        driver_opts=driver_opts)
                    workspace_volumes[task_workspace.name] = volume

            config.add_to_task(task_type, env_vars=env_vars, wksp_volumes=workspace_volumes)

        # Labels for metric grouping
        job_id_label = DockerParameter('label', 'scale-job-id={}'.format(job_exe.job_id))
        job_execution_id_label = DockerParameter('label', 'scale-job-execution-id={}'.format(job_exe.exe_num))
        job_type_name_label = DockerParameter('label', 'scale-job-type-name={}'.format(job_type.name))
        job_type_version_label = DockerParameter('label', 'scale-job-type-version={}'.format(job_type.version))
        main_label = DockerParameter('label', 'scale-task-type=main')
        config.add_to_task('main', docker_params=[job_id_label, job_type_name_label, job_type_version_label,
                                                  job_execution_id_label, main_label])
        if not job_type.is_system:
            pre_label = DockerParameter('label', 'scale-task-type=pre')
            post_label = DockerParameter('label', 'scale-task-type=post')
            config.add_to_task('pre', docker_params=[job_id_label, job_type_name_label, job_type_version_label,
                                                     job_execution_id_label, pre_label])
            config.add_to_task('post', docker_params=[job_id_label, job_type_name_label, job_type_version_label,
                                                  job_execution_id_label, post_label])

        # Configure tasks for logging
        if settings.LOGGING_ADDRESS is not None:
            log_driver = DockerParameter('log-driver', 'syslog')
            # Must explicitly specify RFC3164 to ensure compatibility with logstash in Docker 1.11+
            syslog_format = DockerParameter('log-opt', 'syslog-format=rfc3164')
            log_address = DockerParameter('log-opt', 'syslog-address=%s' % settings.LOGGING_ADDRESS)
            if not job_type.is_system:
                pre_task_tag = DockerParameter('log-opt', 'tag=%s|%s' % (config.get_task_id('pre'), job_type.name))
                config.add_to_task('pre', docker_params=[log_driver, syslog_format, log_address, pre_task_tag])
                post_task_tag = DockerParameter('log-opt', 'tag=%s|%s' % (config.get_task_id('post'), job_type.name))
                config.add_to_task('post', docker_params=[log_driver, syslog_format, log_address, post_task_tag])
                # TODO: remove es_urls parameter when Scale no longer supports old style job types
                es_urls = None
                # Use connection pool to get up-to-date list of elasticsearch nodes
                if settings.ELASTICSEARCH:
                    hosts = [host.host for host in settings.ELASTICSEARCH.transport.connection_pool.connections]
                    es_urls = ','.join(hosts)
                # Post task needs ElasticSearch URL to grab logs for old artifact registration
                es_param = DockerParameter('env', 'SCALE_ELASTICSEARCH_URLS=%s' % es_urls)
                config.add_to_task('post', docker_params=[es_param])
            main_task_tag = DockerParameter('log-opt', 'tag=%s|%s' % (config.get_task_id('main'), job_type.name))
            config.add_to_task('main', docker_params=[log_driver, syslog_format, log_address, main_task_tag])
Exemple #4
0
    def _configure_regular_job(config, job_exe, job_type, system_logging_level):
        """Configures the given execution as a regular (non-system) job by adding pre and post tasks,
        input/output mounts, etc

        :param config: The execution configuration
        :type config: :class:`job.execution.configuration.json.exe_config.ExecutionConfiguration`
        :param job_exe: The job execution model being scheduled
        :type job_exe: :class:`job.models.JobExecution`
        :param job_type: The job type model
        :type job_type: :class:`job.models.JobType`
        :param system_logging_level: The logging level to be passed in through environment
        :type system_logging_level: str
        """

        config.create_tasks(['pull', 'pre', 'main', 'post'])
        config.add_to_task('pull', args=create_pull_command(job_exe.docker_image))
        config.add_to_task('pre', args=PRE_TASK_COMMAND_ARGS)
        config.add_to_task('post', args=POST_TASK_COMMAND_ARGS)

        # Configure input workspaces
        ro_input_workspaces = {}
        rw_input_workspaces = {}
        for input_workspace in config.get_input_workspace_names():
            ro_input_workspaces[input_workspace] = TaskWorkspace(input_workspace, MODE_RO)
            rw_input_workspaces[input_workspace] = TaskWorkspace(input_workspace, MODE_RW)
        config.add_to_task('pre', workspaces=ro_input_workspaces)
        config.add_to_task('main', workspaces=ro_input_workspaces)
        # Post tasks have access to input workspaces in case input files need moved as part of parse results
        config.add_to_task('post', workspaces=rw_input_workspaces)

        # Configure output workspaces
        output_workspaces = {}
        for output_workspace in config.get_output_workspace_names():
            output_workspaces[output_workspace] = TaskWorkspace(output_workspace, MODE_RW)
        config.add_to_task('post', workspaces=output_workspaces)

        # Configure input/output mounts
        input_mnt_name = 'scale_input_mount'
        output_mnt_name = 'scale_output_mount'
        input_vol_name = get_job_exe_input_vol_name(job_exe)
        output_vol_name = get_job_exe_output_vol_name(job_exe)
        input_vol_ro = Volume(input_vol_name, SCALE_JOB_EXE_INPUT_PATH, MODE_RO, is_host=False)
        input_vol_rw = Volume(input_vol_name, SCALE_JOB_EXE_INPUT_PATH, MODE_RW, is_host=False)
        output_vol_ro = Volume(output_vol_name, SCALE_JOB_EXE_OUTPUT_PATH, MODE_RO, is_host=False)
        output_vol_rw = Volume(output_vol_name, SCALE_JOB_EXE_OUTPUT_PATH, MODE_RW, is_host=False)

        config.add_to_task('pre', mount_volumes={input_mnt_name: input_vol_rw, output_mnt_name: output_vol_rw},
                           env_vars={'SYSTEM_LOGGING_LEVEL': system_logging_level})
        config.add_to_task('main', mount_volumes={input_mnt_name: input_vol_ro, output_mnt_name: output_vol_rw})
        config.add_to_task('post', mount_volumes={output_mnt_name: output_vol_ro},
                           env_vars={'SYSTEM_LOGGING_LEVEL': system_logging_level})

        # Configure output directory
        # TODO: original output dir and command arg replacement can be removed when Scale no longer supports old-style
        # job types
        env_vars = {'job_output_dir': SCALE_JOB_EXE_OUTPUT_PATH, 'OUTPUT_DIR': SCALE_JOB_EXE_OUTPUT_PATH}
        args = config._get_task_dict('main')['args']

        # TODO: Remove old-style logic for command parameters inject when with v6
        if not JobInterfaceSunset.is_seed_dict(job_type.manifest):
            args = JobInterface.replace_command_parameters(args, env_vars)
        else:
            args = environment_expansion(env_vars, args, remove_extras=True)
        config.add_to_task('main', args=args, env_vars=env_vars)

        # Configure task resources
        resources = job_exe.get_resources()
        # Pull-task and pre-task require full amount of resources
        config.add_to_task('pull', resources=resources)
        config.add_to_task('pre', resources=resources)
        # Main-task no longer requires the input file space
        resources.subtract(NodeResources([Disk(job_exe.input_file_size)]))
        config.add_to_task('main', resources=resources)
        # Post-task no longer requires any disk space
        resources.remove_resource('disk')
        config.add_to_task('post', resources=resources)
Exemple #5
0
    def _configure_all_tasks(self, config, job_exe, job_type):
        """Configures the given execution with items that apply to all tasks

        :param config: The execution configuration
        :type config: :class:`job.execution.configuration.json.exe_config.ExecutionConfiguration`
        :param job_exe: The job execution model being scheduled
        :type job_exe: :class:`job.models.JobExecution`
        :param job_type: The job type model
        :type job_type: :class:`job.models.JobType`
        """

        config.set_task_ids(job_exe.get_cluster_id())

        for task_type in config.get_task_types():
            # Configure env vars describing allocated task resources
            env_vars = {}
            nvidia_docker_label = None

            for resource in config.get_resources(task_type).resources:
                env_name = 'ALLOCATED_%s' % normalize_env_var_name(
                    resource.name)
                env_vars[
                    env_name] = '%.1f' % resource.value  # Assumes scalar resources
                if resource.name == "gpus" and int(resource.value) > 0:
                    gpu_list = GPUManager.get_nvidia_docker_label(
                        job_exe.node_id, job_exe.job_id)
                    nvidia_docker_label = DockerParameter(
                        'env', 'NVIDIA_VISIBLE_DEVICES={}'.format(
                            gpu_list.strip(',')))

            # Configure env vars for Scale meta-data
            env_vars['SCALE_JOB_ID'] = unicode(job_exe.job_id)
            env_vars['SCALE_EXE_NUM'] = unicode(job_exe.exe_num)
            if job_exe.recipe_id:
                env_vars['SCALE_RECIPE_ID'] = unicode(job_exe.recipe_id)
            if job_exe.batch_id:
                env_vars['SCALE_BATCH_ID'] = unicode(job_exe.batch_id)

            # Configure workspace volumes
            workspace_volumes = {}
            for task_workspace in config.get_workspaces(task_type):
                logger.debug(self._workspaces)
                workspace_model = self._workspaces[task_workspace.name]
                # TODO: Should refactor workspace broker to return a Volume object and remove BrokerVolume
                if workspace_model.volume:
                    vol_name = get_workspace_volume_name(
                        job_exe, task_workspace.name)
                    cont_path = get_workspace_volume_path(workspace_model.name)
                    if workspace_model.volume.host:
                        host_path = workspace_model.volume.remote_path
                        volume = Volume(vol_name,
                                        cont_path,
                                        task_workspace.mode,
                                        is_host=True,
                                        host_path=host_path)
                    else:
                        driver = workspace_model.volume.driver
                        driver_opts = {}
                        # TODO: Hack alert for nfs broker, as stated above, we should return Volume from broker
                        if driver == 'nfs':
                            driver_opts = {
                                'share': workspace_model.volume.remote_path
                            }
                        volume = Volume(vol_name,
                                        cont_path,
                                        task_workspace.mode,
                                        is_host=False,
                                        driver=driver,
                                        driver_opts=driver_opts)
                    workspace_volumes[task_workspace.name] = volume

            config.add_to_task(task_type,
                               env_vars=env_vars,
                               wksp_volumes=workspace_volumes)

        # Labels for metric grouping
        job_id_label = DockerParameter(
            'label', 'scale-job-id={}'.format(job_exe.job_id))
        job_execution_id_label = DockerParameter(
            'label', 'scale-job-execution-id={}'.format(job_exe.exe_num))
        job_type_name_label = DockerParameter(
            'label', 'scale-job-type-name={}'.format(job_type.name))
        job_type_version_label = DockerParameter(
            'label', 'scale-job-type-version={}'.format(job_type.version))
        main_label = DockerParameter('label', 'scale-task-type=main')
        if nvidia_docker_label:
            nvidia_runtime_param = DockerParameter('runtime', 'nvidia')
            config.add_to_task('main',
                               docker_params=[
                                   job_id_label, job_type_name_label,
                                   job_type_version_label,
                                   job_execution_id_label, main_label,
                                   nvidia_docker_label, nvidia_runtime_param
                               ])
        else:
            config.add_to_task('main',
                               docker_params=[
                                   job_id_label, job_type_name_label,
                                   job_type_version_label,
                                   job_execution_id_label, main_label
                               ])

        if not job_type.is_system:
            pre_label = DockerParameter('label', 'scale-task-type=pre')
            post_label = DockerParameter('label', 'scale-task-type=post')
            config.add_to_task('pre',
                               docker_params=[
                                   job_id_label, job_type_name_label,
                                   job_type_version_label,
                                   job_execution_id_label, pre_label
                               ])
            config.add_to_task('post',
                               docker_params=[
                                   job_id_label, job_type_name_label,
                                   job_type_version_label,
                                   job_execution_id_label, post_label
                               ])

        # Configure tasks for logging
        if settings.LOGGING_ADDRESS is not None:
            log_driver = DockerParameter('log-driver', 'fluentd')
            fluent_precision = DockerParameter(
                'log-opt', 'fluentd-sub-second-precision=true')
            log_address = DockerParameter(
                'log-opt', 'fluentd-address=%s' % settings.LOGGING_ADDRESS)
            if not job_type.is_system:
                pre_task_tag = DockerParameter(
                    'log-opt', 'tag=%s|%s|%s|%s|%s' %
                    (config.get_task_id('pre'), job_type.name,
                     job_type.version, job_exe.job_id, job_exe.exe_num))
                config.add_to_task('pre',
                                   docker_params=[
                                       log_driver, fluent_precision,
                                       log_address, pre_task_tag
                                   ])
                post_task_tag = DockerParameter(
                    'log-opt', 'tag=%s|%s|%s|%s|%s' %
                    (config.get_task_id('post'), job_type.name,
                     job_type.version, job_exe.job_id, job_exe.exe_num))
                config.add_to_task('post',
                                   docker_params=[
                                       log_driver, fluent_precision,
                                       log_address, post_task_tag
                                   ])
                # TODO: remove es_urls parameter when Scale no longer supports old style job types

                # Post task needs ElasticSearch URL to grab logs for old artifact registration
                es_param = DockerParameter(
                    'env', 'ELASTICSEARCH_URL=%s' % settings.ELASTICSEARCH_URL)
                config.add_to_task('post', docker_params=[es_param])
            main_task_tag = DockerParameter(
                'log-opt', 'tag=%s|%s|%s|%s|%s' %
                (config.get_task_id('main'), job_type.name, job_type.version,
                 job_exe.job_id, job_exe.exe_num))
            config.add_to_task('main',
                               docker_params=[
                                   log_driver, fluent_precision, log_address,
                                   main_task_tag
                               ])