Ejemplo n.º 1
0
    def _configure_regular_job(config, job_exe, job_type,
                               system_logging_level):
        """Configures the given execution as a regular (non-system) job by adding pre and post tasks,
        input/output mounts, etc

        :param config: The execution configuration
        :type config: :class:`job.execution.configuration.json.exe_config.ExecutionConfiguration`
        :param job_exe: The job execution model being scheduled
        :type job_exe: :class:`job.models.JobExecution`
        :param job_type: The job type model
        :type job_type: :class:`job.models.JobType`
        :param system_logging_level: The logging level to be passed in through environment
        :type system_logging_level: str
        """
        config.create_tasks(['pull', 'pre', 'main', 'post'])
        config.add_to_task('pull',
                           args=create_pull_command(job_exe.docker_image))
        config.add_to_task('pre', args=PRE_TASK_COMMAND_ARGS)
        config.add_to_task('post', args=POST_TASK_COMMAND_ARGS)

        # Configure input workspaces
        ro_input_workspaces = {}
        rw_input_workspaces = {}
        for input_workspace in config.get_input_workspace_names():
            ro_input_workspaces[input_workspace] = TaskWorkspace(
                input_workspace, MODE_RO)
            rw_input_workspaces[input_workspace] = TaskWorkspace(
                input_workspace, MODE_RW)
        config.add_to_task('pre', workspaces=ro_input_workspaces)
        config.add_to_task('main', workspaces=ro_input_workspaces)
        # Post tasks have access to input workspaces in case input files need moved as part of parse results
        config.add_to_task('post', workspaces=rw_input_workspaces)

        # Configure output workspaces
        output_workspaces = {}
        for output_workspace in config.get_output_workspace_names():
            output_workspaces[output_workspace] = TaskWorkspace(
                output_workspace, MODE_RW)
        config.add_to_task('post', workspaces=output_workspaces)

        # Configure input/output mounts
        input_mnt_name = 'scale_input_mount'
        output_mnt_name = 'scale_output_mount'
        input_vol_name = get_job_exe_input_vol_name(job_exe)
        output_vol_name = get_job_exe_output_vol_name(job_exe)
        input_vol_ro = Volume(input_vol_name,
                              SCALE_JOB_EXE_INPUT_PATH,
                              MODE_RO,
                              is_host=False)
        input_vol_rw = Volume(input_vol_name,
                              SCALE_JOB_EXE_INPUT_PATH,
                              MODE_RW,
                              is_host=False)
        output_vol_ro = Volume(output_vol_name,
                               SCALE_JOB_EXE_OUTPUT_PATH,
                               MODE_RO,
                               is_host=False)
        output_vol_rw = Volume(output_vol_name,
                               SCALE_JOB_EXE_OUTPUT_PATH,
                               MODE_RW,
                               is_host=False)

        config.add_to_task(
            'pre',
            mount_volumes={
                input_mnt_name: input_vol_rw,
                output_mnt_name: output_vol_rw
            },
            env_vars={'SYSTEM_LOGGING_LEVEL': system_logging_level})
        config.add_to_task('main',
                           mount_volumes={
                               input_mnt_name: input_vol_ro,
                               output_mnt_name: output_vol_rw
                           })
        config.add_to_task(
            'post',
            mount_volumes={output_mnt_name: output_vol_ro},
            env_vars={'SYSTEM_LOGGING_LEVEL': system_logging_level})

        # Configure output directory
        env_vars = {'OUTPUT_DIR': SCALE_JOB_EXE_OUTPUT_PATH}
        args = config._get_task_dict('main')['args']

        args = environment_expansion(env_vars, args)

        config.add_to_task('main', args=args, env_vars=env_vars)

        # Configure task resources
        resources = job_exe.get_resources()
        # Pull-task and pre-task require full amount of resources
        config.add_to_task('pull', resources=resources)
        config.add_to_task('pre', resources=resources)
        # Main-task no longer requires the input file space
        resources.subtract(NodeResources([Disk(job_exe.input_file_size)]))
        config.add_to_task('main', resources=resources)
        # Post-task no longer requires any disk space
        resources.remove_resource('disk')
        config.add_to_task('post', resources=resources)
Ejemplo n.º 2
0
    def _schedule_waiting_tasks(self, nodes, running_job_exes, when):
        """Schedules all waiting tasks for which there are sufficient resources and updates the resource manager with
        any resource shortages. All scheduling nodes that have fulfilled all of their waiting tasks will be returned so
        new job executions can be added to them.

        :param nodes: The dict of scheduling nodes stored by node ID
        :type nodes: dict
        :param running_job_exes: The currently running job executions
        :type running_job_exes: list
        :param when: The current time
        :type when: :class:`datetime.datetime`
        :returns: The dict of scheduling nodes stored by node ID that have no more waiting tasks
        :rtype: dict
        """

        fulfilled_nodes = {}  # {Node ID: SchedulingNode}
        waiting_tasks = []

        # Schedule waiting node tasks first
        for node in nodes.values():
            has_waiting_tasks = node.accept_node_tasks(when, waiting_tasks)
            if node.is_ready_for_next_job_task and not has_waiting_tasks:
                # A node can only be fulfilled if it is able to run waiting tasks and it has no more waiting tasks
                fulfilled_nodes[node.node_id] = node

        # Schedule job executions already on the node waiting for their next task
        node_lost_job_exes_ids = []
        for running_job_exe in running_job_exes:
            if running_job_exe.node_id not in nodes:  # Unknown/lost node
                node_lost_job_exes_ids.append(running_job_exe.id)
            else:
                node = nodes[running_job_exe.node_id]
                if not node.is_ready_for_next_job_task or node.agent_id != running_job_exe.agent_id:
                    # Node is deprecated, offline, or has switched agent IDs
                    node_lost_job_exes_ids.append(running_job_exe.id)
                elif running_job_exe.is_next_task_ready():
                    has_waiting_tasks = node.accept_job_exe_next_task(
                        running_job_exe, waiting_tasks)
                    if has_waiting_tasks and node.node_id in fulfilled_nodes:
                        # Node has tasks waiting for resources
                        del fulfilled_nodes[node.node_id]
        # Handle any running job executions that have lost their node or become starved
        finished_job_exes = job_exe_mgr.check_for_starvation(when)
        if node_lost_job_exes_ids:
            finished_job_exes.extend(
                job_exe_mgr.lost_job_exes(node_lost_job_exes_ids, when))
        for finished_job_exe in finished_job_exes:
            cleanup_mgr.add_job_execution(finished_job_exe)

        # Update waiting task counts and calculate shortages
        agent_shortages = {}  # {Agent ID: NodeResources}
        new_waiting_tasks = {}  # {Task ID: int}
        for task in waiting_tasks:
            if task.id in self._waiting_tasks:
                count = self._waiting_tasks[task.id] + 1
            else:
                count = 1
            new_waiting_tasks[task.id] = count
            if count >= TASK_SHORTAGE_WAIT_COUNT:
                # This task has waited too long for resources, generate a shortage
                if task.agent_id in agent_shortages:
                    agent_shortages[task.agent_id].add(task.get_resources())
                else:
                    resources = NodeResources()
                    resources.add(task.get_resources())
                    agent_shortages[task.agent_id] = resources
        self._waiting_tasks = new_waiting_tasks
        resource_mgr.set_agent_shortages(agent_shortages)

        return fulfilled_nodes
Ejemplo n.º 3
0
    def get_resources(self):
        """See :meth:`job.tasks.base_task.Task.get_resources`
        """

        return NodeResources([Cpus(0.1), Mem(32.0)])
Ejemplo n.º 4
0
    def _launch_tasks(self, client, nodes):
        """Launches all of the tasks that have been scheduled on the given nodes

        :param client: The Mesos scheduler client
        :type client: :class:`mesoshttp.client.MesosClient`
        :param nodes: The dict of all scheduling nodes stored by node ID
        :type nodes: dict
        :returns: The number of tasks that were launched and the number of offers accepted
        :rtype: tuple
        """

        started = now()

        # Start and launch tasks in the task manager
        all_tasks = []
        for node in nodes.values():
            node.start_job_exe_tasks()
            all_tasks.extend(node.allocated_tasks)
        task_mgr.launch_tasks(all_tasks, started)

        # Launch tasks in Mesos
        node_count = 0
        total_node_count = 0
        total_offer_count = 0
        total_task_count = 0
        total_offer_resources = NodeResources()
        total_task_resources = NodeResources()
        for node in nodes.values():
            mesos_offers = []
            mesos_tasks = []
            offers = node.allocated_offers
            for offer in offers:
                total_offer_count += 1
                total_offer_resources.add(offer.resources)
                mesos_offers.append(offer.mesos_offer)
            tasks = node.allocated_tasks
            for task in tasks:
                total_task_resources.add(task.get_resources())
                mesos_tasks.append(create_mesos_task(task))
            task_count = len(tasks)
            total_task_count += task_count
            if task_count:
                node_count += 1
            if mesos_offers:
                total_node_count += 1
                try:
                    client.combine_offers(mesos_offers, mesos_tasks)
                except Exception:
                    logger.exception(
                        'Error occurred while launching tasks on node %s',
                        node.hostname)

        duration = now() - started
        msg = 'Launching tasks took %.3f seconds'
        if duration > LAUNCH_TASK_WARN_THRESHOLD:
            logger.warning(msg, duration.total_seconds())
        else:
            logger.debug(msg, duration.total_seconds())

        declined_resources = NodeResources()
        declined_resources.add(total_offer_resources)
        declined_resources.subtract(total_task_resources)
        if total_offer_count:
            logger.info(
                'Accepted %d offer(s) from %d node(s), launched %d task(s) with %s on %d node(s), declined %s',
                total_offer_count, total_node_count, total_task_count,
                total_task_resources, node_count, declined_resources)
        return total_task_count, total_offer_count
Ejemplo n.º 5
0
    def _schedule_new_job_exes(self, framework_id, nodes, job_types,
                               job_type_limits, job_type_resources,
                               workspaces):
        """Schedules new job executions from the queue and adds them to the appropriate node

        :param framework_id: The scheduling framework ID
        :type framework_id: string
        :param nodes: The dict of scheduling nodes stored by node ID where every node has fulfilled all waiting tasks
        :type nodes: dict
        :param job_types: The dict of job type models stored by job type ID
        :type job_types: dict
        :param job_type_limits: The dict of job type IDs mapping to job type limits
        :type job_type_limits: dict
        :param job_type_resources: The list of all of the job type resource requirements
        :type job_type_resources: list
        :param workspaces: A dict of all workspaces stored by name
        :type workspaces: dict
        :returns: The number of new job executions that were scheduled
        :rtype: int
        """

        # Can only use nodes that are ready for new job executions
        available_nodes = {}  # {Node ID: SchedulingNode}
        for node in nodes.values():
            if node.is_ready_for_new_job:
                available_nodes[node.node_id] = node

        try:
            scheduled_job_exes = self._process_queue(available_nodes,
                                                     job_types,
                                                     job_type_limits,
                                                     job_type_resources,
                                                     workspaces)
            running_job_exes = self._process_scheduled_job_executions(
                framework_id, scheduled_job_exes, job_types, workspaces)
            all_running_job_exes = []
            for node_id in running_job_exes:
                all_running_job_exes.extend(running_job_exes[node_id])
            job_exe_mgr.schedule_job_exes(
                all_running_job_exes,
                create_running_job_messages(all_running_job_exes))
            node_ids = set()
            job_exe_count = 0
            scheduled_resources = NodeResources()
            for node_id in running_job_exes:
                if node_id in nodes:
                    nodes[node_id].add_scheduled_job_exes(
                        running_job_exes[node_id])
                    for running_job_exe in running_job_exes[node_id]:
                        first_task = running_job_exe.next_task()
                        if first_task:
                            node_ids.add(node_id)
                            scheduled_resources.add(first_task.get_resources())
                            job_exe_count += 1
                else:
                    logger.error('Scheduled jobs on an unknown node')
            if job_exe_count:
                logger.info('Scheduled %d new job(s) with %s on %d node(s)',
                            job_exe_count, scheduled_resources, len(node_ids))
        except DatabaseError:
            logger.exception(
                'Error occurred while scheduling new jobs from the queue')
            job_exe_count = 0
            for node in available_nodes.values():
                node.reset_new_job_exes()

        return job_exe_count
Ejemplo n.º 6
0
    def _score_resources_for_scheduling(self, resources, job_type_resources):
        """Returns an integer score (lower is better) indicating how well the given resources fit on this node for
        scheduling. If the resources cannot be scheduled on this node, None is returned.

        :param resources: The resources to score
        :type resources: :class:`node.resources.node_resources.NodeResources`
        :param job_type_resources: The list of all of the job type resource requirements
        :type job_type_resources: list
        :returns: The integer score indicating how good of a fit these resources are for this node, possibly None
        :rtype: int
        """

        if not self._remaining_resources.is_sufficient_to_meet(resources):
            return None

        # Calculate our best guess of the total resources still available to Scale on this node by starting with the
        # watermark resource level and subtracting resources for currently running and allocated tasks
        total_resources_available = NodeResources()
        total_resources_available.add(self._watermark_resources)
        total_resources_available.subtract(self._task_resources)
        total_resources_available.subtract(self.allocated_resources)
        total_resources_available.subtract(resources)

        # Score is the number of job types that can fit within the estimated resources on this node still available to
        # Scale. A better (lower) score indicates a higher utilization of this node, reducing resource fragmentation.
        score = 0
        for job_type_resource in job_type_resources:
            if total_resources_available.is_sufficient_to_meet(job_type_resource):
                score += 1

        return score
Ejemplo n.º 7
0
class SchedulingNode(object):
    """This class manages scheduling for a node.
    """

    def __init__(self, agent_id, node, tasks, running_job_exes, resource_set):
        """Constructor

        :param agent_id: The agent ID
        :type agent_id: string
        :param node: The node
        :type node: :class:`scheduler.node.node_class.Node`
        :param tasks: The current tasks running on the node
        :type tasks: list
        :param running_job_exes: The current job executions running on the node
        :type running_job_exes: list
        :param resource_set: The set of resources for the node
        :type resource_set: :class:`scheduler.resources.agent.ResourceSet`
        """

        self.agent_id = agent_id  # Set agent ID separately from node since it can change during scheduling
        self.hostname = node.hostname
        self.node_id = node.id
        self.is_ready_for_new_job = node.is_ready_for_new_job()  # Cache this for consistency
        self.is_ready_for_next_job_task = node.is_ready_for_next_job_task()  # Cache this for consistency
        self.is_ready_for_system_task = node.is_ready_for_system_task()  # Cache this for consistency
        self.allocated_offers = []
        self.allocated_resources = NodeResources()
        self.allocated_tasks = []  # Tasks that have been allocated resources from this node

        self._node = node
        self._allocated_queued_job_exes = []  # New queued job executions that have been allocated resources
        self._allocated_running_job_exes = []  # Running job executions that have been allocated resources
        self._running_job_exes = running_job_exes
        self._running_tasks = tasks

        self._offered_resources = NodeResources()  # The amount of resources that were originally offered
        self._offered_resources.add(resource_set.offered_resources)
        self._remaining_resources = NodeResources()
        self._remaining_resources.add(self._offered_resources)
        self._task_resources = resource_set.task_resources
        self._watermark_resources = resource_set.watermark_resources

    def accept_job_exe_next_task(self, job_exe, waiting_tasks):
        """Asks the node if it can accept the next task for the given job execution. If the next task is waiting on
        resources, the task is added to the given waiting list. This should be used for job executions that have already
        been scheduled on this node, not new job executions.

        :param job_exe: The job execution to accept
        :type job_exe: :class:`job.execution.job_exe.RunningJobExecution`
        :param waiting_tasks: List of tasks that are waiting for resources
        :type waiting_tasks: [:class:`job.tasks.base_task.Task`]
        :returns: True if waiting tasks were added to the list, False otherwise
        :rtype: bool
        """

        if not self.is_ready_for_next_job_task:
            return False

        task = job_exe.next_task()
        if not task:
            return False
        task_resources = task.get_resources()
        if self._remaining_resources.is_sufficient_to_meet(task_resources):
            self._allocated_running_job_exes.append(job_exe)
            self.allocated_resources.add(task_resources)
            self._remaining_resources.subtract(task_resources)
            return False

        # Not enough resources, so add task to waiting list
        waiting_tasks.append(task)
        return True

    def accept_new_job_exe(self, job_exe):
        """Asks the node if it can accept the given new job execution

        :param job_exe: The new job execution
        :type job_exe: :class:`queue.job_exe.QueuedJobExecution`
        :returns: True if the new job execution was accepted, False otherwise
        :rtype: bool
        """

        if not self.is_ready_for_new_job:
            return False

        resources = job_exe.required_resources
        if self._remaining_resources.is_sufficient_to_meet(resources):
            self._allocated_queued_job_exes.append(job_exe)
            self.allocated_resources.add(resources)
            self._remaining_resources.subtract(resources)
            job_exe.scheduled(self.agent_id, self.node_id, resources)
            return True

        return False

    def accept_node_tasks(self, when, waiting_tasks):
        """Asks the node to accept any node tasks that need to be scheduled. If any node tasks are waiting on resources,
        those tasks are added to the given waiting list.

        :param when: The current time
        :type when: :class:`datetime.datetime`
        :param waiting_tasks: List of tasks that are waiting for resources
        :type waiting_tasks: [:class:`job.tasks.base_task.Task`]
        :returns: True if waiting tasks were added to the list, False otherwise
        :rtype: bool
        """

        result = False
        for task in self._node.get_next_tasks(when):
            task_resources = task.get_resources()
            if self._remaining_resources.is_sufficient_to_meet(task_resources):
                self.allocated_tasks.append(task)
                self.allocated_resources.add(task_resources)
                self._remaining_resources.subtract(task_resources)
            else:
                waiting_tasks.append(task)
                result = True
        return result

    def accept_system_task(self, system_task):
        """Asks the node if it can accept the given system task

        :param system_task: The system task
        :type system_task: :class:`job.tasks.base_task.Task`
        :returns: True if the system task was accepted, False otherwise
        :rtype: bool
        """

        if not self.is_ready_for_system_task:
            return False

        task_resources = system_task.get_resources()
        if self._remaining_resources.is_sufficient_to_meet(task_resources):
            system_task.agent_id = self.agent_id  # Must set agent ID for task
            self.allocated_tasks.append(system_task)
            self.allocated_resources.add(task_resources)
            self._remaining_resources.subtract(task_resources)
            return True

        return False

    def add_allocated_offers(self, offers):
        """Adds the resource offers that have been allocated to run this node's tasks. If the offer resources are not
        enough to cover the current allocation, job executions and tasks are removed as necessary.

        :param offers: The resource offers to add
        :type offers: list
        """

        offer_resources = NodeResources()
        for offer in offers:
            offer_resources.add(offer.resources)

        self.allocated_offers = offers

        # If the offers are not enough to cover what we allocated, drop all job execution tasks
        if not offer_resources.is_sufficient_to_meet(self.allocated_resources):
            job_exe_resources = NodeResources()
            for job_exe in self._allocated_running_job_exes:
                task = job_exe.next_task()
                if task:
                    job_exe_resources.add(task.get_resources())
            self._allocated_running_job_exes = []
            self.allocated_resources.subtract(job_exe_resources)
            self._remaining_resources.add(job_exe_resources)

        # If the offers are still not enough to cover what we allocated, drop all tasks
        if not offer_resources.is_sufficient_to_meet(self.allocated_resources):
            self.allocated_tasks = []
            self.allocated_resources = NodeResources()
            self._remaining_resources = NodeResources()
            self._remaining_resources.add(self._offered_resources)

    def add_scheduled_job_exes(self, job_exes):
        """Hands the node its queued job executions that have now been scheduled in the database and are now running

        :param job_exes: The running job executions that have now been scheduled in the database
        :type job_exes: list
        """

        self._allocated_queued_job_exes = []
        self._allocated_running_job_exes.extend(job_exes)

    def reset_new_job_exes(self):
        """Resets the allocated new job executions and deallocates any resources associated with them
        """

        if not self._allocated_queued_job_exes:
            return

        resources = NodeResources()
        for new_job_exe in self._allocated_queued_job_exes:
            resources.add(new_job_exe.required_resources)

        self._allocated_queued_job_exes = []
        self.allocated_resources.subtract(resources)
        self._remaining_resources.add(resources)

    def score_job_exe_for_reservation(self, job_exe, job_type_resources):
        """Returns an integer score (lower is better) indicating how well this node is a fit for reserving (temporarily
        blocking additional job executions of lower priority) for the given job execution. If the job execution cannot
        reserve this node, None is returned.

        :param job_exe: The job execution to score
        :type job_exe: :class:`queue.job_exe.QueuedJobExecution`
        :param job_type_resources: The list of all of the job type resource requirements
        :type job_type_resources: list
        :returns: The integer score indicating how good of a fit reserving this node is for this job execution, possibly
            None
        :rtype: int
        """

        # Calculate available resources for lower priority jobs
        available_resources = NodeResources()
        available_resources.add(self._watermark_resources)
        for running_task in self._running_tasks:  # Remove resources for system tasks
            if not isinstance(running_task, JobExecutionTask):
                available_resources.subtract(running_task.get_resources())
        for running_job_exe in self._running_job_exes:  # Remove resources for existing jobs of equal/higher priority
            if running_job_exe.priority <= job_exe.priority:
                task = running_job_exe.current_task
                if not task:
                    task = running_job_exe.next_task()
                if task:
                    available_resources.subtract(task.get_resources())
        for queued_job_exe in self._allocated_queued_job_exes:  # Remove resources for new jobs of equal/higher priority
            if queued_job_exe.priority <= job_exe.priority:
                available_resources.subtract(queued_job_exe.required_resources)

        # If there are enough resources (unused plus used by lower priority jobs) to eventually run this job, then
        # reserve this node to block lower priority jobs
        if not available_resources.is_sufficient_to_meet(job_exe.required_resources):
            return None

        available_resources.subtract(job_exe.required_resources)
        # Score is the number of job types that can fit within the estimated remaining resources. A better (lower) score
        # indicates a higher utilization of this node, reducing resource fragmentation.
        score = 0
        for job_type_resource in job_type_resources:
            if available_resources.is_sufficient_to_meet(job_type_resource):
                score += 1

        return score

    def score_job_exe_for_scheduling(self, job_exe, job_type_resources):
        """Returns an integer score (lower is better) indicating how well the given job execution fits on this node for
        scheduling. If the job execution cannot be scheduled on this node, None is returned.

        :param job_exe: The job execution to score
        :type job_exe: :class:`queue.job_exe.QueuedJobExecution`
        :param job_type_resources: The list of all of the job type resource requirements
        :type job_type_resources: list
        :returns: The integer score indicating how good of a fit this job execution is for this node, possibly None
        :rtype: int
        """

        return self._score_resources_for_scheduling(job_exe.required_resources, job_type_resources)

    def score_system_task_for_scheduling(self, system_task, job_type_resources):
        """Returns an integer score (lower is better) indicating how well the given system task fits on this node for
        scheduling. If the system task cannot be scheduled on this node, None is returned.

        :param system_task: The system task to score
        :type system_task: :class:`job.tasks.base_task.Task`
        :param job_type_resources: The list of all of the job type resource requirements
        :type job_type_resources: list
        :returns: The integer score indicating how good of a fit this system task is for this node, possibly None
        :rtype: int
        """

        return self._score_resources_for_scheduling(system_task.get_resources(), job_type_resources)

    def start_job_exe_tasks(self):
        """Tells the node to start the next task on all scheduled job executions
        """

        for job_exe in self._allocated_running_job_exes:
            task = job_exe.start_next_task()
            if task:
                self.allocated_tasks.append(task)
        self._allocated_running_job_exes = []

    def _score_resources_for_scheduling(self, resources, job_type_resources):
        """Returns an integer score (lower is better) indicating how well the given resources fit on this node for
        scheduling. If the resources cannot be scheduled on this node, None is returned.

        :param resources: The resources to score
        :type resources: :class:`node.resources.node_resources.NodeResources`
        :param job_type_resources: The list of all of the job type resource requirements
        :type job_type_resources: list
        :returns: The integer score indicating how good of a fit these resources are for this node, possibly None
        :rtype: int
        """

        if not self._remaining_resources.is_sufficient_to_meet(resources):
            return None

        # Calculate our best guess of the total resources still available to Scale on this node by starting with the
        # watermark resource level and subtracting resources for currently running and allocated tasks
        total_resources_available = NodeResources()
        total_resources_available.add(self._watermark_resources)
        total_resources_available.subtract(self._task_resources)
        total_resources_available.subtract(self.allocated_resources)
        total_resources_available.subtract(resources)

        # Score is the number of job types that can fit within the estimated resources on this node still available to
        # Scale. A better (lower) score indicates a higher utilization of this node, reducing resource fragmentation.
        score = 0
        for job_type_resource in job_type_resources:
            if total_resources_available.is_sufficient_to_meet(job_type_resource):
                score += 1

        return score
Ejemplo n.º 8
0
    def score_job_exe_for_reservation(self, job_exe, job_type_resources):
        """Returns an integer score (lower is better) indicating how well this node is a fit for reserving (temporarily
        blocking additional job executions of lower priority) for the given job execution. If the job execution cannot
        reserve this node, None is returned.

        :param job_exe: The job execution to score
        :type job_exe: :class:`queue.job_exe.QueuedJobExecution`
        :param job_type_resources: The list of all of the job type resource requirements
        :type job_type_resources: list
        :returns: The integer score indicating how good of a fit reserving this node is for this job execution, possibly
            None
        :rtype: int
        """

        # Calculate available resources for lower priority jobs
        available_resources = NodeResources()
        available_resources.add(self._watermark_resources)
        for running_task in self._running_tasks:  # Remove resources for system tasks
            if not isinstance(running_task, JobExecutionTask):
                available_resources.subtract(running_task.get_resources())
        for running_job_exe in self._running_job_exes:  # Remove resources for existing jobs of equal/higher priority
            if running_job_exe.priority <= job_exe.priority:
                task = running_job_exe.current_task
                if not task:
                    task = running_job_exe.next_task()
                if task:
                    available_resources.subtract(task.get_resources())
        for queued_job_exe in self._allocated_queued_job_exes:  # Remove resources for new jobs of equal/higher priority
            if queued_job_exe.priority <= job_exe.priority:
                available_resources.subtract(queued_job_exe.required_resources)

        # If there are enough resources (unused plus used by lower priority jobs) to eventually run this job, then
        # reserve this node to block lower priority jobs
        if not available_resources.is_sufficient_to_meet(job_exe.required_resources):
            return None

        available_resources.subtract(job_exe.required_resources)
        # Score is the number of job types that can fit within the estimated remaining resources. A better (lower) score
        # indicates a higher utilization of this node, reducing resource fragmentation.
        score = 0
        for job_type_resource in job_type_resources:
            if available_resources.is_sufficient_to_meet(job_type_resource):
                score += 1

        return score
Ejemplo n.º 9
0
    def add_allocated_offers(self, offers):
        """Adds the resource offers that have been allocated to run this node's tasks. If the offer resources are not
        enough to cover the current allocation, job executions and tasks are removed as necessary.

        :param offers: The resource offers to add
        :type offers: list
        """

        offer_resources = NodeResources()
        for offer in offers:
            offer_resources.add(offer.resources)

        self.allocated_offers = offers

        # If the offers are not enough to cover what we allocated, drop all job execution tasks
        if not offer_resources.is_sufficient_to_meet(self.allocated_resources):
            job_exe_resources = NodeResources()
            for job_exe in self._allocated_running_job_exes:
                task = job_exe.next_task()
                if task:
                    job_exe_resources.add(task.get_resources())
            self._allocated_running_job_exes = []
            self.allocated_resources.subtract(job_exe_resources)
            self._remaining_resources.add(job_exe_resources)

        # If the offers are still not enough to cover what we allocated, drop all tasks
        if not offer_resources.is_sufficient_to_meet(self.allocated_resources):
            self.allocated_tasks = []
            self.allocated_resources = NodeResources()
            self._remaining_resources = NodeResources()
            self._remaining_resources.add(self._offered_resources)
Ejemplo n.º 10
0
class AgentResources(object):
    """This class represents an agent's set of resource offers."""
    def __init__(self, agent_id):
        """Constructor

        :param agent_id: The agent ID
        :type agent_id: string
        """

        self.agent_id = agent_id
        self._offers = {}  # {Offer ID: ResourceOffer}
        self._recent_watermark_resources = NodeResources(
        )  # Recent watermark, used to provide a rolling watermark
        self._task_resources = NodeResources(
        )  # Total resources for current tasks
        self._watermark_resources = NodeResources(
        )  # Highest level of offer + task resources

        self._offer_resources = None  # Resources from offers
        self._shortage_resources = None  # Resources that agent needs to fulfill current obligations
        self._total_resources = None
        self._update_resources()

    def allocate_offers(self, resources, when):
        """Directs the agent to allocate offers sufficient to match the given resources. Any offers that have been held
        too long will automatically be included. It's possible that the offer resources returned are less than
        requested.

        :param resources: The requested resources
        :type resources: :class:`node.resources.node_resources.NodeResources`
        :param when: The current time
        :type when: :class:`datetime.datetime`
        :returns: The list of allocated offers
        :rtype: [:class:`scheduler.resources.offer.ResourceOffer`]
        """

        allocated_offers = {}
        allocated_resources = NodeResources()
        available_offer_ids = set(self._offers.keys())
        # Automatically include all offers that have been held too long
        for offer in self._offers.values():
            if when - offer.received >= MAX_OFFER_HOLD_DURATION:
                allocated_offers[offer.id] = offer
                allocated_resources.add(offer.resources)
                available_offer_ids.discard(offer.id)

        if self._offer_resources.is_sufficient_to_meet(resources):
            # We have enough resources to meet the request, so keep allocating offers until we get enough
            while not allocated_resources.is_sufficient_to_meet(resources):
                if len(available_offer_ids) == 0:
                    # We unexpectedly ran out of offers (can occur due to resource rounding error)
                    break
                offer_id = available_offer_ids.pop()
                offer = self._offers[offer_id]
                allocated_offers[offer_id] = offer
                allocated_resources.add(offer.resources)

        # Remove allocated offers and return them
        for offer in allocated_offers.values():
            del self._offers[offer.id]
        self._update_resources()
        return allocated_offers.values()

    def generate_status_json(self,
                             node_dict,
                             total_running=None,
                             total_offered=None,
                             total_watermark=None,
                             total=None):
        """Generates the portion of the status JSON that describes the resources for this agent

        :param node_dict: The dict for this agent's node within the status JSON
        :type node_dict: dict
        :param total_running: The total running resources to add up, possibly None
        :type total_running: :class:`node.resources.node_resources.NodeResources`
        :param total_offered: The total offered resources to add up, possibly None
        :type total_offered: :class:`node.resources.node_resources.NodeResources`
        :param total_watermark: The total watermark resources to add up, possibly None
        :type total_watermark: :class:`node.resources.node_resources.NodeResources`
        :param total: The total resources to add up, possibly None
        :type total: :class:`node.resources.node_resources.NodeResources`
        :returns: The total number of offers this agent has
        :rtype: int
        """

        if self._total_resources:
            total_resources = self._total_resources
        else:
            total_resources = self._watermark_resources
        free_resources = self._watermark_resources.copy()
        free_resources.subtract(self._task_resources)
        free_resources.subtract(self._offer_resources)
        free_resources.round_values()
        unavailable_resources = total_resources.copy()
        unavailable_resources.subtract(self._watermark_resources)
        unavailable_resources.round_values()
        resources_dict = {}

        if total_running:
            total_running.add(self._task_resources)
        if total_offered:
            total_offered.add(self._offer_resources)
        if total_watermark:
            total_watermark.add(self._watermark_resources)
        if total:
            total.add(total_resources)
        self._task_resources.generate_status_json(resources_dict, 'running')
        self._offer_resources.generate_status_json(resources_dict, 'offered')
        free_resources.generate_status_json(resources_dict, 'free')
        unavailable_resources.generate_status_json(resources_dict,
                                                   'unavailable')
        total_resources.generate_status_json(resources_dict, 'total')

        # Fill in any missing values
        for resource in total_resources.resources:
            resource_dict = resources_dict[resource.name]
            if 'running' not in resource_dict:
                resource_dict['running'] = 0.0
            if 'offered' not in resource_dict:
                resource_dict['offered'] = 0.0
            if 'free' not in resource_dict:
                resource_dict['free'] = 0.0
            if 'unavailable' not in resource_dict:
                resource_dict['unavailable'] = 0.0

        num_offers = len(self._offers)
        node_dict['num_offers'] = num_offers
        node_dict['resources'] = resources_dict
        return num_offers

    def has_total_resources(self):
        """Indicates whether this agent knows its total resources or not

        :returns: True if agent knows its total resources, False otherwise
        :rtype: bool
        """

        return self._total_resources is not None

    def refresh_resources(self, offers, tasks):
        """Refreshes the agent's resources by setting the current running tasks and adding new resource offers. Returns
        a copy of the set of resources for the agent.

        :param offers: The new resource offers to add
        :type offers: [:class:`scheduler.resources.offer.ResourceOffer`]
        :param tasks: The current tasks running on the agent
        :type tasks: [:class:`job.tasks.base_task.Task`]
        :returns: A copy of the set of agent resources
        :rtype: :class:`scheduler.resources.agent.ResourceSet`
        """

        # Add new offers
        for offer in offers:
            if offer.id not in self._offers:
                self._offers[offer.id] = offer

        self._update_resources(tasks)

        offered_resources = self._offer_resources.copy()
        task_resources = self._task_resources.copy()
        watermark_resources = self._watermark_resources.copy()
        return ResourceSet(offered_resources, task_resources,
                           watermark_resources)

    def rescind_offers(self, offer_ids):
        """Rescinds the offers with the given IDs

        :param offer_ids: The list of IDs of the offers to remove
        :type offer_ids: [str]
        """

        for offer_id in offer_ids:
            if offer_id in self._offers:
                offer = self._offers[offer_id]
                del self._offers[offer_id]
        self._update_resources()

    def reset_watermark(self):
        """Resets the agent's watermark to the highest recent value
        """

        self._watermark_resources = self._recent_watermark_resources
        self._recent_watermark_resources = NodeResources()
        self._update_resources()

    def set_shortage(self, shortage_resources=None):
        """Sets the resource shortage for the agent, if any

        :param shortage_resources: The resource shortage
        :type shortage_resources: :class:`node.resources.node_resources.NodeResources`
        """

        if shortage_resources:
            logger.warning('Agent %s has a shortage of %s', self.agent_id,
                           shortage_resources)
            shortage_resources.round_values()
        self._shortage_resources = shortage_resources

    def set_total(self, total_resources):
        """Sets the total resources for the agent

        :param total_resources: The total resources
        :type total_resources: :class:`node.resources.node_resources.NodeResources`
        """

        self._total_resources = total_resources

    def _update_resources(self, tasks=None):
        """Updates the agent's resources from its current offers and tasks

        :param tasks: The new list of current tasks running on the agent, possibly None
        :type tasks: list
        """

        # Add up offered resources
        self._offer_resources = NodeResources()
        for offer in self._offers.values():
            self._offer_resources.add(offer.resources)

        # Recalculate task resources if needed
        if tasks is not None:
            self._task_resources = NodeResources()
            for task in tasks:
                self._task_resources.add(task.get_resources())

        # Increase watermark if needed
        available_resources = self._offer_resources.copy()
        available_resources.add(self._task_resources)
        self._watermark_resources.increase_up_to(available_resources)
        self._recent_watermark_resources.increase_up_to(available_resources)

        # Make sure watermark does not exceed total (can happen when we get task resources back before task update)
        if self._total_resources and not self._total_resources.is_sufficient_to_meet(
                self._watermark_resources):
            self._watermark_resources.limit_to(self._total_resources)
            self._recent_watermark_resources.limit_to(self._total_resources)
            # Since watermark was limited to not be higher than total, we're going to limit offered resources so that
            # offered + task = watermark
            max_offered = self._watermark_resources.copy()
            max_offered.subtract(self._task_resources)
            self._offer_resources.limit_to(max_offered)

        # Round values to deal with float precision issues
        self._offer_resources.round_values()
        self._task_resources.round_values()
        self._watermark_resources.round_values()
Ejemplo n.º 11
0
    def generate_status_json(self, status_dict):
        """Generates the portion of the status JSON that describes the resources

        :param status_dict: The status JSON dict
        :type status_dict: dict
        """

        num_offers = 0
        total_running = NodeResources()
        total_offered = NodeResources()
        total_watermark = NodeResources()
        total_resources = NodeResources()

        with self._agent_resources_lock:
            for node_dict in status_dict['nodes']:
                agent_id = node_dict['agent_id']
                is_active = node_dict['is_active']
                if agent_id in self._agent_resources:
                    agent_resources = self._agent_resources[agent_id]
                    if is_active:
                        num_offers += agent_resources.generate_status_json(
                            node_dict, total_running, total_offered,
                            total_watermark, total_resources)
                    else:
                        agent_resources.generate_status_json(node_dict)

        free_resources = total_watermark.copy()
        free_resources.subtract(total_running)
        free_resources.subtract(total_offered)
        unavailable_resources = total_resources.copy()
        unavailable_resources.subtract(total_watermark)
        resources_dict = {}

        total_running.round_values()
        total_offered.round_values()
        free_resources.round_values()
        unavailable_resources.round_values()
        total_resources.round_values()
        total_running.generate_status_json(resources_dict, 'running')
        total_offered.generate_status_json(resources_dict, 'offered')
        free_resources.generate_status_json(resources_dict, 'free')
        unavailable_resources.generate_status_json(resources_dict,
                                                   'unavailable')
        total_resources.generate_status_json(resources_dict, 'total')

        # Fill in any missing values
        for resource in total_resources.resources:
            resource_dict = resources_dict[resource.name]
            if 'running' not in resource_dict:
                resource_dict['running'] = 0.0
            if 'offered' not in resource_dict:
                resource_dict['offered'] = 0.0
            if 'free' not in resource_dict:
                resource_dict['free'] = 0.0
            if 'unavailable' not in resource_dict:
                resource_dict['unavailable'] = 0.0

        status_dict['num_offers'] = num_offers
        status_dict['resources'] = resources_dict
Ejemplo n.º 12
0
    def offers(self, offers):
        """
        Invoked when resources have been offered to this framework. A single
        offer will only contain resources from a single agent.  Resources
        associated with an offer will not be re-offered to _this_ framework
        until either (a) this framework has rejected those resources
        or (b) those resources have been rescinded.  Note that resources may be
        concurrently offered to more than one framework at a time (depending
        on the allocator being used).  In that case, the first framework to
        launch tasks using those resources will be able to use them while the
        other frameworks will have those resources rescinded (or if a
        framework has already launched tasks with those resources then those
        tasks will fail with a TASK_LOST status and a message saying as much).
        """

        started = now()

        agents = {}
        offered_nodes = []
        resource_offers = []
        total_resources = NodeResources()
        skipped_roles = set()
        for offer in offers:
            scale_offer = from_mesos_offer(offer)
            offer_id = scale_offer.id.value
            agent_id = scale_offer.agent_id.value
            framework_id = scale_offer.framework_id.value
            hostname = scale_offer.hostname
            offered_nodes.append(hostname)
            # ignore offers while we're paused
            if scheduler_mgr.config.is_paused:
                offer.decline()
                continue
            resource_list = []
            for resource in scale_offer.resources:
                # Only accept resource that are of SCALAR type and have a role matching our accept list
                if resource.type == RESOURCE_TYPE_SCALAR:
                    if resource.role in settings.ACCEPTED_RESOURCE_ROLE:
                        logger.debug(
                            "Received scalar resource %s with value %i associated with role %s"
                            % (resource.name, resource.scalar.value,
                               resource.role))
                        resource_list.append(
                            ScalarResource(resource.name,
                                           resource.scalar.value))
                    else:
                        skipped_roles.add(resource.role)
                        offer.decline()

            logger.debug("Number of resources: %i" % len(resource_list))

            # Only register agent, if offers are being received
            if len(resource_list) > 0:
                resources = NodeResources(resource_list)
                total_resources.add(resources)
                agents[agent_id] = Agent(agent_id, hostname)
                resource_offers.append(
                    ResourceOffer(offer_id, agent_id, framework_id, resources,
                                  started, offer))

        logger.debug("Offer analysis complete with %i resource offers." %
                     len(resource_offers))

        node_mgr.register_agents(agents.values())
        logger.debug("Agents registered.")
        resource_mgr.add_new_offers(resource_offers)
        logger.debug("Resource offers added.")
        Node.objects.update_node_offers(offered_nodes, now())
        logger.debug("Node offer times updated.")

        num_offers = len(resource_offers)
        logger.info('Received %d offer(s) with %s from %d node(s)', num_offers,
                    total_resources, len(agents))
        if len(skipped_roles):
            logger.warning(
                'Skipped offers from roles that are not marked as accepted: %s',
                ','.join(skipped_roles))
        scheduler_mgr.add_new_offer_count(num_offers)

        duration = now() - started
        msg = 'Scheduler resourceOffers() took %.3f seconds'
        if duration > ScaleScheduler.NORMAL_WARN_THRESHOLD:
            logger.warning(msg, duration.total_seconds())
        else:
            logger.debug(msg, duration.total_seconds())