def _configure_regular_job(config, job_exe, job_type, system_logging_level): """Configures the given execution as a regular (non-system) job by adding pre and post tasks, input/output mounts, etc :param config: The execution configuration :type config: :class:`job.execution.configuration.json.exe_config.ExecutionConfiguration` :param job_exe: The job execution model being scheduled :type job_exe: :class:`job.models.JobExecution` :param job_type: The job type model :type job_type: :class:`job.models.JobType` :param system_logging_level: The logging level to be passed in through environment :type system_logging_level: str """ config.create_tasks(['pull', 'pre', 'main', 'post']) config.add_to_task('pull', args=create_pull_command(job_exe.docker_image)) config.add_to_task('pre', args=PRE_TASK_COMMAND_ARGS) config.add_to_task('post', args=POST_TASK_COMMAND_ARGS) # Configure input workspaces ro_input_workspaces = {} rw_input_workspaces = {} for input_workspace in config.get_input_workspace_names(): ro_input_workspaces[input_workspace] = TaskWorkspace( input_workspace, MODE_RO) rw_input_workspaces[input_workspace] = TaskWorkspace( input_workspace, MODE_RW) config.add_to_task('pre', workspaces=ro_input_workspaces) config.add_to_task('main', workspaces=ro_input_workspaces) # Post tasks have access to input workspaces in case input files need moved as part of parse results config.add_to_task('post', workspaces=rw_input_workspaces) # Configure output workspaces output_workspaces = {} for output_workspace in config.get_output_workspace_names(): output_workspaces[output_workspace] = TaskWorkspace( output_workspace, MODE_RW) config.add_to_task('post', workspaces=output_workspaces) # Configure input/output mounts input_mnt_name = 'scale_input_mount' output_mnt_name = 'scale_output_mount' input_vol_name = get_job_exe_input_vol_name(job_exe) output_vol_name = get_job_exe_output_vol_name(job_exe) input_vol_ro = Volume(input_vol_name, SCALE_JOB_EXE_INPUT_PATH, MODE_RO, is_host=False) input_vol_rw = Volume(input_vol_name, SCALE_JOB_EXE_INPUT_PATH, MODE_RW, is_host=False) output_vol_ro = Volume(output_vol_name, SCALE_JOB_EXE_OUTPUT_PATH, MODE_RO, is_host=False) output_vol_rw = Volume(output_vol_name, SCALE_JOB_EXE_OUTPUT_PATH, MODE_RW, is_host=False) config.add_to_task( 'pre', mount_volumes={ input_mnt_name: input_vol_rw, output_mnt_name: output_vol_rw }, env_vars={'SYSTEM_LOGGING_LEVEL': system_logging_level}) config.add_to_task('main', mount_volumes={ input_mnt_name: input_vol_ro, output_mnt_name: output_vol_rw }) config.add_to_task( 'post', mount_volumes={output_mnt_name: output_vol_ro}, env_vars={'SYSTEM_LOGGING_LEVEL': system_logging_level}) # Configure output directory env_vars = {'OUTPUT_DIR': SCALE_JOB_EXE_OUTPUT_PATH} args = config._get_task_dict('main')['args'] args = environment_expansion(env_vars, args) config.add_to_task('main', args=args, env_vars=env_vars) # Configure task resources resources = job_exe.get_resources() # Pull-task and pre-task require full amount of resources config.add_to_task('pull', resources=resources) config.add_to_task('pre', resources=resources) # Main-task no longer requires the input file space resources.subtract(NodeResources([Disk(job_exe.input_file_size)])) config.add_to_task('main', resources=resources) # Post-task no longer requires any disk space resources.remove_resource('disk') config.add_to_task('post', resources=resources)
def _schedule_waiting_tasks(self, nodes, running_job_exes, when): """Schedules all waiting tasks for which there are sufficient resources and updates the resource manager with any resource shortages. All scheduling nodes that have fulfilled all of their waiting tasks will be returned so new job executions can be added to them. :param nodes: The dict of scheduling nodes stored by node ID :type nodes: dict :param running_job_exes: The currently running job executions :type running_job_exes: list :param when: The current time :type when: :class:`datetime.datetime` :returns: The dict of scheduling nodes stored by node ID that have no more waiting tasks :rtype: dict """ fulfilled_nodes = {} # {Node ID: SchedulingNode} waiting_tasks = [] # Schedule waiting node tasks first for node in nodes.values(): has_waiting_tasks = node.accept_node_tasks(when, waiting_tasks) if node.is_ready_for_next_job_task and not has_waiting_tasks: # A node can only be fulfilled if it is able to run waiting tasks and it has no more waiting tasks fulfilled_nodes[node.node_id] = node # Schedule job executions already on the node waiting for their next task node_lost_job_exes_ids = [] for running_job_exe in running_job_exes: if running_job_exe.node_id not in nodes: # Unknown/lost node node_lost_job_exes_ids.append(running_job_exe.id) else: node = nodes[running_job_exe.node_id] if not node.is_ready_for_next_job_task or node.agent_id != running_job_exe.agent_id: # Node is deprecated, offline, or has switched agent IDs node_lost_job_exes_ids.append(running_job_exe.id) elif running_job_exe.is_next_task_ready(): has_waiting_tasks = node.accept_job_exe_next_task( running_job_exe, waiting_tasks) if has_waiting_tasks and node.node_id in fulfilled_nodes: # Node has tasks waiting for resources del fulfilled_nodes[node.node_id] # Handle any running job executions that have lost their node or become starved finished_job_exes = job_exe_mgr.check_for_starvation(when) if node_lost_job_exes_ids: finished_job_exes.extend( job_exe_mgr.lost_job_exes(node_lost_job_exes_ids, when)) for finished_job_exe in finished_job_exes: cleanup_mgr.add_job_execution(finished_job_exe) # Update waiting task counts and calculate shortages agent_shortages = {} # {Agent ID: NodeResources} new_waiting_tasks = {} # {Task ID: int} for task in waiting_tasks: if task.id in self._waiting_tasks: count = self._waiting_tasks[task.id] + 1 else: count = 1 new_waiting_tasks[task.id] = count if count >= TASK_SHORTAGE_WAIT_COUNT: # This task has waited too long for resources, generate a shortage if task.agent_id in agent_shortages: agent_shortages[task.agent_id].add(task.get_resources()) else: resources = NodeResources() resources.add(task.get_resources()) agent_shortages[task.agent_id] = resources self._waiting_tasks = new_waiting_tasks resource_mgr.set_agent_shortages(agent_shortages) return fulfilled_nodes
def get_resources(self): """See :meth:`job.tasks.base_task.Task.get_resources` """ return NodeResources([Cpus(0.1), Mem(32.0)])
def _launch_tasks(self, client, nodes): """Launches all of the tasks that have been scheduled on the given nodes :param client: The Mesos scheduler client :type client: :class:`mesoshttp.client.MesosClient` :param nodes: The dict of all scheduling nodes stored by node ID :type nodes: dict :returns: The number of tasks that were launched and the number of offers accepted :rtype: tuple """ started = now() # Start and launch tasks in the task manager all_tasks = [] for node in nodes.values(): node.start_job_exe_tasks() all_tasks.extend(node.allocated_tasks) task_mgr.launch_tasks(all_tasks, started) # Launch tasks in Mesos node_count = 0 total_node_count = 0 total_offer_count = 0 total_task_count = 0 total_offer_resources = NodeResources() total_task_resources = NodeResources() for node in nodes.values(): mesos_offers = [] mesos_tasks = [] offers = node.allocated_offers for offer in offers: total_offer_count += 1 total_offer_resources.add(offer.resources) mesos_offers.append(offer.mesos_offer) tasks = node.allocated_tasks for task in tasks: total_task_resources.add(task.get_resources()) mesos_tasks.append(create_mesos_task(task)) task_count = len(tasks) total_task_count += task_count if task_count: node_count += 1 if mesos_offers: total_node_count += 1 try: client.combine_offers(mesos_offers, mesos_tasks) except Exception: logger.exception( 'Error occurred while launching tasks on node %s', node.hostname) duration = now() - started msg = 'Launching tasks took %.3f seconds' if duration > LAUNCH_TASK_WARN_THRESHOLD: logger.warning(msg, duration.total_seconds()) else: logger.debug(msg, duration.total_seconds()) declined_resources = NodeResources() declined_resources.add(total_offer_resources) declined_resources.subtract(total_task_resources) if total_offer_count: logger.info( 'Accepted %d offer(s) from %d node(s), launched %d task(s) with %s on %d node(s), declined %s', total_offer_count, total_node_count, total_task_count, total_task_resources, node_count, declined_resources) return total_task_count, total_offer_count
def _schedule_new_job_exes(self, framework_id, nodes, job_types, job_type_limits, job_type_resources, workspaces): """Schedules new job executions from the queue and adds them to the appropriate node :param framework_id: The scheduling framework ID :type framework_id: string :param nodes: The dict of scheduling nodes stored by node ID where every node has fulfilled all waiting tasks :type nodes: dict :param job_types: The dict of job type models stored by job type ID :type job_types: dict :param job_type_limits: The dict of job type IDs mapping to job type limits :type job_type_limits: dict :param job_type_resources: The list of all of the job type resource requirements :type job_type_resources: list :param workspaces: A dict of all workspaces stored by name :type workspaces: dict :returns: The number of new job executions that were scheduled :rtype: int """ # Can only use nodes that are ready for new job executions available_nodes = {} # {Node ID: SchedulingNode} for node in nodes.values(): if node.is_ready_for_new_job: available_nodes[node.node_id] = node try: scheduled_job_exes = self._process_queue(available_nodes, job_types, job_type_limits, job_type_resources, workspaces) running_job_exes = self._process_scheduled_job_executions( framework_id, scheduled_job_exes, job_types, workspaces) all_running_job_exes = [] for node_id in running_job_exes: all_running_job_exes.extend(running_job_exes[node_id]) job_exe_mgr.schedule_job_exes( all_running_job_exes, create_running_job_messages(all_running_job_exes)) node_ids = set() job_exe_count = 0 scheduled_resources = NodeResources() for node_id in running_job_exes: if node_id in nodes: nodes[node_id].add_scheduled_job_exes( running_job_exes[node_id]) for running_job_exe in running_job_exes[node_id]: first_task = running_job_exe.next_task() if first_task: node_ids.add(node_id) scheduled_resources.add(first_task.get_resources()) job_exe_count += 1 else: logger.error('Scheduled jobs on an unknown node') if job_exe_count: logger.info('Scheduled %d new job(s) with %s on %d node(s)', job_exe_count, scheduled_resources, len(node_ids)) except DatabaseError: logger.exception( 'Error occurred while scheduling new jobs from the queue') job_exe_count = 0 for node in available_nodes.values(): node.reset_new_job_exes() return job_exe_count
def _score_resources_for_scheduling(self, resources, job_type_resources): """Returns an integer score (lower is better) indicating how well the given resources fit on this node for scheduling. If the resources cannot be scheduled on this node, None is returned. :param resources: The resources to score :type resources: :class:`node.resources.node_resources.NodeResources` :param job_type_resources: The list of all of the job type resource requirements :type job_type_resources: list :returns: The integer score indicating how good of a fit these resources are for this node, possibly None :rtype: int """ if not self._remaining_resources.is_sufficient_to_meet(resources): return None # Calculate our best guess of the total resources still available to Scale on this node by starting with the # watermark resource level and subtracting resources for currently running and allocated tasks total_resources_available = NodeResources() total_resources_available.add(self._watermark_resources) total_resources_available.subtract(self._task_resources) total_resources_available.subtract(self.allocated_resources) total_resources_available.subtract(resources) # Score is the number of job types that can fit within the estimated resources on this node still available to # Scale. A better (lower) score indicates a higher utilization of this node, reducing resource fragmentation. score = 0 for job_type_resource in job_type_resources: if total_resources_available.is_sufficient_to_meet(job_type_resource): score += 1 return score
class SchedulingNode(object): """This class manages scheduling for a node. """ def __init__(self, agent_id, node, tasks, running_job_exes, resource_set): """Constructor :param agent_id: The agent ID :type agent_id: string :param node: The node :type node: :class:`scheduler.node.node_class.Node` :param tasks: The current tasks running on the node :type tasks: list :param running_job_exes: The current job executions running on the node :type running_job_exes: list :param resource_set: The set of resources for the node :type resource_set: :class:`scheduler.resources.agent.ResourceSet` """ self.agent_id = agent_id # Set agent ID separately from node since it can change during scheduling self.hostname = node.hostname self.node_id = node.id self.is_ready_for_new_job = node.is_ready_for_new_job() # Cache this for consistency self.is_ready_for_next_job_task = node.is_ready_for_next_job_task() # Cache this for consistency self.is_ready_for_system_task = node.is_ready_for_system_task() # Cache this for consistency self.allocated_offers = [] self.allocated_resources = NodeResources() self.allocated_tasks = [] # Tasks that have been allocated resources from this node self._node = node self._allocated_queued_job_exes = [] # New queued job executions that have been allocated resources self._allocated_running_job_exes = [] # Running job executions that have been allocated resources self._running_job_exes = running_job_exes self._running_tasks = tasks self._offered_resources = NodeResources() # The amount of resources that were originally offered self._offered_resources.add(resource_set.offered_resources) self._remaining_resources = NodeResources() self._remaining_resources.add(self._offered_resources) self._task_resources = resource_set.task_resources self._watermark_resources = resource_set.watermark_resources def accept_job_exe_next_task(self, job_exe, waiting_tasks): """Asks the node if it can accept the next task for the given job execution. If the next task is waiting on resources, the task is added to the given waiting list. This should be used for job executions that have already been scheduled on this node, not new job executions. :param job_exe: The job execution to accept :type job_exe: :class:`job.execution.job_exe.RunningJobExecution` :param waiting_tasks: List of tasks that are waiting for resources :type waiting_tasks: [:class:`job.tasks.base_task.Task`] :returns: True if waiting tasks were added to the list, False otherwise :rtype: bool """ if not self.is_ready_for_next_job_task: return False task = job_exe.next_task() if not task: return False task_resources = task.get_resources() if self._remaining_resources.is_sufficient_to_meet(task_resources): self._allocated_running_job_exes.append(job_exe) self.allocated_resources.add(task_resources) self._remaining_resources.subtract(task_resources) return False # Not enough resources, so add task to waiting list waiting_tasks.append(task) return True def accept_new_job_exe(self, job_exe): """Asks the node if it can accept the given new job execution :param job_exe: The new job execution :type job_exe: :class:`queue.job_exe.QueuedJobExecution` :returns: True if the new job execution was accepted, False otherwise :rtype: bool """ if not self.is_ready_for_new_job: return False resources = job_exe.required_resources if self._remaining_resources.is_sufficient_to_meet(resources): self._allocated_queued_job_exes.append(job_exe) self.allocated_resources.add(resources) self._remaining_resources.subtract(resources) job_exe.scheduled(self.agent_id, self.node_id, resources) return True return False def accept_node_tasks(self, when, waiting_tasks): """Asks the node to accept any node tasks that need to be scheduled. If any node tasks are waiting on resources, those tasks are added to the given waiting list. :param when: The current time :type when: :class:`datetime.datetime` :param waiting_tasks: List of tasks that are waiting for resources :type waiting_tasks: [:class:`job.tasks.base_task.Task`] :returns: True if waiting tasks were added to the list, False otherwise :rtype: bool """ result = False for task in self._node.get_next_tasks(when): task_resources = task.get_resources() if self._remaining_resources.is_sufficient_to_meet(task_resources): self.allocated_tasks.append(task) self.allocated_resources.add(task_resources) self._remaining_resources.subtract(task_resources) else: waiting_tasks.append(task) result = True return result def accept_system_task(self, system_task): """Asks the node if it can accept the given system task :param system_task: The system task :type system_task: :class:`job.tasks.base_task.Task` :returns: True if the system task was accepted, False otherwise :rtype: bool """ if not self.is_ready_for_system_task: return False task_resources = system_task.get_resources() if self._remaining_resources.is_sufficient_to_meet(task_resources): system_task.agent_id = self.agent_id # Must set agent ID for task self.allocated_tasks.append(system_task) self.allocated_resources.add(task_resources) self._remaining_resources.subtract(task_resources) return True return False def add_allocated_offers(self, offers): """Adds the resource offers that have been allocated to run this node's tasks. If the offer resources are not enough to cover the current allocation, job executions and tasks are removed as necessary. :param offers: The resource offers to add :type offers: list """ offer_resources = NodeResources() for offer in offers: offer_resources.add(offer.resources) self.allocated_offers = offers # If the offers are not enough to cover what we allocated, drop all job execution tasks if not offer_resources.is_sufficient_to_meet(self.allocated_resources): job_exe_resources = NodeResources() for job_exe in self._allocated_running_job_exes: task = job_exe.next_task() if task: job_exe_resources.add(task.get_resources()) self._allocated_running_job_exes = [] self.allocated_resources.subtract(job_exe_resources) self._remaining_resources.add(job_exe_resources) # If the offers are still not enough to cover what we allocated, drop all tasks if not offer_resources.is_sufficient_to_meet(self.allocated_resources): self.allocated_tasks = [] self.allocated_resources = NodeResources() self._remaining_resources = NodeResources() self._remaining_resources.add(self._offered_resources) def add_scheduled_job_exes(self, job_exes): """Hands the node its queued job executions that have now been scheduled in the database and are now running :param job_exes: The running job executions that have now been scheduled in the database :type job_exes: list """ self._allocated_queued_job_exes = [] self._allocated_running_job_exes.extend(job_exes) def reset_new_job_exes(self): """Resets the allocated new job executions and deallocates any resources associated with them """ if not self._allocated_queued_job_exes: return resources = NodeResources() for new_job_exe in self._allocated_queued_job_exes: resources.add(new_job_exe.required_resources) self._allocated_queued_job_exes = [] self.allocated_resources.subtract(resources) self._remaining_resources.add(resources) def score_job_exe_for_reservation(self, job_exe, job_type_resources): """Returns an integer score (lower is better) indicating how well this node is a fit for reserving (temporarily blocking additional job executions of lower priority) for the given job execution. If the job execution cannot reserve this node, None is returned. :param job_exe: The job execution to score :type job_exe: :class:`queue.job_exe.QueuedJobExecution` :param job_type_resources: The list of all of the job type resource requirements :type job_type_resources: list :returns: The integer score indicating how good of a fit reserving this node is for this job execution, possibly None :rtype: int """ # Calculate available resources for lower priority jobs available_resources = NodeResources() available_resources.add(self._watermark_resources) for running_task in self._running_tasks: # Remove resources for system tasks if not isinstance(running_task, JobExecutionTask): available_resources.subtract(running_task.get_resources()) for running_job_exe in self._running_job_exes: # Remove resources for existing jobs of equal/higher priority if running_job_exe.priority <= job_exe.priority: task = running_job_exe.current_task if not task: task = running_job_exe.next_task() if task: available_resources.subtract(task.get_resources()) for queued_job_exe in self._allocated_queued_job_exes: # Remove resources for new jobs of equal/higher priority if queued_job_exe.priority <= job_exe.priority: available_resources.subtract(queued_job_exe.required_resources) # If there are enough resources (unused plus used by lower priority jobs) to eventually run this job, then # reserve this node to block lower priority jobs if not available_resources.is_sufficient_to_meet(job_exe.required_resources): return None available_resources.subtract(job_exe.required_resources) # Score is the number of job types that can fit within the estimated remaining resources. A better (lower) score # indicates a higher utilization of this node, reducing resource fragmentation. score = 0 for job_type_resource in job_type_resources: if available_resources.is_sufficient_to_meet(job_type_resource): score += 1 return score def score_job_exe_for_scheduling(self, job_exe, job_type_resources): """Returns an integer score (lower is better) indicating how well the given job execution fits on this node for scheduling. If the job execution cannot be scheduled on this node, None is returned. :param job_exe: The job execution to score :type job_exe: :class:`queue.job_exe.QueuedJobExecution` :param job_type_resources: The list of all of the job type resource requirements :type job_type_resources: list :returns: The integer score indicating how good of a fit this job execution is for this node, possibly None :rtype: int """ return self._score_resources_for_scheduling(job_exe.required_resources, job_type_resources) def score_system_task_for_scheduling(self, system_task, job_type_resources): """Returns an integer score (lower is better) indicating how well the given system task fits on this node for scheduling. If the system task cannot be scheduled on this node, None is returned. :param system_task: The system task to score :type system_task: :class:`job.tasks.base_task.Task` :param job_type_resources: The list of all of the job type resource requirements :type job_type_resources: list :returns: The integer score indicating how good of a fit this system task is for this node, possibly None :rtype: int """ return self._score_resources_for_scheduling(system_task.get_resources(), job_type_resources) def start_job_exe_tasks(self): """Tells the node to start the next task on all scheduled job executions """ for job_exe in self._allocated_running_job_exes: task = job_exe.start_next_task() if task: self.allocated_tasks.append(task) self._allocated_running_job_exes = [] def _score_resources_for_scheduling(self, resources, job_type_resources): """Returns an integer score (lower is better) indicating how well the given resources fit on this node for scheduling. If the resources cannot be scheduled on this node, None is returned. :param resources: The resources to score :type resources: :class:`node.resources.node_resources.NodeResources` :param job_type_resources: The list of all of the job type resource requirements :type job_type_resources: list :returns: The integer score indicating how good of a fit these resources are for this node, possibly None :rtype: int """ if not self._remaining_resources.is_sufficient_to_meet(resources): return None # Calculate our best guess of the total resources still available to Scale on this node by starting with the # watermark resource level and subtracting resources for currently running and allocated tasks total_resources_available = NodeResources() total_resources_available.add(self._watermark_resources) total_resources_available.subtract(self._task_resources) total_resources_available.subtract(self.allocated_resources) total_resources_available.subtract(resources) # Score is the number of job types that can fit within the estimated resources on this node still available to # Scale. A better (lower) score indicates a higher utilization of this node, reducing resource fragmentation. score = 0 for job_type_resource in job_type_resources: if total_resources_available.is_sufficient_to_meet(job_type_resource): score += 1 return score
def score_job_exe_for_reservation(self, job_exe, job_type_resources): """Returns an integer score (lower is better) indicating how well this node is a fit for reserving (temporarily blocking additional job executions of lower priority) for the given job execution. If the job execution cannot reserve this node, None is returned. :param job_exe: The job execution to score :type job_exe: :class:`queue.job_exe.QueuedJobExecution` :param job_type_resources: The list of all of the job type resource requirements :type job_type_resources: list :returns: The integer score indicating how good of a fit reserving this node is for this job execution, possibly None :rtype: int """ # Calculate available resources for lower priority jobs available_resources = NodeResources() available_resources.add(self._watermark_resources) for running_task in self._running_tasks: # Remove resources for system tasks if not isinstance(running_task, JobExecutionTask): available_resources.subtract(running_task.get_resources()) for running_job_exe in self._running_job_exes: # Remove resources for existing jobs of equal/higher priority if running_job_exe.priority <= job_exe.priority: task = running_job_exe.current_task if not task: task = running_job_exe.next_task() if task: available_resources.subtract(task.get_resources()) for queued_job_exe in self._allocated_queued_job_exes: # Remove resources for new jobs of equal/higher priority if queued_job_exe.priority <= job_exe.priority: available_resources.subtract(queued_job_exe.required_resources) # If there are enough resources (unused plus used by lower priority jobs) to eventually run this job, then # reserve this node to block lower priority jobs if not available_resources.is_sufficient_to_meet(job_exe.required_resources): return None available_resources.subtract(job_exe.required_resources) # Score is the number of job types that can fit within the estimated remaining resources. A better (lower) score # indicates a higher utilization of this node, reducing resource fragmentation. score = 0 for job_type_resource in job_type_resources: if available_resources.is_sufficient_to_meet(job_type_resource): score += 1 return score
def add_allocated_offers(self, offers): """Adds the resource offers that have been allocated to run this node's tasks. If the offer resources are not enough to cover the current allocation, job executions and tasks are removed as necessary. :param offers: The resource offers to add :type offers: list """ offer_resources = NodeResources() for offer in offers: offer_resources.add(offer.resources) self.allocated_offers = offers # If the offers are not enough to cover what we allocated, drop all job execution tasks if not offer_resources.is_sufficient_to_meet(self.allocated_resources): job_exe_resources = NodeResources() for job_exe in self._allocated_running_job_exes: task = job_exe.next_task() if task: job_exe_resources.add(task.get_resources()) self._allocated_running_job_exes = [] self.allocated_resources.subtract(job_exe_resources) self._remaining_resources.add(job_exe_resources) # If the offers are still not enough to cover what we allocated, drop all tasks if not offer_resources.is_sufficient_to_meet(self.allocated_resources): self.allocated_tasks = [] self.allocated_resources = NodeResources() self._remaining_resources = NodeResources() self._remaining_resources.add(self._offered_resources)
class AgentResources(object): """This class represents an agent's set of resource offers.""" def __init__(self, agent_id): """Constructor :param agent_id: The agent ID :type agent_id: string """ self.agent_id = agent_id self._offers = {} # {Offer ID: ResourceOffer} self._recent_watermark_resources = NodeResources( ) # Recent watermark, used to provide a rolling watermark self._task_resources = NodeResources( ) # Total resources for current tasks self._watermark_resources = NodeResources( ) # Highest level of offer + task resources self._offer_resources = None # Resources from offers self._shortage_resources = None # Resources that agent needs to fulfill current obligations self._total_resources = None self._update_resources() def allocate_offers(self, resources, when): """Directs the agent to allocate offers sufficient to match the given resources. Any offers that have been held too long will automatically be included. It's possible that the offer resources returned are less than requested. :param resources: The requested resources :type resources: :class:`node.resources.node_resources.NodeResources` :param when: The current time :type when: :class:`datetime.datetime` :returns: The list of allocated offers :rtype: [:class:`scheduler.resources.offer.ResourceOffer`] """ allocated_offers = {} allocated_resources = NodeResources() available_offer_ids = set(self._offers.keys()) # Automatically include all offers that have been held too long for offer in self._offers.values(): if when - offer.received >= MAX_OFFER_HOLD_DURATION: allocated_offers[offer.id] = offer allocated_resources.add(offer.resources) available_offer_ids.discard(offer.id) if self._offer_resources.is_sufficient_to_meet(resources): # We have enough resources to meet the request, so keep allocating offers until we get enough while not allocated_resources.is_sufficient_to_meet(resources): if len(available_offer_ids) == 0: # We unexpectedly ran out of offers (can occur due to resource rounding error) break offer_id = available_offer_ids.pop() offer = self._offers[offer_id] allocated_offers[offer_id] = offer allocated_resources.add(offer.resources) # Remove allocated offers and return them for offer in allocated_offers.values(): del self._offers[offer.id] self._update_resources() return allocated_offers.values() def generate_status_json(self, node_dict, total_running=None, total_offered=None, total_watermark=None, total=None): """Generates the portion of the status JSON that describes the resources for this agent :param node_dict: The dict for this agent's node within the status JSON :type node_dict: dict :param total_running: The total running resources to add up, possibly None :type total_running: :class:`node.resources.node_resources.NodeResources` :param total_offered: The total offered resources to add up, possibly None :type total_offered: :class:`node.resources.node_resources.NodeResources` :param total_watermark: The total watermark resources to add up, possibly None :type total_watermark: :class:`node.resources.node_resources.NodeResources` :param total: The total resources to add up, possibly None :type total: :class:`node.resources.node_resources.NodeResources` :returns: The total number of offers this agent has :rtype: int """ if self._total_resources: total_resources = self._total_resources else: total_resources = self._watermark_resources free_resources = self._watermark_resources.copy() free_resources.subtract(self._task_resources) free_resources.subtract(self._offer_resources) free_resources.round_values() unavailable_resources = total_resources.copy() unavailable_resources.subtract(self._watermark_resources) unavailable_resources.round_values() resources_dict = {} if total_running: total_running.add(self._task_resources) if total_offered: total_offered.add(self._offer_resources) if total_watermark: total_watermark.add(self._watermark_resources) if total: total.add(total_resources) self._task_resources.generate_status_json(resources_dict, 'running') self._offer_resources.generate_status_json(resources_dict, 'offered') free_resources.generate_status_json(resources_dict, 'free') unavailable_resources.generate_status_json(resources_dict, 'unavailable') total_resources.generate_status_json(resources_dict, 'total') # Fill in any missing values for resource in total_resources.resources: resource_dict = resources_dict[resource.name] if 'running' not in resource_dict: resource_dict['running'] = 0.0 if 'offered' not in resource_dict: resource_dict['offered'] = 0.0 if 'free' not in resource_dict: resource_dict['free'] = 0.0 if 'unavailable' not in resource_dict: resource_dict['unavailable'] = 0.0 num_offers = len(self._offers) node_dict['num_offers'] = num_offers node_dict['resources'] = resources_dict return num_offers def has_total_resources(self): """Indicates whether this agent knows its total resources or not :returns: True if agent knows its total resources, False otherwise :rtype: bool """ return self._total_resources is not None def refresh_resources(self, offers, tasks): """Refreshes the agent's resources by setting the current running tasks and adding new resource offers. Returns a copy of the set of resources for the agent. :param offers: The new resource offers to add :type offers: [:class:`scheduler.resources.offer.ResourceOffer`] :param tasks: The current tasks running on the agent :type tasks: [:class:`job.tasks.base_task.Task`] :returns: A copy of the set of agent resources :rtype: :class:`scheduler.resources.agent.ResourceSet` """ # Add new offers for offer in offers: if offer.id not in self._offers: self._offers[offer.id] = offer self._update_resources(tasks) offered_resources = self._offer_resources.copy() task_resources = self._task_resources.copy() watermark_resources = self._watermark_resources.copy() return ResourceSet(offered_resources, task_resources, watermark_resources) def rescind_offers(self, offer_ids): """Rescinds the offers with the given IDs :param offer_ids: The list of IDs of the offers to remove :type offer_ids: [str] """ for offer_id in offer_ids: if offer_id in self._offers: offer = self._offers[offer_id] del self._offers[offer_id] self._update_resources() def reset_watermark(self): """Resets the agent's watermark to the highest recent value """ self._watermark_resources = self._recent_watermark_resources self._recent_watermark_resources = NodeResources() self._update_resources() def set_shortage(self, shortage_resources=None): """Sets the resource shortage for the agent, if any :param shortage_resources: The resource shortage :type shortage_resources: :class:`node.resources.node_resources.NodeResources` """ if shortage_resources: logger.warning('Agent %s has a shortage of %s', self.agent_id, shortage_resources) shortage_resources.round_values() self._shortage_resources = shortage_resources def set_total(self, total_resources): """Sets the total resources for the agent :param total_resources: The total resources :type total_resources: :class:`node.resources.node_resources.NodeResources` """ self._total_resources = total_resources def _update_resources(self, tasks=None): """Updates the agent's resources from its current offers and tasks :param tasks: The new list of current tasks running on the agent, possibly None :type tasks: list """ # Add up offered resources self._offer_resources = NodeResources() for offer in self._offers.values(): self._offer_resources.add(offer.resources) # Recalculate task resources if needed if tasks is not None: self._task_resources = NodeResources() for task in tasks: self._task_resources.add(task.get_resources()) # Increase watermark if needed available_resources = self._offer_resources.copy() available_resources.add(self._task_resources) self._watermark_resources.increase_up_to(available_resources) self._recent_watermark_resources.increase_up_to(available_resources) # Make sure watermark does not exceed total (can happen when we get task resources back before task update) if self._total_resources and not self._total_resources.is_sufficient_to_meet( self._watermark_resources): self._watermark_resources.limit_to(self._total_resources) self._recent_watermark_resources.limit_to(self._total_resources) # Since watermark was limited to not be higher than total, we're going to limit offered resources so that # offered + task = watermark max_offered = self._watermark_resources.copy() max_offered.subtract(self._task_resources) self._offer_resources.limit_to(max_offered) # Round values to deal with float precision issues self._offer_resources.round_values() self._task_resources.round_values() self._watermark_resources.round_values()
def generate_status_json(self, status_dict): """Generates the portion of the status JSON that describes the resources :param status_dict: The status JSON dict :type status_dict: dict """ num_offers = 0 total_running = NodeResources() total_offered = NodeResources() total_watermark = NodeResources() total_resources = NodeResources() with self._agent_resources_lock: for node_dict in status_dict['nodes']: agent_id = node_dict['agent_id'] is_active = node_dict['is_active'] if agent_id in self._agent_resources: agent_resources = self._agent_resources[agent_id] if is_active: num_offers += agent_resources.generate_status_json( node_dict, total_running, total_offered, total_watermark, total_resources) else: agent_resources.generate_status_json(node_dict) free_resources = total_watermark.copy() free_resources.subtract(total_running) free_resources.subtract(total_offered) unavailable_resources = total_resources.copy() unavailable_resources.subtract(total_watermark) resources_dict = {} total_running.round_values() total_offered.round_values() free_resources.round_values() unavailable_resources.round_values() total_resources.round_values() total_running.generate_status_json(resources_dict, 'running') total_offered.generate_status_json(resources_dict, 'offered') free_resources.generate_status_json(resources_dict, 'free') unavailable_resources.generate_status_json(resources_dict, 'unavailable') total_resources.generate_status_json(resources_dict, 'total') # Fill in any missing values for resource in total_resources.resources: resource_dict = resources_dict[resource.name] if 'running' not in resource_dict: resource_dict['running'] = 0.0 if 'offered' not in resource_dict: resource_dict['offered'] = 0.0 if 'free' not in resource_dict: resource_dict['free'] = 0.0 if 'unavailable' not in resource_dict: resource_dict['unavailable'] = 0.0 status_dict['num_offers'] = num_offers status_dict['resources'] = resources_dict
def offers(self, offers): """ Invoked when resources have been offered to this framework. A single offer will only contain resources from a single agent. Resources associated with an offer will not be re-offered to _this_ framework until either (a) this framework has rejected those resources or (b) those resources have been rescinded. Note that resources may be concurrently offered to more than one framework at a time (depending on the allocator being used). In that case, the first framework to launch tasks using those resources will be able to use them while the other frameworks will have those resources rescinded (or if a framework has already launched tasks with those resources then those tasks will fail with a TASK_LOST status and a message saying as much). """ started = now() agents = {} offered_nodes = [] resource_offers = [] total_resources = NodeResources() skipped_roles = set() for offer in offers: scale_offer = from_mesos_offer(offer) offer_id = scale_offer.id.value agent_id = scale_offer.agent_id.value framework_id = scale_offer.framework_id.value hostname = scale_offer.hostname offered_nodes.append(hostname) # ignore offers while we're paused if scheduler_mgr.config.is_paused: offer.decline() continue resource_list = [] for resource in scale_offer.resources: # Only accept resource that are of SCALAR type and have a role matching our accept list if resource.type == RESOURCE_TYPE_SCALAR: if resource.role in settings.ACCEPTED_RESOURCE_ROLE: logger.debug( "Received scalar resource %s with value %i associated with role %s" % (resource.name, resource.scalar.value, resource.role)) resource_list.append( ScalarResource(resource.name, resource.scalar.value)) else: skipped_roles.add(resource.role) offer.decline() logger.debug("Number of resources: %i" % len(resource_list)) # Only register agent, if offers are being received if len(resource_list) > 0: resources = NodeResources(resource_list) total_resources.add(resources) agents[agent_id] = Agent(agent_id, hostname) resource_offers.append( ResourceOffer(offer_id, agent_id, framework_id, resources, started, offer)) logger.debug("Offer analysis complete with %i resource offers." % len(resource_offers)) node_mgr.register_agents(agents.values()) logger.debug("Agents registered.") resource_mgr.add_new_offers(resource_offers) logger.debug("Resource offers added.") Node.objects.update_node_offers(offered_nodes, now()) logger.debug("Node offer times updated.") num_offers = len(resource_offers) logger.info('Received %d offer(s) with %s from %d node(s)', num_offers, total_resources, len(agents)) if len(skipped_roles): logger.warning( 'Skipped offers from roles that are not marked as accepted: %s', ','.join(skipped_roles)) scheduler_mgr.add_new_offer_count(num_offers) duration = now() - started msg = 'Scheduler resourceOffers() took %.3f seconds' if duration > ScaleScheduler.NORMAL_WARN_THRESHOLD: logger.warning(msg, duration.total_seconds()) else: logger.debug(msg, duration.total_seconds())