def test_job_type_limit(self): """Tests calling perform_scheduling() with a job type limit""" Queue.objects.all().delete() job_type_with_limit = job_test_utils.create_seed_job_type() job_type_with_limit.max_scheduled = 4 job_type_with_limit.save() running_job_exe_1 = job_test_utils.create_running_job_exe(agent_id=self.agent_1.agent_id, job_type=job_type_with_limit, node=self.node_1) queue_test_utils.create_queue(job_type=job_type_with_limit) queue_test_utils.create_queue(job_type=job_type_with_limit) queue_test_utils.create_queue(job_type=job_type_with_limit) queue_test_utils.create_queue(job_type=job_type_with_limit) queue_test_utils.create_queue(job_type=job_type_with_limit) queue_test_utils.create_queue(job_type=job_type_with_limit) job_type_mgr.sync_with_database() # One job of this type is already running job_exe_mgr.schedule_job_exes([running_job_exe_1], []) offer_1 = ResourceOffer('offer_1', self.agent_1.agent_id, self.framework_id, NodeResources([Cpus(0.0), Mem(1024.0), Disk(1024.0)]), now(), None) offer_2 = ResourceOffer('offer_2', self.agent_2.agent_id, self.framework_id, NodeResources([Cpus(25.0), Mem(2048.0), Disk(2048.0)]), now(), None) resource_mgr.add_new_offers([offer_1, offer_2]) scheduling_manager = SchedulingManager() num_tasks = scheduling_manager.perform_scheduling(self._client, now()) self.assertEqual(num_tasks, 3) # One is already running, should only be able to schedule 3 more
def _schedule_new_job_exes(self, framework_id, nodes, job_types, job_type_limits, job_type_resources, workspaces): """Schedules new job executions from the queue and adds them to the appropriate node :param framework_id: The scheduling framework ID :type framework_id: string :param nodes: The dict of scheduling nodes stored by node ID where every node has fulfilled all waiting tasks :type nodes: dict :param job_types: The dict of job type models stored by job type ID :type job_types: dict :param job_type_limits: The dict of job type IDs mapping to job type limits :type job_type_limits: dict :param job_type_resources: The list of all of the job type resource requirements :type job_type_resources: list :param workspaces: A dict of all workspaces stored by name :type workspaces: dict :returns: The number of new job executions that were scheduled :rtype: int """ # Can only use nodes that are ready for new job executions available_nodes = {} # {Node ID: SchedulingNode} for node in nodes.values(): if node.is_ready_for_new_job: available_nodes[node.node_id] = node try: scheduled_job_exes = self._process_queue(available_nodes, job_types, job_type_limits, job_type_resources, workspaces) running_job_exes = self._process_scheduled_job_executions(framework_id, scheduled_job_exes, job_types, workspaces) all_running_job_exes = [] for node_id in running_job_exes: all_running_job_exes.extend(running_job_exes[node_id]) job_exe_mgr.schedule_job_exes(all_running_job_exes, create_running_job_messages(all_running_job_exes)) node_ids = set() job_exe_count = 0 scheduled_resources = NodeResources() for node_id in running_job_exes: if node_id in nodes: nodes[node_id].add_scheduled_job_exes(running_job_exes[node_id]) for running_job_exe in running_job_exes[node_id]: first_task = running_job_exe.next_task() if first_task: node_ids.add(node_id) scheduled_resources.add(first_task.get_resources()) job_exe_count += 1 else: logger.error('Scheduled jobs on an unknown node') if job_exe_count: logger.info('Scheduled %d new job(s) with %s on %d node(s)', job_exe_count, scheduled_resources, len(node_ids)) except DatabaseError: logger.exception('Error occurred while scheduling new jobs from the queue') job_exe_count = 0 for node in available_nodes.values(): node.reset_new_job_exes() return job_exe_count
def test_job_type_limit(self, mock_taskinfo): """Tests running the scheduling thread with a job type limit""" mock_taskinfo.return_value = MagicMock() Queue.objects.all().delete() job_type_with_limit = job_test_utils.create_job_type() job_type_with_limit.max_scheduled = 4 job_type_with_limit.save() job_exe_1 = job_test_utils.create_job_exe(job_type=job_type_with_limit, status='RUNNING') queue_test_utils.create_queue(job_type=job_type_with_limit) queue_test_utils.create_queue(job_type=job_type_with_limit) queue_test_utils.create_queue(job_type=job_type_with_limit) queue_test_utils.create_queue(job_type=job_type_with_limit) queue_test_utils.create_queue(job_type=job_type_with_limit) queue_test_utils.create_queue(job_type=job_type_with_limit) job_type_mgr.sync_with_database() # One job of this type is already running job_exe_mgr.schedule_job_exes([RunningJobExecution(job_exe_1)]) offer_1 = ResourceOffer( 'offer_1', self.node_agent_1, NodeResources(cpus=200.0, mem=102400.0, disk=102400.0)) offer_2 = ResourceOffer( 'offer_2', self.node_agent_2, NodeResources(cpus=200.0, mem=204800.0, disk=204800.0)) offer_mgr.add_new_offers([offer_1, offer_2]) # Ignore Docker pull tasks for node in node_mgr.get_nodes(): node._is_image_pulled = True # Ignore cleanup tasks for node in node_mgr.get_nodes(): node._initial_cleanup_completed() node._update_state() num_tasks = self._scheduling_thread._perform_scheduling() self.assertEqual( num_tasks, 3 ) # One is already running, should only be able to schedule 3 more
def _schedule_accepted_tasks(self): """Schedules all of the tasks that have been accepted :returns: The number of Mesos tasks that were scheduled :rtype: int """ when = now() tasks = [] tasks_to_launch = {} # {Node ID: [Mesos Tasks]} queued_job_exes_to_schedule = [] node_offers_list = offer_mgr.pop_offers_with_accepted_job_exes() for node_offers in node_offers_list: mesos_tasks = [] tasks_to_launch[node_offers.node.id] = mesos_tasks # Add cleanup tasks for task in node_offers.get_accepted_tasks(): tasks.append(task) mesos_tasks.append(create_mesos_task(task)) # Start next task for already running job executions that were accepted for running_job_exe in node_offers.get_accepted_running_job_exes(): task = running_job_exe.start_next_task() if task: tasks.append(task) mesos_tasks.append(create_mesos_task(task)) # Gather up queued job executions that were accepted for queued_job_exe in node_offers.get_accepted_new_job_exes(): queued_job_exes_to_schedule.append(queued_job_exe) try: # Schedule queued job executions and start their first tasks workspaces = workspace_mgr.get_workspaces() scheduled_job_exes = self._schedule_queued_job_executions(queued_job_exes_to_schedule, workspaces) job_exe_mgr.schedule_job_exes(scheduled_job_exes) for scheduled_job_exe in scheduled_job_exes: task = scheduled_job_exe.start_next_task() if task: tasks.append(task) tasks_to_launch[scheduled_job_exe.node_id].append(create_mesos_task(task)) except OperationalError: logger.exception('Failed to schedule queued job executions') # Launch tasks on Mesos task_mgr.launch_tasks(tasks, when) total_num_tasks = 0 total_num_nodes = 0 for node_offers in node_offers_list: task_list = tasks_to_launch[node_offers.node.id] num_tasks = len(task_list) total_num_tasks += num_tasks if num_tasks: total_num_nodes += 1 mesos_offer_ids = [] for offer_id in node_offers.offer_ids: mesos_offer_id = mesos_pb2.OfferID() mesos_offer_id.value = offer_id mesos_offer_ids.append(mesos_offer_id) self._driver.launchTasks(mesos_offer_ids, task_list) if total_num_tasks: logger.info('Launched %i Mesos task(s) on %i node(s)', total_num_tasks, total_num_nodes) return total_num_tasks