def _create_next_tasks(self, when): """Creates the next tasks that needs to be run for this node. Caller must have obtained the thread lock. :param when: The current time :type when: :class:`datetime.datetime` """ # If we have a cleanup task, check that node's agent ID has not changed if self._cleanup_task and self._cleanup_task.agent_id != self._agent_id: self._cleanup_task = None if not self._cleanup_task and self._is_ready_for_cleanup_task(when): self._cleanup_task = self._cleanup.create_next_task( self._agent_id, self._hostname, self._is_initial_cleanup_completed) # If we have a health task, check that node's agent ID has not changed if self._health_task and self._health_task.agent_id != self._agent_id: self._health_task = None if not self._health_task and self._is_ready_for_health_task(when): self._health_task = HealthTask(scheduler_mgr.framework_id, self._agent_id) # If we have a pull task, check that node's agent ID has not changed if self._pull_task and self._pull_task.agent_id != self._agent_id: self._pull_task = None if not self._pull_task and self._is_ready_for_pull_task(when): self._pull_task = PullTask(scheduler_mgr.framework_id, self._agent_id)
def test_score_job_exe_for_reservation_insufficient_resources(self): """Tests calling score_job_exe_for_reservation() when there are not enough resources to reserve for the job""" node = MagicMock() node.hostname = 'host_1' node.id = 1 node.is_ready_for_new_job = MagicMock() node.is_ready_for_new_job.return_value = True node.is_ready_for_next_job_task = MagicMock() node.is_ready_for_next_job_task.return_value = True offered_resources = NodeResources([Cpus(20.0), Mem(100.0)]) watermark_resources = NodeResources([Cpus(200.0), Mem(700.0)]) resource_set = ResourceSet(offered_resources, NodeResources(), watermark_resources) task = HealthTask( '1234', 'agent_1') # Resources are 0.1 CPUs and 32 MiB memory job_exe_1 = job_test_utils.create_running_job_exe( agent_id=self.agent_id, resources=NodeResources([Cpus(10.0), Mem(50.0)]), priority=1000) job_exe_2 = job_test_utils.create_running_job_exe( agent_id=self.agent_id, resources=NodeResources([Cpus(56.0), Mem(15.0)]), priority=100) scheduling_node = SchedulingNode('agent_1', node, [task], [job_exe_1, job_exe_2], resource_set) queue_model_1 = queue_test_utils.create_queue(priority=100, cpus_required=8.0, mem_required=40.0, disk_in_required=0.0, disk_out_required=0.0, disk_total_required=0.0) job_exe_1 = QueuedJobExecution(queue_model_1) queue_model_2 = queue_test_utils.create_queue(priority=1000, cpus_required=8.0, mem_required=40.0, disk_in_required=0.0, disk_out_required=0.0, disk_total_required=0.0) job_exe_2 = QueuedJobExecution(queue_model_2) scheduling_node.accept_new_job_exe(job_exe_1) scheduling_node.accept_new_job_exe(job_exe_2) # We are going to try to reserve the node for a job execution with priority 120 # Calculate available resources for reservation: # Watermark (200, 700) - System Tasks (0.1, 32) - Higher Priority Existing Job Exes (56, 15) - Higher Priority # New Job Exes (8, 40) = 135.9 CPUs, 613 memory # This new job should NOT fit for reservation queue_model = queue_test_utils.create_queue(priority=120, cpus_required=140.0, mem_required=600.0, disk_in_required=0.0, disk_out_required=0.0, disk_total_required=0.0) job_exe = QueuedJobExecution(queue_model) job_type_resource_1 = NodeResources([Cpus(2.0), Mem(10.0)]) score = scheduling_node.score_job_exe_for_reservation( job_exe, [job_type_resource_1]) self.assertIsNone(score)
def test_accept_node_tasks_insufficient_resources(self): """Tests calling accept_node_tasks() when there are not enough resources""" node = MagicMock() node.hostname = 'host_1' node.id = 1 health_task = HealthTask('1234', 'agent_1') pull_task = PullTask('1234', 'agent_1') node.is_ready_for_new_job = MagicMock() node.is_ready_for_new_job.return_value = True node.is_ready_for_next_job_task = MagicMock() node.is_ready_for_next_job_task.return_value = True node.get_next_tasks = MagicMock() node.get_next_tasks.return_value = [health_task, pull_task] offered_resources = NodeResources([Cpus(0.0), Mem(50.0)]) task_resources = NodeResources() watermark_resources = NodeResources([Cpus(100.0), Mem(500.0)]) resource_set = ResourceSet(offered_resources, task_resources, watermark_resources) scheduling_node = SchedulingNode('agent_1', node, [], [], resource_set) waiting_tasks = [] had_waiting_task = scheduling_node.accept_node_tasks( now(), waiting_tasks) self.assertTrue(had_waiting_task) self.assertEqual(len(scheduling_node.allocated_tasks), 0) self.assertTrue( scheduling_node.allocated_resources.is_equal(NodeResources())) self.assertTrue( scheduling_node._remaining_resources.is_equal(offered_resources)) self.assertListEqual(waiting_tasks, [health_task, pull_task])
def test_accept_node_tasks(self): """Tests successfully calling accept_node_tasks()""" node = MagicMock() node.hostname = 'host_1' node.id = 1 health_task = HealthTask('1234', 'agent_1') pull_task = PullTask('1234', 'agent_1') node.is_ready_for_new_job = MagicMock() node.is_ready_for_new_job.return_value = True node.is_ready_for_next_job_task = MagicMock() node.is_ready_for_next_job_task.return_value = True node.get_next_tasks = MagicMock() node.get_next_tasks.return_value = [health_task, pull_task] node_task_resources = NodeResources() node_task_resources.add(health_task.get_resources()) node_task_resources.add(pull_task.get_resources()) offered_resources = NodeResources([Cpus(100.0), Mem(5000.0)]) expected_remaining_resources = NodeResources() expected_remaining_resources.add(offered_resources) expected_remaining_resources.subtract(node_task_resources) task_resources = NodeResources() watermark_resources = NodeResources([Cpus(100.0), Mem(5000.0)]) resource_set = ResourceSet(offered_resources, task_resources, watermark_resources) scheduling_node = SchedulingNode('agent_1', node, [], [], resource_set) waiting_tasks = [] had_waiting_task = scheduling_node.accept_node_tasks( now(), waiting_tasks) self.assertFalse(had_waiting_task) self.assertEqual(len(scheduling_node.allocated_tasks), 2) self.assertTrue( scheduling_node.allocated_resources.is_equal(node_task_resources)) self.assertTrue( scheduling_node._remaining_resources.is_equal( expected_remaining_resources)) self.assertListEqual(waiting_tasks, [])
def test_score_job_exe_for_reservation(self): """Tests calling score_job_exe_for_reservation() successfully""" node = MagicMock() node.hostname = 'host_1' node.id = 1 node.is_ready_for_new_job = MagicMock() node.is_ready_for_new_job.return_value = True node.is_ready_for_next_job_task = MagicMock() node.is_ready_for_next_job_task.return_value = True offered_resources = NodeResources([Cpus(20.0), Mem(100.0)]) watermark_resources = NodeResources([Cpus(200.0), Mem(700.0)]) resource_set = ResourceSet(offered_resources, NodeResources(), watermark_resources) task = HealthTask( '1234', 'agent_1') # Resources are 0.1 CPUs and 32 MiB memory job_exe_1 = job_test_utils.create_running_job_exe( agent_id=self.agent_id, resources=NodeResources([Cpus(10.0), Mem(50.0)]), priority=1000) job_exe_2 = job_test_utils.create_running_job_exe( agent_id=self.agent_id, resources=NodeResources([Cpus(56.0), Mem(15.0)]), priority=100) scheduling_node = SchedulingNode('agent_1', node, [task], [job_exe_1, job_exe_2], resource_set) queue_model_1 = queue_test_utils.create_queue(priority=100, cpus_required=8.0, mem_required=40.0, disk_in_required=0.0, disk_out_required=0.0, disk_total_required=0.0) job_exe_1 = QueuedJobExecution(queue_model_1) queue_model_2 = queue_test_utils.create_queue(priority=1000, cpus_required=8.0, mem_required=40.0, disk_in_required=0.0, disk_out_required=0.0, disk_total_required=0.0) job_exe_2 = QueuedJobExecution(queue_model_2) scheduling_node.accept_new_job_exe(job_exe_1) scheduling_node.accept_new_job_exe(job_exe_2) # We are going to try to reserve the node for a job execution with priority 120 # Calculate available resources for reservation: # Watermark (200, 700) - System Tasks (0.1, 32) - Higher Priority Existing Job Exes (56, 15) - Higher Priority # New Job Exes (8, 40) = 135.9 CPUs, 613 memory # This new job should fit for reservation queue_model = queue_test_utils.create_queue(priority=120, cpus_required=130.0, mem_required=600.0, disk_in_required=0.0, disk_out_required=0.0, disk_total_required=0.0) job_exe = QueuedJobExecution(queue_model) # Expected available 5.9 CPUs and 13 MiB memory "left" on node # (available above - new job we are scoring) # First 2 job types should fit, next 2 are too big, so score should be 2 job_type_resource_1 = NodeResources([Cpus(2.0), Mem(10.0)]) job_type_resource_2 = NodeResources([Cpus(5.5), Mem(12.0)]) job_type_resource_3 = NodeResources([Cpus(6.0), Mem(10.0)]) job_type_resource_4 = NodeResources([Cpus(2.0), Mem(14.0)]) score = scheduling_node.score_job_exe_for_reservation( job_exe, [ job_type_resource_1, job_type_resource_2, job_type_resource_3, job_type_resource_4 ]) self.assertEqual(score, 2)
def test_add_allocated_offers_remove_all_tasks(self): """Tests calling add_allocated_offers() when there are not enough resources for the job exes or node tasks""" node = MagicMock() node.hostname = 'host_1' node.id = 1 health_task = HealthTask('1234', 'agent_1') pull_task = PullTask('1234', 'agent_1') node.is_ready_for_new_job = MagicMock() node.is_ready_for_new_job.return_value = True node.is_ready_for_next_job_task = MagicMock() node.is_ready_for_next_job_task.return_value = True node.get_next_tasks = MagicMock() node.get_next_tasks.return_value = [health_task, pull_task] offered_resources = NodeResources([Cpus(100.0), Mem(500.0)]) watermark_resources = NodeResources([Cpus(100.0), Mem(500.0)]) resource_set = ResourceSet(offered_resources, NodeResources(), watermark_resources) scheduling_node = SchedulingNode('agent_1', node, [], [], resource_set) running_job_exe_1 = job_test_utils.create_running_job_exe( agent_id=self.agent_id, resources=NodeResources([Cpus(1.0), Mem(10.0)])) running_job_exe_2 = job_test_utils.create_running_job_exe( agent_id=self.agent_id, resources=NodeResources([Cpus(2.0), Mem(20.0)])) node_task_resources = NodeResources() node_task_resources.add(health_task.get_resources()) node_task_resources.add(pull_task.get_resources()) all_required_resources = NodeResources() all_required_resources.add(node_task_resources) all_required_resources.add( running_job_exe_1.next_task().get_resources()) all_required_resources.add( running_job_exe_2.next_task().get_resources()) expected_remaining_resources = NodeResources() expected_remaining_resources.add(offered_resources) expected_remaining_resources.subtract(node_task_resources) # Set up node with node tasks and job exes (there would never be queued job exes since they would be scheduled # before add_allocated_offers() was called scheduling_node.accept_node_tasks(now(), []) scheduling_node.accept_job_exe_next_task(running_job_exe_1, []) scheduling_node.accept_job_exe_next_task(running_job_exe_2, []) self.assertEqual(len(scheduling_node.allocated_tasks), 2) self.assertEqual(len(scheduling_node._allocated_running_job_exes), 2) self.assertEqual(len(scheduling_node._allocated_queued_job_exes), 0) self.assertTrue( scheduling_node.allocated_resources.is_equal( all_required_resources)) # Set up offers (not enough for job exes or node tasks) offer_1 = ResourceOffer('offer_1', 'agent_1', '1234', NodeResources([Cpus(0.1), Mem(600.0)]), now(), None) scheduling_node.add_allocated_offers([offer_1]) self.assertListEqual(scheduling_node.allocated_offers, [offer_1]) # All allocated tasks and job exes should be gone self.assertEqual(len(scheduling_node.allocated_tasks), 0) self.assertEqual(len(scheduling_node._allocated_running_job_exes), 0) self.assertEqual(len(scheduling_node._allocated_queued_job_exes), 0) self.assertTrue( scheduling_node.allocated_resources.is_equal(NodeResources())) self.assertTrue( scheduling_node._remaining_resources.is_equal(offered_resources))