def test_score_job_exe_for_reservation_insufficient_resources(self): """Tests calling score_job_exe_for_reservation() when there are not enough resources to reserve for the job""" node = MagicMock() node.hostname = 'host_1' node.id = 1 node.is_ready_for_new_job = MagicMock() node.is_ready_for_new_job.return_value = True node.is_ready_for_next_job_task = MagicMock() node.is_ready_for_next_job_task.return_value = True offered_resources = NodeResources([Cpus(20.0), Mem(100.0)]) watermark_resources = NodeResources([Cpus(200.0), Mem(700.0)]) resource_set = ResourceSet(offered_resources, NodeResources(), watermark_resources) task = HealthTask( '1234', 'agent_1') # Resources are 0.1 CPUs and 32 MiB memory job_exe_1 = job_test_utils.create_running_job_exe( agent_id=self.agent_id, resources=NodeResources([Cpus(10.0), Mem(50.0)]), priority=1000) job_exe_2 = job_test_utils.create_running_job_exe( agent_id=self.agent_id, resources=NodeResources([Cpus(56.0), Mem(15.0)]), priority=100) scheduling_node = SchedulingNode('agent_1', node, [task], [job_exe_1, job_exe_2], resource_set) queue_model_1 = queue_test_utils.create_queue(priority=100, cpus_required=8.0, mem_required=40.0, disk_in_required=0.0, disk_out_required=0.0, disk_total_required=0.0) job_exe_1 = QueuedJobExecution(queue_model_1) queue_model_2 = queue_test_utils.create_queue(priority=1000, cpus_required=8.0, mem_required=40.0, disk_in_required=0.0, disk_out_required=0.0, disk_total_required=0.0) job_exe_2 = QueuedJobExecution(queue_model_2) scheduling_node.accept_new_job_exe(job_exe_1) scheduling_node.accept_new_job_exe(job_exe_2) # We are going to try to reserve the node for a job execution with priority 120 # Calculate available resources for reservation: # Watermark (200, 700) - System Tasks (0.1, 32) - Higher Priority Existing Job Exes (56, 15) - Higher Priority # New Job Exes (8, 40) = 135.9 CPUs, 613 memory # This new job should NOT fit for reservation queue_model = queue_test_utils.create_queue(priority=120, cpus_required=140.0, mem_required=600.0, disk_in_required=0.0, disk_out_required=0.0, disk_total_required=0.0) job_exe = QueuedJobExecution(queue_model) job_type_resource_1 = NodeResources([Cpus(2.0), Mem(10.0)]) score = scheduling_node.score_job_exe_for_reservation( job_exe, [job_type_resource_1]) self.assertIsNone(score)
def test_accept_new_job_exe_no_jobs(self): """Tests calling accept_new_job_exe() when new job exes are not allowed""" node = MagicMock() node.hostname = 'host_1' node.id = 1 node.is_ready_for_new_job = MagicMock() node.is_ready_for_new_job.return_value = False node.is_ready_for_next_job_task = MagicMock() node.is_ready_for_next_job_task.return_value = True offered_resources = NodeResources([Cpus(10.0), Mem(50.0)]) task_resources = NodeResources() watermark_resources = NodeResources([Cpus(100.0), Mem(500.0)]) resource_set = ResourceSet(offered_resources, task_resources, watermark_resources) scheduling_node = SchedulingNode('agent_1', node, [], [], resource_set) queue_model = queue_test_utils.create_queue(cpus_required=1.0, mem_required=10.0, disk_in_required=0.0, disk_out_required=0.0, disk_total_required=0.0) job_exe = QueuedJobExecution(queue_model) accepted = scheduling_node.accept_new_job_exe(job_exe) self.assertFalse(accepted) self.assertEqual(len(scheduling_node._allocated_queued_job_exes), 0) self.assertTrue( scheduling_node.allocated_resources.is_equal(NodeResources())) self.assertTrue( scheduling_node._remaining_resources.is_equal( NodeResources([Cpus(10.0), Mem(50.0)]))) self.assertIsNone(job_exe._scheduled_node_id)
def test_accept_new_job_exe_gpu_partial_node_other_task(self): """Tests successfully calling accept_new_job_exe() when job requires less GPUs than available""" node = MagicMock() node.hostname = 'host_1' node.id = 1 node.is_ready_for_new_job = MagicMock() node.is_ready_for_new_job.return_value = True node.is_ready_for_next_job_task = MagicMock() node.is_ready_for_next_job_task.return_value = True offered_resources = NodeResources([Cpus(10.0), Mem(50.0), Gpus(1.0)]) task_resources = NodeResources([Gpus(1.0)]) watermark_resources = NodeResources( [Cpus(100.0), Mem(500.0), Gpus(1.0)]) resource_set = ResourceSet(offered_resources, task_resources, watermark_resources) scheduling_node = SchedulingNode('agent_1', node, [], [], resource_set) queue_model = queue_test_utils.create_queue(cpus_required=1.0, mem_required=10.0, disk_in_required=0.0, disk_out_required=0.0, disk_total_required=0.0, gpus_required=2) job_exe = QueuedJobExecution(queue_model) accepted = scheduling_node.accept_new_job_exe(job_exe) self.assertFalse(accepted)
def test_reset_new_job_exes(self): """Tests calling reset_new_job_exes() successfully""" node = MagicMock() node.hostname = 'host_1' node.id = 1 node.is_ready_for_new_job = MagicMock() node.is_ready_for_new_job.return_value = True node.is_ready_for_next_job_task = MagicMock() node.is_ready_for_next_job_task.return_value = True offered_resources = NodeResources([Cpus(100.0), Mem(500.0)]) watermark_resources = NodeResources([Cpus(100.0), Mem(500.0)]) resource_set = ResourceSet(offered_resources, NodeResources(), watermark_resources) scheduling_node = SchedulingNode('agent_1', node, [], [], resource_set) queue_model_1 = queue_test_utils.create_queue(cpus_required=2.0, mem_required=60.0, disk_in_required=0.0, disk_out_required=0.0, disk_total_required=0.0) job_exe_1 = QueuedJobExecution(queue_model_1) queue_model_2 = queue_test_utils.create_queue(cpus_required=4.5, mem_required=400.0, disk_in_required=0.0, disk_out_required=0.0, disk_total_required=0.0) job_exe_2 = QueuedJobExecution(queue_model_2) allocated_resources = NodeResources() allocated_resources.add(job_exe_1.required_resources) allocated_resources.add(job_exe_2.required_resources) # Set up node with queued job exes scheduling_node.accept_new_job_exe(job_exe_1) scheduling_node.accept_new_job_exe(job_exe_2) self.assertEqual(len(scheduling_node._allocated_queued_job_exes), 2) self.assertTrue( scheduling_node.allocated_resources.is_equal(allocated_resources)) # Reset queued job exes and check that everything is back to square one scheduling_node.reset_new_job_exes() self.assertEqual(len(scheduling_node._allocated_queued_job_exes), 0) self.assertTrue( scheduling_node.allocated_resources.is_equal(NodeResources())) self.assertTrue( scheduling_node._remaining_resources.is_equal(offered_resources))
def test_accept_new_job_exe_gpu_partial_node(self): """Tests successfully calling accept_new_job_exe() when job requires less GPUs than available""" node = MagicMock() node.hostname = 'host_1' node.id = 1 node.is_ready_for_new_job = MagicMock() node.is_ready_for_new_job.return_value = True node.is_ready_for_next_job_task = MagicMock() node.is_ready_for_next_job_task.return_value = True offered_resources = NodeResources([Cpus(10.0), Mem(50.0), Gpus(4.0)]) task_resources = NodeResources() watermark_resources = NodeResources( [Cpus(100.0), Mem(500.0), Gpus(4.0)]) resource_set = ResourceSet(offered_resources, task_resources, watermark_resources) scheduling_node = SchedulingNode('agent_1', node, [], [], resource_set) queue_model = queue_test_utils.create_queue(cpus_required=1.0, mem_required=10.0, disk_in_required=0.0, disk_out_required=0.0, disk_total_required=0.0, gpus_required=1) job_exe = QueuedJobExecution(queue_model) accepted = scheduling_node.accept_new_job_exe(job_exe) self.assertTrue(accepted) self.assertEqual(len(scheduling_node._allocated_queued_job_exes), 1) # Verify that our greedy GPU allocation logic is working self.assertTrue( scheduling_node.allocated_resources.is_equal( NodeResources([Cpus(1.0), Mem(10.0), Gpus(4.0)]))) self.assertTrue( scheduling_node._remaining_resources.is_equal( NodeResources([Cpus(9.0), Mem(40.0)]))) self.assertEqual(job_exe._scheduled_node_id, node.id)
def test_score_job_exe_for_reservation(self): """Tests calling score_job_exe_for_reservation() successfully""" node = MagicMock() node.hostname = 'host_1' node.id = 1 node.is_ready_for_new_job = MagicMock() node.is_ready_for_new_job.return_value = True node.is_ready_for_next_job_task = MagicMock() node.is_ready_for_next_job_task.return_value = True offered_resources = NodeResources([Cpus(20.0), Mem(100.0)]) watermark_resources = NodeResources([Cpus(200.0), Mem(700.0)]) resource_set = ResourceSet(offered_resources, NodeResources(), watermark_resources) task = HealthTask( '1234', 'agent_1') # Resources are 0.1 CPUs and 32 MiB memory job_exe_1 = job_test_utils.create_running_job_exe( agent_id=self.agent_id, resources=NodeResources([Cpus(10.0), Mem(50.0)]), priority=1000) job_exe_2 = job_test_utils.create_running_job_exe( agent_id=self.agent_id, resources=NodeResources([Cpus(56.0), Mem(15.0)]), priority=100) scheduling_node = SchedulingNode('agent_1', node, [task], [job_exe_1, job_exe_2], resource_set) queue_model_1 = queue_test_utils.create_queue(priority=100, cpus_required=8.0, mem_required=40.0, disk_in_required=0.0, disk_out_required=0.0, disk_total_required=0.0) job_exe_1 = QueuedJobExecution(queue_model_1) queue_model_2 = queue_test_utils.create_queue(priority=1000, cpus_required=8.0, mem_required=40.0, disk_in_required=0.0, disk_out_required=0.0, disk_total_required=0.0) job_exe_2 = QueuedJobExecution(queue_model_2) scheduling_node.accept_new_job_exe(job_exe_1) scheduling_node.accept_new_job_exe(job_exe_2) # We are going to try to reserve the node for a job execution with priority 120 # Calculate available resources for reservation: # Watermark (200, 700) - System Tasks (0.1, 32) - Higher Priority Existing Job Exes (56, 15) - Higher Priority # New Job Exes (8, 40) = 135.9 CPUs, 613 memory # This new job should fit for reservation queue_model = queue_test_utils.create_queue(priority=120, cpus_required=130.0, mem_required=600.0, disk_in_required=0.0, disk_out_required=0.0, disk_total_required=0.0) job_exe = QueuedJobExecution(queue_model) # Expected available 5.9 CPUs and 13 MiB memory "left" on node # (available above - new job we are scoring) # First 2 job types should fit, next 2 are too big, so score should be 2 job_type_resource_1 = NodeResources([Cpus(2.0), Mem(10.0)]) job_type_resource_2 = NodeResources([Cpus(5.5), Mem(12.0)]) job_type_resource_3 = NodeResources([Cpus(6.0), Mem(10.0)]) job_type_resource_4 = NodeResources([Cpus(2.0), Mem(14.0)]) score = scheduling_node.score_job_exe_for_reservation( job_exe, [ job_type_resource_1, job_type_resource_2, job_type_resource_3, job_type_resource_4 ]) self.assertEqual(score, 2)