def setUp(self): django.setup() Scheduler.objects.initialize_scheduler() self._driver = MagicMock() scheduler_mgr.sync_with_database() offer_mgr.clear() self.node_agent_1 = 'agent_1' self.node_agent_2 = 'agent_2' self.slave_infos = [SlaveInfo('host_1', slave_id=self.node_agent_1), SlaveInfo('host_2', slave_id=self.node_agent_2)] node_mgr.clear() node_mgr.register_agent_ids([self.node_agent_1, self.node_agent_2]) with patch('scheduler.node.manager.api.get_slaves') as mock_get_slaves: mock_get_slaves.return_value = self.slave_infos node_mgr.sync_with_database('master_host', 5050) # Ignore initial cleanup tasks for node in node_mgr.get_nodes(): node.initial_cleanup_completed() self.queue_1 = queue_test_utils.create_queue(cpus_required=4.0, mem_required=1024.0, disk_in_required=100.0, disk_out_required=200.0, disk_total_required=300.0) self.queue_2 = queue_test_utils.create_queue(cpus_required=8.0, mem_required=512.0, disk_in_required=400.0, disk_out_required=45.0, disk_total_required=445.0) job_type_mgr.sync_with_database() self._scheduling_thread = SchedulingThread(self._driver, '123')
def test_score_job_exe_for_reservation_insufficient_resources(self): """Tests calling score_job_exe_for_reservation() when there are not enough resources to reserve for the job""" node = MagicMock() node.hostname = 'host_1' node.id = 1 node.is_ready_for_new_job = MagicMock() node.is_ready_for_new_job.return_value = True node.is_ready_for_next_job_task = MagicMock() node.is_ready_for_next_job_task.return_value = True offered_resources = NodeResources([Cpus(20.0), Mem(100.0)]) watermark_resources = NodeResources([Cpus(200.0), Mem(700.0)]) resource_set = ResourceSet(offered_resources, NodeResources(), watermark_resources) task = HealthTask( '1234', 'agent_1') # Resources are 0.1 CPUs and 32 MiB memory job_exe_1 = job_test_utils.create_running_job_exe( agent_id=self.agent_id, resources=NodeResources([Cpus(10.0), Mem(50.0)]), priority=1000) job_exe_2 = job_test_utils.create_running_job_exe( agent_id=self.agent_id, resources=NodeResources([Cpus(56.0), Mem(15.0)]), priority=100) scheduling_node = SchedulingNode('agent_1', node, [task], [job_exe_1, job_exe_2], resource_set) queue_model_1 = queue_test_utils.create_queue(priority=100, cpus_required=8.0, mem_required=40.0, disk_in_required=0.0, disk_out_required=0.0, disk_total_required=0.0) job_exe_1 = QueuedJobExecution(queue_model_1) queue_model_2 = queue_test_utils.create_queue(priority=1000, cpus_required=8.0, mem_required=40.0, disk_in_required=0.0, disk_out_required=0.0, disk_total_required=0.0) job_exe_2 = QueuedJobExecution(queue_model_2) scheduling_node.accept_new_job_exe(job_exe_1) scheduling_node.accept_new_job_exe(job_exe_2) # We are going to try to reserve the node for a job execution with priority 120 # Calculate available resources for reservation: # Watermark (200, 700) - System Tasks (0.1, 32) - Higher Priority Existing Job Exes (56, 15) - Higher Priority # New Job Exes (8, 40) = 135.9 CPUs, 613 memory # This new job should NOT fit for reservation queue_model = queue_test_utils.create_queue(priority=120, cpus_required=140.0, mem_required=600.0, disk_in_required=0.0, disk_out_required=0.0, disk_total_required=0.0) job_exe = QueuedJobExecution(queue_model) job_type_resource_1 = NodeResources([Cpus(2.0), Mem(10.0)]) score = scheduling_node.score_job_exe_for_reservation( job_exe, [job_type_resource_1]) self.assertIsNone(score)
def setUp(self): django.setup() self.node_agent = 'agent_1' self.node_agent_paused = 'agent_paused' self.node = node_test_utils.create_node(slave_id=self.node_agent) self.paused_node = node_test_utils.create_node(slave_id=self.node_agent_paused) self.paused_node.is_paused = True self.running_job_exe_1 = job_test_utils.create_job_exe(status='RUNNING', node=self.paused_node) self.running_job_exe_1.cpus_scheduled = 2.0 self.running_job_exe_1.mem_scheduled = 512.0 self.running_job_exe_1.disk_in_scheduled = 100.0 self.running_job_exe_1.disk_out_scheduled = 200.0 self.running_job_exe_1.disk_total_scheduled = 300.0 self.running_job_exe_2 = job_test_utils.create_job_exe(status='RUNNING', node=self.node) self.running_job_exe_2.cpus_scheduled = 2.0 self.running_job_exe_2.mem_scheduled = 512.0 self.running_job_exe_2.disk_in_scheduled = 100.0 self.running_job_exe_2.disk_out_scheduled = 200.0 self.running_job_exe_2.disk_total_scheduled = 300.0 self.queue_1 = queue_test_utils.create_queue(cpus_required=4.0, mem_required=1024.0, disk_in_required=100.0, disk_out_required=200.0, disk_total_required=300.0) self.queue_2 = queue_test_utils.create_queue(cpus_required=8.0, mem_required=512.0, disk_in_required=400.0, disk_out_required=45.0, disk_total_required=445.0) self.queue_high_cpus = queue_test_utils.create_queue(cpus_required=200.0, mem_required=1024.0, disk_in_required=100.0, disk_out_required=200.0, disk_total_required=300.0) self.queue_high_mem = queue_test_utils.create_queue(cpus_required=2.0, mem_required=10240.0, disk_in_required=100.0, disk_out_required=200.0, disk_total_required=300.0) self.queue_high_disk = queue_test_utils.create_queue(cpus_required=2.0, mem_required=1024.0, disk_in_required=10000.0, disk_out_required=20000.0, disk_total_required=30000.0)
def setUp(self): django.setup() Scheduler.objects.initialize_scheduler() self._driver = MagicMock() self._job_exe_manager = RunningJobExecutionManager() self._job_type_manager = JobTypeManager() self._node_manager = NodeManager() self._offer_manager = OfferManager() self._scheduler_manager = SchedulerManager() self._workspace_manager = WorkspaceManager() self._scheduler_manager.sync_with_database() self.node_agent_1 = 'agent_1' self.node_agent_2 = 'agent_2' self.node_1 = node_test_utils.create_node(hostname='host_1', slave_id=self.node_agent_1) self.node_2 = node_test_utils.create_node(hostname='host_2', slave_id=self.node_agent_2) self.slave_infos = [SlaveInfo('host_1', slave_id=self.node_agent_1), SlaveInfo('host_2', slave_id=self.node_agent_2)] self._node_manager.add_agent_ids([self.node_agent_1, self.node_agent_2]) with patch('scheduler.sync.node_manager.api.get_slaves') as mock_get_slaves: mock_get_slaves.return_value = self.slave_infos self._node_manager.sync_with_database('master_host', 5050) self.queue_1 = queue_test_utils.create_queue(cpus_required=4.0, mem_required=1024.0, disk_in_required=100.0, disk_out_required=200.0, disk_total_required=300.0) self.queue_2 = queue_test_utils.create_queue(cpus_required=8.0, mem_required=512.0, disk_in_required=400.0, disk_out_required=45.0, disk_total_required=445.0) self._job_type_manager.sync_with_database() self._scheduling_thread = SchedulingThread(self._driver, '123', self._job_exe_manager, self._job_type_manager, self._node_manager, self._offer_manager, self._scheduler_manager, self._workspace_manager)
def setUp(self): django.setup() Scheduler.objects.initialize_scheduler() self.node_agent = 'agent_1' self.node_agent_paused = 'agent_paused' self.node_model = node_test_utils.create_node(slave_id=self.node_agent) self.node = Node(self.node_agent, self.node_model) self.node._is_image_pulled = True self.node._initial_cleanup_completed() self.node._update_state() self.paused_node_model = node_test_utils.create_node( slave_id=self.node_agent_paused) self.paused_node_model.is_paused = True self.paused_node = Node(self.node_agent_paused, self.paused_node_model) self.running_job_exe_1 = job_test_utils.create_job_exe( status='RUNNING', node=self.paused_node_model) self.running_job_exe_1.cpus_scheduled = 2.0 self.running_job_exe_1.mem_scheduled = 512.0 self.running_job_exe_1.disk_in_scheduled = 100.0 self.running_job_exe_1.disk_out_scheduled = 200.0 self.running_job_exe_1.disk_total_scheduled = 300.0 self.running_job_exe_2 = job_test_utils.create_job_exe( status='RUNNING', node=self.node_model) self.running_job_exe_2.cpus_scheduled = 2.0 self.running_job_exe_2.mem_scheduled = 512.0 self.running_job_exe_2.disk_in_scheduled = 100.0 self.running_job_exe_2.disk_out_scheduled = 200.0 self.running_job_exe_2.disk_total_scheduled = 300.0 self.queue_1 = queue_test_utils.create_queue(cpus_required=4.0, mem_required=1024.0, disk_in_required=100.0, disk_out_required=200.0, disk_total_required=300.0) self.queue_2 = queue_test_utils.create_queue(cpus_required=8.0, mem_required=512.0, disk_in_required=400.0, disk_out_required=45.0, disk_total_required=445.0) self.queue_high_cpus = queue_test_utils.create_queue( cpus_required=200.0, mem_required=1024.0, disk_in_required=100.0, disk_out_required=200.0, disk_total_required=300.0) self.queue_high_mem = queue_test_utils.create_queue( cpus_required=2.0, mem_required=10240.0, disk_in_required=100.0, disk_out_required=200.0, disk_total_required=300.0) self.queue_high_disk = queue_test_utils.create_queue( cpus_required=2.0, mem_required=1024.0, disk_in_required=10000.0, disk_out_required=20000.0, disk_total_required=30000.0)
def setUp(self): django.setup() Scheduler.objects.initialize_scheduler() self.node_agent = 'agent_1' self.node_agent_paused = 'agent_paused' self.node_model = node_test_utils.create_node(slave_id=self.node_agent) self.node = Node(self.node_agent, self.node_model) self.node.initial_cleanup_completed() self.paused_node_model = node_test_utils.create_node(slave_id=self.node_agent_paused) self.paused_node_model.is_paused = True self.paused_node = Node(self.node_agent_paused, self.paused_node_model) self.running_job_exe_1 = job_test_utils.create_job_exe(status='RUNNING') self.running_job_exe_1.cpus_scheduled = 2.0 self.running_job_exe_1.mem_scheduled = 512.0 self.running_job_exe_1.disk_in_scheduled = 100.0 self.running_job_exe_1.disk_out_scheduled = 200.0 self.running_job_exe_1.disk_total_scheduled = 300.0 self.running_job_exe_2 = job_test_utils.create_job_exe(status='RUNNING') self.running_job_exe_2.cpus_scheduled = 4.0 self.running_job_exe_2.mem_scheduled = 1024.0 self.running_job_exe_2.disk_in_scheduled = 500.0 self.running_job_exe_2.disk_out_scheduled = 50.0 self.running_job_exe_2.disk_total_scheduled = 550.0 self.running_job_exe_high_cpus = job_test_utils.create_job_exe(status='RUNNING') self.running_job_exe_high_cpus.cpus_scheduled = 200.0 self.running_job_exe_high_cpus.mem_scheduled = 512.0 self.running_job_exe_high_cpus.disk_in_scheduled = 100.0 self.running_job_exe_high_cpus.disk_out_scheduled = 200.0 self.running_job_exe_high_cpus.disk_total_scheduled = 300.0 self.running_job_exe_high_mem = job_test_utils.create_job_exe(status='RUNNING') self.running_job_exe_high_mem.cpus_scheduled = 2.0 self.running_job_exe_high_mem.mem_scheduled = 1048576.0 self.running_job_exe_high_mem.disk_in_scheduled = 100.0 self.running_job_exe_high_mem.disk_out_scheduled = 200.0 self.running_job_exe_high_mem.disk_total_scheduled = 300.0 self.running_job_exe_high_disk = job_test_utils.create_job_exe(status='RUNNING') self.running_job_exe_high_disk.cpus_scheduled = 2.0 self.running_job_exe_high_disk.mem_scheduled = 512.0 self.running_job_exe_high_disk.disk_in_scheduled = 10000.0 self.running_job_exe_high_disk.disk_out_scheduled = 20000.0 self.running_job_exe_high_disk.disk_total_scheduled = 30000.0 self.queue_1 = queue_test_utils.create_queue(cpus_required=2.0, mem_required=1024.0, disk_in_required=100.0, disk_out_required=200.0, disk_total_required=300.0) self.queue_2 = queue_test_utils.create_queue(cpus_required=8.0, mem_required=512.0, disk_in_required=400.0, disk_out_required=45.0, disk_total_required=445.0) self.queue_high_cpus = queue_test_utils.create_queue(cpus_required=200.0, mem_required=1024.0, disk_in_required=100.0, disk_out_required=200.0, disk_total_required=300.0) self.queue_high_mem = queue_test_utils.create_queue(cpus_required=2.0, mem_required=10240.0, disk_in_required=100.0, disk_out_required=200.0, disk_total_required=300.0) self.queue_high_disk = queue_test_utils.create_queue(cpus_required=2.0, mem_required=1024.0, disk_in_required=10000.0, disk_out_required=20000.0, disk_total_required=30000.0)
def setUp(self): django.setup() self.node_agent = 'agent_1' self.node_agent_paused = 'agent_paused' self.node = node_test_utils.create_node(slave_id=self.node_agent) self.paused_node = node_test_utils.create_node( slave_id=self.node_agent_paused) self.paused_node.is_paused = True self.running_job_exe_1 = job_test_utils.create_job_exe( status='RUNNING', node=self.paused_node) self.running_job_exe_1.cpus_scheduled = 2.0 self.running_job_exe_1.mem_scheduled = 512.0 self.running_job_exe_1.disk_in_scheduled = 100.0 self.running_job_exe_1.disk_out_scheduled = 200.0 self.running_job_exe_1.disk_total_scheduled = 300.0 self.running_job_exe_2 = job_test_utils.create_job_exe( status='RUNNING', node=self.node) self.running_job_exe_2.cpus_scheduled = 2.0 self.running_job_exe_2.mem_scheduled = 512.0 self.running_job_exe_2.disk_in_scheduled = 100.0 self.running_job_exe_2.disk_out_scheduled = 200.0 self.running_job_exe_2.disk_total_scheduled = 300.0 self.queue_1 = queue_test_utils.create_queue(cpus_required=4.0, mem_required=1024.0, disk_in_required=100.0, disk_out_required=200.0, disk_total_required=300.0) self.queue_2 = queue_test_utils.create_queue(cpus_required=8.0, mem_required=512.0, disk_in_required=400.0, disk_out_required=45.0, disk_total_required=445.0) self.queue_high_cpus = queue_test_utils.create_queue( cpus_required=200.0, mem_required=1024.0, disk_in_required=100.0, disk_out_required=200.0, disk_total_required=300.0) self.queue_high_mem = queue_test_utils.create_queue( cpus_required=2.0, mem_required=10240.0, disk_in_required=100.0, disk_out_required=200.0, disk_total_required=300.0) self.queue_high_disk = queue_test_utils.create_queue( cpus_required=2.0, mem_required=1024.0, disk_in_required=10000.0, disk_out_required=20000.0, disk_total_required=30000.0)
def setUp(self): django.setup() self.node = node_test_utils.create_node() self.job_type_1 = job_test_utils.create_job_type() # job_exe_2 has an invalid JSON and will not schedule correctly self.job_type_2 = job_test_utils.create_job_type( configuration={'INVALID': 'SCHEMA'}) self.queue_1 = queue_test_utils.create_queue(job_type=self.job_type_1) self.queue_2 = queue_test_utils.create_queue(job_type=self.job_type_2) self.job_exe_1 = self.queue_1.job_exe self.job_exe_2 = self.queue_2.job_exe
def setUp(self): django.setup() reset_error_cache() self.framework_id = '1234' Scheduler.objects.initialize_scheduler() Scheduler.objects.update( num_message_handlers=0 ) # Prevent message handler tasks from scheduling self._client = MagicMock() scheduler_mgr.sync_with_database() scheduler_mgr.update_from_mesos(framework_id=self.framework_id) resource_mgr.clear() job_exe_mgr.clear() self.agent_1 = Agent('agent_1', 'host_1') self.agent_2 = Agent('agent_2', 'host_2') self.agent_3 = Agent('agent_3', 'host_2') node_mgr.clear() node_mgr.register_agents([self.agent_1, self.agent_2]) node_mgr.sync_with_database(scheduler_mgr.config) # Ignore initial cleanup, health check, and image pull tasks for node in node_mgr.get_nodes(): node._last_health_task = now() node._initial_cleanup_completed() node._is_image_pulled = True node._update_state() if node.agent_id == 'agent_1': self.node_1_id = node.id cleanup_mgr.update_nodes(node_mgr.get_nodes()) self.node_1 = Node.objects.get(id=self.node_1_id) # Ignore system tasks system_task_mgr._is_db_update_completed = True self.queue_1 = queue_test_utils.create_queue(cpus_required=4.0, mem_required=1024.0, disk_in_required=100.0, disk_out_required=200.0, disk_total_required=300.0) self.queue_2 = queue_test_utils.create_queue(cpus_required=8.0, mem_required=512.0, disk_in_required=400.0, disk_out_required=45.0, disk_total_required=445.0) self.queue_large = queue_test_utils.create_queue( resources=NodeResources([Cpus( 125.0), Mem(12048.0), Disk(12048.0)])) job_type_mgr.sync_with_database()
def test_accept_new_job_exe_gpu_partial_node_other_task(self): """Tests successfully calling accept_new_job_exe() when job requires less GPUs than available""" node = MagicMock() node.hostname = 'host_1' node.id = 1 node.is_ready_for_new_job = MagicMock() node.is_ready_for_new_job.return_value = True node.is_ready_for_next_job_task = MagicMock() node.is_ready_for_next_job_task.return_value = True offered_resources = NodeResources([Cpus(10.0), Mem(50.0), Gpus(1.0)]) task_resources = NodeResources([Gpus(1.0)]) watermark_resources = NodeResources( [Cpus(100.0), Mem(500.0), Gpus(1.0)]) resource_set = ResourceSet(offered_resources, task_resources, watermark_resources) scheduling_node = SchedulingNode('agent_1', node, [], [], resource_set) queue_model = queue_test_utils.create_queue(cpus_required=1.0, mem_required=10.0, disk_in_required=0.0, disk_out_required=0.0, disk_total_required=0.0, gpus_required=2) job_exe = QueuedJobExecution(queue_model) accepted = scheduling_node.accept_new_job_exe(job_exe) self.assertFalse(accepted)
def test_get_queue_lifo(self): """Tests calling QueueManager.get_queue() in LIFO mode""" time_1 = now() time_2 = time_1 + datetime.timedelta(seconds=1) queue_1 = queue_test_utils.create_queue(priority=100, queued=time_1) queue_2 = queue_test_utils.create_queue(priority=100, queued=time_2) # Call method to test first = True for queue in Queue.objects.get_queue(QUEUE_ORDER_LIFO): if first: self.assertEqual(queue.job_exe_id, queue_2.job_exe_id) first = False else: self.assertEqual(queue.job_exe_id, queue_1.job_exe_id)
def test_score_job_exe_for_scheduling_insufficient_resources(self): """Tests calling score_job_exe_for_scheduling() when there are not enough resources to schedule the job""" node = MagicMock() node.hostname = 'host_1' node.id = 1 node.is_ready_for_new_job = MagicMock() node.is_ready_for_new_job.return_value = True node.is_ready_for_next_job_task = MagicMock() node.is_ready_for_next_job_task.return_value = True offered_resources = NodeResources([Cpus(20.0), Mem(100.0)]) task_resources = NodeResources([Cpus(100.0), Mem(500.0)]) watermark_resources = NodeResources([Cpus(200.0), Mem(700.0)]) resource_set = ResourceSet(offered_resources, task_resources, watermark_resources) scheduling_node = SchedulingNode('agent_1', node, [], [], resource_set) # Allocate 10 CPUs and 50 MiB memory to existing job execution job_exe = job_test_utils.create_running_job_exe( agent_id=self.agent_id, resources=NodeResources([Cpus(10.0), Mem(50.0)])) scheduling_node.accept_job_exe_next_task(job_exe, []) # Should have 10 CPUs and 50 MiB memory left, so this job execution is too big queue_model = queue_test_utils.create_queue(cpus_required=15.0, mem_required=40.0, disk_in_required=0.0, disk_out_required=0.0, disk_total_required=0.0) job_exe = QueuedJobExecution(queue_model) score = scheduling_node.score_job_exe_for_scheduling(job_exe, []) self.assertIsNone(score)
def test_accept_new_job_exe_no_jobs(self): """Tests calling accept_new_job_exe() when new job exes are not allowed""" node = MagicMock() node.hostname = 'host_1' node.id = 1 node.is_ready_for_new_job = MagicMock() node.is_ready_for_new_job.return_value = False node.is_ready_for_next_job_task = MagicMock() node.is_ready_for_next_job_task.return_value = True offered_resources = NodeResources([Cpus(10.0), Mem(50.0)]) task_resources = NodeResources() watermark_resources = NodeResources([Cpus(100.0), Mem(500.0)]) resource_set = ResourceSet(offered_resources, task_resources, watermark_resources) scheduling_node = SchedulingNode('agent_1', node, [], [], resource_set) queue_model = queue_test_utils.create_queue(cpus_required=1.0, mem_required=10.0, disk_in_required=0.0, disk_out_required=0.0, disk_total_required=0.0) job_exe = QueuedJobExecution(queue_model) accepted = scheduling_node.accept_new_job_exe(job_exe) self.assertFalse(accepted) self.assertEqual(len(scheduling_node._allocated_queued_job_exes), 0) self.assertTrue( scheduling_node.allocated_resources.is_equal(NodeResources())) self.assertTrue( scheduling_node._remaining_resources.is_equal( NodeResources([Cpus(10.0), Mem(50.0)]))) self.assertIsNone(job_exe._scheduled_node_id)
def test_reset_new_job_exes(self): """Tests calling reset_new_job_exes() successfully""" node = MagicMock() node.hostname = 'host_1' node.id = 1 node.is_ready_for_new_job = MagicMock() node.is_ready_for_new_job.return_value = True node.is_ready_for_next_job_task = MagicMock() node.is_ready_for_next_job_task.return_value = True offered_resources = NodeResources([Cpus(100.0), Mem(500.0)]) watermark_resources = NodeResources([Cpus(100.0), Mem(500.0)]) resource_set = ResourceSet(offered_resources, NodeResources(), watermark_resources) scheduling_node = SchedulingNode('agent_1', node, [], [], resource_set) queue_model_1 = queue_test_utils.create_queue(cpus_required=2.0, mem_required=60.0, disk_in_required=0.0, disk_out_required=0.0, disk_total_required=0.0) job_exe_1 = QueuedJobExecution(queue_model_1) queue_model_2 = queue_test_utils.create_queue(cpus_required=4.5, mem_required=400.0, disk_in_required=0.0, disk_out_required=0.0, disk_total_required=0.0) job_exe_2 = QueuedJobExecution(queue_model_2) allocated_resources = NodeResources() allocated_resources.add(job_exe_1.required_resources) allocated_resources.add(job_exe_2.required_resources) # Set up node with queued job exes scheduling_node.accept_new_job_exe(job_exe_1) scheduling_node.accept_new_job_exe(job_exe_2) self.assertEqual(len(scheduling_node._allocated_queued_job_exes), 2) self.assertTrue( scheduling_node.allocated_resources.is_equal(allocated_resources)) # Reset queued job exes and check that everything is back to square one scheduling_node.reset_new_job_exes() self.assertEqual(len(scheduling_node._allocated_queued_job_exes), 0) self.assertTrue( scheduling_node.allocated_resources.is_equal(NodeResources())) self.assertTrue( scheduling_node._remaining_resources.is_equal(offered_resources))
def test_job_type_limit(self, mock_taskinfo): """Tests running the scheduling thread with a job type limit""" mock_taskinfo.return_value = MagicMock() Queue.objects.all().delete() job_type_with_limit = job_test_utils.create_job_type() job_type_with_limit.max_scheduled = 4 job_type_with_limit.save() job_exe_1 = job_test_utils.create_job_exe(job_type=job_type_with_limit, status='RUNNING') queue_1_limit = queue_test_utils.create_queue(job_type=job_type_with_limit) queue_2_limit = queue_test_utils.create_queue(job_type=job_type_with_limit) queue_3_limit = queue_test_utils.create_queue(job_type=job_type_with_limit) queue_4_limit = queue_test_utils.create_queue(job_type=job_type_with_limit) queue_5_limit = queue_test_utils.create_queue(job_type=job_type_with_limit) queue_6_limit = queue_test_utils.create_queue(job_type=job_type_with_limit) self._job_type_manager.sync_with_database() # One job of this type is already running self._job_exe_manager.add_job_exes([RunningJobExecution(job_exe_1)]) offer_1 = ResourceOffer('offer_1', self.node_agent_1, NodeResources(cpus=200.0, mem=102400.0, disk=102400.0)) offer_2 = ResourceOffer('offer_2', self.node_agent_2, NodeResources(cpus=200.0, mem=204800.0, disk=204800.0)) self._offer_manager.add_new_offers([offer_1, offer_2]) num_tasks = self._scheduling_thread._perform_scheduling() self.assertEqual(num_tasks, 3) # One is already running, should only be able to schedule 3 more
def setUp(self): django.setup() Scheduler.objects.initialize_scheduler() self._driver = MagicMock() scheduler_mgr.sync_with_database() offer_mgr.clear() self.node_agent_1 = 'agent_1' self.node_agent_2 = 'agent_2' self.slave_infos = [ SlaveInfo('host_1', slave_id=self.node_agent_1), SlaveInfo('host_2', slave_id=self.node_agent_2) ] node_mgr.clear() node_mgr.register_agent_ids([self.node_agent_1, self.node_agent_2]) with patch('scheduler.node.manager.api.get_slaves') as mock_get_slaves: mock_get_slaves.return_value = self.slave_infos node_mgr.sync_with_database('master_host', 5050) # Ignore initial cleanup tasks and health check tasks for node in node_mgr.get_nodes(): node._last_heath_task = now() node._initial_cleanup_completed() node._update_state() self.queue_1 = queue_test_utils.create_queue(cpus_required=4.0, mem_required=1024.0, disk_in_required=100.0, disk_out_required=200.0, disk_total_required=300.0) self.queue_2 = queue_test_utils.create_queue(cpus_required=8.0, mem_required=512.0, disk_in_required=400.0, disk_out_required=45.0, disk_total_required=445.0) job_type_mgr.sync_with_database() self._scheduling_thread = SchedulingThread(self._driver, '123')
def test_score_job_exe_for_scheduling(self): """Tests calling score_job_exe_for_scheduling() successfully""" node = MagicMock() node.hostname = 'host_1' node.id = 1 node.is_ready_for_new_job = MagicMock() node.is_ready_for_new_job.return_value = True node.is_ready_for_next_job_task = MagicMock() node.is_ready_for_next_job_task.return_value = True offered_resources = NodeResources([Cpus(20.0), Mem(100.0)]) task_resources = NodeResources([Cpus(100.0), Mem(500.0)]) watermark_resources = NodeResources([Cpus(200.0), Mem(700.0)]) resource_set = ResourceSet(offered_resources, task_resources, watermark_resources) scheduling_node = SchedulingNode('agent_1', node, [], [], resource_set) # Allocate 10 CPUs and 50 MiB memory to existing job execution job_exe = job_test_utils.create_running_job_exe( agent_id=self.agent_id, resources=NodeResources([Cpus(10.0), Mem(50.0)])) scheduling_node.accept_job_exe_next_task(job_exe, []) # Should have 10 CPUs and 50 MiB memory left, so this should be scheduled queue_model = queue_test_utils.create_queue(cpus_required=5.0, mem_required=40.0, disk_in_required=0.0, disk_out_required=0.0, disk_total_required=0.0) job_exe = QueuedJobExecution(queue_model) # Expected available 85 CPUs and 110 MiB memory "left" on node # (watermark - current tasks - allocated - new job we are scoring) # First 2 job types should fit, next 2 are too big, so score should be 2 job_type_resource_1 = NodeResources([Cpus(2.0), Mem(10.0)]) job_type_resource_2 = NodeResources([Cpus(85.0), Mem(109.0)]) job_type_resource_3 = NodeResources([Cpus(86.0), Mem(10.0)]) job_type_resource_4 = NodeResources([Cpus(2.0), Mem(111.0)]) score = scheduling_node.score_job_exe_for_scheduling( job_exe, [ job_type_resource_1, job_type_resource_2, job_type_resource_3, job_type_resource_4 ]) self.assertEqual(score, 2)
def test_accept_new_job_exe_gpu_partial_node(self): """Tests successfully calling accept_new_job_exe() when job requires less GPUs than available""" node = MagicMock() node.hostname = 'host_1' node.id = 1 node.is_ready_for_new_job = MagicMock() node.is_ready_for_new_job.return_value = True node.is_ready_for_next_job_task = MagicMock() node.is_ready_for_next_job_task.return_value = True offered_resources = NodeResources([Cpus(10.0), Mem(50.0), Gpus(4.0)]) task_resources = NodeResources() watermark_resources = NodeResources( [Cpus(100.0), Mem(500.0), Gpus(4.0)]) resource_set = ResourceSet(offered_resources, task_resources, watermark_resources) scheduling_node = SchedulingNode('agent_1', node, [], [], resource_set) queue_model = queue_test_utils.create_queue(cpus_required=1.0, mem_required=10.0, disk_in_required=0.0, disk_out_required=0.0, disk_total_required=0.0, gpus_required=1) job_exe = QueuedJobExecution(queue_model) accepted = scheduling_node.accept_new_job_exe(job_exe) self.assertTrue(accepted) self.assertEqual(len(scheduling_node._allocated_queued_job_exes), 1) # Verify that our greedy GPU allocation logic is working self.assertTrue( scheduling_node.allocated_resources.is_equal( NodeResources([Cpus(1.0), Mem(10.0), Gpus(4.0)]))) self.assertTrue( scheduling_node._remaining_resources.is_equal( NodeResources([Cpus(9.0), Mem(40.0)]))) self.assertEqual(job_exe._scheduled_node_id, node.id)
def test_job_type_limit(self): """Tests calling perform_scheduling() with a job type limit""" Queue.objects.all().delete() job_type_with_limit = job_test_utils.create_seed_job_type() job_type_with_limit.max_scheduled = 4 job_type_with_limit.save() running_job_exe_1 = job_test_utils.create_running_job_exe(agent_id=self.agent_1.agent_id, job_type=job_type_with_limit, node=self.node_1) queue_test_utils.create_queue(job_type=job_type_with_limit) queue_test_utils.create_queue(job_type=job_type_with_limit) queue_test_utils.create_queue(job_type=job_type_with_limit) queue_test_utils.create_queue(job_type=job_type_with_limit) queue_test_utils.create_queue(job_type=job_type_with_limit) queue_test_utils.create_queue(job_type=job_type_with_limit) job_type_mgr.sync_with_database() # One job of this type is already running job_exe_mgr.schedule_job_exes([running_job_exe_1], []) offer_1 = ResourceOffer('offer_1', self.agent_1.agent_id, self.framework_id, NodeResources([Cpus(0.0), Mem(1024.0), Disk(1024.0)]), now(), None) offer_2 = ResourceOffer('offer_2', self.agent_2.agent_id, self.framework_id, NodeResources([Cpus(25.0), Mem(2048.0), Disk(2048.0)]), now(), None) resource_mgr.add_new_offers([offer_1, offer_2]) scheduling_manager = SchedulingManager() num_tasks = scheduling_manager.perform_scheduling(self._client, now()) self.assertEqual(num_tasks, 3) # One is already running, should only be able to schedule 3 more
def setUp(self): django.setup() self.job_type = job_test_utils.create_job_type() self.queue = queue_test_utils.create_queue(job_type=self.job_type, priority=123)
def test_job_type_limit(self, mock_taskinfo): """Tests running the scheduling thread with a job type limit""" mock_taskinfo.return_value = MagicMock() Queue.objects.all().delete() job_type_with_limit = job_test_utils.create_job_type() job_type_with_limit.max_scheduled = 4 job_type_with_limit.save() job_exe_1 = job_test_utils.create_job_exe(job_type=job_type_with_limit, status='RUNNING') queue_test_utils.create_queue(job_type=job_type_with_limit) queue_test_utils.create_queue(job_type=job_type_with_limit) queue_test_utils.create_queue(job_type=job_type_with_limit) queue_test_utils.create_queue(job_type=job_type_with_limit) queue_test_utils.create_queue(job_type=job_type_with_limit) queue_test_utils.create_queue(job_type=job_type_with_limit) job_type_mgr.sync_with_database() # One job of this type is already running job_exe_mgr.schedule_job_exes([RunningJobExecution(job_exe_1)]) offer_1 = ResourceOffer( 'offer_1', self.node_agent_1, NodeResources(cpus=200.0, mem=102400.0, disk=102400.0)) offer_2 = ResourceOffer( 'offer_2', self.node_agent_2, NodeResources(cpus=200.0, mem=204800.0, disk=204800.0)) offer_mgr.add_new_offers([offer_1, offer_2]) # Ignore Docker pull tasks for node in node_mgr.get_nodes(): node._is_image_pulled = True # Ignore cleanup tasks for node in node_mgr.get_nodes(): node._initial_cleanup_completed() node._update_state() num_tasks = self._scheduling_thread._perform_scheduling() self.assertEqual( num_tasks, 3 ) # One is already running, should only be able to schedule 3 more
def test_score_job_exe_for_reservation(self): """Tests calling score_job_exe_for_reservation() successfully""" node = MagicMock() node.hostname = 'host_1' node.id = 1 node.is_ready_for_new_job = MagicMock() node.is_ready_for_new_job.return_value = True node.is_ready_for_next_job_task = MagicMock() node.is_ready_for_next_job_task.return_value = True offered_resources = NodeResources([Cpus(20.0), Mem(100.0)]) watermark_resources = NodeResources([Cpus(200.0), Mem(700.0)]) resource_set = ResourceSet(offered_resources, NodeResources(), watermark_resources) task = HealthTask( '1234', 'agent_1') # Resources are 0.1 CPUs and 32 MiB memory job_exe_1 = job_test_utils.create_running_job_exe( agent_id=self.agent_id, resources=NodeResources([Cpus(10.0), Mem(50.0)]), priority=1000) job_exe_2 = job_test_utils.create_running_job_exe( agent_id=self.agent_id, resources=NodeResources([Cpus(56.0), Mem(15.0)]), priority=100) scheduling_node = SchedulingNode('agent_1', node, [task], [job_exe_1, job_exe_2], resource_set) queue_model_1 = queue_test_utils.create_queue(priority=100, cpus_required=8.0, mem_required=40.0, disk_in_required=0.0, disk_out_required=0.0, disk_total_required=0.0) job_exe_1 = QueuedJobExecution(queue_model_1) queue_model_2 = queue_test_utils.create_queue(priority=1000, cpus_required=8.0, mem_required=40.0, disk_in_required=0.0, disk_out_required=0.0, disk_total_required=0.0) job_exe_2 = QueuedJobExecution(queue_model_2) scheduling_node.accept_new_job_exe(job_exe_1) scheduling_node.accept_new_job_exe(job_exe_2) # We are going to try to reserve the node for a job execution with priority 120 # Calculate available resources for reservation: # Watermark (200, 700) - System Tasks (0.1, 32) - Higher Priority Existing Job Exes (56, 15) - Higher Priority # New Job Exes (8, 40) = 135.9 CPUs, 613 memory # This new job should fit for reservation queue_model = queue_test_utils.create_queue(priority=120, cpus_required=130.0, mem_required=600.0, disk_in_required=0.0, disk_out_required=0.0, disk_total_required=0.0) job_exe = QueuedJobExecution(queue_model) # Expected available 5.9 CPUs and 13 MiB memory "left" on node # (available above - new job we are scoring) # First 2 job types should fit, next 2 are too big, so score should be 2 job_type_resource_1 = NodeResources([Cpus(2.0), Mem(10.0)]) job_type_resource_2 = NodeResources([Cpus(5.5), Mem(12.0)]) job_type_resource_3 = NodeResources([Cpus(6.0), Mem(10.0)]) job_type_resource_4 = NodeResources([Cpus(2.0), Mem(14.0)]) score = scheduling_node.score_job_exe_for_reservation( job_exe, [ job_type_resource_1, job_type_resource_2, job_type_resource_3, job_type_resource_4 ]) self.assertEqual(score, 2)