def test_schedule_system_tasks(self): """Tests successfully calling perform_scheduling() when scheduling system tasks""" offer_1 = ResourceOffer( 'offer_1', self.agent_1.agent_id, self.framework_id, NodeResources([Cpus(2.0), Mem(1024.0), Disk(1024.0)]), now(), None) offer_2 = ResourceOffer( 'offer_2', self.agent_2.agent_id, self.framework_id, NodeResources([Cpus(25.0), Mem(2048.0), Disk(2048.0)]), now(), None) resource_mgr.add_new_offers([offer_1, offer_2]) # Clear the queue Queue.objects.all().delete() # Set us up to schedule a database update task system_task_mgr._is_db_update_completed = False # Set us up to schedule 2 message handler tasks Scheduler.objects.update(num_message_handlers=2) scheduler_mgr.sync_with_database() scheduling_manager = SchedulingManager() num_tasks = scheduling_manager.perform_scheduling(self._client, now()) self.assertEqual( num_tasks, 3) # Schedule database update task and 2 message handler tasks
def test_canceled_queue_model(self): """Tests successfully calling perform_scheduling() when a queue model has been canceled""" offer_1 = ResourceOffer('offer_1', self.agent_1.agent_id, self.framework_id, NodeResources([Cpus(2.0), Mem(1024.0), Disk(1024.0)]), now(), None) offer_2 = ResourceOffer('offer_2', self.agent_2.agent_id, self.framework_id, NodeResources([Cpus(25.0), Mem(2048.0), Disk(2048.0)]), now(), None) resource_mgr.add_new_offers([offer_1, offer_2]) self.queue_1.is_canceled = True self.queue_1.save() scheduling_manager = SchedulingManager() num_tasks = scheduling_manager.perform_scheduling(self._client, now()) self.assertEqual(num_tasks, 1) # Scheduled non-canceled queued job execution # queue_1 should be canceled, queue_2 should be running, queue should be empty now self.assertEqual(JobExecution.objects.filter(job_id=self.queue_1.job_id).count(), 1) self.assertEqual(JobExecution.objects.filter(job_id=self.queue_2.job_id).count(), 1) self.assertEqual(Queue.objects.filter(id__in=[self.queue_1.id, self.queue_2.id]).count(), 0) # Job execution manager should have a message for the canceled job execution messages = job_exe_mgr.get_messages() found_job_exe_end_message = False for message in messages: if message.type == 'create_job_exe_ends': found_job_exe_end_message = True self.assertTrue(found_job_exe_end_message)
def test_max_resources(self): """Tests successfully calculating the max resources in a cluster""" offer_1 = ResourceOffer( 'offer_1', self.agent_1.agent_id, self.framework_id, NodeResources([Cpus(2.0), Mem(22048.0), Disk(1024.0)]), now(), None) offer_2 = ResourceOffer( 'offer_2', self.agent_2.agent_id, self.framework_id, NodeResources([Cpus(25.0), Mem(2048.0), Disk(2048.0)]), now(), None) offer_3 = ResourceOffer( 'offer_3', self.agent_2.agent_id, self.framework_id, NodeResources([Cpus(225.0), Mem(1024.0), Disk(22048.0)]), now(), None) resource_mgr.add_new_offers([offer_1, offer_2, offer_3]) resource_mgr.refresh_agent_resources([], now()) max = resource_mgr.get_max_available_resources() self.assertTrue( max.is_equal( NodeResources([Cpus(250.0), Mem(22048.0), Disk(24096.0)])))
def test_all_available_resources(self): """Tests successfully calculating the available resources in a cluster""" offer_1 = ResourceOffer( 'offer_1', self.agent_1.agent_id, self.framework_id, NodeResources([Cpus(2.0), Mem(22048.0), Disk(1024.0)]), now(), None) offer_2 = ResourceOffer( 'offer_2', self.agent_2.agent_id, self.framework_id, NodeResources([Cpus(25.0), Mem(2048.0), Disk(2048.0)]), now(), None) offer_3 = ResourceOffer( 'offer_3', self.agent_2.agent_id, self.framework_id, NodeResources([Cpus(225.0), Mem(1024.0), Disk(22048.0)]), now(), None) resource_mgr.add_new_offers([offer_1, offer_2, offer_3]) resource_mgr.refresh_agent_resources([], now()) all_available_resources = resource_mgr.get_all_available_resources() self.assertDictEqual(all_available_resources, { 'mem': 25120.0, 'gpus': 0.0, 'disk': 25120.0, 'cpus': 252.0 })
def test_job_type_limit(self): """Tests calling perform_scheduling() with a job type limit""" Queue.objects.all().delete() job_type_with_limit = job_test_utils.create_seed_job_type() job_type_with_limit.max_scheduled = 4 job_type_with_limit.save() running_job_exe_1 = job_test_utils.create_running_job_exe(agent_id=self.agent_1.agent_id, job_type=job_type_with_limit, node=self.node_1) queue_test_utils.create_queue(job_type=job_type_with_limit) queue_test_utils.create_queue(job_type=job_type_with_limit) queue_test_utils.create_queue(job_type=job_type_with_limit) queue_test_utils.create_queue(job_type=job_type_with_limit) queue_test_utils.create_queue(job_type=job_type_with_limit) queue_test_utils.create_queue(job_type=job_type_with_limit) job_type_mgr.sync_with_database() # One job of this type is already running job_exe_mgr.schedule_job_exes([running_job_exe_1], []) offer_1 = ResourceOffer('offer_1', self.agent_1.agent_id, self.framework_id, NodeResources([Cpus(0.0), Mem(1024.0), Disk(1024.0)]), now(), None) offer_2 = ResourceOffer('offer_2', self.agent_2.agent_id, self.framework_id, NodeResources([Cpus(25.0), Mem(2048.0), Disk(2048.0)]), now(), None) resource_mgr.add_new_offers([offer_1, offer_2]) scheduling_manager = SchedulingManager() num_tasks = scheduling_manager.perform_scheduling(self._client, now()) self.assertEqual(num_tasks, 3) # One is already running, should only be able to schedule 3 more
def test_successful_schedule(self): """Tests successfully calling perform_scheduling()""" offer_1 = ResourceOffer( 'offer_1', self.agent_1.agent_id, self.framework_id, NodeResources([Cpus(2.0), Mem(1024.0), Disk(1024.0)]), now(), None) offer_2 = ResourceOffer( 'offer_2', self.agent_2.agent_id, self.framework_id, NodeResources([Cpus(25.0), Mem(2048.0), Disk(2048.0)]), now(), None) resource_mgr.add_new_offers([offer_1, offer_2]) scheduling_manager = SchedulingManager() num_tasks = scheduling_manager.perform_scheduling(self._client, now()) self.assertEqual(num_tasks, 2) # Schedule smaller queued job executions # Ensure job execution models are created and queue models are deleted self.assertEqual( JobExecution.objects.filter(job_id=self.queue_1.job_id).count(), 1) self.assertEqual( JobExecution.objects.filter(job_id=self.queue_2.job_id).count(), 1) self.assertEqual( JobExecution.objects.filter( job_id=self.queue_large.job_id).count(), 0) self.assertEqual( Queue.objects.filter( id__in=[self.queue_1.id, self.queue_2.id]).count(), 0)
def test_missing_workspace(self): """Tests calling perform_scheduling() when a queued job's workspace has not been synced to the scheduler""" offer_1 = ResourceOffer('offer_1', self.agent_1.agent_id, self.framework_id, NodeResources([Cpus(2.0), Mem(1024.0), Disk(1024.0)]), now(), None) offer_2 = ResourceOffer('offer_2', self.agent_2.agent_id, self.framework_id, NodeResources([Cpus(25.0), Mem(2048.0), Disk(2048.0)]), now(), None) resource_mgr.add_new_offers([offer_1, offer_2]) # Add workspaces to the queued jobs queue_1 = Queue.objects.get(id=self.queue_1.id) config = queue_1.get_execution_configuration() config.set_output_workspaces({'my_output': 'my_workspace'}) queue_1.configuration = config.get_dict() queue_1.save() queue_2 = Queue.objects.get(id=self.queue_2.id) config = queue_2.get_execution_configuration() config.set_output_workspaces({'my_output': 'my_workspace'}) queue_2.configuration = config.get_dict() queue_2.save() scheduling_manager = SchedulingManager() # Clear out workspace manager for scheduling with patch('scheduler.scheduling.manager.workspace_mgr.get_workspaces') as mock_get_workspaces: mock_get_workspaces.return_value = {} num_tasks = scheduling_manager.perform_scheduling(self._client, now()) # Nothing should be scheduled self.assertEqual(num_tasks, 0) self.assertEqual(JobExecution.objects.filter(job_id=self.queue_1.job_id).count(), 0) self.assertEqual(JobExecution.objects.filter(job_id=self.queue_2.job_id).count(), 0) self.assertEqual(Queue.objects.filter(id__in=[self.queue_1.id, self.queue_2.id]).count(), 2)
def test_update_all_cluster_resources(self): """Tests successfully updating the all cluster resources database in a cluster""" offer_1 = ResourceOffer( 'offer_1', self.agent_1.agent_id, self.framework_id, NodeResources([Cpus(2.0), Mem(22048.0), Disk(1024.0)]), now(), None) offer_2 = ResourceOffer( 'offer_2', self.agent_2.agent_id, self.framework_id, NodeResources([Cpus(25.0), Mem(2048.0), Disk(2048.0)]), now(), None) offer_3 = ResourceOffer( 'offer_3', self.agent_2.agent_id, self.framework_id, NodeResources([Cpus(225.0), Mem(1024.0), Disk(22048.0)]), now(), None) resource_mgr.add_new_offers([offer_1, offer_2, offer_3]) resource_mgr.refresh_agent_resources([], now()) resource_db = ClusterResources.objects.first() self.assertIsNone(resource_db) resource_mgr.update_all_cluster_resources() resource_db = ClusterResources.objects.first() self.assertIsNotNone(resource_db) self.assertEqual(resource_db.mem, 25120.0) self.assertEqual(resource_db.gpus, 0.0) self.assertEqual(resource_db.disk, 25120.0) self.assertEqual(resource_db.cpus, 252.0)
def test_paused_job_type(self): """Tests calling perform_scheduling() when a job type is paused""" offer_1 = ResourceOffer( 'offer_1', self.agent_1.agent_id, self.framework_id, NodeResources([Cpus(2.0), Mem(1024.0), Disk(1024.0)]), now(), None) offer_2 = ResourceOffer( 'offer_2', self.agent_2.agent_id, self.framework_id, NodeResources([Cpus(25.0), Mem(2048.0), Disk(2048.0)]), now(), None) resource_mgr.add_new_offers([offer_1, offer_2]) self.queue_1.job_type.is_paused = True self.queue_1.job_type.save() job_type_mgr.sync_with_database() scheduling_manager = SchedulingManager() num_tasks = scheduling_manager.perform_scheduling(self._client, now()) self.assertEqual(num_tasks, 1) # Schedule queued job execution that is not paused self.assertEqual( JobExecution.objects.filter(job_id=self.queue_1.job_id).count(), 0) self.assertEqual( JobExecution.objects.filter(job_id=self.queue_2.job_id).count(), 1) self.assertEqual( Queue.objects.filter( id__in=[self.queue_1.id, self.queue_2.id]).count(), 1)
def test_missing_job_types(self): """Tests calling perform_scheduling() when a queued job type has not been synced to the scheduler""" offer_1 = ResourceOffer( 'offer_1', self.agent_1.agent_id, self.framework_id, NodeResources([Cpus(2.0), Mem(1024.0), Disk(1024.0)]), now(), None) offer_2 = ResourceOffer( 'offer_2', self.agent_2.agent_id, self.framework_id, NodeResources([Cpus(25.0), Mem(2048.0), Disk(2048.0)]), now(), None) resource_mgr.add_new_offers([offer_1, offer_2]) scheduling_manager = SchedulingManager() # Clear out job type manager for scheduling with patch('scheduler.scheduling.manager.job_type_mgr.get_job_types' ) as mock_get_job_types: mock_get_job_types.return_value = {} num_tasks = scheduling_manager.perform_scheduling( self._client, now()) # Nothing should be scheduled self.assertEqual(num_tasks, 0) self.assertEqual( JobExecution.objects.filter(job_id=self.queue_1.job_id).count(), 0) self.assertEqual( JobExecution.objects.filter(job_id=self.queue_2.job_id).count(), 0) self.assertEqual( Queue.objects.filter( id__in=[self.queue_1.id, self.queue_2.id]).count(), 2)
def test_paused_scheduler(self): """Tests calling perform_scheduling() with a paused scheduler""" offer_1 = ResourceOffer( 'offer_1', self.agent_1.agent_id, self.framework_id, NodeResources([Cpus(2.0), Mem(1024.0), Disk(1024.0)]), now(), None) offer_2 = ResourceOffer( 'offer_2', self.agent_2.agent_id, self.framework_id, NodeResources([Cpus(25.0), Mem(2048.0), Disk(2048.0)]), now(), None) resource_mgr.add_new_offers([offer_1, offer_2]) Scheduler.objects.update(is_paused=True) scheduler_mgr.sync_with_database() node_mgr.sync_with_database( scheduler_mgr.config) # Updates nodes with paused scheduler system_task_mgr._is_db_update_completed = False # Make sure system tasks don't get scheduled scheduling_manager = SchedulingManager() num_tasks = scheduling_manager.perform_scheduling(self._client, now()) self.assertEqual(num_tasks, 0) self.assertEqual( JobExecution.objects.filter(job_id=self.queue_1.job_id).count(), 0) self.assertEqual( JobExecution.objects.filter(job_id=self.queue_2.job_id).count(), 0) self.assertEqual( Queue.objects.filter( id__in=[self.queue_1.id, self.queue_2.id]).count(), 2)
def test_get_queued_resources(self): """Tests successfully getting queued resource information""" offer_1 = ResourceOffer( 'offer_1', self.agent_1.agent_id, self.framework_id, NodeResources([Cpus(2.0), Mem(22048.0), Disk(1024.0)]), now(), None) offer_2 = ResourceOffer( 'offer_2', self.agent_2.agent_id, self.framework_id, NodeResources([Cpus(25.0), Mem(2048.0), Disk(2048.0)]), now(), None) offer_3 = ResourceOffer( 'offer_3', self.agent_2.agent_id, self.framework_id, NodeResources([Cpus(225.0), Mem(1024.0), Disk(22048.0)]), now(), None) resource_mgr.add_new_offers([offer_1, offer_2, offer_3]) resource_mgr.refresh_agent_resources([], now()) resource_db = ClusterResources.objects.first() self.assertIsNone(resource_db) resource_mgr.update_all_cluster_resources() resource_db = ClusterResources.objects.first() self.assertIsNotNone(resource_db) self.assertEqual(resource_db.mem, 25120.0) self.assertEqual(resource_db.gpus, 0.0) self.assertEqual(resource_db.disk, 25120.0) self.assertEqual(resource_db.cpus, 252.0) queued_resources = resource_mgr.get_queued_resources() self.assertDictEqual( queued_resources, { "cluster_resources": { 'cpus': 252, 'disk': 25120, 'gpus': 0, 'mem': 25120 }, "queue_lengths": { 'PENDING': 0, 'QUEUED': 3, 'RUNNING': 0 }, "total_resources": { 'PENDING': {}, 'QUEUED': { 'cpus': 3.0, 'mem': 384.0 }, 'RUNNING': {} } })
def test_node_with_new_agent_id(self): """Tests successfully calling perform_scheduling() when a node get a new agent ID""" # Host 2 gets new agent ID of agent_3 node_mgr.lost_node(self.agent_2) node_mgr.register_agents([self.agent_3]) node_mgr.sync_with_database(scheduler_mgr.config) offer = ResourceOffer( 'offer', self.agent_3.agent_id, self.framework_id, NodeResources([Cpus(25.0), Mem(2048.0), Disk(2048.0)]), now(), None) resource_mgr.add_new_offers([offer]) scheduling_manager = SchedulingManager() num_tasks = scheduling_manager.perform_scheduling(self._client, now()) self.assertEqual(num_tasks, 2) # Schedule both queued job executions # Check that created tasks have the correct agent ID calls = self._client.method_calls # One for checking for driver and second for task launch self.assertEqual(2, len(calls)) # Get tasks off 2nd calls (index mesos_tasks = calls[1][1][1] for mesos_task in mesos_tasks: self.assertEqual(self.agent_3.agent_id, mesos_task['agent_id']['value'])
def setUp(self): django.setup() resource_mgr.clear() self.agent_1 = Agent('agent_1', 'host_1') self.agent_2 = Agent('agent_2', 'host_2') self.framework_id = '1234' offer_1 = ResourceOffer( 'offer_1', self.agent_1.agent_id, self.framework_id, NodeResources([Cpus(2.0), Mem(1024.0), Disk(1024.0)]), now(), None) offer_2 = ResourceOffer( 'offer_2', self.agent_2.agent_id, self.framework_id, NodeResources([Cpus(25.0), Mem(2048.0), Disk(2048.0)]), now(), None) resource_mgr.add_new_offers([offer_1, offer_2]) resource_mgr.refresh_agent_resources([], now())
def resourceOffers(self, driver, offers): """ Invoked when resources have been offered to this framework. A single offer will only contain resources from a single slave. Resources associated with an offer will not be re-offered to _this_ framework until either (a) this framework has rejected those resources (see SchedulerDriver.launchTasks) or (b) those resources have been rescinded (see Scheduler.offerRescinded). Note that resources may be concurrently offered to more than one framework at a time (depending on the allocator being used). In that case, the first framework to launch tasks using those resources will be able to use them while the other frameworks will have those resources rescinded (or if a framework has already launched tasks with those resources then those tasks will fail with a TASK_LOST status and a message saying as much). See documentation for :meth:`mesos_api.mesos.Scheduler.resourceOffers`. """ started = now() agents = {} resource_offers = [] total_resources = NodeResources() for offer in offers: offer_id = offer.id.value agent_id = offer.slave_id.value framework_id = offer.framework_id.value hostname = offer.hostname resource_list = [] for resource in offer.resources: if resource.type == 0: # This is the SCALAR type resource_list.append( ScalarResource(resource.name, resource.scalar.value)) resources = NodeResources(resource_list) total_resources.add(resources) agents[agent_id] = Agent(agent_id, hostname) resource_offers.append( ResourceOffer(offer_id, agent_id, framework_id, resources, started)) node_mgr.register_agents(agents.values()) resource_mgr.add_new_offers(resource_offers) num_offers = len(resource_offers) logger.info('Received %d offer(s) with %s from %d node(s)', num_offers, total_resources, len(agents)) scheduler_mgr.add_new_offer_count(num_offers) duration = now() - started msg = 'Scheduler resourceOffers() took %.3f seconds' if duration > ScaleScheduler.NORMAL_WARN_THRESHOLD: logger.warning(msg, duration.total_seconds()) else: logger.debug(msg, duration.total_seconds())
def test_no_default_workspace(self, mock_taskinfo): """Tests calling perform_scheduling() when a queued job's workspace has not been synced to the scheduler""" mock_taskinfo.return_value = MagicMock() offer_1 = ResourceOffer('offer_1', self.agent_1.agent_id, self.framework_id, NodeResources([Cpus(2.0), Mem(1024.0), Disk(1024.0)]), now()) offer_2 = ResourceOffer('offer_2', self.agent_2.agent_id, self.framework_id, NodeResources([Cpus(25.0), Mem(2048.0), Disk(2048.0)]), now()) resource_mgr.add_new_offers([offer_1, offer_2]) # Add output data to the first queued job: # output data + no workspace defined = fail queue_1 = Queue.objects.get(id=self.queue_1.id) queue_1.get_job_interface().definition['output_data'] = [{'name': 'my_output', 'type': 'file'}] config = queue_1.get_execution_configuration() queue_1.configuration = config.get_dict() queue_1.save() # No output data + no workspace = pass queue_2 = Queue.objects.get(id=self.queue_2.id) config = queue_2.get_execution_configuration() queue_2.configuration = config.get_dict() queue_2.save() scheduling_manager = SchedulingManager() # Set a workspace on the manager with patch('scheduler.scheduling.manager.workspace_mgr.get_workspaces') as mock_get_workspaces: mock_get_workspaces.return_value = { 'name': 'my_workspace', 'title': 'My Workspace', 'description': 'My workspaces', 'is_active': True, 'json_config': {'version': '1.0','broker': {'type': 'host','host_path': '/host/path'}}, } num_tasks = scheduling_manager.perform_scheduling(self._driver, now()) # Only queue_2 should be scheduled self.assertEqual(num_tasks, 1) self.assertEqual(JobExecution.objects.filter(job_id=self.queue_1.job_id).count(), 0) self.assertEqual(JobExecution.objects.filter(job_id=self.queue_2.job_id).count(), 1) self.assertEqual(Queue.objects.filter(id__in=[self.queue_1.id, self.queue_2.id]).count(), 1)
def test_node_with_new_agent_id(self, mock_taskinfo): """Tests successfully calling perform_scheduling() when a node get a new agent ID""" mock_taskinfo.return_value = MagicMock() # Host 2 gets new agent ID of agent_3 node_mgr.lost_node(self.agent_2) node_mgr.register_agents([self.agent_3]) node_mgr.sync_with_database(scheduler_mgr.config) offer = ResourceOffer('offer', self.agent_3.agent_id, self.framework_id, NodeResources([Cpus(25.0), Mem(2048.0), Disk(2048.0)]), now()) resource_mgr.add_new_offers([offer]) scheduling_manager = SchedulingManager() num_tasks = scheduling_manager.perform_scheduling(self._driver, now()) self.assertEqual(num_tasks, 2) # Schedule both queued job executions # Check that created tasks have the correct agent ID calls = self._driver.method_calls self.assertEqual(1, len(calls)) mesos_tasks = calls[0][1][1] for mesos_task in mesos_tasks: self.assertEqual(self.agent_3.agent_id, mesos_task.slave_id.value)
def offers(self, offers): """ Invoked when resources have been offered to this framework. A single offer will only contain resources from a single agent. Resources associated with an offer will not be re-offered to _this_ framework until either (a) this framework has rejected those resources or (b) those resources have been rescinded. Note that resources may be concurrently offered to more than one framework at a time (depending on the allocator being used). In that case, the first framework to launch tasks using those resources will be able to use them while the other frameworks will have those resources rescinded (or if a framework has already launched tasks with those resources then those tasks will fail with a TASK_LOST status and a message saying as much). """ started = now() agents = {} offered_nodes = [] resource_offers = [] total_resources = NodeResources() skipped_roles = set() for offer in offers: scale_offer = from_mesos_offer(offer) offer_id = scale_offer.id.value agent_id = scale_offer.agent_id.value framework_id = scale_offer.framework_id.value hostname = scale_offer.hostname offered_nodes.append(hostname) # ignore offers while we're paused if scheduler_mgr.config.is_paused: offer.decline() continue resource_list = [] for resource in scale_offer.resources: # Only accept resource that are of SCALAR type and have a role matching our accept list if resource.type == RESOURCE_TYPE_SCALAR: if resource.role in settings.ACCEPTED_RESOURCE_ROLE: logger.debug("Received scalar resource %s with value %i associated with role %s" % (resource.name, resource.scalar.value, resource.role)) resource_list.append(ScalarResource(resource.name, resource.scalar.value)) else: skipped_roles.add(resource.role) offer.decline() logger.debug("Number of resources: %i" % len(resource_list)) # Only register agent, if offers are being received if len(resource_list) > 0: resources = NodeResources(resource_list) total_resources.add(resources) agents[agent_id] = Agent(agent_id, hostname) resource_offers.append(ResourceOffer(offer_id, agent_id, framework_id, resources, started, offer)) logger.debug("Offer analysis complete with %i resource offers." % len(resource_offers)) node_mgr.register_agents(agents.values()) logger.debug("Agents registered.") resource_mgr.add_new_offers(resource_offers) logger.debug("Resource offers added.") Node.objects.update_node_offers(offered_nodes, now()) logger.debug("Node offer times updated.") num_offers = len(resource_offers) logger.info('Received %d offer(s) with %s from %d node(s)', num_offers, total_resources, len(agents)) if len(skipped_roles): logger.warning('Skipped offers from roles that are not marked as accepted: %s', ','.join(skipped_roles)) scheduler_mgr.add_new_offer_count(num_offers) duration = now() - started msg = 'Scheduler resourceOffers() took %.3f seconds' if duration > ScaleScheduler.NORMAL_WARN_THRESHOLD: logger.warning(msg, duration.total_seconds()) else: logger.debug(msg, duration.total_seconds())
def test_add_allocated_offers_remove_all_tasks(self): """Tests calling add_allocated_offers() when there are not enough resources for the job exes or node tasks""" node = MagicMock() node.hostname = 'host_1' node.id = 1 health_task = HealthTask('1234', 'agent_1') pull_task = PullTask('1234', 'agent_1') node.is_ready_for_new_job = MagicMock() node.is_ready_for_new_job.return_value = True node.is_ready_for_next_job_task = MagicMock() node.is_ready_for_next_job_task.return_value = True node.get_next_tasks = MagicMock() node.get_next_tasks.return_value = [health_task, pull_task] offered_resources = NodeResources([Cpus(100.0), Mem(500.0)]) watermark_resources = NodeResources([Cpus(100.0), Mem(500.0)]) resource_set = ResourceSet(offered_resources, NodeResources(), watermark_resources) scheduling_node = SchedulingNode('agent_1', node, [], [], resource_set) running_job_exe_1 = job_test_utils.create_running_job_exe( agent_id=self.agent_id, resources=NodeResources([Cpus(1.0), Mem(10.0)])) running_job_exe_2 = job_test_utils.create_running_job_exe( agent_id=self.agent_id, resources=NodeResources([Cpus(2.0), Mem(20.0)])) node_task_resources = NodeResources() node_task_resources.add(health_task.get_resources()) node_task_resources.add(pull_task.get_resources()) all_required_resources = NodeResources() all_required_resources.add(node_task_resources) all_required_resources.add( running_job_exe_1.next_task().get_resources()) all_required_resources.add( running_job_exe_2.next_task().get_resources()) expected_remaining_resources = NodeResources() expected_remaining_resources.add(offered_resources) expected_remaining_resources.subtract(node_task_resources) # Set up node with node tasks and job exes (there would never be queued job exes since they would be scheduled # before add_allocated_offers() was called scheduling_node.accept_node_tasks(now(), []) scheduling_node.accept_job_exe_next_task(running_job_exe_1, []) scheduling_node.accept_job_exe_next_task(running_job_exe_2, []) self.assertEqual(len(scheduling_node.allocated_tasks), 2) self.assertEqual(len(scheduling_node._allocated_running_job_exes), 2) self.assertEqual(len(scheduling_node._allocated_queued_job_exes), 0) self.assertTrue( scheduling_node.allocated_resources.is_equal( all_required_resources)) # Set up offers (not enough for job exes or node tasks) offer_1 = ResourceOffer('offer_1', 'agent_1', '1234', NodeResources([Cpus(0.1), Mem(600.0)]), now(), None) scheduling_node.add_allocated_offers([offer_1]) self.assertListEqual(scheduling_node.allocated_offers, [offer_1]) # All allocated tasks and job exes should be gone self.assertEqual(len(scheduling_node.allocated_tasks), 0) self.assertEqual(len(scheduling_node._allocated_running_job_exes), 0) self.assertEqual(len(scheduling_node._allocated_queued_job_exes), 0) self.assertTrue( scheduling_node.allocated_resources.is_equal(NodeResources())) self.assertTrue( scheduling_node._remaining_resources.is_equal(offered_resources))