def test_lost_node(self): """Tests accepting a running and queued job execution and then the node being lost""" offer_1 = ResourceOffer( 'offer_1', self.node_agent, NodeResources(cpus=2.0, mem=1024.0, disk=1024.0)) offer_2 = ResourceOffer( 'offer_2', self.node_agent, NodeResources(cpus=25.0, mem=2048.0, disk=2048.0)) manager = OfferManager() manager.add_new_offers([offer_1, offer_2]) manager.update_nodes([self.node, self.paused_node]) manager.ready_new_offers() job_exe_1 = QueuedJobExecution(self.queue_1) result = manager.consider_new_job_exe(job_exe_1) self.assertEqual(result, OfferManager.ACCEPTED) job_exe_2 = RunningJobExecution(self.running_job_exe_2) result = manager.consider_next_task(job_exe_2) self.assertEqual(result, OfferManager.ACCEPTED) manager.lost_node(self.node_agent) node_offers = manager.pop_offers_with_accepted_job_exes() self.assertEqual(len(node_offers), 0)
def setUp(self): django.setup() Scheduler.objects.initialize_scheduler() self._driver = MagicMock() self._job_exe_manager = RunningJobExecutionManager() self._job_type_manager = JobTypeManager() self._node_manager = NodeManager() self._offer_manager = OfferManager() self._scheduler_manager = SchedulerManager() self._workspace_manager = WorkspaceManager() self._scheduler_manager.sync_with_database() self.node_agent_1 = 'agent_1' self.node_agent_2 = 'agent_2' self.node_1 = node_test_utils.create_node(hostname='host_1', slave_id=self.node_agent_1) self.node_2 = node_test_utils.create_node(hostname='host_2', slave_id=self.node_agent_2) self.slave_infos = [SlaveInfo('host_1', slave_id=self.node_agent_1), SlaveInfo('host_2', slave_id=self.node_agent_2)] self._node_manager.add_agent_ids([self.node_agent_1, self.node_agent_2]) with patch('scheduler.sync.node_manager.api.get_slaves') as mock_get_slaves: mock_get_slaves.return_value = self.slave_infos self._node_manager.sync_with_database('master_host', 5050) self.queue_1 = queue_test_utils.create_queue(cpus_required=4.0, mem_required=1024.0, disk_in_required=100.0, disk_out_required=200.0, disk_total_required=300.0) self.queue_2 = queue_test_utils.create_queue(cpus_required=8.0, mem_required=512.0, disk_in_required=400.0, disk_out_required=45.0, disk_total_required=445.0) self._job_type_manager.sync_with_database() self._scheduling_thread = SchedulingThread(self._driver, '123', self._job_exe_manager, self._job_type_manager, self._node_manager, self._offer_manager, self._scheduler_manager, self._workspace_manager)
def __init__(self): """Constructor """ self._driver = None self._framework_id = None self._master_hostname = None self._master_port = None self._job_exe_manager = RunningJobExecutionManager() self._job_type_manager = JobTypeManager() self._node_manager = NodeManager() self._offer_manager = OfferManager() self._scheduler_manager = SchedulerManager() self._workspace_manager = WorkspaceManager() self._db_sync_thread = None self._recon_thread = None self._scheduling_thread = None
def test_no_ready_offers(self): """Tests considering job executions when no offers are ready""" offer_1 = ResourceOffer( 'offer_1', self.node_agent_paused, NodeResources(cpus=2.0, mem=1024.0, disk=1024.0)) offer_2 = ResourceOffer( 'offer_2', self.node_agent, NodeResources(cpus=25.0, mem=2048.0, disk=2048.0)) manager = OfferManager() manager.add_new_offers([offer_1, offer_2]) job_exe_1 = QueuedJobExecution(self.queue_1) result = manager.consider_new_job_exe(job_exe_1) self.assertEqual(result, OfferManager.NO_NODES_AVAILABLE) job_exe_2 = RunningJobExecution(self.running_job_exe_1) result = manager.consider_next_task(job_exe_2) self.assertEqual(result, OfferManager.NODE_OFFLINE)
def test_high_disk(self): """Tests rejecting a queued job execution due to too much disk required""" offer_1 = ResourceOffer( 'offer_1', self.node_agent_paused, NodeResources(cpus=2.0, mem=1024.0, disk=1024.0)) offer_2 = ResourceOffer( 'offer_2', self.node_agent, NodeResources(cpus=25.0, mem=2048.0, disk=2048.0)) manager = OfferManager() manager.add_new_offers([offer_1, offer_2]) manager.update_nodes([self.node, self.paused_node]) manager.ready_new_offers() job_exe_1 = QueuedJobExecution(self.queue_high_disk) result = manager.consider_new_job_exe(job_exe_1) self.assertEqual(result, OfferManager.NOT_ENOUGH_DISK) node_offers = manager.pop_offers_with_accepted_job_exes() self.assertEqual(len(node_offers), 0)
def test_all_offers_paused(self): """Tests rejecting a queued job execution due to all nodes being paused""" offer_1 = ResourceOffer( 'offer_1', self.node_agent_paused, NodeResources(cpus=2.0, mem=1024.0, disk=1024.0)) offer_2 = ResourceOffer( 'offer_2', self.node_agent_paused, NodeResources(cpus=25.0, mem=2048.0, disk=2048.0)) manager = OfferManager() manager.add_new_offers([offer_1, offer_2]) manager.update_nodes([self.paused_node]) manager.ready_new_offers() job_exe_1 = QueuedJobExecution(self.queue_1) result = manager.consider_new_job_exe(job_exe_1) self.assertEqual(result, OfferManager.NO_NODES_AVAILABLE) node_offers = manager.pop_offers_with_accepted_job_exes() self.assertEqual(len(node_offers), 0)
def test_offers_with_no_nodes(self): """Tests considering job executions when offers cannot be readied due to no nodes updated""" offer_1 = ResourceOffer( 'offer_1', self.node_agent_paused, NodeResources(cpus=2.0, mem=1024.0, disk=1024.0)) offer_2 = ResourceOffer( 'offer_2', self.node_agent, NodeResources(cpus=25.0, mem=2048.0, disk=2048.0)) manager = OfferManager() manager.add_new_offers([offer_1, offer_2]) manager.ready_new_offers() job_exe_1 = QueuedJobExecution(self.queue_1) result = manager.consider_new_job_exe(job_exe_1) self.assertEqual(result, OfferManager.NO_NODES_AVAILABLE) job_exe_2 = RunningJobExecution(self.running_job_exe_1) result = manager.consider_next_task(job_exe_2) self.assertEqual(result, OfferManager.NODE_NOT_READY)
def __init__(self, executor): """Constructor :param executor: The executor to use for launching tasks :type executor: :class:`mesos_pb2.ExecutorInfo` """ self._driver = None self._executor = executor self._framework_id = None self._master_hostname = None self._master_port = None self._job_exe_manager = RunningJobExecutionManager() self._job_type_manager = JobTypeManager() self._node_manager = NodeManager() self._offer_manager = OfferManager() self._scheduler_manager = SchedulerManager() self._db_sync_thread = None self._recon_thread = None self._scheduling_thread = None
def test_lost_node_that_comes_back(self): """Tests that when a lost name comes back, it can schedule tasks again""" offer_1 = ResourceOffer( 'offer_1', self.node_agent, NodeResources(cpus=2.0, mem=1024.0, disk=1024.0)) offer_2 = ResourceOffer( 'offer_2', self.node_agent, NodeResources(cpus=25.0, mem=2048.0, disk=2048.0)) manager = OfferManager() manager.add_new_offers([offer_1, offer_2]) manager.update_nodes([self.node]) manager.ready_new_offers() # Node goes down and comes back up with new agent ID manager.lost_node(self.node_agent) new_node_agent = 'i_am_a_new_node_agent' self.node.update_from_mesos(agent_id=new_node_agent) job_exe_1 = QueuedJobExecution(self.queue_1) # Offers for previous agent should be gone, do not schedule the job exe result = manager.consider_new_job_exe(job_exe_1) self.assertEqual(result, OfferManager.NO_NODES_AVAILABLE) offer_3 = ResourceOffer( 'offer_3', new_node_agent, NodeResources(cpus=35.0, mem=3048.0, disk=3048.0)) manager.add_new_offers([offer_3]) manager.update_nodes([self.node]) manager.ready_new_offers() # New offers have come in for new agent ID, should schedule job exe now result = manager.consider_new_job_exe(job_exe_1) self.assertEqual(result, OfferManager.ACCEPTED) node_offers = manager.pop_offers_with_accepted_job_exes() self.assertEqual(len(node_offers), 1)