def setUp(self): django.setup() Scheduler.objects.initialize_scheduler() self._driver = MagicMock() scheduler_mgr.sync_with_database() offer_mgr.clear() self.node_agent_1 = 'agent_1' self.node_agent_2 = 'agent_2' self.slave_infos = [SlaveInfo('host_1', slave_id=self.node_agent_1), SlaveInfo('host_2', slave_id=self.node_agent_2)] node_mgr.clear() node_mgr.register_agent_ids([self.node_agent_1, self.node_agent_2]) with patch('scheduler.node.manager.api.get_slaves') as mock_get_slaves: mock_get_slaves.return_value = self.slave_infos node_mgr.sync_with_database('master_host', 5050) # Ignore initial cleanup tasks for node in node_mgr.get_nodes(): node.initial_cleanup_completed() self.queue_1 = queue_test_utils.create_queue(cpus_required=4.0, mem_required=1024.0, disk_in_required=100.0, disk_out_required=200.0, disk_total_required=300.0) self.queue_2 = queue_test_utils.create_queue(cpus_required=8.0, mem_required=512.0, disk_in_required=400.0, disk_out_required=45.0, disk_total_required=445.0) job_type_mgr.sync_with_database() self._scheduling_thread = SchedulingThread(self._driver, '123')
def _perform_sync(self): """Performs the sync with the database """ scheduler_mgr.sync_with_database() job_type_mgr.sync_with_database() workspace_mgr.sync_with_database() mesos_master = scheduler_mgr.mesos_address node_mgr.sync_with_database(mesos_master.hostname, mesos_master.port) self._sync_running_job_executions()
def _execute(self): """See :meth:`scheduler.threads.base_thread.BaseSchedulerThread._execute` """ scheduler_mgr.sync_with_database() job_type_mgr.sync_with_database() workspace_mgr.sync_with_database() node_mgr.sync_with_database(scheduler_mgr.config) cleanup_mgr.update_nodes(node_mgr.get_nodes()) mesos_master = scheduler_mgr.mesos_address resource_mgr.sync_with_mesos(mesos_master.hostname, mesos_master.port) # Kill running tasks for canceled job executions for task_to_kill in job_exe_mgr.sync_with_database(): pb_task_to_kill = mesos_pb2.TaskID() pb_task_to_kill.value = task_to_kill.id logger.info('Killing task %s', task_to_kill.id) self._driver.killTask(pb_task_to_kill) if settings.SECRETS_URL: secrets_mgr.sync_with_backend()
def _execute(self): """See :meth:`scheduler.threads.base_thread.BaseSchedulerThread._execute` """ scheduler_mgr.sync_with_database() job_type_mgr.sync_with_database() job_exe_mgr.sync_with_database() workspace_mgr.sync_with_database() node_mgr.sync_with_database(scheduler_mgr.config) cleanup_mgr.update_nodes(node_mgr.get_nodes()) mesos_master = scheduler_mgr.mesos_address if mesos_master: resource_mgr.sync_with_mesos(mesos_master.hostname, mesos_master.port) # Handle canceled job executions for finished_job_exe in job_exe_mgr.sync_with_database(): cleanup_mgr.add_job_execution(finished_job_exe) if settings.SECRETS_URL: secrets_mgr.sync_with_backend()
def setUp(self): django.setup() Scheduler.objects.initialize_scheduler() self._driver = MagicMock() scheduler_mgr.sync_with_database() offer_mgr.clear() self.node_agent_1 = 'agent_1' self.node_agent_2 = 'agent_2' self.slave_infos = [ SlaveInfo('host_1', slave_id=self.node_agent_1), SlaveInfo('host_2', slave_id=self.node_agent_2) ] node_mgr.clear() node_mgr.register_agent_ids([self.node_agent_1, self.node_agent_2]) with patch('scheduler.node.manager.api.get_slaves') as mock_get_slaves: mock_get_slaves.return_value = self.slave_infos node_mgr.sync_with_database('master_host', 5050) # Ignore initial cleanup tasks and health check tasks for node in node_mgr.get_nodes(): node._last_heath_task = now() node._initial_cleanup_completed() node._update_state() self.queue_1 = queue_test_utils.create_queue(cpus_required=4.0, mem_required=1024.0, disk_in_required=100.0, disk_out_required=200.0, disk_total_required=300.0) self.queue_2 = queue_test_utils.create_queue(cpus_required=8.0, mem_required=512.0, disk_in_required=400.0, disk_out_required=45.0, disk_total_required=445.0) job_type_mgr.sync_with_database() self._scheduling_thread = SchedulingThread(self._driver, '123')
def test_node_with_new_agent_id(self): """Tests successfully calling perform_scheduling() when a node get a new agent ID""" # Host 2 gets new agent ID of agent_3 node_mgr.lost_node(self.agent_2) node_mgr.register_agents([self.agent_3]) node_mgr.sync_with_database(scheduler_mgr.config) offer = ResourceOffer('offer', self.agent_3.agent_id, self.framework_id, NodeResources([Cpus(25.0), Mem(2048.0), Disk(2048.0)]), now(), None) resource_mgr.add_new_offers([offer]) scheduling_manager = SchedulingManager() num_tasks = scheduling_manager.perform_scheduling(self._client, now()) self.assertEqual(num_tasks, 2) # Schedule both queued job executions # Check that created tasks have the correct agent ID calls = self._client.method_calls # One for checking for driver and second for task launch self.assertEqual(2, len(calls)) # Get tasks off 2nd calls (index mesos_tasks = calls[1][1][1] for mesos_task in mesos_tasks: self.assertEqual(self.agent_3.agent_id, mesos_task['agent_id']['value'])
def test_generate_nodes_status(self): """Tests the _generate_nodes_status method""" # Setup nodes from scheduler.node.manager import node_mgr node_mgr.clear() nodes = dependency_mgr._generate_nodes_status() self.assertDictEqual( nodes, { 'OK': False, 'detail': { 'msg': 'No nodes reported' }, 'errors': [{ 'NODES_OFFLINE': 'No nodes reported.' }], 'warnings': [] }) node_mgr.register_agents([ self.agent_1, self.agent_2, self.agent_3, self.agent_4, self.agent_5, self.agent_6, self.agent_7, self.agent_8, self.agent_9, self.agent_10 ]) node_mgr.sync_with_database(scheduler_mgr.config) nodes = node_mgr.get_nodes() self.assertEqual(len(nodes), 10) nodes = dependency_mgr._generate_nodes_status() self.assertDictEqual( nodes, { 'OK': True, 'detail': { 'msg': 'Enough nodes are online to function.' }, 'errors': [], 'warnings': [] }) node_mgr.lost_node(self.agent_1.agent_id) node_mgr.lost_node(self.agent_2.agent_id) node_mgr.lost_node(self.agent_3.agent_id) node_mgr.lost_node(self.agent_4.agent_id) nodes = dependency_mgr._generate_nodes_status() self.assertDictEqual( nodes, { 'OK': False, 'detail': { u'msg': u'Over a third of nodes are in an error state' }, 'errors': [{ 'NODES_ERRORED': 'Over a third of the nodes are offline or degraded.' }], 'warnings': [{ u'NODES_OFFLINE': u'4 nodes are offline' }] })