def test_job_exe_no_offers(self): """Tests the NodeManager where a node is running an exe and has not given offers to Scale in 1 hour. Expected behavior: The node is scheduler and DB are in sync and the node is still active""" last_offer = now() - datetime.timedelta(hours=1) node_mgr = NodeManager() node_mgr.register_agents([self.agent_1]) node_mgr.sync_with_database(scheduler_mgr.config) # Add job to node job_test_utils.create_running_job_exe(agent_id=self.agent_1, node=self.node_1) # Set last_offer_received to 1 hour ago Node.objects.filter(id=self.node_1.id).update( last_offer_received=last_offer) # This inspects what nodes are running jobs and what nodes need to be removed if they # have not sent offers in the last 5 minutes node_mgr.sync_with_database(scheduler_mgr.config) # Get the DB and Scheduler state and make sure they are consistent db_record = Node.objects.get(id=self.node_1.id) scheduler_record = node_mgr.get_node(self.agent_1.agent_id) self.assertEqual(db_record.is_active, scheduler_record._is_active, True)
def test_lost_known_node(self, mock_get_slaves): """Tests the NodeManager where a known node was lost""" mock_get_slaves.return_value = self.slave_infos manager = NodeManager() manager.register_agent_ids([self.node_agent_1, self.node_agent_2]) manager.sync_with_database('master_host', 5050) manager.lost_node(self.node_agent_2) nodes = manager.get_nodes() self.assertEqual(len(nodes), 2) node_1 = manager.get_node(self.node_agent_1) self.assertTrue(node_1.is_online) node_2 = manager.get_node(self.node_agent_2) self.assertFalse(node_2.is_online)
def test_lost_unknown_node(self, mock_get_slaves): """Tests the NodeManager where an unknown node was lost""" mock_get_slaves.return_value = self.slave_infos manager = NodeManager() manager.register_agent_ids([self.node_agent_1, self.node_agent_2]) manager.lost_node(self.node_agent_2) manager.sync_with_database('master_host', 5050) # Unknown node 2 was lost before syncing with database, it should not appear in the manager nodes = manager.get_nodes() self.assertEqual(len(nodes), 1) node_1 = manager.get_node(self.node_agent_1) self.assertEqual(node_1.hostname, self.node_1.hostname) self.assertTrue(node_1.is_online) self.assertIsNone(manager.get_node(self.node_agent_2))
def test_successful_update(self): """Tests doing a successful database update""" manager = NodeManager() manager.register_agents([self.agent_1, self.agent_2]) manager.sync_with_database(scheduler_mgr.config) nodes = manager.get_nodes() self.assertEqual(len(nodes), 2)
def test_sync_with_renamed_node(self): """Tests doing a successful database update when a node model has its hostname changed in the database""" # Initial sync manager = NodeManager() manager.register_agents([self.agent_1, self.agent_2]) manager.sync_with_database(scheduler_mgr.config) self.node_1.hostname = 'new_host_1' self.node_1.save() # No exception is success manager.sync_with_database(scheduler_mgr.config)
def test_get_initial_cleanup_tasks(self): """Tests getting initial cleanup tasks from the manager""" when = now() manager = NodeManager() tasks = manager.get_next_tasks(when) self.assertListEqual(tasks, []) # No tasks yet due to no nodes manager.register_agents([self.agent_1, self.agent_2]) manager.sync_with_database(scheduler_mgr.config) for node in manager.get_nodes(): node._last_health_task = when tasks = manager.get_next_tasks(when) self.assertEqual(len(tasks), 2) for task in tasks: self.assertTrue(task.is_initial_cleanup)
def test_successful_update(self, mock_get_slaves): """Tests doing a successful database update""" mock_get_slaves.return_value = self.slave_infos manager = NodeManager() manager.register_agent_ids([self.node_agent_1, self.node_agent_2]) manager.sync_with_database('master_host', 5050) nodes = manager.get_nodes() self.assertEqual(len(nodes), 2)
def test_sync_with_renamed_node(self, mock_get_slaves): """Tests doing a successful database update when a node model has its hostname changed in the database""" mock_get_slaves.return_value = self.slave_infos # Initial sync manager = NodeManager() manager.register_agent_ids([self.node_agent_1, self.node_agent_2]) manager.sync_with_database('master_host', 5050) self.node_1.hostname = 'new_host_1' self.node_1.save() # No exception is success manager.sync_with_database('master_host', 5050)
def test_get_initial_cleanup_tasks(self, mock_get_slaves): """Tests getting initial cleanup tasks from the manager""" mock_get_slaves.return_value = self.slave_infos when = now() manager = NodeManager() tasks = manager.get_next_tasks(when) self.assertListEqual(tasks, []) # No tasks yet due to no nodes manager.register_agent_ids([self.node_agent_1, self.node_agent_2]) manager.sync_with_database('master_host', 5050) for node in manager.get_nodes(): node._last_heath_task = when tasks = manager.get_next_tasks(when) self.assertEqual(len(tasks), 2) for task in tasks: self.assertTrue(task.is_initial_cleanup)
def test_get_pull_tasks(self): """Tests getting Docker pull tasks from the manager""" when = now() manager = NodeManager() manager.register_agents([self.agent_1, self.agent_2]) manager.sync_with_database(scheduler_mgr.config) for node in manager.get_nodes(): node._last_health_task = when node._initial_cleanup_completed() node._update_state() tasks = manager.get_next_tasks(when) self.assertEqual(len(tasks), 2) for task in tasks: self.assertTrue(isinstance(task, PullTask))
def test_sync_node_model(self): """Tests doing a successful database update when a node model has been updated in the database""" # Initial sync manager = NodeManager() manager.register_agents([self.agent_1, self.agent_2]) manager.sync_with_database(scheduler_mgr.config) # Database model changes to inactive self.node_1.is_active = False self.node_1.save() # Sync with database manager.sync_with_database(scheduler_mgr.config) found_node_1 = False for node in manager.get_nodes(): if node.hostname == self.node_1.hostname: found_node_1 = True self.assertFalse(node.is_active) self.assertTrue(found_node_1)
def test_no_job_exe_no_offers(self): """Tests the NodeManager where a node is not running an exe and has not given offers to Scale in 1 hour. Expected behavior: The node is deleted and the DB model is update with is_active=False""" last_offer = now() - datetime.timedelta(hours=1) node_mgr = NodeManager() node_mgr.register_agents([self.agent_1]) node_mgr.sync_with_database(scheduler_mgr.config) # Set last_offer_received to 1 hour ago Node.objects.filter(id=self.node_1.id).update(last_offer_received=last_offer) # This inspects what nodes are running jobs and what nodes need to be removed if they # have not sent offers in the last 5 minutes node_mgr.sync_with_database(scheduler_mgr.config) # Get the DB state db_record = Node.objects.get(id=self.node_1.id) self.assertIsNone(node_mgr.get_node(self.agent_1.agent_id)) self.assertEqual(db_record.is_active, False)
def test_get_pull_tasks(self, mock_get_slaves): """Tests getting Docker pull tasks from the manager""" mock_get_slaves.return_value = self.slave_infos when = now() manager = NodeManager() manager.register_agent_ids([self.node_agent_1, self.node_agent_2]) manager.sync_with_database('master_host', 5050) for node in manager.get_nodes(): node._last_heath_task = when node._initial_cleanup_completed() node._update_state() tasks = manager.get_next_tasks(when) self.assertEqual(len(tasks), 2) for task in tasks: self.assertTrue(isinstance(task, PullTask))
def test_sync_node_model(self, mock_get_slaves): """Tests doing a successful database update when a node model has been updated in the database""" mock_get_slaves.return_value = self.slave_infos # Initial sync manager = NodeManager() manager.register_agent_ids([self.node_agent_1, self.node_agent_2]) manager.sync_with_database('master_host', 5050) # Database model changes to inactive self.node_1.is_active = False self.node_1.save() # Sync with database manager.sync_with_database('master_host', 5050) found_node_1 = False for node in manager.get_nodes(): if node.hostname == self.node_1.hostname: found_node_1 = True self.assertFalse(node.is_active) self.assertTrue(found_node_1)
def test_lost_unknown_node(self): """Tests the NodeManager where an unknown node was lost""" manager = NodeManager() manager.register_agents([self.agent_1, self.agent_2]) manager.lost_node(self.agent_2.agent_id) manager.sync_with_database(scheduler_mgr.config) # Unknown node 2 was lost before syncing with database, it should not appear in the manager nodes = manager.get_nodes() self.assertEqual(len(nodes), 1) node_1 = manager.get_node(self.agent_1.agent_id) self.assertEqual(node_1.hostname, self.node_1.hostname) self.assertTrue(node_1._is_online) self.assertIsNone(manager.get_node(self.agent_2.agent_id))
def test_sync_and_remove_node_model(self): """Tests doing a successful database update when a node model should be removed from the scheduler""" # Initial sync manager = NodeManager() manager.register_agents([self.agent_1, self.agent_2]) manager.sync_with_database(scheduler_mgr.config) # Database model changes to inactive self.node_1.is_active = False self.node_1.save() # Node is lost manager.lost_node(self.agent_1.agent_id) # Sync with database manager.sync_with_database(scheduler_mgr.config) # Make sure node 1 is gone found_node_1 = False for node in manager.get_nodes(): if node.hostname == self.node_1.hostname: found_node_1 = True self.assertFalse(found_node_1)
def test_lost_known_node(self, mock_get_slaves): """Tests the NodeManager where a known node was lost""" mock_get_slaves.return_value = self.slave_infos manager = NodeManager() manager.register_agent_ids([self.node_agent_1, self.node_agent_2]) manager.sync_with_database('master_host', 5050) manager.lost_node(self.node_agent_2) nodes = manager.get_nodes() self.assertEqual(len(nodes), 2) node_1 = manager.get_node(self.node_agent_1) self.assertTrue(node_1._is_online) node_2 = manager.get_node(self.node_agent_2) self.assertFalse(node_2._is_online)
def test_lost_unknown_node(self, mock_get_slaves): """Tests the NodeManager where an unknown node was lost""" mock_get_slaves.return_value = self.slave_infos manager = NodeManager() manager.register_agent_ids([self.node_agent_1, self.node_agent_2]) manager.lost_node(self.node_agent_2) manager.sync_with_database('master_host', 5050) # Unknown node 2 was lost before syncing with database, it should not appear in the manager nodes = manager.get_nodes() self.assertEqual(len(nodes), 1) node_1 = manager.get_node(self.node_agent_1) self.assertEqual(node_1.hostname, self.node_1.hostname) self.assertTrue(node_1._is_online) self.assertIsNone(manager.get_node(self.node_agent_2))
def test_job_exe_clean_task(self): """Tests the NodeManager where a cleanup task is returned to clean up a job execution""" when = now() node_mgr = NodeManager() node_mgr.register_agents([self.agent_1, self.agent_2]) node_mgr.sync_with_database(scheduler_mgr.config) cleanup_mgr = CleanupManager() cleanup_mgr.update_nodes(node_mgr.get_nodes()) tasks = node_mgr.get_next_tasks(when) task_mgr = TaskManager() # Complete initial cleanup tasks for task in tasks: task_mgr.launch_tasks([task], now()) update = job_test_utils.create_task_status_update( task.id, task.agent_id, TaskStatusUpdate.FINISHED, now()) task_mgr.handle_task_update(update) node_mgr.handle_task_update(update) # Mark image pull done to get rid of image tasks for node in node_mgr.get_nodes(): node._image_pull_completed() node._update_state() job_exe = job_test_utils.create_running_job_exe(agent_id=self.agent_1, node=self.node_1) # Add a job execution to clean up and get the cleanup task for it cleanup_mgr.add_job_execution(job_exe) tasks = node_mgr.get_next_tasks(when) self.assertEqual(len(tasks), 1) task = tasks[0] self.assertEqual(task.agent_id, self.agent_1.agent_id) self.assertFalse(task.is_initial_cleanup) self.assertEqual(len(task.job_exes), 1)
def test_sync_and_remove_node_model(self, mock_get_slaves): """Tests doing a successful database update when a node model should be removed from the scheduler""" mock_get_slaves.return_value = self.slave_infos # Initial sync manager = NodeManager() manager.register_agent_ids([self.node_agent_1, self.node_agent_2]) manager.sync_with_database('master_host', 5050) # Database model changes to inactive self.node_1.is_active = False self.node_1.save() # Node is lost manager.lost_node(self.node_agent_1) # Sync with database manager.sync_with_database('master_host', 5050) # Make sure node 1 is gone found_node_1 = False for node in manager.get_nodes(): if node.hostname == self.node_1.hostname: found_node_1 = True self.assertFalse(found_node_1)
def test_pull_task_change_agent_id(self): """Tests the NodeManager where a node's agent ID changes during a pull task""" when = now() manager = NodeManager() manager.register_agents([self.agent_1, self.agent_2]) manager.sync_with_database(scheduler_mgr.config) for node in manager.get_nodes(): node._last_health_task = when node._initial_cleanup_completed() node._update_state() tasks = manager.get_next_tasks(when) task_mgr = TaskManager() task_2 = None for task in tasks: task_mgr.launch_tasks([task], when) if task.agent_id == self.agent_2.agent_id: task_2 = task # Node 2 changes agent ID to 3 manager.lost_node(self.agent_2.agent_id) manager.register_agents([self.agent_3]) manager.sync_with_database(scheduler_mgr.config) for node in manager.get_nodes(): node._last_health_task = when node._initial_cleanup_completed() node._update_state() # Should get new Docker pull task for node 2 tasks = manager.get_next_tasks(when) self.assertEqual(len(tasks), 1) new_task_2 = tasks[0] self.assertEqual(new_task_2.agent_id, self.agent_3.agent_id) # Task update comes back for original node 2 Docker pull task, manager should ignore with no exception update = job_test_utils.create_task_status_update( task_2.id, task_2.agent_id, TaskStatusUpdate.FAILED, when) task_mgr.handle_task_update(update) manager.handle_task_update(update)
def test_change_agent_id_with_inactive_node(self): """Tests the NodeManager where a registered node changes its agent ID, and the node is inactive""" manager = NodeManager() manager.register_agents([self.agent_1, self.agent_2]) manager.sync_with_database(scheduler_mgr.config) # Node 2 is now inactive Node.objects.filter( id=manager.get_node(self.agent_2.agent_id).id).update( is_active=False) manager.sync_with_database(scheduler_mgr.config) manager.lost_node(self.agent_2.agent_id) manager.register_agents([self.agent_3]) manager.sync_with_database(scheduler_mgr.config) # Make sure two nodes are registered, one for agent 1 and one for agent 3, and both are online nodes = manager.get_nodes() self.assertEqual(len(nodes), 2) node_1 = manager.get_node(self.agent_1.agent_id) self.assertEqual(node_1.hostname, self.node_1.hostname) self.assertTrue(node_1._is_online) self.assertIsNone(manager.get_node(self.agent_2.agent_id)) node_2 = manager.get_node(self.agent_3.agent_id) self.assertEqual(node_2.hostname, 'host_2') self.assertTrue(node_2._is_online) self.assertFalse(node_2._is_active)
def test_change_agent_id(self): """Tests the NodeManager where a registered node changes its agent ID""" manager = NodeManager() manager.register_agents([self.agent_1, self.agent_2]) manager.sync_with_database(scheduler_mgr.config) manager.lost_node(self.agent_2.agent_id) manager.register_agents([self.agent_3]) manager.sync_with_database(scheduler_mgr.config) # Make sure two nodes are registered, one for agent 1 and one for agent 3, and both are online nodes = manager.get_nodes() self.assertEqual(len(nodes), 2) node_1 = manager.get_node(self.agent_1.agent_id) self.assertEqual(node_1.hostname, self.node_1.hostname) self.assertTrue(node_1._is_online) self.assertIsNone(manager.get_node(self.agent_2.agent_id)) node_2 = manager.get_node(self.agent_3.agent_id) self.assertEqual(node_2.hostname, 'host_2') self.assertTrue(node_2._is_online)
def test_lost_known_node(self): """Tests the NodeManager where a known node was lost""" manager = NodeManager() manager.register_agents([self.agent_1, self.agent_2]) manager.sync_with_database(scheduler_mgr.config) manager.lost_node(self.agent_2.agent_id) nodes = manager.get_nodes() self.assertEqual(len(nodes), 2) node_1 = manager.get_node(self.agent_1.agent_id) self.assertTrue(node_1._is_online) node_2 = manager.get_node(self.agent_2.agent_id) self.assertFalse(node_2._is_online)
def test_change_agent_id(self, mock_get_slaves): """Tests the NodeManager where a registered node changes its agent ID""" mock_get_slaves.return_value = self.slave_infos manager = NodeManager() manager.register_agent_ids([self.node_agent_1, self.node_agent_2]) manager.sync_with_database('master_host', 5050) mock_get_slaves.return_value = self.slave_infos_updated manager.lost_node(self.node_agent_2) manager.register_agent_ids([self.node_agent_3]) manager.sync_with_database('master_host', 5050) # Make sure two nodes are registered, one for agent 1 and one for agent 3, and both are online nodes = manager.get_nodes() self.assertEqual(len(nodes), 2) node_1 = manager.get_node(self.node_agent_1) self.assertEqual(node_1.hostname, self.node_1.hostname) self.assertTrue(node_1.is_online) self.assertIsNone(manager.get_node(self.node_agent_2)) node_2 = manager.get_node(self.node_agent_3) self.assertEqual(node_2.hostname, 'host_2') self.assertTrue(node_2.is_online)
def test_change_agent_id_with_inactive_node(self, mock_get_slaves): """Tests the NodeManager where a registered node changes its agent ID, and the node is inactive""" mock_get_slaves.return_value = self.slave_infos manager = NodeManager() manager.register_agent_ids([self.node_agent_1, self.node_agent_2]) manager.sync_with_database('master_host', 5050) # Node 2 is now inactive Node.objects.filter(id=manager.get_node(self.node_agent_2).id).update(is_active=False) manager.sync_with_database('master_host', 5050) mock_get_slaves.return_value = self.slave_infos_updated manager.lost_node(self.node_agent_2) manager.register_agent_ids([self.node_agent_3]) manager.sync_with_database('master_host', 5050) # Make sure two nodes are registered, one for agent 1 and one for agent 3, and both are online nodes = manager.get_nodes() self.assertEqual(len(nodes), 2) node_1 = manager.get_node(self.node_agent_1) self.assertEqual(node_1.hostname, self.node_1.hostname) self.assertTrue(node_1._is_online) self.assertIsNone(manager.get_node(self.node_agent_2)) node_2 = manager.get_node(self.node_agent_3) self.assertEqual(node_2.hostname, 'host_2') self.assertTrue(node_2._is_online) self.assertFalse(node_2._is_active)