def test_get_nodes_running_job_exes(self): """Tests calling NodeManager.get_nodes_running_job_exes()""" # Create nodes node_1 = node_test_utils.create_node(hostname='node_1') node_2 = node_test_utils.create_node(hostname='node_2') node_3 = node_test_utils.create_node(hostname='node_3') # No running jobs; should be empty nodes_w_jobs = Node.objects.get_nodes_running_job_exes() self.assertEqual(nodes_w_jobs, []) job_test_utils.create_job_exe(node=node_3, status='COMPLETED') job_test_utils.create_job_exe(node=node_3, status='FAILED') job_test_utils.create_job_exe(node=node_3, status='CANCELED') # 0 running jobs self.assertEqual(Node.objects.get_nodes_running_job_exes(), []) # Create a running job_exe job_test_utils.create_running_job_exe(node=node_1) # 1 running job on node_1 nodes_w_jobs = Node.objects.get_nodes_running_job_exes() self.assertEqual(len(nodes_w_jobs), 1) self.assertEqual(nodes_w_jobs[0], node_1.id) # Create another running job_exe (using a different way to create running job_exe for testing completeness) job_test_utils.create_job_exe(node=node_2, status='RUNNING') # 2 running job_exes nodes_w_jobs = Node.objects.get_nodes_running_job_exes() self.assertEqual(len(nodes_w_jobs), 2) self.assertIn(node_1.id, nodes_w_jobs) self.assertIn(node_2.id, nodes_w_jobs)
def test_schedule_job_executions(self): job_exe_1 = job_test_utils.create_job_exe(status='QUEUED') job_exe_2 = job_test_utils.create_job_exe(status='QUEUED') node_1 = node_test_utils.create_node() node_2 = node_test_utils.create_node() resources_1 = JobResources(cpus=1, mem=2, disk_in=3, disk_out=4, disk_total=7) resources_2 = JobResources(cpus=10, mem=11, disk_in=12, disk_out=13, disk_total=25) job_exes = JobExecution.objects.schedule_job_executions([(job_exe_1, node_1, resources_1), (job_exe_2, node_2, resources_2)]) for job_exe in job_exes: if job_exe.id == job_exe_1.id: job_exe_1 = job_exe self.assertEqual(job_exe_1.status, 'RUNNING') self.assertEqual(job_exe_1.job.status, 'RUNNING') self.assertEqual(job_exe_1.node_id, node_1.id) self.assertIsNotNone(job_exe_1.started) self.assertEqual(job_exe_1.cpus_scheduled, 1) self.assertEqual(job_exe_1.mem_scheduled, 2) self.assertEqual(job_exe_1.disk_in_scheduled, 3) self.assertEqual(job_exe_1.disk_out_scheduled, 4) self.assertEqual(job_exe_1.disk_total_scheduled, 7) self.assertEqual(job_exe_1.requires_cleanup, job_exe_1.job.job_type.requires_cleanup) else: job_exe_2 = job_exe self.assertEqual(job_exe_2.status, 'RUNNING') self.assertEqual(job_exe_2.job.status, 'RUNNING') self.assertEqual(job_exe_2.node_id, node_2.id) self.assertIsNotNone(job_exe_2.started) self.assertEqual(job_exe_2.cpus_scheduled, 10) self.assertEqual(job_exe_2.mem_scheduled, 11) self.assertEqual(job_exe_2.disk_in_scheduled, 12) self.assertEqual(job_exe_2.disk_out_scheduled, 13) self.assertEqual(job_exe_2.disk_total_scheduled, 25) self.assertEqual(job_exe_2.requires_cleanup, job_exe_2.job.job_type.requires_cleanup)
def setUp(self): django.setup() Scheduler.objects.initialize_scheduler() self._driver = MagicMock() self._job_exe_manager = RunningJobExecutionManager() self._job_type_manager = JobTypeManager() self._node_manager = NodeManager() self._offer_manager = OfferManager() self._scheduler_manager = SchedulerManager() self._workspace_manager = WorkspaceManager() self._scheduler_manager.sync_with_database() self.node_agent_1 = 'agent_1' self.node_agent_2 = 'agent_2' self.node_1 = node_test_utils.create_node(hostname='host_1', slave_id=self.node_agent_1) self.node_2 = node_test_utils.create_node(hostname='host_2', slave_id=self.node_agent_2) self.slave_infos = [SlaveInfo('host_1', slave_id=self.node_agent_1), SlaveInfo('host_2', slave_id=self.node_agent_2)] self._node_manager.add_agent_ids([self.node_agent_1, self.node_agent_2]) with patch('scheduler.sync.node_manager.api.get_slaves') as mock_get_slaves: mock_get_slaves.return_value = self.slave_infos self._node_manager.sync_with_database('master_host', 5050) self.queue_1 = queue_test_utils.create_queue(cpus_required=4.0, mem_required=1024.0, disk_in_required=100.0, disk_out_required=200.0, disk_total_required=300.0) self.queue_2 = queue_test_utils.create_queue(cpus_required=8.0, mem_required=512.0, disk_in_required=400.0, disk_out_required=45.0, disk_total_required=445.0) self._job_type_manager.sync_with_database() self._scheduling_thread = SchedulingThread(self._driver, '123', self._job_exe_manager, self._job_type_manager, self._node_manager, self._offer_manager, self._scheduler_manager, self._workspace_manager)
def setUp(self): django.setup() self.node_agent = 'agent_1' self.node_agent_paused = 'agent_paused' self.node = node_test_utils.create_node(slave_id=self.node_agent) self.paused_node = node_test_utils.create_node(slave_id=self.node_agent_paused) self.paused_node.is_paused = True self.running_job_exe_1 = job_test_utils.create_job_exe(status='RUNNING', node=self.paused_node) self.running_job_exe_1.cpus_scheduled = 2.0 self.running_job_exe_1.mem_scheduled = 512.0 self.running_job_exe_1.disk_in_scheduled = 100.0 self.running_job_exe_1.disk_out_scheduled = 200.0 self.running_job_exe_1.disk_total_scheduled = 300.0 self.running_job_exe_2 = job_test_utils.create_job_exe(status='RUNNING', node=self.node) self.running_job_exe_2.cpus_scheduled = 2.0 self.running_job_exe_2.mem_scheduled = 512.0 self.running_job_exe_2.disk_in_scheduled = 100.0 self.running_job_exe_2.disk_out_scheduled = 200.0 self.running_job_exe_2.disk_total_scheduled = 300.0 self.queue_1 = queue_test_utils.create_queue(cpus_required=4.0, mem_required=1024.0, disk_in_required=100.0, disk_out_required=200.0, disk_total_required=300.0) self.queue_2 = queue_test_utils.create_queue(cpus_required=8.0, mem_required=512.0, disk_in_required=400.0, disk_out_required=45.0, disk_total_required=445.0) self.queue_high_cpus = queue_test_utils.create_queue(cpus_required=200.0, mem_required=1024.0, disk_in_required=100.0, disk_out_required=200.0, disk_total_required=300.0) self.queue_high_mem = queue_test_utils.create_queue(cpus_required=2.0, mem_required=10240.0, disk_in_required=100.0, disk_out_required=200.0, disk_total_required=300.0) self.queue_high_disk = queue_test_utils.create_queue(cpus_required=2.0, mem_required=1024.0, disk_in_required=10000.0, disk_out_required=20000.0, disk_total_required=30000.0)
def setUp(self): django.setup() rest.login_client(self.client) self.node1 = node_test_utils.create_node() self.node2 = node_test_utils.create_node()
def setUp(self): django.setup() self.resource_no_limit = shared_resource_test_utils.create_resource() self.resource_1 = shared_resource_test_utils.create_resource(limit=RESOURCE_LIMIT) self.resource_2 = shared_resource_test_utils.create_resource(limit=RESOURCE_LIMIT) self.resource_restricted = shared_resource_test_utils.create_resource(limit=RESOURCE_LIMIT, is_global=False) self.job_type_1 = job_test_utils.create_job_type() self.job_type_1a = job_test_utils.create_job_type() self.job_type_2 = job_test_utils.create_job_type() self.non_global_job = job_test_utils.create_job_type() shared_resource_test_utils.create_requirement(job_type=self.job_type_1, shared_resource=self.resource_1, usage=JOB_TYPE_1_USAGE) shared_resource_test_utils.create_requirement(job_type=self.job_type_1a, shared_resource=self.resource_1, usage=JOB_TYPE_1A_USAGE) shared_resource_test_utils.create_requirement(job_type=self.non_global_job, shared_resource=self.resource_restricted, usage=JOB_TYPE_3_USAGE) self.global_job_types = [self.job_type_1, self.job_type_2] self.node_without_special_access = node_test_utils.create_node() self.node_with_special_access = node_test_utils.create_node() self.resource_restricted.nodes.add(self.node_with_special_access)
def setUp(self): django.setup() cpus = 50 mem = 500 disk = 50 self.job_type_1 = job_test_utils.create_job_type(priority=1, cpus=cpus, mem=mem, disk=disk) self.job_type_2 = job_test_utils.create_job_type(priority=2, cpus=cpus, mem=mem, disk=disk) self.job_type_3 = job_test_utils.create_job_type(priority=3, cpus=cpus, mem=mem, disk=disk) self.job_type_4 = job_test_utils.create_job_type(priority=4, cpus=cpus, mem=mem, disk=disk) self.job_type_5 = job_test_utils.create_job_type(priority=5, cpus=cpus, mem=mem, disk=disk) resource_1 = shared_resource_test_utils.create_resource() resource_2 = shared_resource_test_utils.create_resource(is_global=False) resource_3 = shared_resource_test_utils.create_resource(limit=1000) shared_resource_test_utils.create_requirement(job_type=self.job_type_1, shared_resource=resource_1) shared_resource_test_utils.create_requirement(job_type=self.job_type_2, shared_resource=resource_2) shared_resource_test_utils.create_requirement(job_type=self.job_type_3, shared_resource=resource_3, usage=400) shared_resource_test_utils.create_requirement(job_type=self.job_type_4, shared_resource=resource_3, usage=200) shared_resource_test_utils.create_requirement(job_type=self.job_type_5, shared_resource=resource_3, usage=100) self.trigger_event_1 = trigger_test_utils.create_trigger_event() self.node_1 = node_test_utils.create_node() self.node_2 = node_test_utils.create_node() self.node_3 = node_test_utils.create_node()
def setUp(self): django.setup() self.resource_no_limit = shared_resource_test_utils.create_resource() self.resource_1 = shared_resource_test_utils.create_resource( limit=RESOURCE_LIMIT) self.resource_2 = shared_resource_test_utils.create_resource( limit=RESOURCE_LIMIT) self.resource_restricted = shared_resource_test_utils.create_resource( limit=RESOURCE_LIMIT, is_global=False) self.job_type_1 = job_test_utils.create_job_type() self.job_type_1a = job_test_utils.create_job_type() self.job_type_2 = job_test_utils.create_job_type() self.non_global_job = job_test_utils.create_job_type() shared_resource_test_utils.create_requirement( job_type=self.job_type_1, shared_resource=self.resource_1, usage=JOB_TYPE_1_USAGE) shared_resource_test_utils.create_requirement( job_type=self.job_type_1a, shared_resource=self.resource_1, usage=JOB_TYPE_1A_USAGE) shared_resource_test_utils.create_requirement( job_type=self.non_global_job, shared_resource=self.resource_restricted, usage=JOB_TYPE_3_USAGE) self.global_job_types = [self.job_type_1, self.job_type_2] self.node_without_special_access = node_test_utils.create_node() self.node_with_special_access = node_test_utils.create_node() self.resource_restricted.nodes.add(self.node_with_special_access)
def setUp(self): django.setup() Scheduler.objects.initialize_scheduler() self.node_agent = 'agent_1' self.node_agent_paused = 'agent_paused' self.node_model = node_test_utils.create_node(slave_id=self.node_agent) self.node = Node(self.node_agent, self.node_model) self.node._is_image_pulled = True self.node._initial_cleanup_completed() self.node._update_state() self.paused_node_model = node_test_utils.create_node( slave_id=self.node_agent_paused) self.paused_node_model.is_paused = True self.paused_node = Node(self.node_agent_paused, self.paused_node_model) self.running_job_exe_1 = job_test_utils.create_job_exe( status='RUNNING', node=self.paused_node_model) self.running_job_exe_1.cpus_scheduled = 2.0 self.running_job_exe_1.mem_scheduled = 512.0 self.running_job_exe_1.disk_in_scheduled = 100.0 self.running_job_exe_1.disk_out_scheduled = 200.0 self.running_job_exe_1.disk_total_scheduled = 300.0 self.running_job_exe_2 = job_test_utils.create_job_exe( status='RUNNING', node=self.node_model) self.running_job_exe_2.cpus_scheduled = 2.0 self.running_job_exe_2.mem_scheduled = 512.0 self.running_job_exe_2.disk_in_scheduled = 100.0 self.running_job_exe_2.disk_out_scheduled = 200.0 self.running_job_exe_2.disk_total_scheduled = 300.0 self.queue_1 = queue_test_utils.create_queue(cpus_required=4.0, mem_required=1024.0, disk_in_required=100.0, disk_out_required=200.0, disk_total_required=300.0) self.queue_2 = queue_test_utils.create_queue(cpus_required=8.0, mem_required=512.0, disk_in_required=400.0, disk_out_required=45.0, disk_total_required=445.0) self.queue_high_cpus = queue_test_utils.create_queue( cpus_required=200.0, mem_required=1024.0, disk_in_required=100.0, disk_out_required=200.0, disk_total_required=300.0) self.queue_high_mem = queue_test_utils.create_queue( cpus_required=2.0, mem_required=10240.0, disk_in_required=100.0, disk_out_required=200.0, disk_total_required=300.0) self.queue_high_disk = queue_test_utils.create_queue( cpus_required=2.0, mem_required=1024.0, disk_in_required=10000.0, disk_out_required=20000.0, disk_total_required=30000.0)
def setUp(self): django.setup() self.node1 = node_test_utils.create_node() self.node2 = node_test_utils.create_node() self.node3 = node_test_utils.create_node() Scheduler.objects.create(id=1, master_hostname='localhost', master_port=5050)
def setUp(self): django.setup() Scheduler.objects.initialize_scheduler() self.node_agent = 'agent_1' self.node_agent_paused = 'agent_paused' self.node_model = node_test_utils.create_node(slave_id=self.node_agent) self.node = Node(self.node_agent, self.node_model) self.node.initial_cleanup_completed() self.paused_node_model = node_test_utils.create_node(slave_id=self.node_agent_paused) self.paused_node_model.is_paused = True self.paused_node = Node(self.node_agent_paused, self.paused_node_model) self.running_job_exe_1 = job_test_utils.create_job_exe(status='RUNNING') self.running_job_exe_1.cpus_scheduled = 2.0 self.running_job_exe_1.mem_scheduled = 512.0 self.running_job_exe_1.disk_in_scheduled = 100.0 self.running_job_exe_1.disk_out_scheduled = 200.0 self.running_job_exe_1.disk_total_scheduled = 300.0 self.running_job_exe_2 = job_test_utils.create_job_exe(status='RUNNING') self.running_job_exe_2.cpus_scheduled = 4.0 self.running_job_exe_2.mem_scheduled = 1024.0 self.running_job_exe_2.disk_in_scheduled = 500.0 self.running_job_exe_2.disk_out_scheduled = 50.0 self.running_job_exe_2.disk_total_scheduled = 550.0 self.running_job_exe_high_cpus = job_test_utils.create_job_exe(status='RUNNING') self.running_job_exe_high_cpus.cpus_scheduled = 200.0 self.running_job_exe_high_cpus.mem_scheduled = 512.0 self.running_job_exe_high_cpus.disk_in_scheduled = 100.0 self.running_job_exe_high_cpus.disk_out_scheduled = 200.0 self.running_job_exe_high_cpus.disk_total_scheduled = 300.0 self.running_job_exe_high_mem = job_test_utils.create_job_exe(status='RUNNING') self.running_job_exe_high_mem.cpus_scheduled = 2.0 self.running_job_exe_high_mem.mem_scheduled = 1048576.0 self.running_job_exe_high_mem.disk_in_scheduled = 100.0 self.running_job_exe_high_mem.disk_out_scheduled = 200.0 self.running_job_exe_high_mem.disk_total_scheduled = 300.0 self.running_job_exe_high_disk = job_test_utils.create_job_exe(status='RUNNING') self.running_job_exe_high_disk.cpus_scheduled = 2.0 self.running_job_exe_high_disk.mem_scheduled = 512.0 self.running_job_exe_high_disk.disk_in_scheduled = 10000.0 self.running_job_exe_high_disk.disk_out_scheduled = 20000.0 self.running_job_exe_high_disk.disk_total_scheduled = 30000.0 self.queue_1 = queue_test_utils.create_queue(cpus_required=2.0, mem_required=1024.0, disk_in_required=100.0, disk_out_required=200.0, disk_total_required=300.0) self.queue_2 = queue_test_utils.create_queue(cpus_required=8.0, mem_required=512.0, disk_in_required=400.0, disk_out_required=45.0, disk_total_required=445.0) self.queue_high_cpus = queue_test_utils.create_queue(cpus_required=200.0, mem_required=1024.0, disk_in_required=100.0, disk_out_required=200.0, disk_total_required=300.0) self.queue_high_mem = queue_test_utils.create_queue(cpus_required=2.0, mem_required=10240.0, disk_in_required=100.0, disk_out_required=200.0, disk_total_required=300.0) self.queue_high_disk = queue_test_utils.create_queue(cpus_required=2.0, mem_required=1024.0, disk_in_required=10000.0, disk_out_required=20000.0, disk_total_required=30000.0)
def setUp(self): django.setup() self.node_agent_1 = "agent_1" self.node_agent_2 = "agent_2" self.node_agent_3 = "agent_3" self.node_1 = node_test_utils.create_node(hostname="host_1", slave_id=self.node_agent_1) self.node_2 = node_test_utils.create_node(hostname="host_2", slave_id=self.node_agent_2) self.job_exe_1 = job_test_utils.create_job_exe(node=self.node_1)
def setUp(self): django.setup() rest.login_client(self.client, is_staff=True) self.node1 = node_test_utils.create_node() self.node2 = node_test_utils.create_node() self.node3 = node_test_utils.create_node() Scheduler.objects.create(id=1)
def test_execute(self): """Tests calling RunningJobs.execute() successfully""" node_1 = node_test_utils.create_node() node_2 = node_test_utils.create_node() job_1 = job_test_utils.create_job(num_exes=1, status='QUEUED') job_2 = job_test_utils.create_job(num_exes=2, status='QUEUED') job_3 = job_test_utils.create_job(num_exes=1, status='COMPLETED') job_4 = job_test_utils.create_job(num_exes=1, status='FAILED') job_5 = job_test_utils.create_job(num_exes=1, status='CANCELED') job_ids = [job_1.id, job_2.id, job_3.id, job_4.id, job_5.id] # Add jobs to message started = now() message = RunningJobs(started) if message.can_fit_more(): message.add_running_job(job_1.id, job_1.num_exes, node_1.id) if message.can_fit_more(): message.add_running_job( job_2.id, 1, node_1.id) # This message is for the first execution number if message.can_fit_more(): message.add_running_job(job_3.id, job_3.num_exes, node_2.id) if message.can_fit_more(): message.add_running_job(job_4.id, job_4.num_exes, node_2.id) if message.can_fit_more(): message.add_running_job(job_5.id, job_5.num_exes, node_2.id) # Execute message result = message.execute() self.assertTrue(result) jobs = Job.objects.filter(id__in=job_ids).order_by('id') # Job 1 should have been successfully updated to RUNNING self.assertEqual(jobs[0].status, 'RUNNING') self.assertEqual(jobs[0].started, started) self.assertEqual(jobs[0].node_id, node_1.id) # Job 2 should not be updated since it has already moved on to exe_num 2 self.assertEqual(jobs[1].status, 'QUEUED') self.assertIsNone(jobs[1].started) self.assertIsNone(jobs[1].node_id) # Job 3 should update its node, but not status since it is already COMPLETED self.assertEqual(jobs[2].status, 'COMPLETED') self.assertEqual(jobs[2].started, started) self.assertEqual(jobs[2].node_id, node_2.id) # Job 4 should update its node, but not status since it is already FAILED self.assertEqual(jobs[3].status, 'FAILED') self.assertEqual(jobs[3].started, started) self.assertEqual(jobs[3].node_id, node_2.id) # Job 5 should update its node, but not status since it is already CANCELED self.assertEqual(jobs[4].status, 'CANCELED') self.assertEqual(jobs[4].started, started) self.assertEqual(jobs[4].node_id, node_2.id)
def setUp(self): django.setup() self.node_agent = 'agent_1' self.node_agent_paused = 'agent_paused' self.node = node_test_utils.create_node(slave_id=self.node_agent) self.paused_node = node_test_utils.create_node( slave_id=self.node_agent_paused) self.paused_node.is_paused = True self.running_job_exe_1 = job_test_utils.create_job_exe( status='RUNNING', node=self.paused_node) self.running_job_exe_1.cpus_scheduled = 2.0 self.running_job_exe_1.mem_scheduled = 512.0 self.running_job_exe_1.disk_in_scheduled = 100.0 self.running_job_exe_1.disk_out_scheduled = 200.0 self.running_job_exe_1.disk_total_scheduled = 300.0 self.running_job_exe_2 = job_test_utils.create_job_exe( status='RUNNING', node=self.node) self.running_job_exe_2.cpus_scheduled = 2.0 self.running_job_exe_2.mem_scheduled = 512.0 self.running_job_exe_2.disk_in_scheduled = 100.0 self.running_job_exe_2.disk_out_scheduled = 200.0 self.running_job_exe_2.disk_total_scheduled = 300.0 self.queue_1 = queue_test_utils.create_queue(cpus_required=4.0, mem_required=1024.0, disk_in_required=100.0, disk_out_required=200.0, disk_total_required=300.0) self.queue_2 = queue_test_utils.create_queue(cpus_required=8.0, mem_required=512.0, disk_in_required=400.0, disk_out_required=45.0, disk_total_required=445.0) self.queue_high_cpus = queue_test_utils.create_queue( cpus_required=200.0, mem_required=1024.0, disk_in_required=100.0, disk_out_required=200.0, disk_total_required=300.0) self.queue_high_mem = queue_test_utils.create_queue( cpus_required=2.0, mem_required=10240.0, disk_in_required=100.0, disk_out_required=200.0, disk_total_required=300.0) self.queue_high_disk = queue_test_utils.create_queue( cpus_required=2.0, mem_required=1024.0, disk_in_required=10000.0, disk_out_required=20000.0, disk_total_required=30000.0)
def setUp(self): django.setup() # Clear error cache so test works correctly CACHED_ERRORS.clear() self.agent_id = 'agent' self.node_model_1 = node_test_utils.create_node() self.job_exe_1 = job_test_utils.create_running_job_exe(agent_id=self.agent_id, node=self.node_model_1) self.node_model_2 = node_test_utils.create_node() self.job_exe_2 = job_test_utils.create_running_job_exe(agent_id=self.agent_id, node=self.node_model_2) self.job_exe_mgr = JobExecutionManager()
def setUp(self): django.setup() # Clear error cache so tests work correctly reset_error_cache() self.agent_id = 'agent' self.node_model_1 = node_test_utils.create_node() self.job_exe_1 = job_test_utils.create_running_job_exe(agent_id=self.agent_id, node=self.node_model_1) self.node_model_2 = node_test_utils.create_node() self.job_exe_2 = job_test_utils.create_running_job_exe(agent_id=self.agent_id, node=self.node_model_2) self.task_mgr = TaskManager() self.job_exe_mgr = JobExecutionManager()
def setUp(self): django.setup() self.scheduler = Scheduler.objects.create(id=1, master_hostname='master', master_port=5050) self.node1 = node_test_utils.create_node() self.node2 = node_test_utils.create_node() self.node3 = node_test_utils.create_node() self.job = job_test_utils.create_job(status='COMPLETED') data_error = error_test_utils.create_error(category='DATA') system_error = error_test_utils.create_error(category='SYSTEM') job_exe_1 = job_test_utils.create_job_exe(job=self.job, status='FAILED', error=data_error, node=self.node2) job_exe_1.created = now() - timedelta(hours=3) job_exe_1.job_completed = now() - timedelta(hours=2) job_exe_1.save() job_exe_2 = job_test_utils.create_job_exe(job=self.job, status='FAILED', error=system_error, node=self.node2) job_exe_2.created = now() - timedelta(hours=3) job_exe_2.job_completed = now() - timedelta(hours=2) job_exe_2.save() job_exe_3 = job_test_utils.create_job_exe(job=self.job, status='FAILED', error=system_error, node=self.node1) job_exe_3.created = now() - timedelta(hours=2) job_exe_3.job_completed = now() - timedelta(hours=1) job_exe_3.save() job_exe_4 = job_test_utils.create_job_exe(job=self.job, status='COMPLETED', node=self.node1) job_exe_4.created = now() - timedelta(hours=1) job_exe_4.job_completed = now() job_exe_4.save() job_exe_5 = job_test_utils.create_job_exe(job=self.job, status='RUNNING', node=self.node3) job_exe_5.created = now() job_exe_5.save()
def test_json(self): """Tests coverting a RunningJobs message to and from JSON""" node_1 = node_test_utils.create_node() node_2 = node_test_utils.create_node() job_1 = job_test_utils.create_job(num_exes=1, status='QUEUED') job_2 = job_test_utils.create_job(num_exes=2, status='QUEUED') job_3 = job_test_utils.create_job(num_exes=10, status='QUEUED') job_4 = job_test_utils.create_job(num_exes=2, status='QUEUED') job_5 = job_test_utils.create_job(num_exes=1, status='QUEUED') job_ids = [job_1.id, job_2.id, job_3.id, job_4.id, job_5.id] # Add jobs to message started = now() message = RunningJobs(started) if message.can_fit_more(): message.add_running_job(job_1.id, job_1.num_exes, node_1.id) if message.can_fit_more(): message.add_running_job(job_2.id, job_2.num_exes, node_1.id) if message.can_fit_more(): message.add_running_job(job_3.id, job_3.num_exes, node_2.id) if message.can_fit_more(): message.add_running_job(job_4.id, job_4.num_exes, node_2.id) if message.can_fit_more(): message.add_running_job(job_5.id, job_5.num_exes, node_2.id) # Convert message to JSON and back, and then execute message_json_dict = message.to_json() new_message = RunningJobs.from_json(message_json_dict) result = new_message.execute() self.assertTrue(result) jobs = Job.objects.filter(id__in=job_ids).order_by('id') self.assertEqual(jobs[0].status, 'RUNNING') self.assertEqual(jobs[0].started, started) self.assertEqual(jobs[0].node_id, node_1.id) self.assertEqual(jobs[1].status, 'RUNNING') self.assertEqual(jobs[1].started, started) self.assertEqual(jobs[1].node_id, node_1.id) self.assertEqual(jobs[2].status, 'RUNNING') self.assertEqual(jobs[2].started, started) self.assertEqual(jobs[2].node_id, node_2.id) self.assertEqual(jobs[3].status, 'RUNNING') self.assertEqual(jobs[3].started, started) self.assertEqual(jobs[3].node_id, node_2.id) self.assertEqual(jobs[4].status, 'RUNNING') self.assertEqual(jobs[4].started, started) self.assertEqual(jobs[4].node_id, node_2.id)
def setUp(self): django.setup() # Clear error cache so test works correctly CACHED_BUILTIN_ERRORS.clear() self.node_model_1 = node_test_utils.create_node() self.job_exe_model_1 = job_test_utils.create_job_exe( status='RUNNING', node=self.node_model_1) self.job_exe_1 = RunningJobExecution(self.job_exe_model_1) self.node_model_2 = node_test_utils.create_node() self.job_exe_model_2 = job_test_utils.create_job_exe( status='RUNNING', node=self.node_model_2) self.job_exe_2 = RunningJobExecution(self.job_exe_model_2) self.job_exe_mgr = JobExecutionManager()
def create_job_exe(job_type=None, job=None, status='RUNNING', error=None, command_arguments='test_arg', timeout=None, node=None, created=None, queued=None, started=None, pre_started=None, pre_completed=None, job_started=None, job_completed=None, post_started=None, post_completed=None, ended=None, last_modified=None): '''Creates a job execution model for unit testing :returns: The job execution model :rtype: :class:`job.models.JobExecution` ''' when = timezone.now() if not job: job = create_job(job_type=job_type) if not timeout: timeout = job.timeout if not node: node = node_utils.create_node() if not created: created = when if not queued: queued = when if not started: started = when if not last_modified: last_modified = when return JobExecution.objects.create(job=job, status=status, error=error, command_arguments=command_arguments, timeout=timeout, node=node, created=created, queued=queued, started=started, pre_started=pre_started, pre_completed=pre_completed, job_started=job_started, job_completed=job_completed, post_started=post_started, post_completed=post_completed, ended=ended, last_modified=last_modified)
def setUp(self): django.setup() scheduler_mgr.config.is_paused = False self.agent_1 = Agent('agent_1', 'host_1') self.agent_2 = Agent('agent_2', 'host_2') self.agent_3 = Agent('agent_3', 'host_2') # Will represent a new agent ID for host 2 self.node_1 = node_test_utils.create_node(hostname='host_1')
def setUp(self): django.setup() self.node_agent = 'agent_1' self.node = node_test_utils.create_node(hostname='host_1', slave_id=self.node_agent) self.job_exe = job_test_utils.create_job_exe(node=self.node) self.task_mgr = TaskManager()
def setUp(self): django.setup() self.node_agent_1 = 'agent_1' self.node_agent_2 = 'agent_2' self.node_1 = node_test_utils.create_node(hostname='host_1', slave_id=self.node_agent_1) self.slave_infos = [SlaveInfo('host_1', slave_id=self.node_agent_1), SlaveInfo('host_2', slave_id=self.node_agent_2)]
def test_paused_node(self): """Tests not returning tasks when its node is paused""" paused_node = node_test_utils.create_node(hostname='host_1', slave_id=self.node_agent) paused_node.is_paused = True node = Node(self.node_agent, paused_node) node_cleanup = NodeCleanup(node) task = node_cleanup.get_next_task() # No task due to paused node self.assertIsNone(task)
def create_running_job_exe(agent_id='agent_1', job_type=None, job=None, node=None, timeout=None, input_file_size=10.0, queued=None, started=None, resources=None, priority=None, num_exes=1): """Creates a running job execution for unit testing :returns: The running job execution :rtype: :class:`job.execution.job_exe.RunningJobExecution` """ when = timezone.now() if not job: job = create_job(job_type=job_type, status='RUNNING', input_file_size=input_file_size, num_exes=num_exes) job_type = job.job_type # Configuration that occurs at queue time input_files = {} input_file_ids = job.get_job_data().get_input_file_ids() if input_file_ids: for input_file in ScaleFile.objects.get_files_for_queued_jobs(input_file_ids): input_files[input_file.id] = input_file exe_config = QueuedExecutionConfigurator(input_files).configure_queued_job(job) job_exe = JobExecution() job_exe.set_cluster_id('1234', job.id, job.num_exes) job_exe.job = job job_exe.job_type = job_type job_exe.exe_num = job.num_exes if not node: node = node_utils.create_node() job_exe.node = node if not timeout: timeout = job.timeout job_exe.timeout = timeout job_exe.input_file_size = input_file_size if not resources: resources = job.get_resources() job_exe.resources = resources.get_json().get_dict() job_exe.configuration = exe_config.get_dict() if not queued: queued = when job_exe.queued = queued if not started: started = when + datetime.timedelta(seconds=1) job_exe.started = started job_exe.save() if not priority: priority = job.priority # Configuration that occurs at schedule time workspaces = {} for workspace in Workspace.objects.all(): workspaces[workspace.name] = workspace secret_config = ScheduledExecutionConfigurator(workspaces).configure_scheduled_job(job_exe, job_type, job_type.get_job_interface(),'INFO') return RunningJobExecution(agent_id, job_exe, job_type, secret_config, priority)
def test_paused_node_cleanup_task(self): """Tests not returning cleanup task when its node is paused""" when = now() paused_node = node_test_utils.create_node(hostname='host_1_paused', slave_id='agent_paused') paused_node.is_paused = True node = Node('agent_paused', paused_node, self.scheduler) # Turn off health task node._last_health_task = when # No task due to paused node self.assertListEqual([], node.get_next_tasks(when))
def setUp(self): django.setup() self.node = node_test_utils.create_node() self.job_type_1 = job_test_utils.create_job_type() # job_exe_2 has an invalid JSON and will not schedule correctly self.job_type_2 = job_test_utils.create_job_type( configuration={'INVALID': 'SCHEMA'}) self.queue_1 = queue_test_utils.create_queue(job_type=self.job_type_1) self.queue_2 = queue_test_utils.create_queue(job_type=self.job_type_2) self.job_exe_1 = self.queue_1.job_exe self.job_exe_2 = self.queue_2.job_exe
def test_paused_node_pull_task(self): """Tests not returning pull task when its node is paused""" when = now() paused_node = node_test_utils.create_node(hostname='host_1_paused', slave_id='agent_paused') paused_node.is_paused = True node = Node('agent_paused', paused_node, self.scheduler) node._last_health_task = when node._initial_cleanup_completed() node._update_state() tasks = node.get_next_tasks(when) # No task due to paused node self.assertListEqual([], tasks)
def setUp(self): django.setup() self.scheduler = Scheduler.objects.create(id=1, master_hostname='master', master_port=5050) self.node1 = node_test_utils.create_node() self.node2 = node_test_utils.create_node() self.node3 = node_test_utils.create_node() self.job = job_test_utils.create_job(status=u'COMPLETED') data_error = error_test_utils.create_error(category=u'DATA') system_error = error_test_utils.create_error(category=u'SYSTEM') job_test_utils.create_job_exe(job=self.job, status=u'FAILED', error=data_error, node=self.node2, created=now() - timedelta(hours=3), job_completed=now() - timedelta(hours=2)) job_test_utils.create_job_exe(job=self.job, status=u'FAILED', error=system_error, node=self.node2, created=now() - timedelta(hours=3), job_completed=now() - timedelta(hours=2)) job_test_utils.create_job_exe(job=self.job, status=u'FAILED', error=system_error, node=self.node1, created=now() - timedelta(hours=2), job_completed=now() - timedelta(hours=1)) job_test_utils.create_job_exe(job=self.job, status=u'COMPLETED', node=self.node1, created=now() - timedelta(hours=1), job_completed=now()) job_test_utils.create_job_exe(job=self.job, status=u'RUNNING', node=self.node3, created=now())
def setUp(self): django.setup() self.node1 = node_test_utils.create_node() self.node2 = node_test_utils.create_node()
def create_job_exe(job_type=None, job=None, exe_num=None, node=None, timeout=None, input_file_size=10.0, queued=None, started=None, status='RUNNING', error=None, ended=None, output=None, task_results=None): """Creates a job_exe model for unit testing, may also create job_exe_end and job_exe_output models depending on status :returns: The job_exe model :rtype: :class:`job.execution.job_exe.RunningJobExecution` """ when = timezone.now() if not job: job = create_job(job_type=job_type, status=status, input_file_size=input_file_size) job_type = job.job_type job_exe = JobExecution() job_exe.job = job job_exe.job_type = job_type if not exe_num: exe_num = job.num_exes job_exe.exe_num = exe_num job_exe.set_cluster_id('1234', job.id, job_exe.exe_num) if not node: node = node_utils.create_node() job_exe.node = node if not timeout: timeout = job.timeout job_exe.timeout = timeout job_exe.input_file_size = input_file_size job_exe.resources = job.get_resources().get_json().get_dict() job_exe.configuration = ExecutionConfiguration().get_dict() if not queued: queued = when job_exe.queued = queued if not started: started = when + datetime.timedelta(seconds=1) job_exe.started = started job_exe.save() if status in ['COMPLETED', 'FAILED', 'CANCELED']: job_exe_end = JobExecutionEnd() job_exe_end.job_exe_id = job_exe.id job_exe_end.job = job_exe.job job_exe_end.job_type = job_exe.job_type job_exe_end.exe_num = job_exe.exe_num if not task_results: task_results = TaskResults() job_exe_end.task_results = task_results.get_dict() job_exe_end.status = status if status == 'FAILED' and not error: error = error_test_utils.create_error() job_exe_end.error = error job_exe_end.node = node job_exe_end.queued = queued job_exe_end.started = started job_exe_end.seed_started = task_results.get_task_started('main') job_exe_end.seed_ended = task_results.get_task_ended('main') if not ended: ended = started + datetime.timedelta(seconds=1) job_exe_end.ended = ended job_exe_end.save() if status == 'COMPLETED' or output: job_exe_output = JobExecutionOutput() job_exe_output.job_exe_id = job_exe.id job_exe_output.job = job_exe.job job_exe_output.job_type = job_exe.job_type job_exe_output.exe_num = job_exe.exe_num if not output: output = JobResults() job_exe_output.output = output.get_dict() job_exe_output.save() return job_exe
def test_successful_supersede(self): """Tests calling QueueManager.queue_new_recipe() successfully when superseding a recipe.""" # Queue initial recipe and complete its first job node = node_test_utils.create_node() recipe_id = Queue.objects.queue_new_recipe(self.recipe_type, self.data, self.event) recipe = Recipe.objects.get(id=recipe_id) recipe_job_1 = RecipeJob.objects.select_related('job__job_exe').get(recipe_id=recipe_id, job_name='Job 1') job_exe_1 = JobExecution.objects.get(job_id=recipe_job_1.job_id) queued_job_exe = QueuedJobExecution(Queue.objects.get(job_exe_id=job_exe_1.id)) queued_job_exe.accepted(node, JobResources(cpus=10, mem=1000, disk_in=1000, disk_out=1000, disk_total=2000)) Queue.objects.schedule_job_executions('123', [queued_job_exe], {}) results = JobResults() results.add_file_list_parameter('Test Output 1', [product_test_utils.create_product().file_id]) JobExecution.objects.filter(id=job_exe_1.id).update(results=results.get_dict()) Queue.objects.handle_job_completion(job_exe_1.id, now()) # Create a new recipe type that has a new version of job 2 (job 1 is identical) new_job_type_2 = job_test_utils.create_job_type(name=self.job_type_2.name, version='New Version', interface=self.job_type_2.interface) new_definition = { 'version': '1.0', 'input_data': [{ 'name': 'Recipe Input', 'type': 'file', 'media_types': ['text/plain'], }], 'jobs': [{ 'name': 'New Job 1', 'job_type': { 'name': self.job_type_1.name, 'version': self.job_type_1.version, }, 'recipe_inputs': [{ 'recipe_input': 'Recipe Input', 'job_input': 'Test Input 1', }] }, { 'name': 'New Job 2', 'job_type': { 'name': new_job_type_2.name, 'version': new_job_type_2.version, }, 'dependencies': [{ 'name': 'New Job 1', 'connections': [{ 'output': 'Test Output 1', 'input': 'Test Input 2', }] }] }] } new_recipe_type = recipe_test_utils.create_recipe_type(name=self.recipe_type.name, definition=new_definition) event = trigger_test_utils.create_trigger_event() recipe_job_1 = RecipeJob.objects.select_related('job').get(recipe_id=recipe_id, job_name='Job 1') recipe_job_2 = RecipeJob.objects.select_related('job').get(recipe_id=recipe_id, job_name='Job 2') superseded_jobs = {'Job 1': recipe_job_1.job, 'Job 2': recipe_job_2.job} graph_a = self.recipe_type.get_recipe_definition().get_graph() graph_b = new_recipe_type.get_recipe_definition().get_graph() delta = RecipeGraphDelta(graph_a, graph_b) # Queue new recipe that supersedes the old recipe new_recipe_id = Queue.objects.queue_new_recipe(new_recipe_type, None, event, recipe, delta, superseded_jobs) # Ensure old recipe is superseded recipe = Recipe.objects.get(id=recipe_id) self.assertTrue(recipe.is_superseded) # Ensure new recipe supersedes old recipe new_recipe = Recipe.objects.get(id=new_recipe_id) self.assertEqual(new_recipe.superseded_recipe_id, recipe_id) # Ensure that job 1 is already completed (it was copied from original recipe) and that job 2 is queued new_recipe_job_1 = RecipeJob.objects.select_related('job').get(recipe_id=new_recipe_id, job_name='New Job 1') new_recipe_job_2 = RecipeJob.objects.select_related('job').get(recipe_id=new_recipe_id, job_name='New Job 2') self.assertEqual(new_recipe_job_1.job.status, 'COMPLETED') self.assertFalse(new_recipe_job_1.is_original) self.assertEqual(new_recipe_job_2.job.status, 'QUEUED') self.assertTrue(new_recipe_job_2.is_original) # Complete both the old and new job 2 and check that only the new recipe completes job_exe_2 = JobExecution.objects.get(job_id=recipe_job_2.job_id) queued_job_exe_2 = QueuedJobExecution(Queue.objects.get(job_exe_id=job_exe_2.id)) queued_job_exe_2.accepted(node, JobResources(cpus=10, mem=1000, disk_in=1000, disk_out=1000, disk_total=2000)) Queue.objects.schedule_job_executions('123', [queued_job_exe_2], {}) Queue.objects.handle_job_completion(job_exe_2.id, now()) new_job_exe_2 = JobExecution.objects.get(job_id=new_recipe_job_2.job_id) new_queued_job_exe_2 = QueuedJobExecution(Queue.objects.get(job_exe_id=new_job_exe_2.id)) new_queued_job_exe_2.accepted(node, JobResources(cpus=10, mem=1000, disk_in=1000, disk_out=1000, disk_total=2000)) Queue.objects.schedule_job_executions('123', [new_queued_job_exe_2], {}) Queue.objects.handle_job_completion(new_job_exe_2.id, now()) recipe = Recipe.objects.get(id=recipe.id) new_recipe = Recipe.objects.get(id=new_recipe.id) self.assertIsNone(recipe.completed) self.assertIsNotNone(new_recipe.completed)
def test_running_executions(self): """Tests the metrics with running executions that complete""" node_model_1 = node_test_utils.create_node() node_model_2 = node_test_utils.create_node() job_type_1 = job_test_utils.create_seed_job_type() job_type_2 = job_test_utils.create_seed_job_type() job_exe_1 = job_test_utils.create_running_job_exe(agent_id='agent', job_type=job_type_1, node=node_model_1) job_exe_2 = job_test_utils.create_running_job_exe(agent_id='agent', job_type=job_type_1, node=node_model_1) job_exe_3 = job_test_utils.create_running_job_exe(agent_id='agent', job_type=job_type_1, node=node_model_1) job_exe_4 = job_test_utils.create_running_job_exe(agent_id='agent', job_type=job_type_2, node=node_model_1) job_exe_5 = job_test_utils.create_running_job_exe(agent_id='agent', job_type=job_type_1, node=node_model_2) job_exe_6 = job_test_utils.create_running_job_exe(agent_id='agent', job_type=job_type_1, node=node_model_2) job_exe_7 = job_test_utils.create_running_job_exe(agent_id='agent', job_type=job_type_2, node=node_model_2) job_exe_8 = job_test_utils.create_running_job_exe(agent_id='agent', job_type=job_type_2, node=node_model_2) job_exe_9 = job_test_utils.create_running_job_exe(agent_id='agent', job_type=job_type_2, node=node_model_2) job_exe_10 = job_test_utils.create_running_job_exe(agent_id='agent', job_type=job_type_2, node=node_model_2) job_exe_11 = job_test_utils.create_running_job_exe(agent_id='agent', job_type=job_type_2, node=node_model_2) # NOTE: This unit test is about to get CRAZY. I apologize for the complexity, but this is needed for a # thorough testing self.metrics.add_running_job_exes([ job_exe_1, job_exe_2, job_exe_3, job_exe_4, job_exe_5, job_exe_6, job_exe_7, job_exe_8, job_exe_9, job_exe_10, job_exe_11 ]) node_list_dict = [{'id': node_model_1.id}, {'id': node_model_2.id}] self.metrics.generate_status_json(node_list_dict, now()) # Check expected totals self.assertEqual( node_list_dict[0]['job_executions']['running']['total'], 4) for job_type_dict in node_list_dict[0]['job_executions']['running'][ 'by_job_type']: if job_type_dict['job_type_id'] == job_type_1.id: self.assertEqual(job_type_dict['count'], 3) elif job_type_dict['job_type_id'] == job_type_2.id: self.assertEqual(job_type_dict['count'], 1) else: self.fail('Unexpected job type ID') self.assertEqual( node_list_dict[0]['job_executions']['completed']['total'], 0) self.assertEqual( node_list_dict[0]['job_executions']['failed']['total'], 0) self.assertEqual( node_list_dict[0]['job_executions']['failed']['algorithm'] ['total'], 0) self.assertEqual( node_list_dict[0]['job_executions']['failed']['data']['total'], 0) self.assertEqual( node_list_dict[0]['job_executions']['failed']['system']['total'], 0) self.assertEqual( node_list_dict[1]['job_executions']['running']['total'], 7) for job_type_dict in node_list_dict[1]['job_executions']['running'][ 'by_job_type']: if job_type_dict['job_type_id'] == job_type_1.id: self.assertEqual(job_type_dict['count'], 2) elif job_type_dict['job_type_id'] == job_type_2.id: self.assertEqual(job_type_dict['count'], 5) else: self.fail('Unexpected job type ID') self.assertEqual( node_list_dict[1]['job_executions']['completed']['total'], 0) self.assertEqual( node_list_dict[1]['job_executions']['failed']['total'], 0) self.assertEqual( node_list_dict[1]['job_executions']['failed']['algorithm'] ['total'], 0) self.assertEqual( node_list_dict[1]['job_executions']['failed']['data']['total'], 0) self.assertEqual( node_list_dict[1]['job_executions']['failed']['system']['total'], 0) # Finish some job executions end_time_1 = now() job_exe_1._set_final_status('COMPLETED', end_time_1) job_exe_2._set_final_status('FAILED', end_time_1, error=self.data_error) job_exe_4._set_final_status('FAILED', end_time_1, error=self.alg_error) self.metrics.job_exe_finished(job_exe_1) self.metrics.job_exe_finished(job_exe_2) self.metrics.job_exe_finished(job_exe_4) node_list_dict = [{'id': node_model_1.id}, {'id': node_model_2.id}] self.metrics.generate_status_json( node_list_dict, end_time_1 + datetime.timedelta(seconds=1)) # Check expected totals self.assertEqual( node_list_dict[0]['job_executions']['running']['total'], 1) self.assertEqual( len(node_list_dict[0]['job_executions']['running']['by_job_type']), 1) self.assertEqual( node_list_dict[0]['job_executions']['running']['by_job_type'][0] ['count'], 1) self.assertEqual( node_list_dict[0]['job_executions']['running']['by_job_type'][0] ['job_type_id'], job_type_1.id) self.assertEqual( node_list_dict[0]['job_executions']['completed']['total'], 1) self.assertEqual( len(node_list_dict[0]['job_executions']['completed'] ['by_job_type']), 1) self.assertEqual( node_list_dict[0]['job_executions']['completed']['by_job_type'][0] ['count'], 1) self.assertEqual( node_list_dict[0]['job_executions']['completed']['by_job_type'][0] ['job_type_id'], job_type_1.id) self.assertEqual( node_list_dict[0]['job_executions']['failed']['total'], 2) self.assertEqual( node_list_dict[0]['job_executions']['failed']['algorithm'] ['total'], 1) self.assertEqual( len(node_list_dict[0]['job_executions']['failed']['algorithm'] ['by_job_type']), 1) self.assertEqual( node_list_dict[0]['job_executions']['failed']['algorithm'] ['by_job_type'][0]['count'], 1) self.assertEqual( node_list_dict[0]['job_executions']['failed']['algorithm'] ['by_job_type'][0]['job_type_id'], job_type_2.id) self.assertEqual( node_list_dict[0]['job_executions']['failed']['data']['total'], 1) self.assertEqual( len(node_list_dict[0]['job_executions']['failed']['data'] ['by_job_type']), 1) self.assertEqual( node_list_dict[0]['job_executions']['failed']['data'] ['by_job_type'][0]['count'], 1) self.assertEqual( node_list_dict[0]['job_executions']['failed']['data'] ['by_job_type'][0]['job_type_id'], job_type_1.id) self.assertEqual( node_list_dict[0]['job_executions']['failed']['system']['total'], 0) self.assertEqual( node_list_dict[1]['job_executions']['running']['total'], 7) for job_type_dict in node_list_dict[1]['job_executions']['running'][ 'by_job_type']: if job_type_dict['job_type_id'] == job_type_1.id: self.assertEqual(job_type_dict['count'], 2) elif job_type_dict['job_type_id'] == job_type_2.id: self.assertEqual(job_type_dict['count'], 5) else: self.fail('Unexpected job type ID') self.assertEqual( node_list_dict[1]['job_executions']['completed']['total'], 0) self.assertEqual( node_list_dict[1]['job_executions']['failed']['total'], 0) self.assertEqual( node_list_dict[1]['job_executions']['failed']['algorithm'] ['total'], 0) self.assertEqual( node_list_dict[1]['job_executions']['failed']['data']['total'], 0) self.assertEqual( node_list_dict[1]['job_executions']['failed']['system']['total'], 0) # Finish some job executions (all executions still on node 2) end_time_2 = end_time_1 + FinishedJobExeMetricsOverTime.BLOCK_LENGTH job_exe_5._set_final_status('COMPLETED', end_time_2) job_exe_6._set_final_status('COMPLETED', end_time_2) job_exe_7._set_final_status('COMPLETED', end_time_2) job_exe_8._set_final_status('COMPLETED', end_time_2) job_exe_9._set_final_status('COMPLETED', end_time_2) job_exe_10._set_final_status('COMPLETED', end_time_2) job_exe_11._set_final_status('COMPLETED', end_time_2) self.metrics.job_exe_finished(job_exe_5) self.metrics.job_exe_finished(job_exe_6) self.metrics.job_exe_finished(job_exe_7) self.metrics.job_exe_finished(job_exe_8) self.metrics.job_exe_finished(job_exe_9) self.metrics.job_exe_finished(job_exe_10) self.metrics.job_exe_finished(job_exe_11) node_list_dict = [{'id': node_model_1.id}, {'id': node_model_2.id}] self.metrics.generate_status_json(node_list_dict, end_time_2) # Check expected totals self.assertEqual( node_list_dict[0]['job_executions']['running']['total'], 1) self.assertEqual( len(node_list_dict[0]['job_executions']['running']['by_job_type']), 1) self.assertEqual( node_list_dict[0]['job_executions']['running']['by_job_type'][0] ['count'], 1) self.assertEqual( node_list_dict[0]['job_executions']['running']['by_job_type'][0] ['job_type_id'], job_type_1.id) self.assertEqual( node_list_dict[0]['job_executions']['completed']['total'], 1) self.assertEqual( len(node_list_dict[0]['job_executions']['completed'] ['by_job_type']), 1) self.assertEqual( node_list_dict[0]['job_executions']['completed']['by_job_type'][0] ['count'], 1) self.assertEqual( node_list_dict[0]['job_executions']['completed']['by_job_type'][0] ['job_type_id'], job_type_1.id) self.assertEqual( node_list_dict[0]['job_executions']['failed']['total'], 2) self.assertEqual( node_list_dict[0]['job_executions']['failed']['algorithm'] ['total'], 1) self.assertEqual( len(node_list_dict[0]['job_executions']['failed']['algorithm'] ['by_job_type']), 1) self.assertEqual( node_list_dict[0]['job_executions']['failed']['algorithm'] ['by_job_type'][0]['count'], 1) self.assertEqual( node_list_dict[0]['job_executions']['failed']['algorithm'] ['by_job_type'][0]['job_type_id'], job_type_2.id) self.assertEqual( node_list_dict[0]['job_executions']['failed']['data']['total'], 1) self.assertEqual( len(node_list_dict[0]['job_executions']['failed']['data'] ['by_job_type']), 1) self.assertEqual( node_list_dict[0]['job_executions']['failed']['data'] ['by_job_type'][0]['count'], 1) self.assertEqual( node_list_dict[0]['job_executions']['failed']['data'] ['by_job_type'][0]['job_type_id'], job_type_1.id) self.assertEqual( node_list_dict[0]['job_executions']['failed']['system']['total'], 0) self.assertEqual( node_list_dict[1]['job_executions']['running']['total'], 0) self.assertEqual( node_list_dict[1]['job_executions']['completed']['total'], 7) for job_type_dict in node_list_dict[1]['job_executions']['completed'][ 'by_job_type']: if job_type_dict['job_type_id'] == job_type_1.id: self.assertEqual(job_type_dict['count'], 2) elif job_type_dict['job_type_id'] == job_type_2.id: self.assertEqual(job_type_dict['count'], 5) else: self.fail('Unexpected job type ID') self.assertEqual( node_list_dict[1]['job_executions']['failed']['total'], 0) self.assertEqual( node_list_dict[1]['job_executions']['failed']['algorithm'] ['total'], 0) self.assertEqual( node_list_dict[1]['job_executions']['failed']['data']['total'], 0) self.assertEqual( node_list_dict[1]['job_executions']['failed']['system']['total'], 0) # Let all finished job executions roll off by time, only running remaining end_time_3 = end_time_2 + FinishedJobExeMetricsOverTime.TOTAL_TIME_PERIOD end_time_3 += FinishedJobExeMetricsOverTime.BLOCK_LENGTH + datetime.timedelta( seconds=1) node_list_dict = [{'id': node_model_1.id}, {'id': node_model_2.id}] self.metrics.generate_status_json(node_list_dict, end_time_3) # Check expected totals self.assertEqual( node_list_dict[0]['job_executions']['running']['total'], 1) self.assertEqual( len(node_list_dict[0]['job_executions']['running']['by_job_type']), 1) self.assertEqual( node_list_dict[0]['job_executions']['running']['by_job_type'][0] ['count'], 1) self.assertEqual( node_list_dict[0]['job_executions']['running']['by_job_type'][0] ['job_type_id'], job_type_1.id) self.assertEqual( node_list_dict[0]['job_executions']['completed']['total'], 0) self.assertEqual( node_list_dict[0]['job_executions']['failed']['total'], 0) self.assertEqual( node_list_dict[0]['job_executions']['failed']['algorithm'] ['total'], 0) self.assertEqual( node_list_dict[0]['job_executions']['failed']['data']['total'], 0) self.assertEqual( node_list_dict[0]['job_executions']['failed']['system']['total'], 0) self.assertEqual( node_list_dict[1]['job_executions']['running']['total'], 0) self.assertEqual( node_list_dict[1]['job_executions']['completed']['total'], 0) self.assertEqual( node_list_dict[1]['job_executions']['failed']['total'], 0) self.assertEqual( node_list_dict[1]['job_executions']['failed']['algorithm'] ['total'], 0) self.assertEqual( node_list_dict[1]['job_executions']['failed']['data']['total'], 0) self.assertEqual( node_list_dict[1]['job_executions']['failed']['system']['total'], 0)
def test_init_with_database(self): """Tests calling init_with_database() successfully to load in job executions from the database""" # First block of job executions end_time_1 = now( ) - FinishedJobExeMetricsOverTime.BLOCK_LENGTH - FinishedJobExeMetricsOverTime.BLOCK_LENGTH node_model_1 = node_test_utils.create_node() job_type_1 = job_test_utils.create_seed_job_type() job_type_2 = job_test_utils.create_seed_job_type() job_exe_model_1 = job_test_utils.create_job_exe(job_type=job_type_1, status='COMPLETED', ended=end_time_1, node=node_model_1) job_exe_model_2 = job_test_utils.create_job_exe(job_type=job_type_1, status='COMPLETED', ended=end_time_1, node=node_model_1) job_exe_model_3 = job_test_utils.create_job_exe(job_type=job_type_1, status='FAILED', ended=end_time_1, error=self.alg_error, node=node_model_1) job_exe_model_4 = job_test_utils.create_job_exe(job_type=job_type_1, status='FAILED', ended=end_time_1, error=self.alg_error, node=node_model_1) job_exe_model_5 = job_test_utils.create_job_exe(job_type=job_type_1, status='FAILED', ended=end_time_1, error=self.alg_error, node=node_model_1) job_exe_model_6 = job_test_utils.create_job_exe(job_type=job_type_1, status='FAILED', ended=end_time_1, error=self.data_error, node=node_model_1) job_exe_model_7 = job_test_utils.create_job_exe( job_type=job_type_1, status='FAILED', ended=end_time_1, error=self.system_error, node=node_model_1) job_exe_model_8 = job_test_utils.create_job_exe( job_type=job_type_2, status='FAILED', ended=end_time_1, error=self.system_error, node=node_model_1) node_model_2 = node_test_utils.create_node() job_exe_model_9 = job_test_utils.create_job_exe(job_type=job_type_1, status='COMPLETED', ended=end_time_1, node=node_model_2) job_exe_model_10 = job_test_utils.create_job_exe(job_type=job_type_2, status='COMPLETED', ended=end_time_1, node=node_model_2) job_exe_model_11 = job_test_utils.create_job_exe(job_type=job_type_2, status='FAILED', ended=end_time_1, error=self.data_error, node=node_model_2) # Second block of job executions (one time block over from first set of executions) end_time_2 = end_time_1 + FinishedJobExeMetricsOverTime.BLOCK_LENGTH job_exe_model_12 = job_test_utils.create_job_exe( job_type=job_type_2, status='FAILED', ended=end_time_2, error=self.system_error, node=node_model_1) job_exe_model_13 = job_test_utils.create_job_exe( job_type=job_type_2, status='FAILED', ended=end_time_2, error=self.system_error, node=node_model_1) job_exe_model_14 = job_test_utils.create_job_exe(job_type=job_type_2, status='COMPLETED', ended=end_time_2, node=node_model_2) # Load all initial executions from database self.metrics.init_with_database() # Generate JSON which should include both sets of job executions right_now = end_time_2 + datetime.timedelta(seconds=30) node_list_dict = [{'id': node_model_1.id}, {'id': node_model_2.id}] self.metrics.generate_status_json(node_list_dict, right_now) # Check expected totals self.assertEqual( node_list_dict[0]['job_executions']['running']['total'], 0) self.assertEqual( node_list_dict[0]['job_executions']['completed']['total'], 2) self.assertEqual( node_list_dict[0]['job_executions']['failed']['total'], 8) self.assertEqual( node_list_dict[0]['job_executions']['failed']['algorithm'] ['total'], 3) self.assertEqual( node_list_dict[0]['job_executions']['failed']['data']['total'], 1) self.assertEqual( node_list_dict[0]['job_executions']['failed']['system']['total'], 4) self.assertEqual( node_list_dict[1]['job_executions']['running']['total'], 0) self.assertEqual( node_list_dict[1]['job_executions']['completed']['total'], 3) self.assertEqual( node_list_dict[1]['job_executions']['failed']['total'], 1) self.assertEqual( node_list_dict[1]['job_executions']['failed']['algorithm'] ['total'], 0) self.assertEqual( node_list_dict[1]['job_executions']['failed']['data']['total'], 1) self.assertEqual( node_list_dict[1]['job_executions']['failed']['system']['total'], 0) # Generate JSON which should include only second set of job executions (first set rolled off by time) later = end_time_1 + FinishedJobExeMetricsOverTime.TOTAL_TIME_PERIOD + datetime.timedelta( seconds=1) later += FinishedJobExeMetricsOverTime.BLOCK_LENGTH node_list_dict = [{'id': node_model_1.id}, {'id': node_model_2.id}] self.metrics.generate_status_json(node_list_dict, later) # Check expected totals self.assertEqual( node_list_dict[0]['job_executions']['running']['total'], 0) self.assertEqual( node_list_dict[0]['job_executions']['completed']['total'], 0) self.assertEqual( node_list_dict[0]['job_executions']['failed']['total'], 2) self.assertEqual( node_list_dict[0]['job_executions']['failed']['algorithm'] ['total'], 0) self.assertEqual( node_list_dict[0]['job_executions']['failed']['data']['total'], 0) self.assertEqual( node_list_dict[0]['job_executions']['failed']['system']['total'], 2) self.assertEqual( node_list_dict[1]['job_executions']['running']['total'], 0) self.assertEqual( node_list_dict[1]['job_executions']['completed']['total'], 1) self.assertEqual( node_list_dict[1]['job_executions']['failed']['total'], 0) self.assertEqual( node_list_dict[1]['job_executions']['failed']['algorithm'] ['total'], 0) self.assertEqual( node_list_dict[1]['job_executions']['failed']['data']['total'], 0) self.assertEqual( node_list_dict[1]['job_executions']['failed']['system']['total'], 0) # Generate JSON where all job executions should have rolled off by time later = later + FinishedJobExeMetricsOverTime.TOTAL_TIME_PERIOD node_list_dict = [{'id': node_model_1.id}, {'id': node_model_2.id}] self.metrics.generate_status_json(node_list_dict, later) # Check expected totals self.assertEqual( node_list_dict[0]['job_executions']['running']['total'], 0) self.assertEqual( node_list_dict[0]['job_executions']['completed']['total'], 0) self.assertEqual( node_list_dict[0]['job_executions']['failed']['total'], 0) self.assertEqual( node_list_dict[0]['job_executions']['failed']['algorithm'] ['total'], 0) self.assertEqual( node_list_dict[0]['job_executions']['failed']['data']['total'], 0) self.assertEqual( node_list_dict[0]['job_executions']['failed']['system']['total'], 0) self.assertEqual( node_list_dict[1]['job_executions']['running']['total'], 0) self.assertEqual( node_list_dict[1]['job_executions']['completed']['total'], 0) self.assertEqual( node_list_dict[1]['job_executions']['failed']['total'], 0) self.assertEqual( node_list_dict[1]['job_executions']['failed']['algorithm'] ['total'], 0) self.assertEqual( node_list_dict[1]['job_executions']['failed']['data']['total'], 0) self.assertEqual( node_list_dict[1]['job_executions']['failed']['system']['total'], 0)
def create_job_exe(job_type=None, job=None, status='RUNNING', configuration=None, error=None, command_arguments='test_arg', timeout=None, node=None, created=None, queued=None, started=None, pre_started=None, pre_completed=None, job_started=None, job_completed=None, post_started=None, post_completed=None, ended=None, last_modified=None): """Creates a job execution model for unit testing :returns: The job execution model :rtype: :class:`job.models.JobExecution` """ when = timezone.now() if not job: job = create_job(job_type=job_type) if not configuration: configuration = ExecutionConfiguration().get_dict() if not timeout: timeout = job.timeout if not node: node = node_utils.create_node() if not created: created = when if not queued: queued = when if not started: started = when if not last_modified: last_modified = when job_exe = JobExecution.objects.create(job=job, status=status, error=error, configuration=configuration, command_arguments=command_arguments, timeout=timeout, node=node, created=created, queued=queued, started=started, pre_started=pre_started, pre_completed=pre_completed, job_started=job_started, job_completed=job_completed, post_started=post_started, post_completed=post_completed, ended=ended, last_modified=last_modified) job_exe.set_cluster_id('1234') return job_exe
def setUp(self): django.setup() self.node_agent = 'agent_1' self.node = node_test_utils.create_node(hostname='host_1', slave_id=self.node_agent) self.job_exe = job_test_utils.create_job_exe(node=self.node)