예제 #1
0
파일: test_node.py 프로젝트: sau29/scale
    def test_score_job_exe_for_reservation_insufficient_resources(self):
        """Tests calling score_job_exe_for_reservation() when there are not enough resources to reserve for the job"""

        node = MagicMock()
        node.hostname = 'host_1'
        node.id = 1
        node.is_ready_for_new_job = MagicMock()
        node.is_ready_for_new_job.return_value = True
        node.is_ready_for_next_job_task = MagicMock()
        node.is_ready_for_next_job_task.return_value = True
        offered_resources = NodeResources([Cpus(20.0), Mem(100.0)])
        watermark_resources = NodeResources([Cpus(200.0), Mem(700.0)])
        resource_set = ResourceSet(offered_resources, NodeResources(),
                                   watermark_resources)
        task = HealthTask(
            '1234', 'agent_1')  # Resources are 0.1 CPUs and 32 MiB memory
        job_exe_1 = job_test_utils.create_running_job_exe(
            agent_id=self.agent_id,
            resources=NodeResources([Cpus(10.0), Mem(50.0)]),
            priority=1000)
        job_exe_2 = job_test_utils.create_running_job_exe(
            agent_id=self.agent_id,
            resources=NodeResources([Cpus(56.0), Mem(15.0)]),
            priority=100)
        scheduling_node = SchedulingNode('agent_1', node, [task],
                                         [job_exe_1, job_exe_2], resource_set)
        queue_model_1 = queue_test_utils.create_queue(priority=100,
                                                      cpus_required=8.0,
                                                      mem_required=40.0,
                                                      disk_in_required=0.0,
                                                      disk_out_required=0.0,
                                                      disk_total_required=0.0)
        job_exe_1 = QueuedJobExecution(queue_model_1)
        queue_model_2 = queue_test_utils.create_queue(priority=1000,
                                                      cpus_required=8.0,
                                                      mem_required=40.0,
                                                      disk_in_required=0.0,
                                                      disk_out_required=0.0,
                                                      disk_total_required=0.0)
        job_exe_2 = QueuedJobExecution(queue_model_2)
        scheduling_node.accept_new_job_exe(job_exe_1)
        scheduling_node.accept_new_job_exe(job_exe_2)

        # We are going to try to reserve the node for a job execution with priority 120
        # Calculate available resources for reservation:
        # Watermark (200, 700) - System Tasks (0.1, 32) - Higher Priority Existing Job Exes (56, 15) -  Higher Priority
        # New Job Exes (8, 40) = 135.9 CPUs, 613 memory
        # This new job should NOT fit for reservation
        queue_model = queue_test_utils.create_queue(priority=120,
                                                    cpus_required=140.0,
                                                    mem_required=600.0,
                                                    disk_in_required=0.0,
                                                    disk_out_required=0.0,
                                                    disk_total_required=0.0)
        job_exe = QueuedJobExecution(queue_model)
        job_type_resource_1 = NodeResources([Cpus(2.0), Mem(10.0)])

        score = scheduling_node.score_job_exe_for_reservation(
            job_exe, [job_type_resource_1])
        self.assertIsNone(score)
예제 #2
0
파일: test_node.py 프로젝트: sau29/scale
    def test_start_job_exe_tasks(self):
        """Tests calling start_job_exe_tasks() successfully"""

        node = MagicMock()
        node.hostname = 'host_1'
        node.id = 1
        node.is_ready_for_new_job = MagicMock()
        node.is_ready_for_new_job.return_value = True
        node.is_ready_for_next_job_task = MagicMock()
        node.is_ready_for_next_job_task.return_value = True
        offered_resources = NodeResources([Cpus(20.0), Mem(100.0)])
        watermark_resources = NodeResources([Cpus(200.0), Mem(700.0)])
        resource_set = ResourceSet(offered_resources, NodeResources(),
                                   watermark_resources)
        scheduling_node = SchedulingNode('agent_1', node, [], [], resource_set)
        job_exe_1 = job_test_utils.create_running_job_exe(
            agent_id=self.agent_id,
            resources=NodeResources([Cpus(10.0), Mem(50.0)]))
        job_exe_2 = job_test_utils.create_running_job_exe(
            agent_id=self.agent_id,
            resources=NodeResources([Cpus(5.0), Mem(25.0)]))
        scheduling_node.accept_job_exe_next_task(job_exe_1, [])
        scheduling_node.accept_job_exe_next_task(job_exe_2, [])
        self.assertEqual(len(scheduling_node._allocated_running_job_exes), 2)

        job_exe_1.execution_canceled(now(
        ))  # Execution canceled, so it will not have a next task to start

        scheduling_node.start_job_exe_tasks()
        self.assertEqual(len(scheduling_node._allocated_running_job_exes), 0)
        self.assertEqual(len(scheduling_node.allocated_tasks),
                         1)  # Only job_exe_2 had a next task
예제 #3
0
    def test_job_exe_no_offers(self):
        """Tests the NodeManager where a node is running an exe and has not given offers to Scale in 1 hour.
           Expected behavior: The node is scheduler and DB are in sync and the node is still active"""

        last_offer = now() - datetime.timedelta(hours=1)
        node_mgr = NodeManager()
        node_mgr.register_agents([self.agent_1])
        node_mgr.sync_with_database(scheduler_mgr.config)

        # Add job to node
        job_test_utils.create_running_job_exe(agent_id=self.agent_1,
                                              node=self.node_1)

        # Set last_offer_received to 1 hour ago
        Node.objects.filter(id=self.node_1.id).update(
            last_offer_received=last_offer)

        # This inspects what nodes are running jobs and what nodes need to be removed if they
        # have not sent offers in the last 5 minutes
        node_mgr.sync_with_database(scheduler_mgr.config)

        # Get the DB and Scheduler state and make sure they are consistent
        db_record = Node.objects.get(id=self.node_1.id)
        scheduler_record = node_mgr.get_node(self.agent_1.agent_id)

        self.assertEqual(db_record.is_active, scheduler_record._is_active,
                         True)
예제 #4
0
    def test_get_nodes_running_job_exes(self):
        """Tests calling NodeManager.get_nodes_running_job_exes()"""

        # Create nodes
        node_1 = node_test_utils.create_node(hostname='node_1')
        node_2 = node_test_utils.create_node(hostname='node_2')
        node_3 = node_test_utils.create_node(hostname='node_3')

        # No running jobs; should be empty
        nodes_w_jobs = Node.objects.get_nodes_running_job_exes()
        self.assertEqual(nodes_w_jobs, [])

        job_test_utils.create_job_exe(node=node_3, status='COMPLETED')
        job_test_utils.create_job_exe(node=node_3, status='FAILED')
        job_test_utils.create_job_exe(node=node_3, status='CANCELED')

        # 0 running jobs
        self.assertEqual(Node.objects.get_nodes_running_job_exes(), [])
        
        # Create a running job_exe
        job_test_utils.create_running_job_exe(node=node_1)

        # 1 running job on node_1
        nodes_w_jobs = Node.objects.get_nodes_running_job_exes()
        self.assertEqual(len(nodes_w_jobs), 1)
        self.assertEqual(nodes_w_jobs[0], node_1.id)

        # Create another running job_exe (using a different way to create running job_exe for testing completeness)
        job_test_utils.create_job_exe(node=node_2, status='RUNNING')

        # 2 running job_exes
        nodes_w_jobs = Node.objects.get_nodes_running_job_exes()
        self.assertEqual(len(nodes_w_jobs), 2)
        self.assertIn(node_1.id, nodes_w_jobs)
        self.assertIn(node_2.id, nodes_w_jobs)
예제 #5
0
    def setUp(self):
        django.setup()

        # Clear error cache so test works correctly
        CACHED_ERRORS.clear()

        self.agent_id = 'agent'
        self.node_model_1 = node_test_utils.create_node()
        self.job_exe_1 = job_test_utils.create_running_job_exe(agent_id=self.agent_id, node=self.node_model_1)
        self.node_model_2 = node_test_utils.create_node()
        self.job_exe_2 = job_test_utils.create_running_job_exe(agent_id=self.agent_id, node=self.node_model_2)

        self.job_exe_mgr = JobExecutionManager()
예제 #6
0
파일: test_manager.py 프로젝트: sau29/scale
    def setUp(self):
        django.setup()

        # Clear error cache so tests work correctly
        reset_error_cache()

        self.agent_id = 'agent'
        self.node_model_1 = node_test_utils.create_node()
        self.job_exe_1 = job_test_utils.create_running_job_exe(agent_id=self.agent_id, node=self.node_model_1)
        self.node_model_2 = node_test_utils.create_node()
        self.job_exe_2 = job_test_utils.create_running_job_exe(agent_id=self.agent_id, node=self.node_model_2)

        self.task_mgr = TaskManager()
        self.job_exe_mgr = JobExecutionManager()
예제 #7
0
    def test_job_exe_clean_task(self):
        """Tests the NodeManager where a cleanup task is returned to clean up a job execution"""

        when = now()
        node_mgr = NodeManager()
        node_mgr.register_agents([self.agent_1, self.agent_2])
        node_mgr.sync_with_database(scheduler_mgr.config)
        cleanup_mgr = CleanupManager()
        cleanup_mgr.update_nodes(node_mgr.get_nodes())
        tasks = node_mgr.get_next_tasks(when)

        task_mgr = TaskManager()
        # Complete initial cleanup tasks
        for task in tasks:
            task_mgr.launch_tasks([task], now())
            update = job_test_utils.create_task_status_update(
                task.id, task.agent_id, TaskStatusUpdate.FINISHED, now())
            task_mgr.handle_task_update(update)
            node_mgr.handle_task_update(update)

        # Mark image pull done to get rid of image tasks
        for node in node_mgr.get_nodes():
            node._image_pull_completed()
            node._update_state()

        job_exe = job_test_utils.create_running_job_exe(agent_id=self.agent_1,
                                                        node=self.node_1)
        # Add a job execution to clean up and get the cleanup task for it
        cleanup_mgr.add_job_execution(job_exe)
        tasks = node_mgr.get_next_tasks(when)
        self.assertEqual(len(tasks), 1)
        task = tasks[0]
        self.assertEqual(task.agent_id, self.agent_1.agent_id)
        self.assertFalse(task.is_initial_cleanup)
        self.assertEqual(len(task.job_exes), 1)
예제 #8
0
    def test_json(self):
        """Tests coverting a RestartScheduler message to and from JSON"""

        started = now()
        scheduler_restarted = started + datetime.timedelta(seconds=30)
        running_job_exe = job_test_utils.create_running_job_exe(
            started=started)

        # Create message
        message = RestartScheduler()
        message.when = scheduler_restarted

        # Convert message to JSON and back, and then execute
        message_json_dict = message.to_json()
        new_message = RestartScheduler.from_json(message_json_dict)
        result = new_message.execute()

        self.assertTrue(result)
        failed_jobs_msg = None
        job_exe_end_msg = None
        self.assertEqual(len(new_message.new_messages), 2)
        for msg in new_message.new_messages:
            if msg.type == 'failed_jobs':
                failed_jobs_msg = msg
            elif msg.type == 'create_job_exe_ends':
                job_exe_end_msg = msg
        self.assertEqual(failed_jobs_msg._failed_jobs.values()[0][0].job_id,
                         running_job_exe.job_id)
        self.assertEqual(job_exe_end_msg._job_exe_ends[0].job_exe_id,
                         running_job_exe.id)
예제 #9
0
파일: test_node.py 프로젝트: sau29/scale
    def test_accept_job_exe_next_task_canceled(self):
        """Tests calling accept_job_exe_next_task() when job exe gets canceled (no next task)"""

        node = MagicMock()
        node.hostname = 'host_1'
        node.id = 1
        node.is_ready_for_new_job = MagicMock()
        node.is_ready_for_new_job.return_value = True
        node.is_ready_for_next_job_task = MagicMock()
        node.is_ready_for_next_job_task.return_value = True
        offered_resources = NodeResources([Cpus(10.0), Mem(50.0)])
        task_resources = NodeResources()
        watermark_resources = NodeResources([Cpus(100.0), Mem(500.0)])
        resource_set = ResourceSet(offered_resources, task_resources,
                                   watermark_resources)
        scheduling_node = SchedulingNode('agent_1', node, [], [], resource_set)

        job_exe = job_test_utils.create_running_job_exe(
            agent_id=self.agent_id,
            resources=NodeResources([Cpus(1.0), Mem(10.0)]))
        waiting_tasks = []

        job_exe.execution_canceled(now())
        had_waiting_task = scheduling_node.accept_job_exe_next_task(
            job_exe, waiting_tasks)
        self.assertFalse(had_waiting_task)
        self.assertEqual(len(scheduling_node._allocated_running_job_exes), 0)
        self.assertTrue(
            scheduling_node.allocated_resources.is_equal(NodeResources()))
        self.assertTrue(
            scheduling_node._remaining_resources.is_equal(
                NodeResources([Cpus(10.0), Mem(50.0)])))
        self.assertListEqual(waiting_tasks, [])
예제 #10
0
파일: test_node.py 프로젝트: sau29/scale
    def test_score_job_exe_for_scheduling_insufficient_resources(self):
        """Tests calling score_job_exe_for_scheduling() when there are not enough resources to schedule the job"""

        node = MagicMock()
        node.hostname = 'host_1'
        node.id = 1
        node.is_ready_for_new_job = MagicMock()
        node.is_ready_for_new_job.return_value = True
        node.is_ready_for_next_job_task = MagicMock()
        node.is_ready_for_next_job_task.return_value = True
        offered_resources = NodeResources([Cpus(20.0), Mem(100.0)])
        task_resources = NodeResources([Cpus(100.0), Mem(500.0)])
        watermark_resources = NodeResources([Cpus(200.0), Mem(700.0)])
        resource_set = ResourceSet(offered_resources, task_resources,
                                   watermark_resources)
        scheduling_node = SchedulingNode('agent_1', node, [], [], resource_set)
        # Allocate 10 CPUs and 50 MiB memory to existing job execution
        job_exe = job_test_utils.create_running_job_exe(
            agent_id=self.agent_id,
            resources=NodeResources([Cpus(10.0), Mem(50.0)]))
        scheduling_node.accept_job_exe_next_task(job_exe, [])

        # Should have 10 CPUs and 50 MiB memory left, so this job execution is too big
        queue_model = queue_test_utils.create_queue(cpus_required=15.0,
                                                    mem_required=40.0,
                                                    disk_in_required=0.0,
                                                    disk_out_required=0.0,
                                                    disk_total_required=0.0)
        job_exe = QueuedJobExecution(queue_model)

        score = scheduling_node.score_job_exe_for_scheduling(job_exe, [])
        self.assertIsNone(score)
예제 #11
0
    def test_job_type_limit(self):
        """Tests calling perform_scheduling() with a job type limit"""
        Queue.objects.all().delete()
        job_type_with_limit = job_test_utils.create_seed_job_type()
        job_type_with_limit.max_scheduled = 4
        job_type_with_limit.save()
        running_job_exe_1 = job_test_utils.create_running_job_exe(agent_id=self.agent_1.agent_id,
                                                                  job_type=job_type_with_limit, node=self.node_1)
        queue_test_utils.create_queue(job_type=job_type_with_limit)
        queue_test_utils.create_queue(job_type=job_type_with_limit)
        queue_test_utils.create_queue(job_type=job_type_with_limit)
        queue_test_utils.create_queue(job_type=job_type_with_limit)
        queue_test_utils.create_queue(job_type=job_type_with_limit)
        queue_test_utils.create_queue(job_type=job_type_with_limit)
        job_type_mgr.sync_with_database()
        # One job of this type is already running
        job_exe_mgr.schedule_job_exes([running_job_exe_1], [])

        offer_1 = ResourceOffer('offer_1', self.agent_1.agent_id, self.framework_id,
                                NodeResources([Cpus(0.0), Mem(1024.0), Disk(1024.0)]), now(), None)
        offer_2 = ResourceOffer('offer_2', self.agent_2.agent_id, self.framework_id,
                                NodeResources([Cpus(25.0), Mem(2048.0), Disk(2048.0)]), now(), None)
        resource_mgr.add_new_offers([offer_1, offer_2])

        scheduling_manager = SchedulingManager()
        num_tasks = scheduling_manager.perform_scheduling(self._client, now())
        self.assertEqual(num_tasks, 3)  # One is already running, should only be able to schedule 3 more
예제 #12
0
    def setUp(self):
        django.setup()

        self.scheduler = Scheduler()
        self.node_agent = 'agent_1'
        self.node = node_test_utils.create_node(hostname='host_1', slave_id=self.node_agent)
        self.job_exe = job_test_utils.create_running_job_exe(agent_id=self.node_agent, node=self.node)
        self.task_mgr = TaskManager()
예제 #13
0
파일: test_node.py 프로젝트: sau29/scale
    def test_score_job_exe_for_scheduling(self):
        """Tests calling score_job_exe_for_scheduling() successfully"""

        node = MagicMock()
        node.hostname = 'host_1'
        node.id = 1
        node.is_ready_for_new_job = MagicMock()
        node.is_ready_for_new_job.return_value = True
        node.is_ready_for_next_job_task = MagicMock()
        node.is_ready_for_next_job_task.return_value = True
        offered_resources = NodeResources([Cpus(20.0), Mem(100.0)])
        task_resources = NodeResources([Cpus(100.0), Mem(500.0)])
        watermark_resources = NodeResources([Cpus(200.0), Mem(700.0)])
        resource_set = ResourceSet(offered_resources, task_resources,
                                   watermark_resources)
        scheduling_node = SchedulingNode('agent_1', node, [], [], resource_set)
        # Allocate 10 CPUs and 50 MiB memory to existing job execution
        job_exe = job_test_utils.create_running_job_exe(
            agent_id=self.agent_id,
            resources=NodeResources([Cpus(10.0), Mem(50.0)]))
        scheduling_node.accept_job_exe_next_task(job_exe, [])

        # Should have 10 CPUs and 50 MiB memory left, so this should be scheduled
        queue_model = queue_test_utils.create_queue(cpus_required=5.0,
                                                    mem_required=40.0,
                                                    disk_in_required=0.0,
                                                    disk_out_required=0.0,
                                                    disk_total_required=0.0)
        job_exe = QueuedJobExecution(queue_model)
        # Expected available 85 CPUs and 110 MiB memory "left" on node
        # (watermark - current tasks - allocated - new job we are scoring)
        # First 2 job types should fit, next 2 are too big, so score should be 2
        job_type_resource_1 = NodeResources([Cpus(2.0), Mem(10.0)])
        job_type_resource_2 = NodeResources([Cpus(85.0), Mem(109.0)])
        job_type_resource_3 = NodeResources([Cpus(86.0), Mem(10.0)])
        job_type_resource_4 = NodeResources([Cpus(2.0), Mem(111.0)])

        score = scheduling_node.score_job_exe_for_scheduling(
            job_exe, [
                job_type_resource_1, job_type_resource_2, job_type_resource_3,
                job_type_resource_4
            ])
        self.assertEqual(score, 2)
예제 #14
0
파일: test_app.py 프로젝트: sau29/scale
    def test_timed_out_system_job_task(self):
        """Tests running through a job execution where a system job task times out"""

        ingest_job_type = Ingest.objects.get_ingest_job_type()
        ingest_job_type.max_tries = 1
        ingest_job_type.save()
        running_job_exe = job_test_utils.create_running_job_exe(
            agent_id='agent_1', job_type=ingest_job_type, num_exes=1)

        # Start job-task and then task times out
        when_launched = now() + timedelta(seconds=1)
        job_task_started = when_launched + timedelta(seconds=1)
        when_timed_out = job_task_started + timedelta(seconds=1)
        job_task = running_job_exe.start_next_task()
        self.task_mgr.launch_tasks([job_task], when_launched)
        update = job_test_utils.create_task_status_update(
            job_task.id, 'agent', TaskStatusUpdate.RUNNING, job_task_started)
        self.task_mgr.handle_task_update(update)
        running_job_exe.task_update(update)
        running_job_exe.execution_timed_out(job_task, when_timed_out)

        self.assertFalse(running_job_exe.is_finished()
                         )  # Not finished until killed task update arrives
        self.assertEqual(running_job_exe.status, 'FAILED')
        self.assertEqual(running_job_exe.error_category, 'SYSTEM')
        self.assertEqual(running_job_exe.error.name, 'ingest-timeout')
        self.assertEqual(running_job_exe.finished, when_timed_out)
        self.assertFalse(running_job_exe.is_next_task_ready())

        # Killed task update arrives, job execution is now finished
        job_task_kill = when_timed_out + timedelta(seconds=1)
        update = job_test_utils.create_task_status_update(
            job_task.id, 'agent', TaskStatusUpdate.KILLED, job_task_kill)
        self.task_mgr.handle_task_update(update)
        running_job_exe.task_update(update)
        self.assertTrue(running_job_exe.is_finished())
        self.assertEqual(running_job_exe.status, 'FAILED')
        self.assertEqual(running_job_exe.error_category, 'SYSTEM')
        self.assertEqual(running_job_exe.error.name, 'ingest-timeout')
        self.assertEqual(running_job_exe.finished, when_timed_out)
        self.assertFalse(running_job_exe.is_next_task_ready())
예제 #15
0
파일: test_node.py 프로젝트: sau29/scale
    def test_add_allocated_offers_remove_all_tasks(self):
        """Tests calling add_allocated_offers() when there are not enough resources for the job exes or node tasks"""

        node = MagicMock()
        node.hostname = 'host_1'
        node.id = 1
        health_task = HealthTask('1234', 'agent_1')
        pull_task = PullTask('1234', 'agent_1')
        node.is_ready_for_new_job = MagicMock()
        node.is_ready_for_new_job.return_value = True
        node.is_ready_for_next_job_task = MagicMock()
        node.is_ready_for_next_job_task.return_value = True
        node.get_next_tasks = MagicMock()
        node.get_next_tasks.return_value = [health_task, pull_task]
        offered_resources = NodeResources([Cpus(100.0), Mem(500.0)])
        watermark_resources = NodeResources([Cpus(100.0), Mem(500.0)])
        resource_set = ResourceSet(offered_resources, NodeResources(),
                                   watermark_resources)
        scheduling_node = SchedulingNode('agent_1', node, [], [], resource_set)
        running_job_exe_1 = job_test_utils.create_running_job_exe(
            agent_id=self.agent_id,
            resources=NodeResources([Cpus(1.0), Mem(10.0)]))
        running_job_exe_2 = job_test_utils.create_running_job_exe(
            agent_id=self.agent_id,
            resources=NodeResources([Cpus(2.0), Mem(20.0)]))
        node_task_resources = NodeResources()
        node_task_resources.add(health_task.get_resources())
        node_task_resources.add(pull_task.get_resources())
        all_required_resources = NodeResources()
        all_required_resources.add(node_task_resources)
        all_required_resources.add(
            running_job_exe_1.next_task().get_resources())
        all_required_resources.add(
            running_job_exe_2.next_task().get_resources())
        expected_remaining_resources = NodeResources()
        expected_remaining_resources.add(offered_resources)
        expected_remaining_resources.subtract(node_task_resources)

        # Set up node with node tasks and job exes (there would never be queued job exes since they would be scheduled
        # before add_allocated_offers() was called
        scheduling_node.accept_node_tasks(now(), [])
        scheduling_node.accept_job_exe_next_task(running_job_exe_1, [])
        scheduling_node.accept_job_exe_next_task(running_job_exe_2, [])
        self.assertEqual(len(scheduling_node.allocated_tasks), 2)
        self.assertEqual(len(scheduling_node._allocated_running_job_exes), 2)
        self.assertEqual(len(scheduling_node._allocated_queued_job_exes), 0)
        self.assertTrue(
            scheduling_node.allocated_resources.is_equal(
                all_required_resources))

        # Set up offers (not enough for job exes or node tasks)
        offer_1 = ResourceOffer('offer_1', 'agent_1', '1234',
                                NodeResources([Cpus(0.1),
                                               Mem(600.0)]), now(), None)

        scheduling_node.add_allocated_offers([offer_1])
        self.assertListEqual(scheduling_node.allocated_offers, [offer_1])
        # All allocated tasks and job exes should be gone
        self.assertEqual(len(scheduling_node.allocated_tasks), 0)
        self.assertEqual(len(scheduling_node._allocated_running_job_exes), 0)
        self.assertEqual(len(scheduling_node._allocated_queued_job_exes), 0)
        self.assertTrue(
            scheduling_node.allocated_resources.is_equal(NodeResources()))
        self.assertTrue(
            scheduling_node._remaining_resources.is_equal(offered_resources))
예제 #16
0
    def setUp(self):
        django.setup()

        self.conditions = NodeConditions('test_node')
        self.job_exes = [job_test_utils.create_running_job_exe()]
        self.job_ids = [exe.job_id for exe in self.job_exes]
예제 #17
0
파일: test_node.py 프로젝트: sau29/scale
    def test_score_job_exe_for_reservation(self):
        """Tests calling score_job_exe_for_reservation() successfully"""

        node = MagicMock()
        node.hostname = 'host_1'
        node.id = 1
        node.is_ready_for_new_job = MagicMock()
        node.is_ready_for_new_job.return_value = True
        node.is_ready_for_next_job_task = MagicMock()
        node.is_ready_for_next_job_task.return_value = True
        offered_resources = NodeResources([Cpus(20.0), Mem(100.0)])
        watermark_resources = NodeResources([Cpus(200.0), Mem(700.0)])
        resource_set = ResourceSet(offered_resources, NodeResources(),
                                   watermark_resources)
        task = HealthTask(
            '1234', 'agent_1')  # Resources are 0.1 CPUs and 32 MiB memory
        job_exe_1 = job_test_utils.create_running_job_exe(
            agent_id=self.agent_id,
            resources=NodeResources([Cpus(10.0), Mem(50.0)]),
            priority=1000)
        job_exe_2 = job_test_utils.create_running_job_exe(
            agent_id=self.agent_id,
            resources=NodeResources([Cpus(56.0), Mem(15.0)]),
            priority=100)
        scheduling_node = SchedulingNode('agent_1', node, [task],
                                         [job_exe_1, job_exe_2], resource_set)
        queue_model_1 = queue_test_utils.create_queue(priority=100,
                                                      cpus_required=8.0,
                                                      mem_required=40.0,
                                                      disk_in_required=0.0,
                                                      disk_out_required=0.0,
                                                      disk_total_required=0.0)
        job_exe_1 = QueuedJobExecution(queue_model_1)
        queue_model_2 = queue_test_utils.create_queue(priority=1000,
                                                      cpus_required=8.0,
                                                      mem_required=40.0,
                                                      disk_in_required=0.0,
                                                      disk_out_required=0.0,
                                                      disk_total_required=0.0)
        job_exe_2 = QueuedJobExecution(queue_model_2)
        scheduling_node.accept_new_job_exe(job_exe_1)
        scheduling_node.accept_new_job_exe(job_exe_2)

        # We are going to try to reserve the node for a job execution with priority 120
        # Calculate available resources for reservation:
        # Watermark (200, 700) - System Tasks (0.1, 32) - Higher Priority Existing Job Exes (56, 15) -  Higher Priority
        # New Job Exes (8, 40) = 135.9 CPUs, 613 memory
        # This new job should fit for reservation
        queue_model = queue_test_utils.create_queue(priority=120,
                                                    cpus_required=130.0,
                                                    mem_required=600.0,
                                                    disk_in_required=0.0,
                                                    disk_out_required=0.0,
                                                    disk_total_required=0.0)
        job_exe = QueuedJobExecution(queue_model)
        # Expected available 5.9 CPUs and 13 MiB memory "left" on node
        # (available above - new job we are scoring)
        # First 2 job types should fit, next 2 are too big, so score should be 2
        job_type_resource_1 = NodeResources([Cpus(2.0), Mem(10.0)])
        job_type_resource_2 = NodeResources([Cpus(5.5), Mem(12.0)])
        job_type_resource_3 = NodeResources([Cpus(6.0), Mem(10.0)])
        job_type_resource_4 = NodeResources([Cpus(2.0), Mem(14.0)])

        score = scheduling_node.score_job_exe_for_reservation(
            job_exe, [
                job_type_resource_1, job_type_resource_2, job_type_resource_3,
                job_type_resource_4
            ])
        self.assertEqual(score, 2)
예제 #18
0
    def test_execute(self):
        """Tests calling RestartScheduler.execute() successfully"""

        started = now()
        scheduler_restarted = started + datetime.timedelta(seconds=30)
        started_later = scheduler_restarted + datetime.timedelta(seconds=30)
        running_job_exe_1 = job_test_utils.create_running_job_exe(
            started=started)
        running_job_exe_2 = job_test_utils.create_running_job_exe(
            started=started)
        running_job_exe_3 = job_test_utils.create_running_job_exe(
            started=started)
        running_job_exe_4 = job_test_utils.create_running_job_exe(
            started=started_later)  # After scheduler restart

        # Set job 1 so it is still QUEUED
        Job.objects.filter(id=running_job_exe_1.job_id).update(status='QUEUED')

        # Set job 3 to COMPLETED, so it should not be failed by scheduler restart
        Job.objects.filter(id=running_job_exe_3.job_id).update(
            status='COMPLETED')

        # Create message
        message = RestartScheduler()
        message.when = scheduler_restarted

        # Execute message
        result = message.execute()
        self.assertTrue(result)

        failed_jobs_msg = None
        job_exe_end_msg = None
        self.assertEqual(len(message.new_messages), 2)
        for msg in message.new_messages:
            if msg.type == 'failed_jobs':
                failed_jobs_msg = msg
            elif msg.type == 'create_job_exe_ends':
                job_exe_end_msg = msg

        error = get_builtin_error('scheduler-lost')
        # Jobs 1 and 2 should be in messages to be failed, Jobs 3 and 4 should not be included
        expected_failed_jobs = {
            FailedJob(running_job_exe_1.job_id, running_job_exe_1.exe_num,
                      error.id),
            FailedJob(running_job_exe_2.job_id, running_job_exe_2.exe_num,
                      error.id)
        }
        expected_failed_job_exe_ids = {
            running_job_exe_1.id, running_job_exe_2.id
        }
        self.assertSetEqual(set(failed_jobs_msg._failed_jobs.values()[0]),
                            expected_failed_jobs)
        failed_job_exe_ids = set()
        for job_exe_end_model in job_exe_end_msg._job_exe_ends:
            failed_job_exe_ids.add(job_exe_end_model.job_exe_id)
        self.assertSetEqual(failed_job_exe_ids, expected_failed_job_exe_ids)

        # Test executing message again, should get same result
        message_json_dict = message.to_json()
        message = RestartScheduler.from_json(message_json_dict)
        result = message.execute()
        self.assertTrue(result)

        # Jobs 1 and 2 should be in messages to be failed, Jobs 3 and 4 should not be included
        expected_failed_jobs = {
            FailedJob(running_job_exe_1.job_id, running_job_exe_1.exe_num,
                      error.id),
            FailedJob(running_job_exe_2.job_id, running_job_exe_2.exe_num,
                      error.id)
        }
        expected_failed_job_exe_ids = {
            running_job_exe_1.id, running_job_exe_2.id
        }
        self.assertSetEqual(set(failed_jobs_msg._failed_jobs.values()[0]),
                            expected_failed_jobs)
        failed_job_exe_ids = set()
        for job_exe_end_model in job_exe_end_msg._job_exe_ends:
            failed_job_exe_ids.add(job_exe_end_model.job_exe_id)
        self.assertSetEqual(failed_job_exe_ids, expected_failed_job_exe_ids)
예제 #19
0
    def test_json(self):
        """Tests converting a CreateJobExecutionEnd message to and from JSON"""

        job_exe_1 = job_test_utils.create_running_job_exe()
        job_exe_2 = job_test_utils.create_running_job_exe()
        job_exe_3 = job_test_utils.create_running_job_exe()
        job_exe_4 = job_test_utils.create_running_job_exe()
        job_exe_5 = job_test_utils.create_running_job_exe()
        job_exe_ids = [
            job_exe_1.id, job_exe_2.id, job_exe_3.id, job_exe_4.id,
            job_exe_5.id
        ]

        # Execution that was immediately canceled
        job_exe_1.execution_canceled(now())

        # Execution that was canceled after a task launched
        task_2 = job_exe_2.start_next_task()
        task_2.launch(now())
        job_exe_2.execution_canceled(now())
        update = job_test_utils.create_task_status_update(
            task_2.id, task_2.agent_id, TaskStatusUpdate.KILLED, now())
        task_2.update(update)
        job_exe_2.task_update(update)

        # Execution where a task timed out
        task_3 = job_exe_3.start_next_task()
        task_3.launch(now())
        job_exe_3.execution_timed_out(task_3, now())
        update = job_test_utils.create_task_status_update(
            task_3.id, task_3.agent_id, TaskStatusUpdate.KILLED, now())
        task_3.update(update)
        job_exe_3.task_update(update)

        # Execution where a task failed
        task_4 = job_exe_4.start_next_task()
        task_4.launch(now())
        update = job_test_utils.create_task_status_update(
            task_4.id, task_4.agent_id, TaskStatusUpdate.FAILED, now())
        task_4.update(update)
        job_exe_4.task_update(update)

        # Execution that completed
        while not job_exe_5.is_finished():
            task_5 = job_exe_5.start_next_task()
            task_5.launch(now())
            update = job_test_utils.create_task_status_update(
                task_5.id, task_5.agent_id, TaskStatusUpdate.RUNNING, now())
            task_5.update(update)
            job_exe_5.task_update(update)
            update = job_test_utils.create_task_status_update(
                task_5.id, task_5.agent_id, TaskStatusUpdate.FINISHED, now())
            task_5.update(update)
            job_exe_5.task_update(update)

        # Add models to message
        message = CreateJobExecutionEnd()
        if message.can_fit_more():
            message.add_job_exe_end(job_exe_1.create_job_exe_end_model())
        if message.can_fit_more():
            message.add_job_exe_end(job_exe_2.create_job_exe_end_model())
        if message.can_fit_more():
            message.add_job_exe_end(job_exe_3.create_job_exe_end_model())
        if message.can_fit_more():
            message.add_job_exe_end(job_exe_4.create_job_exe_end_model())
        if message.can_fit_more():
            message.add_job_exe_end(job_exe_5.create_job_exe_end_model())

        # Convert message to JSON and back, and then execute
        message_json_dict = message.to_json()
        new_message = CreateJobExecutionEnd.from_json(message_json_dict)
        result = new_message.execute()

        self.assertTrue(result)
        job_exe_ends = JobExecutionEnd.objects.filter(
            job_exe_id__in=job_exe_ids).order_by('job_exe_id')
        self.assertEqual(len(job_exe_ends), 5)
        self.assertEqual(job_exe_ends[0].status, 'CANCELED')
        self.assertEqual(job_exe_ends[1].status, 'CANCELED')
        self.assertEqual(job_exe_ends[2].status, 'FAILED')
        self.assertEqual(job_exe_ends[3].status, 'FAILED')
        self.assertEqual(job_exe_ends[4].status, 'COMPLETED')
예제 #20
0
    def test_execute(self):
        """Tests calling CreateJobExecutionEnd.execute() successfully"""

        # Add 3 job_exe_end models to messages 1, 2, and 3
        message_1 = CreateJobExecutionEnd()
        message_2 = CreateJobExecutionEnd()
        message_3 = CreateJobExecutionEnd()
        job_exe_ids = []
        for _ in range(3):
            job_exe = job_test_utils.create_running_job_exe()
            job_exe_ids.append(job_exe.id)
            while not job_exe.is_finished():
                task = job_exe.start_next_task()
                task.launch(now())
                update = job_test_utils.create_task_status_update(
                    task.id, task.agent_id, TaskStatusUpdate.RUNNING, now())
                task.update(update)
                job_exe.task_update(update)
                update = job_test_utils.create_task_status_update(
                    task.id, task.agent_id, TaskStatusUpdate.FINISHED, now())
                task.update(update)
                job_exe.task_update(update)
            message_1.add_job_exe_end(job_exe.create_job_exe_end_model())
            message_1.add_job_exe_end(job_exe.create_job_exe_end_model()
                                      )  # Test having duplicate models
            message_2.add_job_exe_end(job_exe.create_job_exe_end_model())
            message_3.add_job_exe_end(job_exe.create_job_exe_end_model())

        # Execute message 1 with 3 job_exe_end models
        message_1.execute()
        self.assertEqual(
            JobExecutionEnd.objects.filter(job_exe_id__in=job_exe_ids).count(),
            3)

        # Add more job_exe_end models to messages 1 and 2
        while message_2.can_fit_more():
            job_exe = job_test_utils.create_running_job_exe()
            job_exe_ids.append(job_exe.id)
            while not job_exe.is_finished():
                task = job_exe.start_next_task()
                task.launch(now())
                update = job_test_utils.create_task_status_update(
                    task.id, task.agent_id, TaskStatusUpdate.RUNNING, now())
                task.update(update)
                job_exe.task_update(update)
                update = job_test_utils.create_task_status_update(
                    task.id, task.agent_id, TaskStatusUpdate.FINISHED, now())
                task.update(update)
                job_exe.task_update(update)
            message_2.add_job_exe_end(job_exe.create_job_exe_end_model())
            message_3.add_job_exe_end(job_exe.create_job_exe_end_model())

        # Execute message 2 with same 3 job_exe_end models from before, plus new ones
        # Old models should not cause an error and only new ones should get created
        message_2.execute()
        self.assertEqual(
            JobExecutionEnd.objects.filter(job_exe_id__in=job_exe_ids).count(),
            len(job_exe_ids))

        # Execute message 3 with all old models
        # Old models should not cause an error and no new ones should get created
        message_3.execute()
        self.assertEqual(
            JobExecutionEnd.objects.filter(job_exe_id__in=job_exe_ids).count(),
            len(job_exe_ids))
예제 #21
0
    def test_running_executions(self):
        """Tests the metrics with running executions that complete"""

        node_model_1 = node_test_utils.create_node()
        node_model_2 = node_test_utils.create_node()
        job_type_1 = job_test_utils.create_seed_job_type()
        job_type_2 = job_test_utils.create_seed_job_type()
        job_exe_1 = job_test_utils.create_running_job_exe(agent_id='agent',
                                                          job_type=job_type_1,
                                                          node=node_model_1)
        job_exe_2 = job_test_utils.create_running_job_exe(agent_id='agent',
                                                          job_type=job_type_1,
                                                          node=node_model_1)
        job_exe_3 = job_test_utils.create_running_job_exe(agent_id='agent',
                                                          job_type=job_type_1,
                                                          node=node_model_1)
        job_exe_4 = job_test_utils.create_running_job_exe(agent_id='agent',
                                                          job_type=job_type_2,
                                                          node=node_model_1)
        job_exe_5 = job_test_utils.create_running_job_exe(agent_id='agent',
                                                          job_type=job_type_1,
                                                          node=node_model_2)
        job_exe_6 = job_test_utils.create_running_job_exe(agent_id='agent',
                                                          job_type=job_type_1,
                                                          node=node_model_2)
        job_exe_7 = job_test_utils.create_running_job_exe(agent_id='agent',
                                                          job_type=job_type_2,
                                                          node=node_model_2)
        job_exe_8 = job_test_utils.create_running_job_exe(agent_id='agent',
                                                          job_type=job_type_2,
                                                          node=node_model_2)
        job_exe_9 = job_test_utils.create_running_job_exe(agent_id='agent',
                                                          job_type=job_type_2,
                                                          node=node_model_2)
        job_exe_10 = job_test_utils.create_running_job_exe(agent_id='agent',
                                                           job_type=job_type_2,
                                                           node=node_model_2)
        job_exe_11 = job_test_utils.create_running_job_exe(agent_id='agent',
                                                           job_type=job_type_2,
                                                           node=node_model_2)

        # NOTE: This unit test is about to get CRAZY. I apologize for the complexity, but this is needed for a
        # thorough testing
        self.metrics.add_running_job_exes([
            job_exe_1, job_exe_2, job_exe_3, job_exe_4, job_exe_5, job_exe_6,
            job_exe_7, job_exe_8, job_exe_9, job_exe_10, job_exe_11
        ])
        node_list_dict = [{'id': node_model_1.id}, {'id': node_model_2.id}]
        self.metrics.generate_status_json(node_list_dict, now())

        # Check expected totals
        self.assertEqual(
            node_list_dict[0]['job_executions']['running']['total'], 4)
        for job_type_dict in node_list_dict[0]['job_executions']['running'][
                'by_job_type']:
            if job_type_dict['job_type_id'] == job_type_1.id:
                self.assertEqual(job_type_dict['count'], 3)
            elif job_type_dict['job_type_id'] == job_type_2.id:
                self.assertEqual(job_type_dict['count'], 1)
            else:
                self.fail('Unexpected job type ID')
        self.assertEqual(
            node_list_dict[0]['job_executions']['completed']['total'], 0)
        self.assertEqual(
            node_list_dict[0]['job_executions']['failed']['total'], 0)
        self.assertEqual(
            node_list_dict[0]['job_executions']['failed']['algorithm']
            ['total'], 0)
        self.assertEqual(
            node_list_dict[0]['job_executions']['failed']['data']['total'], 0)
        self.assertEqual(
            node_list_dict[0]['job_executions']['failed']['system']['total'],
            0)
        self.assertEqual(
            node_list_dict[1]['job_executions']['running']['total'], 7)
        for job_type_dict in node_list_dict[1]['job_executions']['running'][
                'by_job_type']:
            if job_type_dict['job_type_id'] == job_type_1.id:
                self.assertEqual(job_type_dict['count'], 2)
            elif job_type_dict['job_type_id'] == job_type_2.id:
                self.assertEqual(job_type_dict['count'], 5)
            else:
                self.fail('Unexpected job type ID')
        self.assertEqual(
            node_list_dict[1]['job_executions']['completed']['total'], 0)
        self.assertEqual(
            node_list_dict[1]['job_executions']['failed']['total'], 0)
        self.assertEqual(
            node_list_dict[1]['job_executions']['failed']['algorithm']
            ['total'], 0)
        self.assertEqual(
            node_list_dict[1]['job_executions']['failed']['data']['total'], 0)
        self.assertEqual(
            node_list_dict[1]['job_executions']['failed']['system']['total'],
            0)

        # Finish some job executions
        end_time_1 = now()
        job_exe_1._set_final_status('COMPLETED', end_time_1)
        job_exe_2._set_final_status('FAILED',
                                    end_time_1,
                                    error=self.data_error)
        job_exe_4._set_final_status('FAILED', end_time_1, error=self.alg_error)
        self.metrics.job_exe_finished(job_exe_1)
        self.metrics.job_exe_finished(job_exe_2)
        self.metrics.job_exe_finished(job_exe_4)
        node_list_dict = [{'id': node_model_1.id}, {'id': node_model_2.id}]
        self.metrics.generate_status_json(
            node_list_dict, end_time_1 + datetime.timedelta(seconds=1))

        # Check expected totals
        self.assertEqual(
            node_list_dict[0]['job_executions']['running']['total'], 1)
        self.assertEqual(
            len(node_list_dict[0]['job_executions']['running']['by_job_type']),
            1)
        self.assertEqual(
            node_list_dict[0]['job_executions']['running']['by_job_type'][0]
            ['count'], 1)
        self.assertEqual(
            node_list_dict[0]['job_executions']['running']['by_job_type'][0]
            ['job_type_id'], job_type_1.id)
        self.assertEqual(
            node_list_dict[0]['job_executions']['completed']['total'], 1)
        self.assertEqual(
            len(node_list_dict[0]['job_executions']['completed']
                ['by_job_type']), 1)
        self.assertEqual(
            node_list_dict[0]['job_executions']['completed']['by_job_type'][0]
            ['count'], 1)
        self.assertEqual(
            node_list_dict[0]['job_executions']['completed']['by_job_type'][0]
            ['job_type_id'], job_type_1.id)
        self.assertEqual(
            node_list_dict[0]['job_executions']['failed']['total'], 2)
        self.assertEqual(
            node_list_dict[0]['job_executions']['failed']['algorithm']
            ['total'], 1)
        self.assertEqual(
            len(node_list_dict[0]['job_executions']['failed']['algorithm']
                ['by_job_type']), 1)
        self.assertEqual(
            node_list_dict[0]['job_executions']['failed']['algorithm']
            ['by_job_type'][0]['count'], 1)
        self.assertEqual(
            node_list_dict[0]['job_executions']['failed']['algorithm']
            ['by_job_type'][0]['job_type_id'], job_type_2.id)
        self.assertEqual(
            node_list_dict[0]['job_executions']['failed']['data']['total'], 1)
        self.assertEqual(
            len(node_list_dict[0]['job_executions']['failed']['data']
                ['by_job_type']), 1)
        self.assertEqual(
            node_list_dict[0]['job_executions']['failed']['data']
            ['by_job_type'][0]['count'], 1)
        self.assertEqual(
            node_list_dict[0]['job_executions']['failed']['data']
            ['by_job_type'][0]['job_type_id'], job_type_1.id)
        self.assertEqual(
            node_list_dict[0]['job_executions']['failed']['system']['total'],
            0)
        self.assertEqual(
            node_list_dict[1]['job_executions']['running']['total'], 7)
        for job_type_dict in node_list_dict[1]['job_executions']['running'][
                'by_job_type']:
            if job_type_dict['job_type_id'] == job_type_1.id:
                self.assertEqual(job_type_dict['count'], 2)
            elif job_type_dict['job_type_id'] == job_type_2.id:
                self.assertEqual(job_type_dict['count'], 5)
            else:
                self.fail('Unexpected job type ID')
        self.assertEqual(
            node_list_dict[1]['job_executions']['completed']['total'], 0)
        self.assertEqual(
            node_list_dict[1]['job_executions']['failed']['total'], 0)
        self.assertEqual(
            node_list_dict[1]['job_executions']['failed']['algorithm']
            ['total'], 0)
        self.assertEqual(
            node_list_dict[1]['job_executions']['failed']['data']['total'], 0)
        self.assertEqual(
            node_list_dict[1]['job_executions']['failed']['system']['total'],
            0)

        # Finish some job executions (all executions still on node 2)
        end_time_2 = end_time_1 + FinishedJobExeMetricsOverTime.BLOCK_LENGTH
        job_exe_5._set_final_status('COMPLETED', end_time_2)
        job_exe_6._set_final_status('COMPLETED', end_time_2)
        job_exe_7._set_final_status('COMPLETED', end_time_2)
        job_exe_8._set_final_status('COMPLETED', end_time_2)
        job_exe_9._set_final_status('COMPLETED', end_time_2)
        job_exe_10._set_final_status('COMPLETED', end_time_2)
        job_exe_11._set_final_status('COMPLETED', end_time_2)
        self.metrics.job_exe_finished(job_exe_5)
        self.metrics.job_exe_finished(job_exe_6)
        self.metrics.job_exe_finished(job_exe_7)
        self.metrics.job_exe_finished(job_exe_8)
        self.metrics.job_exe_finished(job_exe_9)
        self.metrics.job_exe_finished(job_exe_10)
        self.metrics.job_exe_finished(job_exe_11)
        node_list_dict = [{'id': node_model_1.id}, {'id': node_model_2.id}]
        self.metrics.generate_status_json(node_list_dict, end_time_2)

        # Check expected totals
        self.assertEqual(
            node_list_dict[0]['job_executions']['running']['total'], 1)
        self.assertEqual(
            len(node_list_dict[0]['job_executions']['running']['by_job_type']),
            1)
        self.assertEqual(
            node_list_dict[0]['job_executions']['running']['by_job_type'][0]
            ['count'], 1)
        self.assertEqual(
            node_list_dict[0]['job_executions']['running']['by_job_type'][0]
            ['job_type_id'], job_type_1.id)
        self.assertEqual(
            node_list_dict[0]['job_executions']['completed']['total'], 1)
        self.assertEqual(
            len(node_list_dict[0]['job_executions']['completed']
                ['by_job_type']), 1)
        self.assertEqual(
            node_list_dict[0]['job_executions']['completed']['by_job_type'][0]
            ['count'], 1)
        self.assertEqual(
            node_list_dict[0]['job_executions']['completed']['by_job_type'][0]
            ['job_type_id'], job_type_1.id)
        self.assertEqual(
            node_list_dict[0]['job_executions']['failed']['total'], 2)
        self.assertEqual(
            node_list_dict[0]['job_executions']['failed']['algorithm']
            ['total'], 1)
        self.assertEqual(
            len(node_list_dict[0]['job_executions']['failed']['algorithm']
                ['by_job_type']), 1)
        self.assertEqual(
            node_list_dict[0]['job_executions']['failed']['algorithm']
            ['by_job_type'][0]['count'], 1)
        self.assertEqual(
            node_list_dict[0]['job_executions']['failed']['algorithm']
            ['by_job_type'][0]['job_type_id'], job_type_2.id)
        self.assertEqual(
            node_list_dict[0]['job_executions']['failed']['data']['total'], 1)
        self.assertEqual(
            len(node_list_dict[0]['job_executions']['failed']['data']
                ['by_job_type']), 1)
        self.assertEqual(
            node_list_dict[0]['job_executions']['failed']['data']
            ['by_job_type'][0]['count'], 1)
        self.assertEqual(
            node_list_dict[0]['job_executions']['failed']['data']
            ['by_job_type'][0]['job_type_id'], job_type_1.id)
        self.assertEqual(
            node_list_dict[0]['job_executions']['failed']['system']['total'],
            0)
        self.assertEqual(
            node_list_dict[1]['job_executions']['running']['total'], 0)
        self.assertEqual(
            node_list_dict[1]['job_executions']['completed']['total'], 7)
        for job_type_dict in node_list_dict[1]['job_executions']['completed'][
                'by_job_type']:
            if job_type_dict['job_type_id'] == job_type_1.id:
                self.assertEqual(job_type_dict['count'], 2)
            elif job_type_dict['job_type_id'] == job_type_2.id:
                self.assertEqual(job_type_dict['count'], 5)
            else:
                self.fail('Unexpected job type ID')
        self.assertEqual(
            node_list_dict[1]['job_executions']['failed']['total'], 0)
        self.assertEqual(
            node_list_dict[1]['job_executions']['failed']['algorithm']
            ['total'], 0)
        self.assertEqual(
            node_list_dict[1]['job_executions']['failed']['data']['total'], 0)
        self.assertEqual(
            node_list_dict[1]['job_executions']['failed']['system']['total'],
            0)

        # Let all finished job executions roll off by time, only running remaining
        end_time_3 = end_time_2 + FinishedJobExeMetricsOverTime.TOTAL_TIME_PERIOD
        end_time_3 += FinishedJobExeMetricsOverTime.BLOCK_LENGTH + datetime.timedelta(
            seconds=1)
        node_list_dict = [{'id': node_model_1.id}, {'id': node_model_2.id}]
        self.metrics.generate_status_json(node_list_dict, end_time_3)

        # Check expected totals
        self.assertEqual(
            node_list_dict[0]['job_executions']['running']['total'], 1)
        self.assertEqual(
            len(node_list_dict[0]['job_executions']['running']['by_job_type']),
            1)
        self.assertEqual(
            node_list_dict[0]['job_executions']['running']['by_job_type'][0]
            ['count'], 1)
        self.assertEqual(
            node_list_dict[0]['job_executions']['running']['by_job_type'][0]
            ['job_type_id'], job_type_1.id)
        self.assertEqual(
            node_list_dict[0]['job_executions']['completed']['total'], 0)
        self.assertEqual(
            node_list_dict[0]['job_executions']['failed']['total'], 0)
        self.assertEqual(
            node_list_dict[0]['job_executions']['failed']['algorithm']
            ['total'], 0)
        self.assertEqual(
            node_list_dict[0]['job_executions']['failed']['data']['total'], 0)
        self.assertEqual(
            node_list_dict[0]['job_executions']['failed']['system']['total'],
            0)
        self.assertEqual(
            node_list_dict[1]['job_executions']['running']['total'], 0)
        self.assertEqual(
            node_list_dict[1]['job_executions']['completed']['total'], 0)
        self.assertEqual(
            node_list_dict[1]['job_executions']['failed']['total'], 0)
        self.assertEqual(
            node_list_dict[1]['job_executions']['failed']['algorithm']
            ['total'], 0)
        self.assertEqual(
            node_list_dict[1]['job_executions']['failed']['data']['total'], 0)
        self.assertEqual(
            node_list_dict[1]['job_executions']['failed']['system']['total'],
            0)