예제 #1
0
 def test_job_request_abort(self):
     django_job = self._create_job(hosts=[5, 6])
     job = scheduler_models.Job(django_job.id)
     job.request_abort()
     django_hqes = list(models.HostQueueEntry.objects.filter(job=job.id))
     for hqe in django_hqes:
         self.assertTrue(hqe.aborted)
예제 #2
0
    def test_next_group_name(self):
        django_job = self._create_job(metahosts=[1])
        job = scheduler_models.Job(id=django_job.id)
        self.assertEqual('group0', job._next_group_name())

        for hqe in django_job.hostqueueentry_set.filter():
            hqe.execution_subdir = 'my_rack.group0'
            hqe.save()
        self.assertEqual('my_rack.group1', job._next_group_name('my/rack'))
 def test_atomic_group_scheduling_fail(self):
     # If synch_count is > the atomic group number of machines, the job
     # should be aborted immediately.
     model_job = self._create_job(synchronous=True, atomic_group=1)
     model_job.synch_count = 4
     model_job.save()
     job = scheduler_models.Job(id=model_job.id)
     self._run_scheduler()
     self._check_for_extra_schedulings()
     queue_entries = job.get_host_queue_entries()
     self.assertEqual(1, len(queue_entries))
     self.assertEqual(queue_entries[0].status,
                      models.HostQueueEntry.Status.ABORTED)
예제 #4
0
    def test_run_if_ready_delays(self):
        # Also tests Job.run_with_ready_delay() on atomic group jobs.
        django_job = self._create_job(hosts=[5, 6], atomic_group=1)
        job = scheduler_models.Job(django_job.id)
        self.assertEqual(1, job.synch_count)
        django_hqes = list(models.HostQueueEntry.objects.filter(job=job.id))
        self.assertEqual(2, len(django_hqes))
        self.assertEqual(2, django_hqes[0].atomic_group.max_number_of_machines)

        def set_hqe_status(django_hqe, status):
            django_hqe.status = status
            django_hqe.save()
            scheduler_models.HostQueueEntry(
                django_hqe.id).host.set_status(status)

        # An initial state, our synch_count is 1
        set_hqe_status(django_hqes[0], models.HostQueueEntry.Status.VERIFYING)
        set_hqe_status(django_hqes[1], models.HostQueueEntry.Status.PENDING)

        # So that we don't depend on the config file value during the test.
        self.assert_(scheduler_config.config.
                     secs_to_wait_for_atomic_group_hosts is not None)
        self.god.stub_with(scheduler_config.config,
                           'secs_to_wait_for_atomic_group_hosts', 123456)

        # Get the pending one as a scheduler_models.HostQueueEntry object.
        hqe = scheduler_models.HostQueueEntry(django_hqes[1].id)
        self.assert_(not job._delay_ready_task)
        self.assertTrue(job.is_ready())

        # Ready with one pending, one verifying and an atomic group should
        # result in a DelayCallTask to re-check if we're ready a while later.
        job.run_if_ready(hqe)
        self.assertEquals('Waiting', hqe.status)
        self._dispatcher._schedule_delay_tasks()
        self.assertEquals('Pending', hqe.status)
        agent = self._dispatcher._agents[0]
        self.assert_(job._delay_ready_task)
        self.assert_(isinstance(agent, monitor_db.Agent))
        self.assert_(agent.task)
        delay_task = agent.task
        self.assert_(isinstance(delay_task, scheduler_models.DelayedCallTask))
        self.assert_(not delay_task.is_done())

        self.god.stub_function(delay_task, 'abort')

        self.god.stub_function(job, 'run')

        self.god.stub_function(job, '_pending_count')
        self.god.stub_with(job, 'synch_count', 9)
        self.god.stub_function(job, 'request_abort')

        # Test that the DelayedCallTask's callback queued up above does the
        # correct thing and does not call run if there are not enough hosts
        # in pending after the delay.
        job._pending_count.expect_call().and_return(0)
        job.request_abort.expect_call()
        delay_task._callback()
        self.god.check_playback()

        # Test that the DelayedCallTask's callback queued up above does the
        # correct thing and returns the Agent returned by job.run() if
        # there are still enough hosts pending after the delay.
        job.synch_count = 4
        job._pending_count.expect_call().and_return(4)
        job.run.expect_call(hqe)
        delay_task._callback()
        self.god.check_playback()

        job._pending_count.expect_call().and_return(4)

        # Adjust the delay deadline so that enough time has passed.
        job._delay_ready_task.end_time = time.time() - 111111
        job.run.expect_call(hqe)
        # ...the delay_expired condition should cause us to call run()
        self._dispatcher._handle_agents()
        self.god.check_playback()
        delay_task.success = False

        # Adjust the delay deadline back so that enough time has not passed.
        job._delay_ready_task.end_time = time.time() + 111111
        self._dispatcher._handle_agents()
        self.god.check_playback()

        # Now max_number_of_machines HQEs are in pending state.  Remaining
        # delay will now be ignored.
        other_hqe = scheduler_models.HostQueueEntry(django_hqes[0].id)
        self.god.unstub(job, 'run')
        self.god.unstub(job, '_pending_count')
        self.god.unstub(job, 'synch_count')
        self.god.unstub(job, 'request_abort')
        # ...the over_max_threshold test should cause us to call run()
        delay_task.abort.expect_call()
        other_hqe.on_pending()
        self.assertEquals('Starting', other_hqe.status)
        self.assertEquals('Starting', hqe.status)
        self.god.stub_function(job, 'run')
        self.god.unstub(delay_task, 'abort')

        hqe.set_status('Pending')
        other_hqe.set_status('Pending')
        # Now we're not over the max for the atomic group.  But all assigned
        # hosts are in pending state.  over_max_threshold should make us run().
        hqe.atomic_group.max_number_of_machines += 1
        hqe.atomic_group.save()
        job.run.expect_call(hqe)
        hqe.on_pending()
        self.god.check_playback()
        hqe.atomic_group.max_number_of_machines -= 1
        hqe.atomic_group.save()

        other_hqe = scheduler_models.HostQueueEntry(django_hqes[0].id)
        self.assertTrue(hqe.job is other_hqe.job)
        # DBObject classes should reuse instances so these should be the same.
        self.assertEqual(job, other_hqe.job)
        self.assertEqual(other_hqe.job, hqe.job)
        # Be sure our delay was not lost during the other_hqe construction.
        self.assertEqual(job._delay_ready_task, delay_task)
        self.assert_(job._delay_ready_task)
        self.assertFalse(job._delay_ready_task.is_done())
        self.assertFalse(job._delay_ready_task.aborted)

        # We want the real run() to be called below.
        self.god.unstub(job, 'run')

        # We pass in the other HQE this time the same way it would happen
        # for real when one host finishes verifying and enters pending.
        job.run_if_ready(other_hqe)

        # The delayed task must be aborted by the actual run() call above.
        self.assertTrue(job._delay_ready_task.aborted)
        self.assertFalse(job._delay_ready_task.success)
        self.assertTrue(job._delay_ready_task.is_done())

        # Check that job run() and _finish_run() were called by the above:
        self._dispatcher._schedule_running_host_queue_entries()
        agent = self._dispatcher._agents[0]
        self.assert_(agent.task)
        task = agent.task
        self.assert_(isinstance(task, monitor_db.QueueTask))
        # Requery these hqes in order to verify the status from the DB.
        django_hqes = list(models.HostQueueEntry.objects.filter(job=job.id))
        for entry in django_hqes:
            self.assertEqual(models.HostQueueEntry.Status.STARTING,
                             entry.status)

        # We're already running, but more calls to run_with_ready_delay can
        # continue to come in due to straggler hosts enter Pending.  Make
        # sure we don't do anything.
        self.god.stub_function(job, 'run')
        job.run_with_ready_delay(hqe)
        self.god.check_playback()
        self.god.unstub(job, 'run')