Example #1
0
    def _create_finished_job_exe_messages(self, finished_job_exes):
        """Creates messages for finished job executions

        :param finished_job_exes: The finished job executions
        :type finished_job_exes: list
        :returns: The messages
        :rtype: list
        """

        when = now()

        completed_jobs = []
        failed_jobs = []
        for job_exe in finished_job_exes:
            if job_exe.status == 'COMPLETED':
                completed_jobs.append(
                    CompletedJob(job_exe.job_id, job_exe.exe_num))
            elif job_exe.status == 'FAILED':
                failed_jobs.append(
                    FailedJob(job_exe.job_id, job_exe.exe_num,
                              job_exe.error.id))

        messages = create_completed_jobs_messages(completed_jobs, when)
        messages.extend(create_failed_jobs_messages(failed_jobs, when))

        return messages
Example #2
0
    def execute(self):
        """See :meth:`messaging.messages.message.CommandMessage.execute`
        """

        failed_jobs = []
        job_exe_ends = []
        error = get_builtin_error('scheduler-lost')
        task_results = TaskResults(do_validate=False)  # Blank

        # Find executions for unfinished jobs so we can fail them
        for job_exe in JobExecution.objects.get_unfinished_job_exes():
            if job_exe.started < self.when:
                failed_jobs.append(
                    FailedJob(job_exe.job_id, job_exe.exe_num, error.id))
                job_exe_ends.append(
                    job_exe.create_job_exe_end_model(task_results, 'FAILED',
                                                     error.id, self.when))

        # Create messages to fail unfinished jobs and executions
        if failed_jobs:
            count = len(failed_jobs)
            logger.info(
                'Failing %d job(s) that had started but not finished prior to scheduler restart',
                count)
            self.new_messages.extend(
                create_failed_jobs_messages(failed_jobs, self.when))
            self.new_messages.extend(create_job_exe_end_messages(job_exe_ends))

        return True
Example #3
0
    def test_execute(self):
        """Tests calling FailedJobs.execute() successfully"""

        error_1 = error_test_utils.create_error(should_be_retried=True)
        error_2 = error_test_utils.create_error(should_be_retried=False)

        data = JobData()
        job_1 = job_test_utils.create_job(num_exes=1,
                                          status='QUEUED',
                                          data=data.get_dict(),
                                          max_tries=2)
        job_2 = job_test_utils.create_job(num_exes=1,
                                          status='RUNNING',
                                          data=data.get_dict(),
                                          max_tries=2)
        job_3 = job_test_utils.create_job(num_exes=1,
                                          status='RUNNING',
                                          data=data.get_dict(),
                                          max_tries=1)
        job_4 = job_test_utils.create_job(num_exes=1,
                                          status='RUNNING',
                                          data=data.get_dict(),
                                          max_tries=2)
        job_5 = job_test_utils.create_job(num_exes=1,
                                          status='RUNNING',
                                          data=data.get_dict(),
                                          max_tries=2)
        job_6 = job_test_utils.create_job(num_exes=1,
                                          status='FAILED',
                                          data=data.get_dict(),
                                          max_tries=2)
        job_7 = job_test_utils.create_job(num_exes=0, status='CANCELED')
        job_ids = [
            job_1.id, job_2.id, job_3.id, job_4.id, job_5.id, job_6.id,
            job_7.id
        ]

        from recipe.test import utils as recipe_test_utils
        recipe_1 = recipe_test_utils.create_recipe()
        recipe_test_utils.create_recipe_job(recipe=recipe_1, job=job_3)
        recipe_2 = recipe_test_utils.create_recipe()
        recipe_test_utils.create_recipe_job(recipe=recipe_2, job=job_4)

        when_ended = now()

        # Add jobs to message
        message = FailedJobs()
        message.ended = when_ended
        if message.can_fit_more():
            message.add_failed_job(
                FailedJob(job_1.id, job_1.num_exes, error_1.id))
        if message.can_fit_more():
            message.add_failed_job(
                FailedJob(job_2.id, job_2.num_exes, error_1.id))
        if message.can_fit_more():
            message.add_failed_job(
                FailedJob(job_3.id, job_3.num_exes, error_1.id))
        if message.can_fit_more():
            message.add_failed_job(
                FailedJob(job_4.id, job_4.num_exes,
                          error_2.id))  # Error that cannot be retried
        if message.can_fit_more():
            message.add_failed_job(
                FailedJob(job_5.id, job_5.num_exes - 1,
                          error_1.id))  # Mismatched exe_num
        if message.can_fit_more():
            message.add_failed_job(
                FailedJob(job_6.id, job_6.num_exes, error_1.id))
        if message.can_fit_more():
            message.add_failed_job(
                FailedJob(job_7.id, job_7.num_exes - 1, error_1.id))

        # Execute message
        result = message.execute()
        self.assertTrue(result)

        jobs = Job.objects.filter(id__in=job_ids).order_by('id')
        queued_jobs_msg = None
        update_recipes_msg = None
        self.assertEqual(len(message.new_messages), 2)
        for msg in message.new_messages:
            if msg.type == 'queued_jobs':
                queued_jobs_msg = msg
            elif msg.type == 'update_recipes':
                update_recipes_msg = msg
        self.assertEqual(len(queued_jobs_msg._queued_jobs),
                         2)  # 2 jobs should have been retried
        self.assertEqual(len(update_recipes_msg._recipe_ids),
                         2)  # 2 jobs should have been failed

        # Job 1 should be retried and put back on the queue
        self.assertEqual(jobs[0].status, 'QUEUED')
        self.assertEqual(jobs[0].num_exes, 1)
        self.assertEqual(queued_jobs_msg._queued_jobs[0].job_id, job_1.id)
        # Job 2 should be retried and put back on the queue
        self.assertEqual(jobs[1].status, 'RUNNING')
        self.assertEqual(jobs[1].num_exes, 1)
        self.assertEqual(queued_jobs_msg._queued_jobs[1].job_id, job_2.id)
        # Job 3 should be failed since max_tries is used up
        self.assertEqual(jobs[2].status, 'FAILED')
        self.assertEqual(jobs[2].num_exes, 1)
        self.assertEqual(jobs[2].error_id, error_1.id)
        self.assertEqual(jobs[2].ended, when_ended)
        self.assertTrue(recipe_1.id in update_recipes_msg._recipe_ids)
        # Job 4 should be failed since error cannot be retried
        self.assertEqual(jobs[3].status, 'FAILED')
        self.assertEqual(jobs[3].num_exes, 1)
        self.assertEqual(jobs[3].error_id, error_2.id)
        self.assertEqual(jobs[3].ended, when_ended)
        self.assertTrue(recipe_2.id in update_recipes_msg._recipe_ids)
        # Job 5 should be ignored since mismatched exe_num
        self.assertEqual(jobs[4].status, 'RUNNING')
        self.assertEqual(jobs[4].num_exes, 1)
        # Job 6 should be ignored since it is already failed
        self.assertEqual(jobs[5].status, 'FAILED')
        self.assertEqual(jobs[5].num_exes, 1)
        # Job 6 should be ignored since it is canceled
        self.assertEqual(jobs[6].status, 'CANCELED')
        self.assertEqual(jobs[6].num_exes, 0)

        # Test executing message again
        message_json_dict = message.to_json()
        message = FailedJobs.from_json(message_json_dict)
        result = message.execute()
        self.assertTrue(result)

        jobs = Job.objects.filter(id__in=job_ids).order_by('id')
        self.assertEqual(len(message.new_messages), 1)
        queued_jobs_msg = message.new_messages[0]
        self.assertEqual(queued_jobs_msg.type, 'queued_jobs')
        # The same 2 jobs should have been retried
        self.assertEqual(len(queued_jobs_msg._queued_jobs), 2)

        # Job 1 should be retried and put back on the queue
        self.assertEqual(jobs[0].status, 'QUEUED')
        self.assertEqual(jobs[0].num_exes, 1)
        self.assertEqual(queued_jobs_msg._queued_jobs[0].job_id, job_1.id)
        # Job 2 should be retried and put back on the queue
        self.assertEqual(jobs[1].status, 'RUNNING')
        self.assertEqual(jobs[1].num_exes, 1)
        self.assertEqual(queued_jobs_msg._queued_jobs[1].job_id, job_2.id)
        # Job 3 should be failed from first execution
        self.assertEqual(jobs[2].status, 'FAILED')
        self.assertEqual(jobs[2].num_exes, 1)
        self.assertEqual(jobs[2].error_id, error_1.id)
        # Job 4 should be failed from first execution
        self.assertEqual(jobs[3].status, 'FAILED')
        self.assertEqual(jobs[3].num_exes, 1)
        self.assertEqual(jobs[3].error_id, error_2.id)
        # Job 5 should be ignored since mismatched exe_num
        self.assertEqual(jobs[4].status, 'RUNNING')
        self.assertEqual(jobs[4].num_exes, 1)
        # Job 6 should be ignored since it is already failed
        self.assertEqual(jobs[5].status, 'FAILED')
        self.assertEqual(jobs[5].num_exes, 1)
        # Job 6 should be ignored since it is canceled
        self.assertEqual(jobs[6].status, 'CANCELED')
        self.assertEqual(jobs[6].num_exes, 0)
Example #4
0
    def test_json(self):
        """Tests coverting a FailedJobs message to and from JSON"""

        error = error_test_utils.create_error(should_be_retried=True)

        data = JobData()
        job_1 = job_test_utils.create_job(num_exes=1,
                                          status='QUEUED',
                                          data=data.get_dict(),
                                          max_tries=2)
        job_2 = job_test_utils.create_job(num_exes=1,
                                          status='RUNNING',
                                          data=data.get_dict(),
                                          max_tries=1)
        job_3 = job_test_utils.create_job(num_exes=0, status='PENDING')
        job_ids = [job_1.id, job_2.id, job_3.id]

        from recipe.test import utils as recipe_test_utils
        recipe_1 = recipe_test_utils.create_recipe()
        recipe_test_utils.create_recipe_job(recipe=recipe_1, job=job_2)

        when_ended = now()

        # Add jobs to message
        message = FailedJobs()
        message.ended = when_ended
        if message.can_fit_more():
            message.add_failed_job(
                FailedJob(job_1.id, job_1.num_exes, error.id))
        if message.can_fit_more():
            message.add_failed_job(
                FailedJob(job_2.id, job_2.num_exes, error.id))
        if message.can_fit_more():
            message.add_failed_job(
                FailedJob(job_3.id, job_3.num_exes, error.id))

        # Convert message to JSON and back, and then execute
        message_json_dict = message.to_json()
        new_message = FailedJobs.from_json(message_json_dict)
        result = new_message.execute()

        self.assertTrue(result)
        jobs = Job.objects.filter(id__in=job_ids).order_by('id')
        queued_jobs_msg = None
        update_recipes_msg = None
        self.assertEqual(len(new_message.new_messages), 2)
        for msg in new_message.new_messages:
            if msg.type == 'queued_jobs':
                queued_jobs_msg = msg
            elif msg.type == 'update_recipes':
                update_recipes_msg = msg
        # Job 1 should be retried and put back on the queue
        self.assertEqual(jobs[0].status, 'QUEUED')
        self.assertEqual(jobs[0].num_exes, 1)
        self.assertEqual(len(queued_jobs_msg._queued_jobs), 1)
        self.assertEqual(queued_jobs_msg._queued_jobs[0].job_id, job_1.id)
        # Job 2 should be failed since max_tries is used up
        self.assertEqual(jobs[1].status, 'FAILED')
        self.assertEqual(jobs[1].num_exes, 1)
        self.assertEqual(jobs[1].error_id, error.id)
        self.assertEqual(jobs[1].ended, when_ended)
        self.assertEqual(len(update_recipes_msg._recipe_ids), 1)
        self.assertTrue(recipe_1.id in update_recipes_msg._recipe_ids)
        # Job 3 should ignore update
        self.assertEqual(jobs[2].status, 'PENDING')
        self.assertEqual(jobs[2].num_exes, 0)
Example #5
0
    def test_execute(self):
        """Tests calling RestartScheduler.execute() successfully"""

        started = now()
        scheduler_restarted = started + datetime.timedelta(seconds=30)
        started_later = scheduler_restarted + datetime.timedelta(seconds=30)
        running_job_exe_1 = job_test_utils.create_running_job_exe(
            started=started)
        running_job_exe_2 = job_test_utils.create_running_job_exe(
            started=started)
        running_job_exe_3 = job_test_utils.create_running_job_exe(
            started=started)
        running_job_exe_4 = job_test_utils.create_running_job_exe(
            started=started_later)  # After scheduler restart

        # Set job 1 so it is still QUEUED
        Job.objects.filter(id=running_job_exe_1.job_id).update(status='QUEUED')

        # Set job 3 to COMPLETED, so it should not be failed by scheduler restart
        Job.objects.filter(id=running_job_exe_3.job_id).update(
            status='COMPLETED')

        # Create message
        message = RestartScheduler()
        message.when = scheduler_restarted

        # Execute message
        result = message.execute()
        self.assertTrue(result)

        failed_jobs_msg = None
        job_exe_end_msg = None
        self.assertEqual(len(message.new_messages), 2)
        for msg in message.new_messages:
            if msg.type == 'failed_jobs':
                failed_jobs_msg = msg
            elif msg.type == 'create_job_exe_ends':
                job_exe_end_msg = msg

        error = get_builtin_error('scheduler-lost')
        # Jobs 1 and 2 should be in messages to be failed, Jobs 3 and 4 should not be included
        expected_failed_jobs = {
            FailedJob(running_job_exe_1.job_id, running_job_exe_1.exe_num,
                      error.id),
            FailedJob(running_job_exe_2.job_id, running_job_exe_2.exe_num,
                      error.id)
        }
        expected_failed_job_exe_ids = {
            running_job_exe_1.id, running_job_exe_2.id
        }
        self.assertSetEqual(set(failed_jobs_msg._failed_jobs.values()[0]),
                            expected_failed_jobs)
        failed_job_exe_ids = set()
        for job_exe_end_model in job_exe_end_msg._job_exe_ends:
            failed_job_exe_ids.add(job_exe_end_model.job_exe_id)
        self.assertSetEqual(failed_job_exe_ids, expected_failed_job_exe_ids)

        # Test executing message again, should get same result
        message_json_dict = message.to_json()
        message = RestartScheduler.from_json(message_json_dict)
        result = message.execute()
        self.assertTrue(result)

        # Jobs 1 and 2 should be in messages to be failed, Jobs 3 and 4 should not be included
        expected_failed_jobs = {
            FailedJob(running_job_exe_1.job_id, running_job_exe_1.exe_num,
                      error.id),
            FailedJob(running_job_exe_2.job_id, running_job_exe_2.exe_num,
                      error.id)
        }
        expected_failed_job_exe_ids = {
            running_job_exe_1.id, running_job_exe_2.id
        }
        self.assertSetEqual(set(failed_jobs_msg._failed_jobs.values()[0]),
                            expected_failed_jobs)
        failed_job_exe_ids = set()
        for job_exe_end_model in job_exe_end_msg._job_exe_ends:
            failed_job_exe_ids.add(job_exe_end_model.job_exe_id)
        self.assertSetEqual(failed_job_exe_ids, expected_failed_job_exe_ids)