Ejemplo n.º 1
0
    def execute(self):
        """See :meth:`messaging.messages.message.CommandMessage.execute`
        """

        when = now()
        job_ids = [requeue_job.job_id for requeue_job in self._requeue_jobs]
        jobs_to_requeue = []
        job_ids_to_uncancel = []

        # Find jobs that can be re-queued and have valid exe_num
        job_models = {job.id: job for job in Job.objects.get_basic_jobs(job_ids)}
        for requeue_job in self._requeue_jobs:
            job_model = job_models[requeue_job.job_id]
            if job_model.can_be_requeued() and job_model.num_exes == requeue_job.exe_num:
                jobs_to_requeue.append(QueuedJob(job_model.id, job_model.num_exes))
            elif job_model.can_be_uncanceled():
                job_ids_to_uncancel.append(job_model.id)
        job_ids_to_requeue = [job.job_id for job in jobs_to_requeue]

        # Reset max_tries for jobs that will be re-queued
        if job_ids_to_requeue:
            logger.info('There are %d job(s) to re-queue, increasing max tries', len(job_ids_to_requeue))
            Job.objects.increment_max_tries(job_ids_to_requeue, when)

        # Create messages to queue the jobs
        self.new_messages.extend(create_queued_jobs_messages(jobs_to_requeue, requeue=True, priority=self.priority))

        # Create messages to uncancel jobs
        self.new_messages.extend(create_uncancel_jobs_messages(job_ids_to_uncancel, when))

        return True
Ejemplo n.º 2
0
    def execute(self):
        """See :meth:`messaging.messages.message.CommandMessage.execute`
        """

        from queue.messages.queued_jobs import create_queued_jobs_messages, QueuedJob

        try:
            job = Job.objects.get_job_with_interfaces(self.job_id)
        except Job.DoesNotExist:
            logger.exception(
                'Failed to get job %d - job does not exist. Message will not re-run.',
                self.job_id)
            return True

        if job.status not in ['PENDING', 'BLOCKED']:
            logger.warning(
                'Job %d input has already been processed. Message will not re-run',
                self.job_id)
            return True

        if not job.has_input():
            if not job.recipe:
                logger.error(
                    'Job %d has no input and is not in a recipe. Message will not re-run.',
                    self.job_id)
                return True

            try:
                self._generate_input_data_from_recipe(job)
            except InvalidData:
                logger.exception(
                    'Recipe created invalid input data for job %d. Message will not re-run. Cancelling job that cannot be queued.',
                    self.job_id)
                self.new_messages.extend(
                    create_cancel_jobs_messages([self.job_id], now()))
                return True

        # Lock job model and process job's input data
        with transaction.atomic():
            job = Job.objects.get_locked_job(self.job_id)
            Job.objects.process_job_input(job)

        # Create message to queue the job
        if job.num_exes == 0:
            logger.info(
                'Processed input for job %d, sending message to queue job',
                self.job_id)
            self.new_messages.extend(
                create_queued_jobs_messages([QueuedJob(job.id, 0)],
                                            requeue=False))

        return True
Ejemplo n.º 3
0
    def execute(self):
        """See :meth:`messaging.messages.message.CommandMessage.execute`
        """

        from queue.messages.queued_jobs import create_queued_jobs_messages, QueuedJob

        job = Job.objects.get_job_with_interfaces(self.job_id)

        if not job.has_input():
            if not job.recipe:
                logger.error(
                    'Job %d has no input and is not in a recipe. Message will not re-run.',
                    self.job_id)
                return True

            try:
                self._generate_input_data_from_recipe(job)
            except InvalidData:
                logger.exception(
                    'Recipe created invalid input data for job %d. Message will not re-run.',
                    self.job_id)
                return True

        # Lock job model and process job's input data
        with transaction.atomic():
            job = Job.objects.get_locked_job(self.job_id)
            Job.objects.process_job_input(job)

        # Create message to queue the job
        if job.num_exes == 0:
            logger.info(
                'Processed input for job %d, sending message to queue job',
                self.job_id)
            self.new_messages.extend(
                create_queued_jobs_messages([QueuedJob(job.id, 0)],
                                            requeue=False))

        return True
Ejemplo n.º 4
0
    def execute(self):
        """See :meth:`messaging.messages.message.CommandMessage.execute`
        """

        from queue.messages.queued_jobs import create_queued_jobs_messages, QueuedJob

        job_ids = []
        for job_list in self._failed_jobs.values():
            for failed_job in job_list:
                job_ids.append(failed_job.job_id)

        root_recipe_ids = set()
        with transaction.atomic():
            # Retrieve locked job models
            job_models = {}
            for job in Job.objects.get_locked_jobs(job_ids):
                job_models[job.id] = job
                if job.root_recipe_id:
                    root_recipe_ids.add(job.root_recipe_id)

            # Get job models with related fields
            # TODO: once long running job types are gone, the related fields are not needed
            for job in Job.objects.get_jobs_with_related(job_ids):
                job_models[job.id] = job

            jobs_to_retry = []
            all_failed_job_ids = []
            for error_id, job_list in self._failed_jobs.items():
                error = get_error(error_id)
                jobs_to_fail = []
                for failed_job in job_list:
                    job_model = job_models[failed_job.job_id]
                    # If job cannot be failed or execution number does not match, then this update is obsolete
                    if not job_model.can_be_failed() or job_model.num_exes != failed_job.exe_num:
                        # Ignore this job
                        continue

                    # Re-try job if error supports re-try and there are more tries left
                    retry = error.should_be_retried and job_model.num_exes < job_model.max_tries
                    # Also re-try long running jobs
                    retry = retry or job_model.job_type.is_long_running
                    # Do not re-try superseded jobs
                    retry = retry and not job_model.is_superseded

                    if retry:
                        jobs_to_retry.append(QueuedJob(job_model.id, job_model.num_exes))
                    else:
                        jobs_to_fail.append(job_model)

                # Update jobs that failed with this error
                if jobs_to_fail:
                    failed_job_ids = Job.objects.update_jobs_to_failed(jobs_to_fail, error_id, self.ended)
                    logger.info('Set %d job(s) to FAILED status with error %s', len(failed_job_ids), error.name)
                    all_failed_job_ids.extend(failed_job_ids)

            # Need to update recipes of failed jobs so that dependent jobs are BLOCKED
            if root_recipe_ids:
                from recipe.messages.update_recipe import create_update_recipe_messages_from_node
                self.new_messages.extend(create_update_recipe_messages_from_node(root_recipe_ids))

            # Place jobs to retry back onto the queue
            if jobs_to_retry:
                self.new_messages.extend(create_queued_jobs_messages(jobs_to_retry, requeue=True))

        # Send messages to update recipe metrics
        from recipe.messages.update_recipe_metrics import create_update_recipe_metrics_messages_from_jobs
        self.new_messages.extend(create_update_recipe_metrics_messages_from_jobs(job_ids))

        return True