def fail_jobs_if_not_in_celery(self, node_jobs, active_tasks, celery_task_start_time, isolated=False): for task in node_jobs: if (task.celery_task_id not in active_tasks and not hasattr(settings, 'IGNORE_CELERY_INSPECTOR')): if isinstance(task, WorkflowJob): continue if task.modified > celery_task_start_time: continue new_status = 'failed' if isolated: new_status = 'error' task.status = new_status if isolated: # TODO: cancel and reap artifacts of lost jobs from heartbeat task.job_explanation += ' '.join(( 'Task was marked as running in Tower but its ', 'controller management daemon was not present in', 'Celery, so it has been marked as failed.', 'Task may still be running, but contactability is unknown.' )) else: task.job_explanation += ' '.join(( 'Task was marked as running in Tower but was not present in', 'Celery, so it has been marked as failed.', )) try: task.save(update_fields=['status', 'job_explanation']) except DatabaseError: logger.error("Task {} DB error in marking failed. Job possibly deleted.".format(task.log_format)) continue awx_tasks._send_notification_templates(task, 'failed') task.websocket_emit_status(new_status) logger.error("{}Task {} has no record in celery. Marking as failed".format( 'Isolated ' if isolated else '', task.log_format))
def fail_jobs_if_not_in_celery(self, node_jobs, active_tasks, celery_task_start_time): for task in node_jobs: if (task.celery_task_id not in active_tasks and not hasattr(settings, 'IGNORE_CELERY_INSPECTOR')): if isinstance(task, WorkflowJob): continue if task.modified > celery_task_start_time: continue task.status = 'failed' task.job_explanation += ' '.join(( 'Task was marked as running in Tower but was not present in', 'Celery, so it has been marked as failed.', )) try: task.save(update_fields=['status', 'job_explanation']) except DatabaseError: logger.error( "Task {} DB error in marking failed. Job possibly deleted." .format(task.log_format)) continue awx_tasks._send_notification_templates(task, 'failed') task.websocket_emit_status('failed') logger.error( "Task {} has no record in celery. Marking as failed". format(task.log_format))
def schedule(self): with transaction.atomic(): # Lock with advisory_lock('task_manager_lock', wait=False) as acquired: if acquired is False: logger.debug("Not running scheduler, another task holds lock") return logger.debug("Starting Scheduler") self.cleanup_inconsistent_celery_tasks() finished_wfjs = self._schedule() # Operations whose queries rely on modifications made during the atomic scheduling session for wfj in WorkflowJob.objects.filter(id__in=finished_wfjs): awx_tasks._send_notification_templates(wfj, 'succeeded' if wfj.status == 'successful' else 'failed')