def check_and_cancel_missing_tasks(): """Cancel any unexecuted Tasks which are no longer in the RQ registry. In some situations such as a restart of Redis, Jobs can be dropped from the Redis queues and "forgotten". Therefore the Task will never be marked completed in Pulp and are never cleaned up. This results in stray resource locks that cause workers to deadlock. We go through all of the tasks which are in an incomplete state and check that RQ still has a record of the job. If not, we cancel it. """ redis_conn = connection.get_redis_connection() assigned_and_unfinished_tasks = Task.objects.filter( state__in=TASK_INCOMPLETE_STATES, worker__in=Worker.objects.online_workers()) for task in assigned_and_unfinished_tasks: try: Job.fetch(str(task.pk), connection=redis_conn) except NoSuchJobError: cancel(task.pk) # Also go through all of the tasks that were still queued up on the resource manager for task in Task.objects.filter(worker__isnull=True): try: Job.fetch(str(task._resource_job_id), connection=redis_conn) except NoSuchJobError: cancel(task.pk)
def mark_worker_offline(worker_name, normal_shutdown=False): """ Mark the :class:`~pulpcore.app.models.Worker` as offline and cancel associated tasks. If the worker shutdown normally, no message is logged, otherwise an error level message is logged. Default is to assume the worker did not shut down normally. Any resource reservations associated with this worker are cleaned up by this function. Any tasks associated with this worker are explicitly canceled. Args: worker_name (str) The name of the worker normal_shutdown (bool): True if the worker shutdown normally, False otherwise. Defaults to False. """ if not normal_shutdown: msg = _( "The worker named %(name)s is missing. Canceling the tasks in its queue." ) _logger.error(msg % {"name": worker_name}) else: _logger.info(_("Worker '{name}' shutdown".format(name=worker_name))) _logger.info( _("Cleaning up shutdown worker '{name}'.".format( name=worker_name))) try: worker = Worker.objects.get(name=worker_name, gracefully_stopped=False, cleaned_up=False) except Worker.DoesNotExist: pass else: # Cancel all of the tasks that were assigned to this worker's queue for task in worker.tasks.filter(state__in=TASK_INCOMPLETE_STATES): cancel(task.pk) # Ensure all locks are released for those tasks that are in final states also for task in worker.tasks.exclude(state__in=TASK_INCOMPLETE_STATES): task.release_resources() if normal_shutdown: worker.gracefully_stopped = True worker.cleaned_up = True worker.save()
def mark_worker_offline(worker_name, normal_shutdown=False): """ Mark the :class:`~pulpcore.app.models.Worker` as offline and cancel associated tasks. If the worker shutdown normally, no message is logged, otherwise an error level message is logged. Default is to assume the worker did not shut down normally. Any resource reservations associated with this worker are cleaned up by this function. Any tasks associated with this worker are explicitly canceled. Args: worker_name (str) The name of the worker normal_shutdown (bool): True if the worker shutdown normally, False otherwise. Defaults to False. """ if not normal_shutdown: msg = _('The worker named %(name)s is missing. Canceling the tasks in its queue.') msg = msg % {'name': worker_name} _logger.error(msg) else: msg = _("Cleaning up shutdown worker '%s'.") % worker_name _logger.info(msg) try: worker = Worker.objects.get(name=worker_name, gracefully_stopped=False, cleaned_up=False) except Worker.DoesNotExist: pass else: # Cancel all of the tasks that were assigned to this worker's queue for task_status in worker.tasks.filter(state__in=TASK_INCOMPLETE_STATES): cancel(task_status.pk) if normal_shutdown: worker.gracefully_stopped = True worker.cleaned_up = True worker.save()