Beispiel #1
0
def check_and_cancel_missing_tasks():
    """Cancel any unexecuted Tasks which are no longer in the RQ registry.

    In some situations such as a restart of Redis, Jobs can be dropped from the Redis
    queues and "forgotten". Therefore the Task will never be marked completed in Pulp
    and are never cleaned up. This results in stray resource locks that cause workers
    to deadlock.

    We go through all of the tasks which are in an incomplete state and check that RQ
    still has a record of the job. If not, we cancel it.
    """
    redis_conn = connection.get_redis_connection()

    assigned_and_unfinished_tasks = Task.objects.filter(
        state__in=TASK_INCOMPLETE_STATES,
        worker__in=Worker.objects.online_workers())

    for task in assigned_and_unfinished_tasks:
        try:
            Job.fetch(str(task.pk), connection=redis_conn)
        except NoSuchJobError:
            cancel(task.pk)

    # Also go through all of the tasks that were still queued up on the resource manager
    for task in Task.objects.filter(worker__isnull=True):
        try:
            Job.fetch(str(task._resource_job_id), connection=redis_conn)
        except NoSuchJobError:
            cancel(task.pk)
Beispiel #2
0
def mark_worker_offline(worker_name, normal_shutdown=False):
    """
    Mark the :class:`~pulpcore.app.models.Worker` as offline and cancel associated tasks.

    If the worker shutdown normally, no message is logged, otherwise an error level message is
    logged. Default is to assume the worker did not shut down normally.

    Any resource reservations associated with this worker are cleaned up by this function.

    Any tasks associated with this worker are explicitly canceled.

    Args:
        worker_name (str) The name of the worker
        normal_shutdown (bool): True if the worker shutdown normally, False otherwise. Defaults to
                                False.
    """
    if not normal_shutdown:
        msg = _(
            "The worker named %(name)s is missing. Canceling the tasks in its queue."
        )
        _logger.error(msg % {"name": worker_name})
    else:
        _logger.info(_("Worker '{name}' shutdown".format(name=worker_name)))
        _logger.info(
            _("Cleaning up shutdown worker '{name}'.".format(
                name=worker_name)))

    try:
        worker = Worker.objects.get(name=worker_name,
                                    gracefully_stopped=False,
                                    cleaned_up=False)
    except Worker.DoesNotExist:
        pass
    else:
        # Cancel all of the tasks that were assigned to this worker's queue
        for task in worker.tasks.filter(state__in=TASK_INCOMPLETE_STATES):
            cancel(task.pk)

        # Ensure all locks are released for those tasks that are in final states also
        for task in worker.tasks.exclude(state__in=TASK_INCOMPLETE_STATES):
            task.release_resources()

        if normal_shutdown:
            worker.gracefully_stopped = True

        worker.cleaned_up = True
        worker.save()
Beispiel #3
0
def mark_worker_offline(worker_name, normal_shutdown=False):
    """
    Mark the :class:`~pulpcore.app.models.Worker` as offline and cancel associated tasks.

    If the worker shutdown normally, no message is logged, otherwise an error level message is
    logged. Default is to assume the worker did not shut down normally.

    Any resource reservations associated with this worker are cleaned up by this function.

    Any tasks associated with this worker are explicitly canceled.

    Args:
        worker_name (str) The name of the worker
        normal_shutdown (bool): True if the worker shutdown normally, False otherwise. Defaults to
                                False.
    """
    if not normal_shutdown:
        msg = _('The worker named %(name)s is missing. Canceling the tasks in its queue.')
        msg = msg % {'name': worker_name}
        _logger.error(msg)
    else:
        msg = _("Cleaning up shutdown worker '%s'.") % worker_name
        _logger.info(msg)

    try:
        worker = Worker.objects.get(name=worker_name, gracefully_stopped=False, cleaned_up=False)
    except Worker.DoesNotExist:
        pass
    else:
        # Cancel all of the tasks that were assigned to this worker's queue
        for task_status in worker.tasks.filter(state__in=TASK_INCOMPLETE_STATES):
            cancel(task_status.pk)

        if normal_shutdown:
            worker.gracefully_stopped = True

        worker.cleaned_up = True
        worker.save()