예제 #1
0
파일: app.py 프로젝트: alexxa/pulp
    def _record_heartbeat(self, consumer):
        """
        This method creates or updates the worker record

        :param consumer: The consumer instance
        :type  consumer: celery.worker.consumer.Consumer
        """
        name = consumer.hostname
        # Update the worker record timestamp and handle logging new workers
        worker_watcher.handle_worker_heartbeat(name)

        # If the worker is a resource manager, update the associated ResourceManagerLock timestamp
        if name.startswith(constants.RESOURCE_MANAGER_WORKER_NAME):
            ResourceManagerLock.objects(name=name).update_one(set__timestamp=datetime.utcnow(),
                                                              upsert=False)
예제 #2
0
    def _record_heartbeat(self, consumer):
        """
        This method creates or updates the worker record

        :param worker: The consumer instance
        :type  worker: celery.worker.consumer.Consumer
        """
        name = consumer.hostname
        # Update the worker record timestamp and handle logging new workers
        worker_watcher.handle_worker_heartbeat(name)

        # If the worker is a resource manager, update the associated ResourceManagerLock timestamp
        if name.startswith(constants.RESOURCE_MANAGER_WORKER_NAME):
            ResourceManagerLock.objects(name=name).update_one(
                set__timestamp=datetime.utcnow(), upsert=False)
예제 #3
0
def _delete_worker(name, normal_shutdown=False):
    """
    Delete the Worker with _id name from the database, cancel any associated tasks and reservations

    If the worker shutdown normally, no message is logged, otherwise an error level message is
    logged. Default is to assume the worker did not shut down normally.

    Any resource reservations associated with this worker are cleaned up by this function.

    Any tasks associated with this worker are explicitly canceled.

    :param name:            The name of the worker you wish to delete.
    :type  name:            basestring
    :param normal_shutdown: True if the worker shutdown normally, False otherwise.  Defaults to
                            False.
    :type normal_shutdown:  bool
    """
    if normal_shutdown is False:
        msg = _(
            'The worker named %(name)s is missing. Canceling the tasks in its queue.'
        )
        msg = msg % {'name': name}
        _logger.error(msg)
    else:
        msg = _("Cleaning up shutdown worker '%s'.") % name
        _logger.info(msg)

    # Delete the worker document
    Worker.objects(name=name).delete()

    # Delete all reserved_resource documents for the worker
    ReservedResource.objects(worker_name=name).delete()

    # If the worker is a resource manager, we also need to delete the associated lock
    if name.startswith(RESOURCE_MANAGER_WORKER_NAME):
        ResourceManagerLock.objects(name=name).delete()

    # If the worker is a scheduler, we also need to delete the associated lock
    if name.startswith(SCHEDULER_WORKER_NAME):
        CeleryBeatLock.objects(name=name).delete()

    # Cancel all of the tasks that were assigned to this worker's queue
    for task_status in TaskStatus.objects(
            worker_name=name, state__in=constants.CALL_INCOMPLETE_STATES):
        cancel(task_status['task_id'])
예제 #4
0
def get_resource_manager_lock(name):
    """
    Tries to acquire the resource manager lock. If the lock cannot be acquired immediately, it
    will wait until the currently active instance becomes unavailable, at which point the worker
    cleanup routine will clear the lock for us to acquire. A worker record will be created so that
    the waiting resource manager will appear in the Status API. We override the SIGTERM signal
    handler so that that the worker record will be immediately cleaned up if the process is killed
    while in this states.

    :param name:   The hostname of the worker
    :type  name:   basestring
    """
    assert name.startswith(constants.RESOURCE_MANAGER_WORKER_NAME)

    lock = ResourceManagerLock(name=name)

    with custom_sigterm_handler(name):
        # Whether this is the first lock availability check for this instance
        _first_check = True

        while True:
            # Create / update the worker record so that Pulp knows we exist
            Worker.objects(name=name).update_one(
                set__last_heartbeat=datetime.utcnow(), upsert=True)
            try:
                lock.save()

                msg = _(
                    "Resource manager '%s' has acquired the resource manager lock"
                    % name)
                _logger.info(msg)
                break
            except mongoengine.NotUniqueError:
                # Only log the message the first time
                if _first_check:
                    msg = _(
                        "Resource manager '%s' attempted to acquire the the resource manager "
                        "lock but was unable to do so. It will retry every %d seconds until "
                        "the lock can be acquired." %
                        (name, constants.CELERY_CHECK_INTERVAL))
                    _logger.info(msg)
                    _first_check = False

                time.sleep(constants.CELERY_CHECK_INTERVAL)
예제 #5
0
파일: tasks.py 프로젝트: alexxa/pulp
def _delete_worker(name, normal_shutdown=False):
    """
    Delete the Worker with _id name from the database, cancel any associated tasks and reservations

    If the worker shutdown normally, no message is logged, otherwise an error level message is
    logged. Default is to assume the worker did not shut down normally.

    Any resource reservations associated with this worker are cleaned up by this function.

    Any tasks associated with this worker are explicitly canceled.

    :param name:            The name of the worker you wish to delete.
    :type  name:            basestring
    :param normal_shutdown: True if the worker shutdown normally, False otherwise.  Defaults to
                            False.
    :type normal_shutdown:  bool
    """
    if normal_shutdown is False:
        msg = _('The worker named %(name)s is missing. Canceling the tasks in its queue.')
        msg = msg % {'name': name}
        _logger.error(msg)
    else:
        msg = _("Cleaning up shutdown worker '%s'.") % name
        _logger.info(msg)

    # Delete the worker document
    Worker.objects(name=name).delete()

    # Delete all reserved_resource documents for the worker
    ReservedResource.objects(worker_name=name).delete()

    # If the worker is a resource manager, we also need to delete the associated lock
    if name.startswith(RESOURCE_MANAGER_WORKER_NAME):
        ResourceManagerLock.objects(name=name).delete()

    # If the worker is a scheduler, we also need to delete the associated lock
    if name.startswith(SCHEDULER_WORKER_NAME):
        CeleryBeatLock.objects(name=name).delete()

    # Cancel all of the tasks that were assigned to this worker's queue
    for task_status in TaskStatus.objects(worker_name=name,
                                          state__in=constants.CALL_INCOMPLETE_STATES):
        cancel(task_status['task_id'], revoke_task=False)
예제 #6
0
def get_resource_manager_lock(name):
    """
    Tries to acquire the resource manager lock.

    If the lock cannot be acquired immediately, it will wait until the
    currently active instance becomes unavailable, at which point the worker
    cleanup routine will clear the lock for us to acquire. A worker record will
    be created so that the waiting resource manager will appear in the Status
    API. This worker record will be cleaned up through the regular worker
    shutdown routine.

    :param name:   The hostname of the worker
    :type  name:   basestring
    """
    assert name.startswith(constants.RESOURCE_MANAGER_WORKER_NAME)

    lock = ResourceManagerLock(name=name)

    # Whether this is the first lock availability check for this instance
    _first_check = True

    while True:

        now = dateutils.ensure_tz(datetime.utcnow())
        old_timestamp = now - timedelta(
            seconds=constants.PULP_PROCESS_TIMEOUT_INTERVAL)

        ResourceManagerLock.objects(timestamp__lte=old_timestamp).delete()

        # Create / update the worker record so that Pulp knows we exist
        Worker.objects(name=name).update_one(
            set__last_heartbeat=datetime.utcnow(), upsert=True)
        try:
            lock.timestamp = now
            lock.save()

            msg = _(
                "Resource manager '%s' has acquired the resource manager lock"
            ) % name
            _logger.debug(msg)

            if not _first_check:
                msg = _(
                    "Failover occurred: '%s' is now the primary resource manager"
                ) % name
                _logger.warning(msg)

            break
        except mongoengine.NotUniqueError:
            # Only log the message the first time
            if _first_check:
                _logger.info(
                    _("Hot spare pulp_resource_manager instance '%(name)s' detected."
                      ) % {'name': name})
                _first_check = False

            time.sleep(constants.PULP_PROCESS_HEARTBEAT_INTERVAL)
예제 #7
0
파일: app.py 프로젝트: pulp/pulp
def get_resource_manager_lock(name):
    """
    Tries to acquire the resource manager lock.

    If the lock cannot be acquired immediately, it will wait until the
    currently active instance becomes unavailable, at which point the worker
    cleanup routine will clear the lock for us to acquire. A worker record will
    be created so that the waiting resource manager will appear in the Status
    API. This worker record will be cleaned up through the regular worker
    shutdown routine.

    :param name:   The hostname of the worker
    :type  name:   basestring
    """
    assert name.startswith(constants.RESOURCE_MANAGER_WORKER_NAME)

    lock = ResourceManagerLock(name=name)

    # Whether this is the first lock availability check for this instance
    _first_check = True

    while True:
        # Create / update the worker record so that Pulp knows we exist
        Worker.objects(name=name).update_one(set__last_heartbeat=datetime.utcnow(),
                                             upsert=True)
        try:
            lock.save()

            msg = _("Resource manager '%s' has acquired the resource manager lock") % name
            _logger.info(msg)
            break
        except mongoengine.NotUniqueError:
            # Only log the message the first time
            if _first_check:
                msg = _("Resource manager '%(name)s' attempted to acquire the the resource manager "
                        "lock but was unable to do so. It will retry every %(interval)d seconds "
                        "until the lock can be acquired.") % \
                    {'name': name, 'interval': constants.CELERY_CHECK_INTERVAL}
                _logger.info(msg)
                _first_check = False

            time.sleep(constants.CELERY_CHECK_INTERVAL)
예제 #8
0
파일: app.py 프로젝트: alexxa/pulp
def get_resource_manager_lock(name):
    """
    Tries to acquire the resource manager lock.

    If the lock cannot be acquired immediately, it will wait until the
    currently active instance becomes unavailable, at which point the worker
    cleanup routine will clear the lock for us to acquire. A worker record will
    be created so that the waiting resource manager will appear in the Status
    API. This worker record will be cleaned up through the regular worker
    shutdown routine.

    :param name:   The hostname of the worker
    :type  name:   basestring
    """
    assert name.startswith(constants.RESOURCE_MANAGER_WORKER_NAME)

    lock = ResourceManagerLock(name=name)

    # Whether this is the first lock availability check for this instance
    _first_check = True

    while True:

        now = dateutils.ensure_tz(datetime.utcnow())
        old_timestamp = now - timedelta(seconds=PULP_PROCESS_TIMEOUT_INTERVAL)

        ResourceManagerLock.objects(timestamp__lte=old_timestamp).delete()

        # Create / update the worker record so that Pulp knows we exist
        Worker.objects(name=name).update_one(set__last_heartbeat=datetime.utcnow(),
                                             upsert=True)
        try:
            lock.timestamp = now
            lock.save()

            msg = _("Resource manager '%s' has acquired the resource manager lock") % name
            _logger.debug(msg)

            if not _first_check:
                msg = _("Failover occurred: '%s' is now the primary resource manager") % name
                _logger.warning(msg)

            break
        except mongoengine.NotUniqueError:
            # Only log the message the first time
            if _first_check:
                _logger.info(_("Hot spare pulp_resource_manager instance '%(name)s' detected.")
                             % {'name': name})
                _first_check = False

            time.sleep(PULP_PROCESS_HEARTBEAT_INTERVAL)