Example #1
0
def babysit():
    """
    Babysit the workers, updating our tables with information about their queues.
    """
    # Inspect the available workers to build our state variables
    active_queues = controller.inspect().active_queues()
    # Now we need the entire list of AvailableQueues from the database, though we only need their
    # _id and missing_since attributes. This is preferrable to using a Map/Reduce operation to get
    # Mongo to tell us which workers Celery knows about that aren't found in the database.
    all_queues_criteria = Criteria(filters={}, fields=('_id', 'missing_since'))
    all_queues = list(resources.filter_available_queues(all_queues_criteria))
    all_queues_set = set([q.name for q in all_queues])

    active_queues_set = set()
    for worker, queues in active_queues.items():
        # If this worker is a reserved task worker, let's make sure we know about it in our
        # available_queues collection, and make sure it is processing a queue with its own name
        if re.match('^%s' % RESERVED_WORKER_NAME_PREFIX, worker):
            # Make sure that this worker is subscribed to a queue of his own name. If not,
            # subscribe him to one
            if not worker in [queue['name'] for queue in queues]:
                controller.add_consumer(queue=worker, destination=(worker,))
            active_queues_set.add(worker)

    # Determine which queues are in active_queues_set that aren't in all_queues_set. These are new
    # workers and we need to add them to the database.
    for worker in (active_queues_set - all_queues_set):
        resources.get_or_create_available_queue(worker)

    # If there are any AvalailableQueues that have their missing_since attribute set and they are
    # present now, let's set their missing_since attribute back to None.
    missing_since_queues = set([q.name for q in all_queues if q.missing_since is not None])
    for queue in (active_queues_set & missing_since_queues):
        active_queue = resources.get_or_create_available_queue(queue)
        active_queue.missing_since = None
        active_queue.save()

    # Now we must delete queues for workers that don't exist anymore, but only if they've been
    # missing for at least five minutes.
    for queue in (all_queues_set - active_queues_set):
        active_queue = list(resources.filter_available_queues(Criteria(filters={'_id': queue})))[0]

        # We only want to delete this queue if it has been missing for at least 5 minutes. If this
        # AvailableQueue doesn't have a missing_since attribute, that means it has just now gone
        # missing. Let's mark its missing_since attribute and continue.
        if active_queue.missing_since is None:
            active_queue.missing_since = datetime.utcnow()
            active_queue.save()
            continue

        # This queue has been missing for some time. Let's check to see if it's been 5 minutes yet,
        # and if it has, let's delete it.
        if active_queue.missing_since < datetime.utcnow() - timedelta(minutes=5):
            _delete_queue.apply_async(args=(queue,), queue=RESOURCE_MANAGER_QUEUE)
Example #2
0
def _release_resource(resource_id):
    """
    Do not queue this task yourself, but always use the _queue_release_resource() task instead.
    Please see the docblock on that function for an explanation.

    When a resource-reserving task is complete, this method must be called with the
    resource_id so that the we know when it is safe to unmap a resource_id from
    its given queue name.

    :param resource_id: The resource that is no longer in use
    :type  resource_id: basestring
    """
    try:
        reserved_resource = ReservedResource(resource_id)
        reserved_resource.decrement_num_reservations()
        # Now we need to decrement the AvailabeQueue that the reserved_resource was using. If the
        # ReservedResource does not exist for some reason, we won't know its assigned_queue, but
        # these next lines won't execute anyway.
        # Remove the '.dq' from the queue name to get the worker name
        worker_name = reserved_resource.assigned_queue.rstrip('.dq')
        aqc = Criteria(filters={'_id': worker_name})
        aq_list = list(resources.filter_available_queues(aqc))
        available_queue = aq_list[0]
        available_queue.decrement_num_reservations()
    except DoesNotExist:
        # If we are trying to decrement the count on one of these obejcts, and they don't exist,
        # that's OK
        pass
Example #3
0
def _reserve_resource(resource_id):
    """
    When you wish you queue a task that needs to reserve a resource, you should make a call to this
    function() first, queueing it in the RESOURCE_MANAGER_QUEUE. This Task will return the
    name of the queue you should put your task in.

    Please be sure to also add a task to run _queue_release_resource() in the same queue name that
    this function returns to you. It is important that _release_resource() is called after your task
    is completed, regardless of whether your task completes successfully or not.

    :param resource_id: The name of the resource you wish to reserve for your task. The system
                        will ensure that no other tasks that want that same reservation will run
                        concurrently with yours.
    :type  resource_id: basestring
    :return:            The name of a queue that you should put your task in
    :rtype:             basestring
    """
    reserved_resource = resources.get_or_create_reserved_resource(resource_id)
    if reserved_resource.assigned_queue is None:
        # The assigned_queue will be None if the reserved_resource was just created, so we'll
        # need to assign a queue to it
        # get the dedicated queue name by adding '.dq' to the end of the worker name
        reserved_resource.assigned_queue = resources.get_least_busy_available_queue().name + '.dq'
        reserved_resource.save()
    else:
        # The assigned_queue is set, so we just need to increment the num_reservations on the
        # reserved resource
        reserved_resource.increment_num_reservations()

    # Remove the '.dq' from the queue name to get the worker name
    worker_name = reserved_resource.assigned_queue.rstrip('.dq')
    aqc = Criteria(filters={'_id': worker_name})
    aq_list = list(resources.filter_available_queues(aqc))
    aq_list[0].increment_num_reservations()
    return reserved_resource.assigned_queue
Example #4
0
def handle_worker_heartbeat(event):
    """
    Celery event handler for 'worker-heartbeat' events.

    The event is first parsed and logged. If this event is from the resource manager, there is
    no further processing to be done. Then the existing AvailableQueue objects are searched
    for one to update. If an existing one is found, it is updated. Otherwise a new
    AvailableQueue entry is created. Logging at the info and debug level is also done.

    :param event: A celery event to handle.
    :type event: dict
    """
    event_info = _parse_and_log_event(event)

    # if this is the resource_manager do nothing
    if _is_resource_manager(event):
        return

    find_worker_criteria = Criteria(filters={'_id': event_info['worker_name']},
                                    fields=('_id', 'last_heartbeat', 'num_reservations'))
    find_worker_list = list(resources.filter_available_queues(find_worker_criteria))

    if find_worker_list:
        AvailableQueue.get_collection().find_and_modify(
            query={'_id': event_info['worker_name']},
            update={'$set': {'last_heartbeat': event_info['timestamp']}}
        )
    else:
        new_available_queue = AvailableQueue(event_info['worker_name'], event_info['timestamp'])
        msg = _("New worker '%(worker_name)s' discovered") % event_info
        _logger.info(msg)
        new_available_queue.save()
Example #5
0
    def test_filter(self):
        """
        Test a filter operation to make sure the results appear to be correct.
        """
        # Make three queues. We'll filter for two of them.
        now = datetime.utcnow()
        aq_1 = AvailableQueue('queue_1', now, 1)
        aq_1.save()
        aq_2 = AvailableQueue('queue_2', now, 2)
        aq_2.save()
        aq_3 = AvailableQueue('queue_3', now, 3)
        aq_3.save()
        criteria = Criteria(filters={'_id': {'$gt': 'queue_1'}}, sort=[('_id', pymongo.ASCENDING)])

        aqs = resources.filter_available_queues(criteria)

        # Let's assert that aqs is a generator, and then let's cast it to a list so it's easier to
        # test that we got the correct instances back.
        self.assertEqual(type(aqs), types.GeneratorType)
        aqs = list(aqs)
        self.assertEqual(all([isinstance(aq, AvailableQueue) for aq in aqs]), True)
        self.assertEqual(aqs[0].name, 'queue_2')
        self.assertEqual(aqs[0].num_reservations, 2)
        self.assertEqual(aqs[1].name, 'queue_3')
        self.assertEqual(aqs[1].num_reservations, 3)
Example #6
0
    def test_criteria_passed_to_mongo(self, get_collection):
        """
        Assert that the Criteria object is passed on to MongoDB.
        """
        criteria = Criteria(filters={'_id': 'some_id'})

        aqs = list(resources.filter_available_queues(criteria))

        get_collection.return_value.query.assert_called_once_with(criteria)
        self.assertEqual(aqs, list())
Example #7
0
    def check_workers(self):
        """
        Look for missing workers, and dispatch a cleanup task if one goes missing.

        To find a missing worker, filter the AvailableQueues model for entries older than
        utcnow() - WORKER_TIMEOUT_SECONDS. The heartbeat times are stored in native UTC, so this is
        a comparable datetime.

        For each missing worker found, dispatch a _delete_queue task requesting that the resource
        manager delete the queue and cleanup any associated work.

        This method logs and the debug and error levels.
        """
        msg = _('Looking for workers missing for more than %s seconds') % self.WORKER_TIMEOUT_SECONDS
        _logger.debug(msg)
        oldest_heartbeat_time = datetime.utcnow() - timedelta(seconds=self.WORKER_TIMEOUT_SECONDS)
        worker_criteria = Criteria(filters={'last_heartbeat': {'$lt': oldest_heartbeat_time}},
                                   fields=('_id', 'last_heartbeat', 'num_reservations'))
        worker_list = list(resources.filter_available_queues(worker_criteria))
        for worker in worker_list:
            msg = _("Workers '%s' has gone missing, removing from list of workers") % worker.name
            _logger.error(msg)
            _delete_queue.apply_async(args=(worker.name,), queue=RESOURCE_MANAGER_QUEUE)
Example #8
0
def _delete_queue(queue, normal_shutdown=False):
    """
    Delete the AvailableQueue with _id queue from the database. This Task can only safely be
    performed by the resource manager at this time, so be sure to queue it in the
    RESOURCE_MANAGER_QUEUE.

    If the worker shutdown normally, no message is logged, otherwise an error level message is
    logged. Default is to assume the work did not shut down normally.

    :param queue: The name of the queue you wish to delete. In the database, the _id field is the
                  name.
    :type  queue: basestring
    :param normal_shutdown: True if the worker associated with the queue shutdown normally, False
                            otherwise.  Defaults to False.
    :type normal_shutdown:  bool
    """
    queue_list = list(resources.filter_available_queues(Criteria(filters={'_id': queue})))
    if len(queue_list) == 0:
        # Potentially _delete_queue() may be called with the database not containing any entries.
        # https://bugzilla.redhat.com/show_bug.cgi?id=1091922
        return
    queue = queue_list[0]

    if normal_shutdown is False:
        msg = _('The worker named %(name)s is missing. Canceling the tasks in its queue.') % {
            'name': queue.name}
        logger.error(msg)

    # Cancel all of the tasks that were assigned to this queue
    for task in TaskStatusManager.find_by_criteria(
            Criteria(
                filters={'queue': queue.name,
                         'state': {'$in': constants.CALL_INCOMPLETE_STATES}})):
        cancel(task['task_id'])

    # Finally, delete the queue
    queue.delete()
Example #9
0
def _delete_queue(queue):
    """
    Delete the AvailableQueue with _id queue from the database. This Task can only safely be
    performed by the resource manager at this time, so be sure to queue it in the
    RESOURCE_MANAGER_QUEUE.

    :param queue: The name of the queue you wish to delete. In the database, the _id field is the
                  name.
    :type  queue: basestring
    """
    queue = list(resources.filter_available_queues(Criteria(filters={'_id': queue})))[0]

    # Cancel all of the tasks that were assigned to this queue
    msg = _('The worker named %(name)s is missing. Canceling the tasks in its queue.')
    msg = msg % {'name': queue.name}
    logger.error(msg)
    for task in TaskStatusManager.find_by_criteria(
            Criteria(
                filters={'queue': queue.name,
                         'state': {'$in': dispatch_constants.CALL_INCOMPLETE_STATES}})):
        cancel(task['task_id'])

    # Finally, delete the queue
    queue.delete()