Beispiel #1
0
    def check_workers(self):
        """
        Look for missing workers, and dispatch a cleanup task if one goes missing.

        To find a missing worker, filter the Workers model for entries older than
        utcnow() - WORKER_TIMEOUT_SECONDS. The heartbeat times are stored in native UTC, so this is
        a comparable datetime.

        For each missing worker found, dispatch a _delete_worker task requesting that the resource
        manager delete the Worker and cleanup any associated work.

        This method logs and the debug and error levels.
        """
        msg = _('Looking for workers missing for more than %s seconds'
                ) % self.WORKER_TIMEOUT_SECONDS
        _logger.debug(msg)
        oldest_heartbeat_time = datetime.utcnow() - timedelta(
            seconds=self.WORKER_TIMEOUT_SECONDS)
        worker_criteria = Criteria(
            filters={'last_heartbeat': {
                '$lt': oldest_heartbeat_time
            }},
            fields=('_id', 'last_heartbeat', 'num_reservations'))
        worker_list = list(resources.filter_workers(worker_criteria))
        for worker in worker_list:
            msg = _(
                "Workers '%s' has gone missing, removing from list of workers"
            ) % worker.name
            _logger.error(msg)
            _delete_worker.apply_async(args=(worker.name, ),
                                       queue=RESOURCE_MANAGER_QUEUE)
Beispiel #2
0
    def test_filter(self):
        """
        Test a filter operation to make sure the results appear to be correct.
        """
        # Make three workers. We'll filter for two of them.
        now = datetime.utcnow()
        kw_1 = Worker('worker_1', now)
        kw_1.save()
        kw_2 = Worker('worker_2', now)
        kw_2.save()
        kw_3 = Worker('worker_3', now)
        kw_3.save()
        criteria = Criteria(filters={'_id': {
            '$gt': 'worker_1'
        }},
                            sort=[('_id', pymongo.ASCENDING)])

        workers = resources.filter_workers(criteria)

        # Let's assert that workers is a generator, and then let's cast it to a list so it's easier
        # to test that we got the correct instances back.
        self.assertEqual(type(workers), types.GeneratorType)
        workers = list(workers)
        self.assertEqual(all([isinstance(w, Worker) for w in workers]), True)
        self.assertEqual(workers[0].name, 'worker_2')
        self.assertEqual(workers[1].name, 'worker_3')
Beispiel #3
0
def handle_worker_heartbeat(event):
    """
    Celery event handler for 'worker-heartbeat' events.

    The event is first parsed and logged.  Then the existing Worker objects are
    searched for one to update. If an existing one is found, it is updated.
    Otherwise a new Worker entry is created. Logging at the info and debug
    level is also done.

    :param event: A celery event to handle.
    :type event: dict
    """
    event_info = _parse_and_log_event(event)

    find_worker_criteria = Criteria(filters={'_id': event_info['worker_name']},
                                    fields=('_id', 'last_heartbeat'))
    find_worker_list = list(resources.filter_workers(find_worker_criteria))

    if find_worker_list:
        Worker.get_collection().find_and_modify(
            query={'_id': event_info['worker_name']},
            update={'$set': {'last_heartbeat': event_info['timestamp']}}
        )
    else:
        new_worker = Worker(event_info['worker_name'], event_info['timestamp'])
        msg = _("New worker '%(worker_name)s' discovered") % event_info
        _logger.info(msg)
        new_worker.save()
Beispiel #4
0
def handle_worker_heartbeat(event):
    """
    Celery event handler for 'worker-heartbeat' events.

    The event is first parsed and logged. If this event is from the resource manager, there is
    no further processing to be done. Then the existing Worker objects are searched
    for one to update. If an existing one is found, it is updated. Otherwise a new
    Worker entry is created. Logging at the info and debug level is also done.

    :param event: A celery event to handle.
    :type event: dict
    """
    event_info = _parse_and_log_event(event)

    # if this is the resource_manager do nothing
    if _is_resource_manager(event):
        return

    find_worker_criteria = Criteria(filters={'_id': event_info['worker_name']},
                                    fields=('_id', 'last_heartbeat',
                                            'num_reservations'))
    find_worker_list = list(resources.filter_workers(find_worker_criteria))

    if find_worker_list:
        Worker.get_collection().find_and_modify(
            query={'_id': event_info['worker_name']},
            update={'$set': {
                'last_heartbeat': event_info['timestamp']
            }})
    else:
        new_worker = Worker(event_info['worker_name'], event_info['timestamp'])
        msg = _("New worker '%(worker_name)s' discovered") % event_info
        _logger.info(msg)
        new_worker.save()
Beispiel #5
0
def get_workers():
    """
    :returns:          list of workers with their heartbeats
    :rtype:            list
    """
    empty_criteria = Criteria()
    return resources.filter_workers(empty_criteria)
Beispiel #6
0
def get_workers():
    """
    :returns:          list of workers with their heartbeats
    :rtype:            list
    """
    empty_criteria = Criteria()
    return resources.filter_workers(empty_criteria)
Beispiel #7
0
    def test_criteria_passed_to_mongo(self, get_collection):
        """
        Assert that the Criteria object is passed on to MongoDB.
        """
        criteria = Criteria(filters={'_id': 'some_id'})

        workers = list(resources.filter_workers(criteria))

        get_collection.return_value.query.assert_called_once_with(criteria)
        self.assertEqual(workers, list())
Beispiel #8
0
    def test_criteria_passed_to_mongo(self, get_collection):
        """
        Assert that the Criteria object is passed on to MongoDB.
        """
        criteria = Criteria(filters={'_id': 'some_id'})

        workers = list(resources.filter_workers(criteria))

        get_collection.return_value.query.assert_called_once_with(criteria)
        self.assertEqual(workers, list())
Beispiel #9
0
def _delete_worker(name, normal_shutdown=False):
    """
    Delete the Worker with _id name from the database, cancel any associated tasks and reservations

    If the worker shutdown normally, no message is logged, otherwise an error level message is
    logged. Default is to assume the worker did not shut down normally.

    Any resource reservations associated with this worker are cleaned up by this function.

    Any tasks associated with this worker are explicitly canceled.

    :param name:            The name of the worker you wish to delete. In the database, the _id
                            field is the name.
    :type  name:            basestring
    :param normal_shutdown: True if the worker shutdown normally, False otherwise.  Defaults to
                            False.
    :type normal_shutdown:  bool
    """
    if normal_shutdown is False:
        msg = _(
            'The worker named %(name)s is missing. Canceling the tasks in its queue.'
        )
        msg = msg % {'name': name}
        logger.error(msg)

    # Delete the worker document
    worker_list = list(
        resources.filter_workers(Criteria(filters={'_id': name})))
    if len(worker_list) > 0:
        worker_document = worker_list[0]
        worker_document.delete()

    # Delete all reserved_resource documents for the worker
    ReservedResource.get_collection().remove({'worker_name': name})

    # Cancel all of the tasks that were assigned to this worker's queue
    worker = Worker.from_bson({'_id': name})
    for task in TaskStatusManager.find_by_criteria(
            Criteria(
                filters={
                    'worker_name': worker.name,
                    'state': {
                        '$in': constants.CALL_INCOMPLETE_STATES
                    }
                })):
        cancel(task['task_id'])
Beispiel #10
0
def _delete_worker(name, normal_shutdown=False):
    """
    Delete the Worker with _id name from the database. This Task can only safely be
    performed by the resource manager at this time, so be sure to queue it in the
    RESOURCE_MANAGER_QUEUE.

    If the worker shutdown normally, no message is logged, otherwise an error level message is
    logged. Default is to assume the work did not shut down normally.

    :param name:            The name of the worker you wish to delete. In the database, the _id
                            field is the name.
    :type  name:            basestring
    :param normal_shutdown: True if the worker shutdown normally, False otherwise.  Defaults to
                            False.
    :type normal_shutdown:  bool
    """
    worker_list = list(
        resources.filter_workers(Criteria(filters={'_id': name})))
    if len(worker_list) == 0:
        # Potentially _delete_worker() may be called with the database not containing any entries.
        # https://bugzilla.redhat.com/show_bug.cgi?id=1091922
        return
    worker = worker_list[0]

    if normal_shutdown is False:
        msg = _(
            'The worker named %(name)s is missing. Canceling the tasks in its queue.'
        )
        msg = msg % {'name': worker.name}
        logger.error(msg)

    # Cancel all of the tasks that were assigned to this worker's queue
    for task in TaskStatusManager.find_by_criteria(
            Criteria(
                filters={
                    'queue': worker.queue_name,
                    'state': {
                        '$in': constants.CALL_INCOMPLETE_STATES
                    }
                })):
        cancel(task['task_id'])

    # Finally, delete the worker
    worker.delete()
Beispiel #11
0
    def test_filter(self):
        """
        Test a filter operation to make sure the results appear to be correct.
        """
        # Make three workers. We'll filter for two of them.
        now = datetime.utcnow()
        kw_1 = Worker('worker_1', now)
        kw_1.save()
        kw_2 = Worker('worker_2', now)
        kw_2.save()
        kw_3 = Worker('worker_3', now)
        kw_3.save()
        criteria = Criteria(filters={'_id': {'$gt': 'worker_1'}}, sort=[('_id', pymongo.ASCENDING)])

        workers = resources.filter_workers(criteria)

        # Let's assert that workers is a generator, and then let's cast it to a list so it's easier
        # to test that we got the correct instances back.
        self.assertEqual(type(workers), types.GeneratorType)
        workers = list(workers)
        self.assertEqual(all([isinstance(w, Worker) for w in workers]), True)
        self.assertEqual(workers[0].name, 'worker_2')
        self.assertEqual(workers[1].name, 'worker_3')
Beispiel #12
0
    def check_workers(self):
        """
        Look for missing workers, and dispatch a cleanup task if one goes missing.

        To find a missing worker, filter the Workers model for entries older than
        utcnow() - WORKER_TIMEOUT_SECONDS. The heartbeat times are stored in native UTC, so this is
        a comparable datetime.

        For each missing worker found, dispatch a _delete_worker task requesting that the resource
        manager delete the Worker and cleanup any associated work.

        This method logs and the debug and error levels.
        """
        msg = _('Looking for workers missing for more than %s seconds') % self.WORKER_TIMEOUT_SECONDS
        _logger.debug(msg)
        oldest_heartbeat_time = datetime.utcnow() - timedelta(seconds=self.WORKER_TIMEOUT_SECONDS)
        worker_criteria = Criteria(filters={'last_heartbeat': {'$lt': oldest_heartbeat_time}},
                                   fields=('_id', 'last_heartbeat'))
        worker_list = list(resources.filter_workers(worker_criteria))
        for worker in worker_list:
            msg = _("Workers '%s' has gone missing, removing from list of workers") % worker.name
            _logger.error(msg)
            _delete_worker(worker.name)
Beispiel #13
0
def _delete_worker(name, normal_shutdown=False):
    """
    Delete the Worker with _id name from the database, cancel any associated tasks and reservations

    If the worker shutdown normally, no message is logged, otherwise an error level message is
    logged. Default is to assume the worker did not shut down normally.

    Any resource reservations associated with this worker are cleaned up by this function.

    Any tasks associated with this worker are explicitly canceled.

    :param name:            The name of the worker you wish to delete. In the database, the _id
                            field is the name.
    :type  name:            basestring
    :param normal_shutdown: True if the worker shutdown normally, False otherwise.  Defaults to
                            False.
    :type normal_shutdown:  bool
    """
    if normal_shutdown is False:
        msg = _('The worker named %(name)s is missing. Canceling the tasks in its queue.')
        msg = msg % {'name': name}
        _logger.error(msg)

    # Delete the worker document
    worker_list = list(resources.filter_workers(Criteria(filters={'_id': name})))
    if len(worker_list) > 0:
        worker_document = worker_list[0]
        worker_document.delete()

    # Delete all reserved_resource documents for the worker
    ReservedResource.get_collection().remove({'worker_name': name})

    # Cancel all of the tasks that were assigned to this worker's queue
    worker = Worker.from_bson({'_id': name})
    for task_status in TaskStatus.objects(worker_name=worker.name,
                                          state__in=constants.CALL_INCOMPLETE_STATES):
        cancel(task_status['task_id'])
Beispiel #14
0
def _delete_worker(name, normal_shutdown=False):
    """
    Delete the Worker with _id name from the database. This Task can only safely be
    performed by the resource manager at this time, so be sure to queue it in the
    RESOURCE_MANAGER_QUEUE.

    If the worker shutdown normally, no message is logged, otherwise an error level message is
    logged. Default is to assume the work did not shut down normally.

    :param name:            The name of the worker you wish to delete. In the database, the _id
                            field is the name.
    :type  name:            basestring
    :param normal_shutdown: True if the worker shutdown normally, False otherwise.  Defaults to
                            False.
    :type normal_shutdown:  bool
    """
    worker_list = list(resources.filter_workers(Criteria(filters={'_id': name})))
    if len(worker_list) == 0:
        # Potentially _delete_worker() may be called with the database not containing any entries.
        # https://bugzilla.redhat.com/show_bug.cgi?id=1091922
        return
    worker = worker_list[0]

    if normal_shutdown is False:
        msg = _('The worker named %(name)s is missing. Canceling the tasks in its queue.')
        msg = msg % {'name': worker.name}
        logger.error(msg)

    # Cancel all of the tasks that were assigned to this worker's queue
    for task in TaskStatusManager.find_by_criteria(
            Criteria(
                filters={'queue': worker.queue_name,
                         'state': {'$in': constants.CALL_INCOMPLETE_STATES}})):
        cancel(task['task_id'])

    # Finally, delete the worker
    worker.delete()