Beispiel #1
0
def _delete_worker(name, normal_shutdown=False):
    """
    Delete the Worker with _id name from the database, cancel any associated tasks and reservations

    If the worker shutdown normally, no message is logged, otherwise an error level message is
    logged. Default is to assume the worker did not shut down normally.

    Any resource reservations associated with this worker are cleaned up by this function.

    Any tasks associated with this worker are explicitly canceled.

    :param name:            The name of the worker you wish to delete.
    :type  name:            basestring
    :param normal_shutdown: True if the worker shutdown normally, False otherwise.  Defaults to
                            False.
    :type normal_shutdown:  bool
    """
    if normal_shutdown is False:
        msg = _('The worker named %(name)s is missing. Canceling the tasks in its queue.')
        msg = msg % {'name': name}
        _logger.error(msg)

    # Delete the worker document
    Worker.objects(name=name).delete()

    # Delete all reserved_resource documents for the worker
    ReservedResource.get_collection().remove({'worker_name': name})

    # Cancel all of the tasks that were assigned to this worker's queue
    for task_status in TaskStatus.objects(worker_name=name,
                                          state__in=constants.CALL_INCOMPLETE_STATES):
        cancel(task_status['task_id'])

    # Delete working directory
    common_utils.delete_worker_working_directory(name)
Beispiel #2
0
    def test_resource_not_in_resource_map(self):
        """
        Test _release_resource() with a resource that is not in the database. This should be
        gracefully handled, and result in no changes to the database.
        """
        # Set up two workers
        worker_1 = Worker(WORKER_1, datetime.utcnow())
        worker_1.save()
        worker_2 = Worker(WORKER_2, datetime.utcnow())
        worker_2.save()
        # Set up two resource reservations, using our workers from above
        reserved_resource_1 = ReservedResource(uuid.uuid4(), worker_1.name, 'resource_1')
        reserved_resource_1.save()
        reserved_resource_2 = ReservedResource(uuid.uuid4(), worker_2.name, 'resource_2')
        reserved_resource_2.save()

        # This should not raise any Exception, but should also not alter either the Worker
        # collection or the ReservedResource collection
        tasks._release_resource('made_up_resource_id')

        # Make sure that the workers collection has not been altered
        self.assertEqual(Worker.objects().count(), 2)
        worker_1 = Worker.objects().get(name=worker_1.name)
        self.assertTrue(worker_1)
        worker_2 = Worker.objects().get(name=worker_2.name)
        self.assertTrue(worker_2)
        # Make sure that the reserved resources collection has not been altered
        rrc = ReservedResource.get_collection()
        self.assertEqual(rrc.count(), 2)
        rr_1 = rrc.find_one({'_id': reserved_resource_1.task_id})
        self.assertEqual(rr_1['worker_name'], reserved_resource_1.worker_name)
        self.assertEqual(rr_1['resource_id'], 'resource_1')
        rr_2 = rrc.find_one({'_id': reserved_resource_2.task_id})
        self.assertEqual(rr_2['worker_name'], reserved_resource_2.worker_name)
        self.assertEqual(rr_2['resource_id'], 'resource_2')
Beispiel #3
0
    def check_workers(self):
        """
        Look for missing workers, and dispatch a cleanup task if one goes missing.

        To find a missing worker, filter the Workers model for entries older than
        utcnow() - WORKER_TIMEOUT_SECONDS. The heartbeat times are stored in native UTC, so this is
        a comparable datetime.

        For each missing worker found, dispatch a _delete_worker task requesting that the resource
        manager delete the Worker and cleanup any associated work.

        This method logs and the debug and error levels.
        """
        msg = _('Looking for workers missing for more than %s seconds'
                ) % self.WORKER_TIMEOUT_SECONDS
        _logger.debug(msg)
        oldest_heartbeat_time = datetime.utcnow() - timedelta(
            seconds=self.WORKER_TIMEOUT_SECONDS)
        worker_list = Worker.objects(last_heartbeat__lt=oldest_heartbeat_time)
        for worker in worker_list:
            msg = _(
                "Workers '%s' has gone missing, removing from list of workers"
            ) % worker.name
            _logger.error(msg)
            _delete_worker(worker.name)
Beispiel #4
0
def _get_unreserved_worker():
    """
    Return the Worker instance that has no reserved_resource entries
    associated with it. If there are no unreserved workers a
    pulp.server.exceptions.NoWorkers exception is raised.

    :raises NoWorkers: If all workers have reserved_resource entries associated with them.

    :returns:          The Worker instance that has no reserved_resource
                       entries associated with it.
    :rtype:            pulp.server.db.model.resources.Worker
    """

    # Build a mapping of queue names to Worker objects
    workers_dict = dict((worker['name'], worker) for worker in Worker.objects())
    worker_names = workers_dict.keys()
    reserved_names = [r['worker_name'] for r in ReservedResource.get_collection().find()]

    # Find an unreserved worker using set differences of the names, and filter
    # out workers that should not be assigned work.
    # NB: this is a little messy but set comprehensions are in python 2.7+
    unreserved_workers = set(filter(_is_worker, worker_names)) - set(reserved_names)

    try:
        return workers_dict[unreserved_workers.pop()]
    except KeyError:
        # All workers are reserved
        raise NoWorkers()
Beispiel #5
0
def _get_unreserved_worker():
    """
    Return the Worker instance that has no reserved_resource entries
    associated with it. If there are no unreserved workers a
    pulp.server.exceptions.NoWorkers exception is raised.

    :raises NoWorkers: If all workers have reserved_resource entries associated with them.

    :returns:          The Worker instance that has no reserved_resource
                       entries associated with it.
    :rtype:            pulp.server.db.model.resources.Worker
    """

    # Build a mapping of queue names to Worker objects
    workers_dict = dict(
        (worker['name'], worker) for worker in Worker.objects())
    worker_names = workers_dict.keys()
    reserved_names = [
        r['worker_name'] for r in ReservedResource.get_collection().find()
    ]

    # Find an unreserved worker using set differences of the names, and filter
    # out workers that should not be assigned work.
    # NB: this is a little messy but set comprehensions are in python 2.7+
    unreserved_workers = set(filter(_is_worker,
                                    worker_names)) - set(reserved_names)

    try:
        return workers_dict[unreserved_workers.pop()]
    except KeyError:
        # All workers are reserved
        raise NoWorkers()
Beispiel #6
0
def handle_worker_heartbeat(event):
    """
    Celery event handler for 'worker-heartbeat' events.

    The event is first parsed and logged.  Then the existing Worker objects are
    searched for one to update. If an existing one is found, it is updated.
    Otherwise a new Worker entry is created. Logging at the info and debug
    level is also done.

    :param event: A celery event to handle.
    :type event: dict
    """
    event_info = _parse_and_log_event(event)
    worker = Worker.objects(name=event_info['worker_name']).first()

    if not worker:
        msg = _("New worker '%(worker_name)s' discovered") % event_info
        _logger.info(msg)

    Worker.objects(name=event_info['worker_name']).\
        update_one(set__last_heartbeat=event_info['timestamp'], upsert=True)
Beispiel #7
0
def handle_worker_heartbeat(event):
    """
    Celery event handler for 'worker-heartbeat' events.

    The event is first parsed and logged.  Then the existing Worker objects are
    searched for one to update. If an existing one is found, it is updated.
    Otherwise a new Worker entry is created. Logging at the info and debug
    level is also done.

    :param event: A celery event to handle.
    :type event: dict
    """
    event_info = _parse_and_log_event(event)
    worker = Worker.objects(name=event_info['worker_name']).first()

    if not worker:
        msg = _("New worker '%(worker_name)s' discovered") % event_info
        _logger.info(msg)

    Worker.objects(name=event_info['worker_name']).\
        update_one(set__last_heartbeat=event_info['timestamp'], upsert=True)
Beispiel #8
0
    def test_resource_not_in_resource_map(self):
        """
        Test _release_resource() with a resource that is not in the database. This should be
        gracefully handled, and result in no changes to the database.
        """
        # Set up two workers
        worker_1 = Worker(WORKER_1, datetime.utcnow())
        worker_1.save()
        worker_2 = Worker(WORKER_2, datetime.utcnow())
        worker_2.save()
        # Set up two resource reservations, using our workers from above
        reserved_resource_1 = ReservedResource(uuid.uuid4(), worker_1.name,
                                               'resource_1')
        reserved_resource_1.save()
        reserved_resource_2 = ReservedResource(uuid.uuid4(), worker_2.name,
                                               'resource_2')
        reserved_resource_2.save()

        # This should not raise any Exception, but should also not alter either the Worker
        # collection or the ReservedResource collection
        tasks._release_resource('made_up_resource_id')

        # Make sure that the workers collection has not been altered
        self.assertEqual(Worker.objects().count(), 2)
        worker_1 = Worker.objects().get(name=worker_1.name)
        self.assertTrue(worker_1)
        worker_2 = Worker.objects().get(name=worker_2.name)
        self.assertTrue(worker_2)
        # Make sure that the reserved resources collection has not been altered
        rrc = ReservedResource.get_collection()
        self.assertEqual(rrc.count(), 2)
        rr_1 = rrc.find_one({'_id': reserved_resource_1.task_id})
        self.assertEqual(rr_1['worker_name'], reserved_resource_1.worker_name)
        self.assertEqual(rr_1['resource_id'], 'resource_1')
        rr_2 = rrc.find_one({'_id': reserved_resource_2.task_id})
        self.assertEqual(rr_2['worker_name'], reserved_resource_2.worker_name)
        self.assertEqual(rr_2['resource_id'], 'resource_2')
Beispiel #9
0
def _delete_worker(name, normal_shutdown=False):
    """
    Delete the Worker with _id name from the database, cancel any associated tasks and reservations

    If the worker shutdown normally, no message is logged, otherwise an error level message is
    logged. Default is to assume the worker did not shut down normally.

    Any resource reservations associated with this worker are cleaned up by this function.

    Any tasks associated with this worker are explicitly canceled.

    :param name:            The name of the worker you wish to delete.
    :type  name:            basestring
    :param normal_shutdown: True if the worker shutdown normally, False otherwise.  Defaults to
                            False.
    :type normal_shutdown:  bool
    """
    if normal_shutdown is False:
        msg = _(
            'The worker named %(name)s is missing. Canceling the tasks in its queue.'
        )
        msg = msg % {'name': name}
        _logger.error(msg)

    # Delete the worker document
    Worker.objects(name=name).delete()

    # Delete all reserved_resource documents for the worker
    ReservedResource.get_collection().remove({'worker_name': name})

    # Cancel all of the tasks that were assigned to this worker's queue
    for task_status in TaskStatus.objects(
            worker_name=name, state__in=constants.CALL_INCOMPLETE_STATES):
        cancel(task_status['task_id'])

    # Delete working directory
    common_utils.delete_worker_working_directory(name)
Beispiel #10
0
def get_worker_for_reservation(resource_id):
    """
    Return the Worker instance that is associated with a reservation of type resource_id. If
    there are no workers with that reservation_id type a pulp.server.exceptions.NoWorkers
    exception is raised.

    :param resource_id:    The name of the resource you wish to reserve for your task.

    :raises NoWorkers:     If all workers have reserved_resource entries associated with them.

    :type resource_id:     basestring
    :returns:              The Worker instance that has a reserved_resource entry of type
                           `resource_id` associated with it.
    :rtype:                pulp.server.db.model.resources.Worker
    """
    reservation = ReservedResource.get_collection().find_one({'resource_id': resource_id})
    if reservation:
        return Worker.objects(name=reservation['worker_name']).first()
    else:
        raise NoWorkers()
Beispiel #11
0
def get_worker_for_reservation(resource_id):
    """
    Return the Worker instance that is associated with a reservation of type resource_id. If
    there are no workers with that reservation_id type a pulp.server.exceptions.NoWorkers
    exception is raised.

    :param resource_id:    The name of the resource you wish to reserve for your task.

    :raises NoWorkers:     If all workers have reserved_resource entries associated with them.

    :type resource_id:     basestring
    :returns:              The Worker instance that has a reserved_resource entry of type
                           `resource_id` associated with it.
    :rtype:                pulp.server.db.model.resources.Worker
    """
    reservation = ReservedResource.get_collection().find_one(
        {'resource_id': resource_id})
    if reservation:
        return Worker.objects(name=reservation['worker_name']).first()
    else:
        raise NoWorkers()
Beispiel #12
0
    def check_workers(self):
        """
        Look for missing workers, and dispatch a cleanup task if one goes missing.

        To find a missing worker, filter the Workers model for entries older than
        utcnow() - WORKER_TIMEOUT_SECONDS. The heartbeat times are stored in native UTC, so this is
        a comparable datetime.

        For each missing worker found, dispatch a _delete_worker task requesting that the resource
        manager delete the Worker and cleanup any associated work.

        This method logs and the debug and error levels.
        """
        msg = _(
            'Looking for workers missing for more than %s seconds') % self.WORKER_TIMEOUT_SECONDS
        _logger.debug(msg)
        oldest_heartbeat_time = datetime.utcnow() - timedelta(seconds=self.WORKER_TIMEOUT_SECONDS)
        worker_list = Worker.objects(last_heartbeat__lt=oldest_heartbeat_time)
        for worker in worker_list:
            msg = _("Workers '%s' has gone missing, removing from list of workers") % worker.name
            _logger.error(msg)
            _delete_worker(worker.name)
Beispiel #13
0
 def tearDown(self):
     Worker.objects().delete()
     ReservedResource.get_collection().remove()
     TaskStatus.objects().delete()
Beispiel #14
0
def get_workers():
    """
    :returns:          list of workers with their heartbeats
    :rtype:            list
    """
    return Worker.objects()
Beispiel #15
0
def get_workers():
    """
    :returns:          list of workers with their heartbeats
    :rtype:            list
    """
    return Worker.objects()