def _release_resource(task_id): """ Do not queue this task yourself. It will be used automatically when your task is dispatched by the _queue_reserved_task task. When a resource-reserving task is complete, this method releases the resource by removing the ReservedResource object by UUID. :param task_id: The UUID of the task that requested the reservation :type task_id: basestring """ running_task_qs = TaskStatus.objects.filter( task_id=task_id, state=constants.CALL_RUNNING_STATE) for running_task in running_task_qs: new_task = Task() msg = _( 'The task status %(task_id)s exited immediately for some reason. Marking as ' 'errored. Check the logs for more details') runtime_exception = RuntimeError(msg % {'task_id': task_id}) class MyEinfo(object): traceback = None new_task.on_failure(runtime_exception, task_id, (), {}, MyEinfo) ReservedResource.objects(task_id=task_id).delete()
def _delete_worker(name, normal_shutdown=False): """ Delete the Worker with _id name from the database, cancel any associated tasks and reservations If the worker shutdown normally, no message is logged, otherwise an error level message is logged. Default is to assume the worker did not shut down normally. Any resource reservations associated with this worker are cleaned up by this function. Any tasks associated with this worker are explicitly canceled. :param name: The name of the worker you wish to delete. :type name: basestring :param normal_shutdown: True if the worker shutdown normally, False otherwise. Defaults to False. :type normal_shutdown: bool """ if normal_shutdown is False: msg = _( 'The worker named %(name)s is missing. Canceling the tasks in its queue.' ) msg = msg % {'name': name} _logger.error(msg) # Delete the worker document Worker.objects(name=name).delete() # Delete all reserved_resource documents for the worker ReservedResource.objects(worker_name=name).delete() # Cancel all of the tasks that were assigned to this worker's queue for task_status in TaskStatus.objects( worker_name=name, state__in=constants.CALL_INCOMPLETE_STATES): cancel(task_status['task_id'])
def _delete_worker(name, normal_shutdown=False): """ Delete the Worker with _id name from the database, cancel any associated tasks and reservations If the worker shutdown normally, no message is logged, otherwise an error level message is logged. Default is to assume the worker did not shut down normally. Any resource reservations associated with this worker are cleaned up by this function. Any tasks associated with this worker are explicitly canceled. :param name: The name of the worker you wish to delete. :type name: basestring :param normal_shutdown: True if the worker shutdown normally, False otherwise. Defaults to False. :type normal_shutdown: bool """ if normal_shutdown is False: msg = _('The worker named %(name)s is missing. Canceling the tasks in its queue.') msg = msg % {'name': name} _logger.error(msg) # Delete the worker document Worker.objects(name=name).delete() # Delete all reserved_resource documents for the worker ReservedResource.objects(worker_name=name).delete() # Cancel all of the tasks that were assigned to this worker's queue for task_status in TaskStatus.objects(worker_name=name, state__in=constants.CALL_INCOMPLETE_STATES): cancel(task_status['task_id']) # Delete working directory common_utils.delete_worker_working_directory(name)
def _release_resource(task_id): """ Do not queue this task yourself. It will be used automatically when your task is dispatched by the _queue_reserved_task task. When a resource-reserving task is complete, this method releases the resource by removing the ReservedResource object by UUID. :param task_id: The UUID of the task that requested the reservation :type task_id: basestring """ ReservedResource.objects(task_id=task_id).delete()
def test_resource_in_resource_map(self): """ Test _release_resource() with a valid resource. This should remove the resource from the database. """ # Set up two workers now = datetime.utcnow() worker_1 = Worker(name=WORKER_1, last_heartbeat=now) worker_1.save() worker_2 = Worker(name=WORKER_2, last_heartbeat=now) worker_2.save() # Set up two reserved resources reserved_resource_1 = ReservedResource(task_id=str(uuid.uuid4()), worker_name=worker_1.name, resource_id='resource_1') reserved_resource_1.save() reserved_resource_2 = ReservedResource(task_id=str(uuid.uuid4()), worker_name=worker_2.name, resource_id='resource_2') reserved_resource_2.save() # This should remove resource_2 from the _resource_map. tasks._release_resource(reserved_resource_2.task_id) # resource_2 should have been removed from the database self.assertEqual(ReservedResource.objects.count(), 1) rr_1 = ReservedResource.objects.get(task_id=reserved_resource_1.task_id) self.assertEqual(rr_1['worker_name'], reserved_resource_1.worker_name) self.assertEqual(rr_1['resource_id'], 'resource_1')
def _delete_worker(name, normal_shutdown=False): """ Delete the Worker with _id name from the database, cancel any associated tasks and reservations If the worker shutdown normally, no message is logged, otherwise an error level message is logged. Default is to assume the worker did not shut down normally. Any resource reservations associated with this worker are cleaned up by this function. Any tasks associated with this worker are explicitly canceled. :param name: The name of the worker you wish to delete. :type name: basestring :param normal_shutdown: True if the worker shutdown normally, False otherwise. Defaults to False. :type normal_shutdown: bool """ if normal_shutdown is False: msg = _( 'The worker named %(name)s is missing. Canceling the tasks in its queue.' ) msg = msg % {'name': name} _logger.error(msg) else: msg = _("Cleaning up shutdown worker '%s'.") % name _logger.info(msg) # Delete the worker document Worker.objects(name=name).delete() # Delete all reserved_resource documents for the worker ReservedResource.objects(worker_name=name).delete() # If the worker is a resource manager, we also need to delete the associated lock if name.startswith(RESOURCE_MANAGER_WORKER_NAME): ResourceManagerLock.objects(name=name).delete() # If the worker is a scheduler, we also need to delete the associated lock if name.startswith(SCHEDULER_WORKER_NAME): CeleryBeatLock.objects(name=name).delete() # Cancel all of the tasks that were assigned to this worker's queue for task_status in TaskStatus.objects( worker_name=name, state__in=constants.CALL_INCOMPLETE_STATES): cancel(task_status['task_id'], revoke_task=False)
def _queue_reserved_task(name, task_id, resource_id, inner_args, inner_kwargs): """ A task that encapsulates another task to be dispatched later. This task being encapsulated is called the "inner" task, and a task name, UUID, and accepts a list of positional args and keyword args for the inner task. These arguments are named inner_args and inner_kwargs. inner_args is a list, and inner_kwargs is a dictionary passed to the inner task as positional and keyword arguments using the * and ** operators. The inner task is dispatched into a dedicated queue for a worker that is decided at dispatch time. The logic deciding which queue receives a task is controlled through the find_worker function. :param name: The name of the task to be called :type name: basestring :param inner_task_id: The UUID to be set on the task being called. By providing the UUID, the caller can have an asynchronous reference to the inner task that will be dispatched. :type inner_task_id: basestring :param resource_id: The name of the resource you wish to reserve for your task. The system will ensure that no other tasks that want that same reservation will run concurrently with yours. :type resource_id: basestring :return: None """ while True: try: worker = get_worker_for_reservation(resource_id) except NoWorkers: pass else: break try: worker = _get_unreserved_worker() except NoWorkers: pass else: break # No worker is ready for this work, so we need to wait time.sleep(0.25) ReservedResource(task_id=task_id, worker_name=worker['name'], resource_id=resource_id).save() inner_kwargs['routing_key'] = worker.name inner_kwargs['exchange'] = DEDICATED_QUEUE_EXCHANGE inner_kwargs['task_id'] = task_id try: celery.tasks[name].apply_async(*inner_args, **inner_kwargs) finally: _release_resource.apply_async((task_id, ), routing_key=worker.name, exchange=DEDICATED_QUEUE_EXCHANGE)
def _delete_worker(name, normal_shutdown=False): """ Delete the Worker with _id name from the database, cancel any associated tasks and reservations If the worker shutdown normally, no message is logged, otherwise an error level message is logged. Default is to assume the worker did not shut down normally. Any resource reservations associated with this worker are cleaned up by this function. Any tasks associated with this worker are explicitly canceled. :param name: The name of the worker you wish to delete. :type name: basestring :param normal_shutdown: True if the worker shutdown normally, False otherwise. Defaults to False. :type normal_shutdown: bool """ if normal_shutdown is False: msg = _('The worker named %(name)s is missing. Canceling the tasks in its queue.') msg = msg % {'name': name} _logger.error(msg) else: msg = _("Cleaning up shutdown worker '%s'.") % name _logger.info(msg) # Delete the worker document Worker.objects(name=name).delete() # Delete all reserved_resource documents for the worker ReservedResource.objects(worker_name=name).delete() # If the worker is a resource manager, we also need to delete the associated lock if name.startswith(RESOURCE_MANAGER_WORKER_NAME): ResourceManagerLock.objects(name=name).delete() # If the worker is a scheduler, we also need to delete the associated lock if name.startswith(SCHEDULER_WORKER_NAME): CeleryBeatLock.objects(name=name).delete() # Cancel all of the tasks that were assigned to this worker's queue for task_status in TaskStatus.objects(worker_name=name, state__in=constants.CALL_INCOMPLETE_STATES): cancel(task_status['task_id'], revoke_task=False)
def _release_resource(task_id): """ Do not queue this task yourself. It will be used automatically when your task is dispatched by the _queue_reserved_task task. When a resource-reserving task is complete, this method releases the resource by removing the ReservedResource object by UUID. :param task_id: The UUID of the task that requested the reservation :type task_id: basestring """ running_task_qs = TaskStatus.objects.filter(task_id=task_id, state=constants.CALL_RUNNING_STATE) for running_task in running_task_qs: new_task = Task() exception = PulpCodedException(error_codes.PLP0049, task_id=task_id) class MyEinfo(object): traceback = None new_task.on_failure(exception, task_id, (), {}, MyEinfo) ReservedResource.objects(task_id=task_id).delete()
def test_resource_not_in_resource_map(self): """ Test _release_resource() with a resource that is not in the database. This should be gracefully handled, and result in no changes to the database. """ # Set up two workers worker_1 = Worker(WORKER_1, datetime.utcnow()) worker_1.save() worker_2 = Worker(WORKER_2, datetime.utcnow()) worker_2.save() # Set up two resource reservations, using our workers from above reserved_resource_1 = ReservedResource(str(uuid.uuid4()), worker_1.name, 'resource_1') reserved_resource_1.save() reserved_resource_2 = ReservedResource(str(uuid.uuid4()), worker_2.name, 'resource_2') reserved_resource_2.save() # This should not raise any Exception, but should also not alter either the Worker # collection or the ReservedResource collection tasks._release_resource('made_up_resource_id') # Make sure that the workers collection has not been altered self.assertEqual(Worker.objects().count(), 2) worker_1 = Worker.objects().get(name=worker_1.name) self.assertTrue(worker_1) worker_2 = Worker.objects().get(name=worker_2.name) self.assertTrue(worker_2) # Make sure that the reserved resources collection has not been altered self.assertEqual(ReservedResource.objects.count(), 2) rr_1 = ReservedResource.objects.get(task_id=reserved_resource_1.task_id) self.assertEqual(rr_1['worker_name'], reserved_resource_1.worker_name) self.assertEqual(rr_1['resource_id'], 'resource_1') rr_2 = ReservedResource.objects.get(task_id=reserved_resource_2.task_id) self.assertEqual(rr_2['worker_name'], reserved_resource_2.worker_name) self.assertEqual(rr_2['resource_id'], 'resource_2')
def _release_resource(task_id): """ Do not queue this task yourself. It will be used automatically when your task is dispatched by the _queue_reserved_task task. When a resource-reserving task is complete, this method releases the resource by removing the ReservedResource object by UUID. :param task_id: The UUID of the task that requested the reservation :type task_id: basestring """ running_task_qs = TaskStatus.objects.filter(task_id=task_id, state=constants.CALL_RUNNING_STATE) for running_task in running_task_qs: new_task = Task() msg = _('The task status %(task_id)s exited immediately for some reason. Marking as ' 'errored. Check the logs for more details') runtime_exception = RuntimeError(msg % {'task_id': task_id}) class MyEinfo(object): traceback = None new_task.on_failure(runtime_exception, task_id, (), {}, MyEinfo) ReservedResource.objects(task_id=task_id).delete()
def test_resource_not_in_resource_map(self): """ Test _release_resource() with a resource that is not in the database. This should be gracefully handled, and result in no changes to the database. """ # Set up two workers worker_1 = Worker(name=WORKER_1, last_heartbeat=datetime.utcnow()) worker_1.save() worker_2 = Worker(name=WORKER_2, last_heartbeat=datetime.utcnow()) worker_2.save() # Set up two resource reservations, using our workers from above reserved_resource_1 = ReservedResource(task_id=str(uuid.uuid4()), worker_name=worker_1.name, resource_id='resource_1') reserved_resource_1.save() reserved_resource_2 = ReservedResource(task_id=str(uuid.uuid4()), worker_name=worker_2.name, resource_id='resource_2') reserved_resource_2.save() # This should not raise any Exception, but should also not alter either the Worker # collection or the ReservedResource collection tasks._release_resource('made_up_resource_id') # Make sure that the workers collection has not been altered self.assertEqual(Worker.objects().count(), 2) worker_1 = Worker.objects().get(name=worker_1.name) self.assertTrue(worker_1) worker_2 = Worker.objects().get(name=worker_2.name) self.assertTrue(worker_2) # Make sure that the reserved resources collection has not been altered self.assertEqual(ReservedResource.objects.count(), 2) rr_1 = ReservedResource.objects.get( task_id=reserved_resource_1.task_id) self.assertEqual(rr_1['worker_name'], reserved_resource_1.worker_name) self.assertEqual(rr_1['resource_id'], 'resource_1') rr_2 = ReservedResource.objects.get( task_id=reserved_resource_2.task_id) self.assertEqual(rr_2['worker_name'], reserved_resource_2.worker_name) self.assertEqual(rr_2['resource_id'], 'resource_2')
def get_worker_for_reservation(resource_id): """ Return the Worker instance that is associated with a reservation of type resource_id. If there are no workers with that reservation_id type a pulp.server.exceptions.NoWorkers exception is raised. :param resource_id: The name of the resource you wish to reserve for your task. :raises NoWorkers: If all workers have reserved_resource entries associated with them. :type resource_id: basestring :returns: The Worker instance that has a reserved_resource entry of type `resource_id` associated with it. :rtype: pulp.server.db.model.resources.Worker """ reservation = ReservedResource.objects(resource_id=resource_id).first() if reservation: return Worker.objects(name=reservation['worker_name']).first() else: raise NoWorkers()
def _queue_reserved_task_list(name, task_id, resource_id_list, inner_args, inner_kwargs): """ A task that allows multiple resources to be reserved before dispatching a second, "inner", task. See _queue_reserved_task for details on the inner workings. :param name: The name of the task to be called :type name: basestring :param inner_task_id: The UUID to be set on the task being called. By providing the UUID, the caller can have an asynchronous reference to the inner task that will be dispatched. :type inner_task_id: basestring :param resource_id_list: A list of names of the resources you wish to reserve for your task. The system will ensure that no other tasks that want any of the same reservations will run concurrently with yours. :type resource_id_list: list :return: None """ _logger.debug('_queue_reserved_task_list for task %s and ids [%s]' % (task_id, resource_id_list)) # Find a/the available Worker for processing our list of resources worker = get_worker_for_reservation_list(resource_id_list) # Reserve each resource, associating them with that Worker for rid in resource_id_list: _logger.debug('...saving RR for RID %s' % rid) ReservedResource(task_id=task_id, worker_name=worker['name'], resource_id=rid).save() # Dispatch the Worker inner_kwargs['routing_key'] = worker.name inner_kwargs['exchange'] = DEDICATED_QUEUE_EXCHANGE inner_kwargs['task_id'] = task_id try: celery.tasks[name].apply_async(*inner_args, **inner_kwargs) finally: # Arrange to release all held reserved-resources _release_resource.apply_async((task_id, ), routing_key=worker.name, exchange=DEDICATED_QUEUE_EXCHANGE)
def get_worker_for_reservation_list(resources): """ Return the Worker instance that is associated with the reservations described by the 'resources' list. This will be either an existing Worker that is dealing with at least one of the specified resources, or an available idle Worker. We sleep and retry the request until it can be fulfilled. :param resources: A list of the names of the resources you wish to reserve for your task. :type resources: list :returns: The Worker instance that has a reserved_resource entry associated with it for each resource in 'resources' :rtype: pulp.server.db.model.resources.Worker """ _logger.debug('get_worker_for_reservation_list [%s]' % resources) # We leave this loop once we find a Worker to return - otherwise, sleep and try again while True: reservation_workers = set([ reservation['worker_name'] for reservation in ReservedResource.objects( resource_id__in=resources) ]) _logger.debug('...num-RR is %d' % len(reservation_workers)) if len(reservation_workers ) == 1: # Exactly one worker holds any of the desired resources _logger.debug('...one-holds') return Worker.objects(name=list(reservation_workers)[0]).first() elif len(reservation_workers ) == 0: # No worker holds any of the desired resources _logger.debug('...zero-holds') try: worker = _get_unreserved_worker() return worker except NoWorkers: _logger.debug('...unresolved NoWorkers - WAIT') pass else: _logger.debug('...multiple-holds - WAIT') time.sleep(0.25)