def handle_worker_heartbeat(event): """ Celery event handler for 'worker-heartbeat' events. The event is first parsed and logged. Then the existing Worker objects are searched for one to update. If an existing one is found, it is updated. Otherwise a new Worker entry is created. Logging at the info and debug level is also done. :param event: A celery event to handle. :type event: dict """ event_info = _parse_and_log_event(event) find_worker_criteria = Criteria(filters={'_id': event_info['worker_name']}, fields=('_id', 'last_heartbeat')) find_worker_list = list(resources.filter_workers(find_worker_criteria)) if find_worker_list: Worker.get_collection().find_and_modify( query={'_id': event_info['worker_name']}, update={'$set': {'last_heartbeat': event_info['timestamp']}} ) else: new_worker = Worker(event_info['worker_name'], event_info['timestamp']) msg = _("New worker '%(worker_name)s' discovered") % event_info _logger.info(msg) new_worker.save()
def handle_worker_heartbeat(event): """ Celery event handler for 'worker-heartbeat' events. The event is first parsed and logged. If this event is from the resource manager, there is no further processing to be done. Then the existing Worker objects are searched for one to update. If an existing one is found, it is updated. Otherwise a new Worker entry is created. Logging at the info and debug level is also done. :param event: A celery event to handle. :type event: dict """ event_info = _parse_and_log_event(event) # if this is the resource_manager do nothing if _is_resource_manager(event): return find_worker_criteria = Criteria(filters={'_id': event_info['worker_name']}, fields=('_id', 'last_heartbeat', 'num_reservations')) find_worker_list = list(resources.filter_workers(find_worker_criteria)) if find_worker_list: Worker.get_collection().find_and_modify( query={'_id': event_info['worker_name']}, update={'$set': { 'last_heartbeat': event_info['timestamp'] }}) else: new_worker = Worker(event_info['worker_name'], event_info['timestamp']) msg = _("New worker '%(worker_name)s' discovered") % event_info _logger.info(msg) new_worker.save()
def test__delete_worker(self, logger, cancel, mock_add_consumer): """ Assert that the correct Tasks get canceled when their Worker is deleted, and that the Worker is removed from the database. """ # cause two workers to be added to the database as having workers worker_watcher.handle_worker_heartbeat({ 'timestamp': time.time(), 'type': 'worker-heartbeat', 'hostname': WORKER_1, }) worker_watcher.handle_worker_heartbeat({ 'timestamp': time.time(), 'type': 'worker-heartbeat', 'hostname': WORKER_2, }) # Let's simulate three tasks being assigned to WORKER_2, with two of them being # in an incomplete state and one in a complete state. We will delete WORKER_2, # which should cause the two to get canceled. Let's put task_1 in progress TaskStatusManager.create_task_status('task_1', WORKER_2_QUEUE, state=CALL_RUNNING_STATE) TaskStatusManager.create_task_status('task_2', WORKER_2_QUEUE, state=CALL_WAITING_STATE) # This task shouldn't get canceled because it isn't in an incomplete state TaskStatusManager.create_task_status('task_3', WORKER_2_QUEUE, state=CALL_FINISHED_STATE) # Let's make a task in a worker that is still present just to make sure it isn't touched. TaskStatusManager.create_task_status('task_4', WORKER_1_QUEUE, state=CALL_RUNNING_STATE) # Let's just make sure the setup worked and that we have a Worker with RR2 worker_collection = Worker.get_collection() self.assertEqual(worker_collection.find({'_id': WORKER_2}).count(), 1) # Now let's delete the Worker named WORKER_2 tasks._delete_worker.apply_async(args=(WORKER_2, ), queue=tasks.RESOURCE_MANAGER_QUEUE) # cancel() should have been called twice with task_1 and task_2 as parameters self.assertEqual(cancel.call_count, 2) # Let's build a set out of the two times that cancel was called. We can't know for sure # which order the Tasks got canceled in, but we can assert that the correct two tasks were # canceled (task_3 should not appear in this set). cancel_param_set = set([c[1] for c in cancel.mock_calls]) self.assertEqual(cancel_param_set, set([('task_1', ), ('task_2', )])) # We should have logged that we are canceling the tasks self.assertEqual(logger.call_count, 0) self.assertTrue(WORKER_2 in logger.mock_calls[0][1][0]) self.assertTrue('Canceling the tasks' in logger.mock_calls[0][1][0]) # The Worker should have been deleted self.assertEqual(worker_collection.find({'_id': WORKER_2}).count(), 0) # the Worker for RW1 should remain self.assertEqual(worker_collection.find({'_id': WORKER_1}).count(), 1)
def test__delete_worker(self, logger, cancel, mock_add_consumer): """ Assert that the correct Tasks get canceled when their Worker is deleted, and that the Worker is removed from the database. """ # cause two workers to be added to the database as having workers worker_watcher.handle_worker_heartbeat({ 'timestamp': time.time(), 'type': 'worker-heartbeat', 'hostname': WORKER_1, }) worker_watcher.handle_worker_heartbeat({ 'timestamp': time.time(), 'type': 'worker-heartbeat', 'hostname': WORKER_2, }) # Let's simulate three tasks being assigned to WORKER_2, with two of them being # in an incomplete state and one in a complete state. We will delete WORKER_2, # which should cause the two to get canceled. Let's put task_1 in progress TaskStatusManager.create_task_status('task_1', WORKER_2_QUEUE, state=CALL_RUNNING_STATE) TaskStatusManager.create_task_status('task_2', WORKER_2_QUEUE, state=CALL_WAITING_STATE) # This task shouldn't get canceled because it isn't in an incomplete state TaskStatusManager.create_task_status('task_3', WORKER_2_QUEUE, state=CALL_FINISHED_STATE) # Let's make a task in a worker that is still present just to make sure it isn't touched. TaskStatusManager.create_task_status('task_4', WORKER_1_QUEUE, state=CALL_RUNNING_STATE) # Let's just make sure the setup worked and that we have a Worker with RR2 worker_collection = Worker.get_collection() self.assertEqual(worker_collection.find({'_id': WORKER_2}).count(), 1) # Now let's delete the Worker named WORKER_2 tasks._delete_worker.apply_async(args=(WORKER_2,), queue=tasks.RESOURCE_MANAGER_QUEUE) # cancel() should have been called twice with task_1 and task_2 as parameters self.assertEqual(cancel.call_count, 2) # Let's build a set out of the two times that cancel was called. We can't know for sure # which order the Tasks got canceled in, but we can assert that the correct two tasks were # canceled (task_3 should not appear in this set). cancel_param_set = set([c[1] for c in cancel.mock_calls]) self.assertEqual(cancel_param_set, set([('task_1',), ('task_2',)])) # We should have logged that we are canceling the tasks self.assertEqual(logger.call_count, 0) self.assertTrue(WORKER_2 in logger.mock_calls[0][1][0]) self.assertTrue('Canceling the tasks' in logger.mock_calls[0][1][0]) # The Worker should have been deleted self.assertEqual(worker_collection.find({'_id': WORKER_2}).count(), 0) # the Worker for RW1 should remain self.assertEqual(worker_collection.find({'_id': WORKER_1}).count(), 1)
def test__release_resource_not_in__resource_map(self): """ Test _release_resource() with a resource that is not in the database. This should be gracefully handled, and result in no changes to the database. """ # Set up two workers worker_1 = Worker(WORKER_1, datetime.utcnow()) worker_1.save() worker_2 = Worker(WORKER_2, datetime.utcnow()) worker_2.save() # Set up two resource reservations, using our workers from above reserved_resource_1 = ReservedResource('resource_1', worker_1.name, 7) reserved_resource_1.save() reserved_resource_2 = ReservedResource('resource_2', worker_2.name, 3) reserved_resource_2.save() # This should not raise any Exception, but should also not alter either the Worker # collection or the ReservedResource collection tasks._release_resource('made_up_resource_id') # Make sure that the workers collection has not been altered worker_collection = Worker.get_collection() self.assertEqual(worker_collection.count(), 2) worker_1 = worker_collection.find_one({'_id': worker_1.name}) self.assertTrue(worker_1) worker_2 = worker_collection.find_one({'_id': worker_2.name}) self.assertTrue(worker_2) # Make sure that the reserved resources collection has not been altered rrc = ReservedResource.get_collection() self.assertEqual(rrc.count(), 2) rr_1 = rrc.find_one({'_id': reserved_resource_1.name}) self.assertEqual(rr_1['assigned_queue'], reserved_resource_1.assigned_queue) self.assertEqual(rr_1['num_reservations'], 7) rr_2 = rrc.find_one({'_id': reserved_resource_2.name}) self.assertEqual(rr_2['assigned_queue'], reserved_resource_2.assigned_queue) self.assertEqual(rr_2['num_reservations'], 3)
def test_resource_not_in_resource_map(self): """ Test _release_resource() with a resource that is not in the database. This should be gracefully handled, and result in no changes to the database. """ # Set up two workers worker_1 = Worker(WORKER_1, datetime.utcnow()) worker_1.save() worker_2 = Worker(WORKER_2, datetime.utcnow()) worker_2.save() # Set up two resource reservations, using our workers from above reserved_resource_1 = ReservedResource(uuid.uuid4(), worker_1.name, 'resource_1') reserved_resource_1.save() reserved_resource_2 = ReservedResource(uuid.uuid4(), worker_2.name, 'resource_2') reserved_resource_2.save() # This should not raise any Exception, but should also not alter either the Worker # collection or the ReservedResource collection tasks._release_resource('made_up_resource_id') # Make sure that the workers collection has not been altered worker_collection = Worker.get_collection() self.assertEqual(worker_collection.count(), 2) worker_1 = worker_collection.find_one({'_id': worker_1.name}) self.assertTrue(worker_1) worker_2 = worker_collection.find_one({'_id': worker_2.name}) self.assertTrue(worker_2) # Make sure that the reserved resources collection has not been altered rrc = ReservedResource.get_collection() self.assertEqual(rrc.count(), 2) rr_1 = rrc.find_one({'_id': reserved_resource_1.task_id}) self.assertEqual(rr_1['worker_name'], reserved_resource_1.worker_name) self.assertEqual(rr_1['resource_id'], 'resource_1') rr_2 = rrc.find_one({'_id': reserved_resource_2.task_id}) self.assertEqual(rr_2['worker_name'], reserved_resource_2.worker_name) self.assertEqual(rr_2['resource_id'], 'resource_2')
def tearDown(self): Worker.get_collection().remove() ReservedResource.get_collection().remove() TaskStatus.objects().delete()
def tearDown(self): Worker.get_collection().remove() ReservedResource.get_collection().remove() TaskStatus.get_collection().remove()