def testForemanMessageHandler(self): with test_lib.ConfigOverrider({ "Database.useForReads": True, "Database.useForReads.message_handlers": True }): with mock.patch.object(foreman.Foreman, "AssignTasksToClient") as instr: worker_obj = worker_lib.GRRWorker(token=self.token) # Send a message to the Foreman. session_id = administrative.Foreman.well_known_session_id client_id = rdf_client.ClientURN("C.1100110011001100") self.SendResponse( session_id, rdf_protodict.DataBlob(), client_id=client_id, well_known=True) done = threading.Event() def handle(l): worker_obj._ProcessMessageHandlerRequests(l) done.set() data_store.REL_DB.RegisterMessageHandler( handle, worker_obj.well_known_flow_lease_time, limit=1000) self.assertTrue(done.wait(10)) # Make sure there are no leftover requests. self.assertEqual(data_store.REL_DB.ReadMessageHandlerRequests(), []) instr.assert_called_once_with(client_id)
def testKillNotificationsScheduledForFlows(self): worker_obj = worker_lib.GRRWorker(token=self.token) initial_time = rdfvalue.RDFDatetime.FromSecondsSinceEpoch(100) try: with test_lib.FakeTime(initial_time.AsSecondsSinceEpoch()): flow.StartFlow(flow_name=WorkerStuckableTestFlow.__name__, client_id=self.client_id, token=self.token, sync=False) # Process all messages worker_obj.RunOnce() # Wait until worker thread starts processing the flow. WorkerStuckableTestFlow.WaitUntilWorkerStartsProcessing() # Assert that there are no stuck notifications in the worker's # queue. with queue_manager.QueueManager(token=self.token) as manager: for queue in worker_obj.queues: notifications = manager.GetNotificationsByPriority( queue) self.assertFalse( manager.STUCK_PRIORITY in notifications) finally: # Release the semaphore so that worker thread unblocks and finishes # processing the flow. WorkerStuckableTestFlow.LetWorkerFinishProcessing() worker_obj.thread_pool.Join()
def testWellKnownFlowResponsesAreProcessedOnlyOnce(self): worker_obj = worker_lib.GRRWorker(token=self.token) # Send a message to a WellKnownFlow - ClientStatsAuto. client_id = rdf_client.ClientURN("C.1100110011001100") self.SendResponse(rdfvalue.SessionID(queue=queues.STATS, flow_name="Stats"), data=rdf_client.ClientStats(RSS_size=1234), client_id=client_id, well_known=True) # Process all messages worker_obj.RunOnce() worker_obj.thread_pool.Join() client = aff4.FACTORY.Open(client_id.Add("stats"), token=self.token) stats = client.Get(client.Schema.STATS) self.assertEqual(stats.RSS_size, 1234) aff4.FACTORY.Delete(client_id.Add("stats"), token=self.token) # Process all messages once again - there should be no actual processing # done, as all the responses were processed last time. worker_obj.RunOnce() worker_obj.thread_pool.Join() # Check that stats haven't changed as no new responses were processed. client = aff4.FACTORY.Open(client_id.Add("stats"), token=self.token) self.assertIsNone(client.Get(client.Schema.STATS))
def testCPULimitForFlows(self): """This tests that the client actions are limited properly.""" result = {} client_mock = action_mocks.CPULimitClientMock(result) client_mock = flow_test_lib.MockClient(self.client_id, client_mock, token=self.token) client_mock.EnableResourceUsage(user_cpu_usage=[10], system_cpu_usage=[10], network_usage=[1000]) worker_obj = worker_lib.GRRWorker(token=self.token) flow.StartFlow(client_id=self.client_id, flow_name=flow_test_lib.CPULimitFlow.__name__, cpu_limit=1000, network_bytes_limit=10000, token=self.token) self._Process([client_mock], worker_obj) self.assertEqual(result["cpulimit"], [1000, 980, 960]) self.assertEqual(result["networklimit"], [10000, 9000, 8000]) return result
def testNoKillNotificationsScheduledForHunts(self): worker_obj = worker_lib.GRRWorker(token=self.token) initial_time = rdfvalue.RDFDatetime.FromSecondsSinceEpoch(100) try: with test_lib.FakeTime(initial_time.AsSecondsSinceEpoch()): with implementation.StartHunt( hunt_name=WorkerStuckableHunt.__name__, client_rate=0, token=self.token) as hunt: hunt.GetRunner().Start() implementation.GRRHunt.StartClients(hunt.session_id, [self.client_id]) # Process all messages while worker_obj.RunOnce(): pass # Wait until worker thread starts processing the flow. WorkerStuckableHunt.WaitUntilWorkerStartsProcessing() # Assert that there are no stuck notifications in the worker's queue. with queue_manager.QueueManager(token=self.token) as manager: for queue in worker_obj.queues: notifications = manager.GetNotificationsByPriority( queue) self.assertFalse(manager.STUCK_PRIORITY in notifications) finally: # Release the semaphore so that worker thread unblocks and finishes # processing the flow. WorkerStuckableHunt.LetWorkerFinishProcessing() worker_obj.thread_pool.Join()
def CheckNotificationsDisappear(self, session_id): worker_obj = worker_lib.GRRWorker(token=self.token) manager = queue_manager.QueueManager(token=self.token) notification = rdf_flows.GrrNotification(session_id=session_id) with data_store.DB.GetMutationPool() as pool: manager.NotifyQueue(notification, mutation_pool=pool) notifications = manager.GetNotificationsByPriority(queues.FLOWS).get( notification.priority, []) # Check the notification is there. With multiple worker queue shards we can # get other notifications such as for audit event listeners, so we need to # filter out ours. notifications = [ x for x in notifications if x.session_id == session_id ] self.assertEqual(len(notifications), 1) # Process all messages worker_obj.RunOnce() worker_obj.thread_pool.Join() notifications = manager.GetNotificationsByPriority(queues.FLOWS).get( notification.priority, []) notifications = [ x for x in notifications if x.session_id == session_id ] # Check the notification is now gone. self.assertEqual(len(notifications), 0)
def testNonStuckFlowDoesNotGetTerminated(self): worker_obj = worker_lib.GRRWorker(token=self.token) initial_time = rdfvalue.RDFDatetime.FromSecondsSinceEpoch(100) stuck_flows_timeout = flow_runner.FlowRunner.stuck_flows_timeout with test_lib.FakeTime(initial_time.AsSecondsSinceEpoch()): session_id = flow.StartFlow(flow_name="WorkerSendingTestFlow", client_id=self.client_id, token=self.token, sync=False) # Process all messages worker_obj.RunOnce() worker_obj.thread_pool.Join() flow_obj = aff4.FACTORY.Open(session_id, token=self.token) self.assertEqual(flow_obj.context.state, rdf_flow_runner.FlowContext.State.RUNNING) # Set the time to max worker flow duration + 1 minute. If the 'kill' # notification isn't deleted we should get it now. future_time = initial_time + rdfvalue.Duration( "1m") + stuck_flows_timeout with test_lib.FakeTime(future_time.AsSecondsSinceEpoch()): worker_obj.RunOnce() worker_obj.thread_pool.Join() flow_obj = aff4.FACTORY.Open(session_id, token=self.token) # Check that flow didn't get terminated due to a logic bug. self.assertEqual(flow_obj.context.state, rdf_flow_runner.FlowContext.State.RUNNING)
def testHeartBeatingFlowIsNotTreatedAsStuck(self): worker_obj = worker_lib.GRRWorker(token=self.token) initial_time = rdfvalue.RDFDatetime.FromSecondsSinceEpoch(100) stuck_flows_timeout = flow_runner.FlowRunner.stuck_flows_timeout lease_timeout = rdfvalue.Duration(worker_lib.GRRWorker.flow_lease_time) WorkerStuckableTestFlow.Reset(heartbeat=True) try: with test_lib.FakeTime(initial_time.AsSecondsSinceEpoch()): session_id = flow.StartFlow( flow_name=WorkerStuckableTestFlow.__name__, client_id=self.client_id, token=self.token, sync=False) # Process all messages worker_obj.RunOnce() # Wait until worker thread starts processing the flow. WorkerStuckableTestFlow.WaitUntilWorkerStartsProcessing() # Increase the time in steps, using LetFlowHeartBeat/WaitForFlowHeartBeat # to control the flow execution that happens in the parallel thread. current_time = rdfvalue.RDFDatetime(initial_time) future_time = initial_time + stuck_flows_timeout + rdfvalue.Duration( "1m") while current_time <= future_time: current_time += lease_timeout - rdfvalue.Duration("1s") with test_lib.FakeTime(current_time.AsSecondsSinceEpoch()): checked_flow = aff4.FACTORY.Open(session_id, token=self.token) WorkerStuckableTestFlow.LetFlowHeartBeat() WorkerStuckableTestFlow.WaitForFlowHeartBeat( last_heartbeat=current_time > future_time) # Now current_time is > future_time, where future_time is the time # when stuck flow should have been killed. Calling RunOnce() here, # because if the flow is going to be killed, it will be killed # during worker.RunOnce() call. with test_lib.FakeTime(current_time.AsSecondsSinceEpoch()): worker_obj.RunOnce() # Check that the flow wasn't killed forecfully. checked_flow = aff4.FACTORY.Open(session_id, token=self.token) self.assertEqual(checked_flow.context.state, rdf_flow_runner.FlowContext.State.RUNNING) finally: # Release the semaphore so that worker thread unblocks and finishes # processing the flow. with test_lib.FakeTime(current_time.AsSecondsSinceEpoch()): WorkerStuckableTestFlow.LetWorkerFinishProcessing() worker_obj.thread_pool.Join() # Check that the flow has finished normally. checked_flow = aff4.FACTORY.Open(session_id, token=self.token) self.assertEqual(checked_flow.context.state, rdf_flow_runner.FlowContext.State.TERMINATED)
def main(argv): """Main.""" del argv # Unused. config.CONFIG.AddContext(contexts.WORKER_CONTEXT, "Context applied when running a worker.") # Initialise flows and config_lib server_startup.Init() fleetspeak_connector.Init() token = access_control.ACLToken(username="******").SetUID() worker_obj = worker_lib.GRRWorker(token=token) worker_obj.Run()
def testNotificationReschedulingTTL(self): """Test that notifications are not rescheduled forever.""" with test_lib.FakeTime(10000): worker_obj = worker_lib.GRRWorker(token=self.token) flow_obj = self.FlowSetup("RaisingTestFlow") session_id = flow_obj.session_id flow_obj.Close() with queue_manager.QueueManager(token=self.token) as manager: notification = rdf_flows.GrrNotification(session_id=session_id, timestamp=time.time(), last_status=1) with data_store.DB.GetMutationPool() as pool: manager.NotifyQueue(notification, mutation_pool=pool) notifications = manager.GetNotifications(queues.FLOWS) # Check the notification is there. notifications = [ n for n in notifications if n.session_id == session_id ] self.assertEqual(len(notifications), 1) delay = flow_runner.FlowRunner.notification_retry_interval ttl = notification.ttl for i in xrange(ttl - 1): with test_lib.FakeTime(10000 + 100 + delay * (i + 1)): # Process all messages. worker_obj.RunOnce() worker_obj.thread_pool.Join() notifications = manager.GetNotifications(queues.FLOWS) # Check the notification is for the correct session_id. notifications = [ n for n in notifications if n.session_id == session_id ] self.assertEqual(len(notifications), 1) with test_lib.FakeTime(10000 + 100 + delay * ttl): # Process all messages. worker_obj.RunOnce() worker_obj.thread_pool.Join() notifications = manager.GetNotifications(queues.FLOWS) self.assertEqual(len(notifications), 0)
def StartFlowAndWorker(client_id, flow_name, **kwargs): """Launches the flow and worker and waits for it to finish. Args: client_id: The client common name we issue the request. flow_name: The name of the flow to launch. **kwargs: passthrough to flow. Returns: A flow session id. Note: you need raw access to run this flow as it requires running a worker. """ # Empty token, only works with raw access. queue = rdfvalue.RDFURN("DEBUG-%s-" % getpass.getuser()) if "token" in kwargs: token = kwargs.pop("token") else: token = access_control.ACLToken(username="******") session_id = flow.StartAFF4Flow(client_id=client_id, flow_name=flow_name, queue=queue, token=token, **kwargs) worker_thrd = worker_lib.GRRWorker(queues=[queue], token=token, threadpool_size=1) while True: try: worker_thrd.RunOnce() except KeyboardInterrupt: print("exiting") worker_thrd.thread_pool.Join() break time.sleep(2) with aff4.FACTORY.Open(session_id, token=token) as flow_obj: if not flow_obj.GetRunner().IsRunning(): break # Terminate the worker threads worker_thrd.thread_pool.Join() return session_id
def main(argv): """Main.""" del argv # Unused. if flags.FLAGS.version: print("GRR worker {}".format(config_server.VERSION["packageversion"])) return config.CONFIG.AddContext(contexts.WORKER_CONTEXT, "Context applied when running a worker.") # Initialise flows and config_lib server_startup.Init() fleetspeak_connector.Init() worker_obj = worker_lib.GRRWorker() worker_obj.Run()
def _testProcessMessagesWellKnown(self): worker_obj = worker_lib.GRRWorker(token=self.token) # Send a message to a WellKnownFlow - ClientStatsAuto. session_id = administrative.GetClientStatsAuto.well_known_session_id client_id = rdf_client.ClientURN("C.1100110011001100") if data_store.RelationalDBReadEnabled(category="message_handlers"): done = threading.Event() def handle(l): worker_obj._ProcessMessageHandlerRequests(l) done.set() data_store.REL_DB.RegisterMessageHandler( handle, worker_obj.well_known_flow_lease_time, limit=1000) self.SendResponse( session_id, data=rdf_client_stats.ClientStats(RSS_size=1234), client_id=client_id, well_known=True) self.assertTrue(done.wait(10)) else: self.SendResponse( session_id, data=rdf_client_stats.ClientStats(RSS_size=1234), client_id=client_id, well_known=True) # Process all messages worker_obj.RunOnce() worker_obj.thread_pool.Join() client = aff4.FACTORY.Open(client_id.Add("stats"), token=self.token) stats = client.Get(client.Schema.STATS) self.assertEqual(stats.RSS_size, 1234) # Make sure no notifications have been sent. user = aff4.FACTORY.Open( "aff4:/users/%s" % self.token.username, token=self.token) notifications = user.Get(user.Schema.PENDING_NOTIFICATIONS) self.assertIsNone(notifications)
def testStuckNotificationGetsDeletedAfterTheFlowIsTerminated(self): worker_obj = worker_lib.GRRWorker(token=self.token) initial_time = rdfvalue.RDFDatetime.FromSecondsSinceEpoch(100) stuck_flows_timeout = flow_runner.FlowRunner.stuck_flows_timeout try: with test_lib.FakeTime(initial_time.AsSecondsSinceEpoch()): session_id = flow.StartFlow( flow_name=WorkerStuckableTestFlow.__name__, client_id=self.client_id, token=self.token, sync=False) # Process all messages worker_obj.RunOnce() # Wait until worker thread starts processing the flow. WorkerStuckableTestFlow.WaitUntilWorkerStartsProcessing() # Set the time to max worker flow duration + 1 minute. The flow is # currently blocked because of the way how semaphores are set up. # Worker should consider the flow to be stuck and terminate it. future_time = (initial_time + rdfvalue.Duration("1m") + stuck_flows_timeout) with test_lib.FakeTime(future_time.AsSecondsSinceEpoch()): worker_obj.RunOnce() killed_flow = aff4.FACTORY.Open(session_id, token=self.token) self.assertEqual(killed_flow.context.state, rdf_flow_runner.FlowContext.State.ERROR) self.assertEqual( killed_flow.context.status, "Terminated by user test. Reason: Stuck in the worker") # Check that stuck notification has been removed. qm = queue_manager.QueueManager(token=self.token) notifications_by_priority = qm.GetNotificationsByPriority( queues.FLOWS) self.assertTrue(qm.STUCK_PRIORITY not in notifications_by_priority) finally: # Release the semaphore so that worker thread unblocks and finishes # processing the flow. WorkerStuckableTestFlow.LetWorkerFinishProcessing() worker_obj.thread_pool.Join()
def testNotificationRacesAreResolved(self): # We need a random flow object for this test. session_id = flow.StartFlow(client_id=self.client_id, flow_name="WorkerSendingTestFlow", token=self.token) worker_obj = worker_lib.GRRWorker(token=self.token) manager = queue_manager.QueueManager(token=self.token) manager.DeleteNotification(session_id) manager.Flush() # We simulate a race condition here - the notification for request #1 is # there but the actual request #1 is not. The worker should pick up the # notification, notice that the request #1 is not there yet and reschedule # the notification. notification = rdf_flows.GrrNotification(session_id=session_id, last_status=1) with data_store.DB.GetMutationPool() as pool: manager.NotifyQueue(notification, mutation_pool=pool) notifications = manager.GetNotifications(queues.FLOWS) # Check the notification is there. notifications = [ n for n in notifications if n.session_id == session_id ] self.assertEqual(len(notifications), 1) # Process all messages worker_obj.RunOnce() worker_obj.thread_pool.Join() delay = flow_runner.FlowRunner.notification_retry_interval with test_lib.FakeTime(time.time() + 10 + delay): requeued_notifications = manager.GetNotifications(queues.FLOWS) # Check that there is a new notification. notifications = [ n for n in notifications if n.session_id == session_id ] self.assertEqual(len(requeued_notifications), 1) self.assertEqual(requeued_notifications[0].first_queued, notifications[0].first_queued) self.assertNotEqual(requeued_notifications[0].timestamp, notifications[0].timestamp)
def testStuckFlowGetsTerminated(self): worker_obj = worker_lib.GRRWorker(token=self.token) initial_time = rdfvalue.RDFDatetime.FromSecondsSinceEpoch(100) try: with test_lib.FakeTime(initial_time.AsSecondsSinceEpoch()): session_id = flow.StartFlow( flow_name=WorkerStuckableTestFlow.__name__, client_id=self.client_id, token=self.token, sync=False) # Process all messages while worker_obj.RunOnce(): pass # Wait until worker thread starts processing the flow. WorkerStuckableTestFlow.WaitUntilWorkerStartsProcessing() # Set the time to max worker flow duration + 1 minute. The flow is # currently blocked because of the way semaphores are set up. # Worker should consider the flow to be stuck and terminate it. stuck_flows_timeout = flow_runner.FlowRunner.stuck_flows_timeout future_time = (initial_time + rdfvalue.Duration("1m") + stuck_flows_timeout) with test_lib.FakeTime(future_time.AsSecondsSinceEpoch()): worker_obj.RunOnce() finally: # Release the semaphore so that worker thread unblocks and finishes # processing the flow. WorkerStuckableTestFlow.LetWorkerFinishProcessing() worker_obj.thread_pool.Join() killed_flow = aff4.FACTORY.Open(session_id, token=self.token) self.assertEqual(killed_flow.context.state, rdf_flow_runner.FlowContext.State.ERROR) self.assertEqual( killed_flow.context.status, "Terminated by user test. Reason: Stuck in the worker")
def testNoNotificationRescheduling(self): """Test that no notifications are rescheduled when a flow raises.""" with test_lib.FakeTime(10000): flow_obj = self.FlowSetup("RaisingTestFlow") session_id = flow_obj.session_id flow_obj.Close() # Send the flow some messages. self.SendResponse(session_id, "Hello1", request_id=1) self.SendResponse(session_id, "Hello2", request_id=2) self.SendResponse(session_id, "Hello3", request_id=3) worker_obj = worker_lib.GRRWorker(token=self.token) # Process all messages. worker_obj.RunOnce() worker_obj.thread_pool.Join() delay = flow_runner.FlowRunner.notification_retry_interval with test_lib.FakeTime(10000 + 100 + delay): manager = queue_manager.QueueManager(token=self.token) self.assertFalse(manager.GetNotificationsForAllShards(session_id.Queue()))
def _TestWorker(self): worker = worker_lib.GRRWorker(token=self.token) self.addCleanup(worker.Shutdown) return worker
def _TestWorker(self): self.worker = worker_lib.GRRWorker(token=self.token) return self.worker
def testCPULimitForHunts(self): worker_obj = worker_lib.GRRWorker(token=self.token) client_ids = ["C.%016X" % i for i in xrange(10, 20)] result = {} client_mocks = [] for client_id in client_ids: client_mock = action_mocks.CPULimitClientMock(result) client_mock = flow_test_lib.MockClient( rdf_client.ClientURN(client_id), client_mock, token=self.token) client_mock.EnableResourceUsage(user_cpu_usage=[10], system_cpu_usage=[10], network_usage=[1000]) client_mocks.append(client_mock) flow_runner_args = rdf_flow_runner.FlowRunnerArgs( flow_name=flow_test_lib.CPULimitFlow.__name__) with implementation.StartHunt(hunt_name=standard.GenericHunt.__name__, flow_runner_args=flow_runner_args, cpu_limit=5000, per_client_cpu_limit=10000, network_bytes_limit=1000000, client_rate=0, token=self.token) as hunt: hunt.GetRunner().Start() implementation.GRRHunt.StartClients(hunt.session_id, client_ids[:1]) self._Process(client_mocks, worker_obj) implementation.GRRHunt.StartClients(hunt.session_id, client_ids[1:2]) self._Process(client_mocks, worker_obj) implementation.GRRHunt.StartClients(hunt.session_id, client_ids[2:3]) self._Process(client_mocks, worker_obj) # The limiting factor here is the overall hunt limit of 5000 cpu # seconds. Clients that finish should decrease the remaining quota # and the following clients should get the reduced quota. self.assertEqual(result["cpulimit"], [ 5000.0, 4980.0, 4960.0, 4940.0, 4920.0, 4900.0, 4880.0, 4860.0, 4840.0 ]) self.assertEqual(result["networklimit"], [ 1000000, 999000, 998000, 997000, 996000, 995000, 994000, 993000, 992000 ]) result.clear() with implementation.StartHunt(hunt_name=standard.GenericHunt.__name__, flow_runner_args=flow_runner_args, per_client_cpu_limit=3000, per_client_network_limit_bytes=3000000, client_rate=0, token=self.token) as hunt: hunt.GetRunner().Start() implementation.GRRHunt.StartClients(hunt.session_id, client_ids[:1]) self._Process(client_mocks, worker_obj) implementation.GRRHunt.StartClients(hunt.session_id, client_ids[1:2]) self._Process(client_mocks, worker_obj) implementation.GRRHunt.StartClients(hunt.session_id, client_ids[2:3]) self._Process(client_mocks, worker_obj) # This time, the per client limit is 3000s / 3000000 bytes. Every # client should get the same limit. self.assertEqual(result["cpulimit"], [ 3000.0, 2980.0, 2960.0, 3000.0, 2980.0, 2960.0, 3000.0, 2980.0, 2960.0 ]) self.assertEqual(result["networklimit"], [ 3000000, 2999000, 2998000, 3000000, 2999000, 2998000, 3000000, 2999000, 2998000 ]) result.clear() for client_mock in client_mocks: client_mock.EnableResourceUsage(user_cpu_usage=[500], system_cpu_usage=[500], network_usage=[1000000]) with implementation.StartHunt(hunt_name=standard.GenericHunt.__name__, flow_runner_args=flow_runner_args, per_client_cpu_limit=3000, cpu_limit=5000, per_client_network_limit_bytes=3000000, network_bytes_limit=5000000, client_rate=0, token=self.token) as hunt: hunt.GetRunner().Start() implementation.GRRHunt.StartClients(hunt.session_id, client_ids[:1]) self._Process(client_mocks, worker_obj) implementation.GRRHunt.StartClients(hunt.session_id, client_ids[1:2]) self._Process(client_mocks, worker_obj) implementation.GRRHunt.StartClients(hunt.session_id, client_ids[2:3]) self._Process(client_mocks, worker_obj) # The first client gets the full per client limit of 3000s, and # uses all of it. The hunt has a limit of just 5000 total so the # second client gets started with a limit of 2000. It can only run # two of three states, the last client will not be started at all # due to out of quota. self.assertEqual(result["cpulimit"], [3000.0, 2000.0, 1000.0, 2000.0, 1000.0]) self.assertEqual(result["networklimit"], [3000000, 2000000, 1000000, 2000000, 1000000]) errors = list(hunt.GetClientsErrors()) self.assertEqual(len(errors), 2) # Client side out of cpu. self.assertIn("CPU limit exceeded", errors[0].log_message) # Server side out of cpu. self.assertIn("Out of CPU quota", errors[1].backtrace)
def testNoValidStatusRaceIsResolved(self): # This tests for the regression of a long standing race condition we saw # where notifications would trigger the reading of another request that # arrives later but wasn't completely written to the database yet. # Timestamp based notification handling should eliminate this bug. # We need a random flow object for this test. session_id = flow.StartFlow(client_id=self.client_id, flow_name="WorkerSendingTestFlow", token=self.token) worker_obj = worker_lib.GRRWorker(token=self.token) manager = queue_manager.QueueManager(token=self.token) manager.DeleteNotification(session_id) manager.Flush() # We have a first request that is complete (request_id 1, response_id 1). self.SendResponse(session_id, "Response 1") # However, we also have request #2 already coming in. The race is that # the queue manager might write the status notification to # session_id/state as "status:00000002" but not the status response # itself yet under session_id/state/request:00000002 request_id = 2 response_id = 1 flow_manager = queue_manager.QueueManager(token=self.token) flow_manager.FreezeTimestamp() flow_manager.QueueResponse( rdf_flows.GrrMessage( source=self.client_id, session_id=session_id, payload=rdf_protodict.DataBlob(string="Response 2"), request_id=request_id, auth_state="AUTHENTICATED", response_id=response_id)) status = rdf_flows.GrrMessage( source=self.client_id, session_id=session_id, payload=rdf_flows.GrrStatus( status=rdf_flows.GrrStatus.ReturnedStatus.OK), request_id=request_id, response_id=response_id + 1, auth_state="AUTHENTICATED", type=rdf_flows.GrrMessage.Type.STATUS) # Now we write half the status information. data_store.DB.StoreRequestsAndResponses(new_responses=[(status, None)]) # We make the race even a bit harder by saying the new notification gets # written right before the old one gets deleted. If we are not careful here, # we delete the new notification as well and the flow becomes stuck. # pylint: disable=invalid-name def WriteNotification(self, arg_session_id, start=None, end=None): if arg_session_id == session_id: flow_manager.QueueNotification(session_id=arg_session_id) flow_manager.Flush() self.DeleteNotification.old_target(self, arg_session_id, start=start, end=end) # pylint: enable=invalid-name with utils.Stubber(queue_manager.QueueManager, "DeleteNotification", WriteNotification): # This should process request 1 but not touch request 2. worker_obj.RunOnce() worker_obj.thread_pool.Join() flow_obj = aff4.FACTORY.Open(session_id, token=self.token) self.assertFalse(flow_obj.context.backtrace) self.assertNotEqual(flow_obj.context.state, rdf_flow_runner.FlowContext.State.ERROR) request_data = data_store.DB.ReadResponsesForRequestId(session_id, 2) request_data.sort(key=lambda msg: msg.response_id) self.assertEqual(len(request_data), 2) # Make sure the status and the original request are still there. self.assertEqual(request_data[0].args_rdf_name, "DataBlob") self.assertEqual(request_data[1].args_rdf_name, "GrrStatus") # But there is nothing for request 1. request_data = data_store.DB.ReadResponsesForRequestId(session_id, 1) self.assertEqual(request_data, []) # The notification for request 2 should have survived. with queue_manager.QueueManager(token=self.token) as manager: notifications = manager.GetNotifications(queues.FLOWS) self.assertEqual(len(notifications), 1) notification = notifications[0] self.assertEqual(notification.session_id, session_id) self.assertEqual(notification.timestamp, flow_manager.frozen_timestamp) self.assertEqual(RESULTS, ["Response 1"]) # The last missing piece of request 2 is the actual status message. flow_manager.QueueResponse(status) flow_manager.Flush() # Now make sure request 2 runs as expected. worker_obj.RunOnce() worker_obj.thread_pool.Join() self.assertEqual(RESULTS, ["Response 1", "Response 2"])
def testProcessMessages(self): """Test processing of several inbound messages.""" # Create a couple of flows flow_obj = self.FlowSetup("WorkerSendingTestFlow") session_id_1 = flow_obj.session_id flow_obj.Close() flow_obj = self.FlowSetup("WorkerSendingTestFlow2") session_id_2 = flow_obj.session_id flow_obj.Close() manager = queue_manager.QueueManager(token=self.token) # Check that client queue has messages tasks_on_client_queue = manager.Query(self.client_id.Queue(), 100) # should have 10 requests from WorkerSendingTestFlow and 1 from # SendingTestFlow2 self.assertEqual(len(tasks_on_client_queue), 11) # Send each of the flows a repeated message self.SendResponse(session_id_1, "Hello1") self.SendResponse(session_id_2, "Hello2") self.SendResponse(session_id_1, "Hello1") self.SendResponse(session_id_2, "Hello2") worker_obj = worker_lib.GRRWorker(token=self.token) # Process all messages worker_obj.RunOnce() worker_obj.thread_pool.Join() # Ensure both requests ran exactly once RESULTS.sort() self.assertEqual(2, len(RESULTS)) self.assertEqual("Hello1", RESULTS[0]) self.assertEqual("Hello2", RESULTS[1]) # Check that client queue is cleared - should have 2 less messages (since # two were completed). tasks_on_client_queue = manager.Query(self.client_id.Queue(), 100) self.assertEqual(len(tasks_on_client_queue), 9) # Ensure that processed requests are removed from state subject outstanding_requests = list( data_store.DB.ReadRequestsAndResponses(session_id_1)) self.assertEqual(len(outstanding_requests), 9) for request, _ in outstanding_requests: self.assertNotEqual(request.request.request_id, 0) # This flow is still in state Incoming. flow_obj = aff4.FACTORY.Open(session_id_1, token=self.token) self.assertTrue(flow_obj.context.state != rdf_flow_runner.FlowContext.State.TERMINATED) self.assertEqual(flow_obj.context.current_state, "Incoming") # This flow should be done. flow_obj = aff4.FACTORY.Open(session_id_2, token=self.token) self.assertTrue(flow_obj.context.state == rdf_flow_runner.FlowContext.State.TERMINATED) self.assertEqual(flow_obj.context.current_state, "End")