def FetchRequestsAndResponses(self, session_id): """Well known flows do not have real requests. This manages retrieving all the responses without requiring corresponding requests. Args: session_id: The session_id to get the requests/responses for. Yields: A tuple of request (None) and responses. """ subject = session_id.Add("state/request:00000000") # Get some requests for _, serialized, _ in sorted( self.data_store.ResolveRegex( subject, self.FLOW_RESPONSE_REGEX, token=self.token, limit=self.response_limit, timestamp=(0, self.frozen_timestamp or rdfvalue.RDFDatetime().Now()))): # The predicate format is flow:response:REQUEST_ID:RESPONSE_ID. For well # known flows both request_id and response_id are randomized. response = rdfvalue.GrrMessage(serialized) yield rdfvalue.RequestState(id=0), [response]
def SendOKStatus(self, response_id, session_id): """Send a message to the flow.""" message = rdfvalue.GrrMessage( request_id=1, response_id=response_id, session_id=session_id, type=rdfvalue.GrrMessage.Type.STATUS, auth_state=rdfvalue.GrrMessage.AuthorizationState.AUTHENTICATED) status = rdfvalue.GrrStatus(status=rdfvalue.GrrStatus.ReturnedStatus.OK) message.payload = status self.SendMessage(message) # Now also set the state on the RequestState request_state, _ = data_store.DB.Resolve( message.session_id.Add("state"), queue_manager.QueueManager.FLOW_REQUEST_TEMPLATE % message.request_id, token=self.token) request_state = rdfvalue.RequestState(request_state) request_state.status = status data_store.DB.Set( message.session_id.Add("state"), queue_manager.QueueManager.FLOW_REQUEST_TEMPLATE % message.request_id, request_state, token=self.token) return message
def FetchCompletedRequests(self, session_id, timestamp=None): """Fetch all the requests with a status message queued for them.""" subject = session_id.Add("state") requests = {} status = {} if timestamp is None: timestamp = (0, self.frozen_timestamp or rdfvalue.RDFDatetime().Now()) for predicate, serialized, _ in self.data_store.ResolveRegex( subject, [self.FLOW_REQUEST_REGEX, self.FLOW_STATUS_REGEX], token=self.token, limit=self.request_limit, timestamp=timestamp): parts = predicate.split(":", 3) request_id = parts[2] if parts[1] == "status": status[request_id] = serialized else: requests[request_id] = serialized for request_id, serialized in sorted(requests.items()): if request_id in status: yield (rdfvalue.RequestState(serialized), rdfvalue.GrrMessage(status[request_id]))
def FetchRequestsAndResponses(self, session_id, timestamp=None): """Fetches all outstanding requests and responses for this flow. We first cache all requests and responses for this flow in memory to prevent round trips. Args: session_id: The session_id to get the requests/responses for. timestamp: Tupe (start, end) with a time range. Fetched requests and responses will have timestamp in this range. Yields: an tuple (request protobufs, list of responses messages) in ascending order of request ids. Raises: MoreDataException: When there is more data available than read by the limited query. """ subject = session_id.Add("state") requests = {} if timestamp is None: timestamp = ( 0, # TODO(user): remove int() conversion when datastores # accept RDFDatetime instead of ints. int(self.frozen_timestamp or rdfvalue.RDFDatetime().Now())) # Get some requests. for predicate, serialized, _ in self.data_store.ResolveRegex( subject, self.FLOW_REQUEST_REGEX, token=self.token, limit=self.request_limit, timestamp=timestamp): request_id = predicate.split(":", 1)[1] requests[str(subject.Add(request_id))] = serialized # And the responses for them. response_data = dict( self.data_store.MultiResolveRegex(requests.keys(), self.FLOW_RESPONSE_REGEX, limit=self.response_limit, token=self.token, timestamp=timestamp)) for urn, request_data in sorted(requests.items()): request = rdfvalue.RequestState(request_data) responses = [] for _, serialized, _ in response_data.get(urn, []): responses.append(rdfvalue.GrrMessage(serialized)) yield (request, sorted(responses, key=lambda msg: msg.response_id)) if len(requests) >= self.request_limit: raise MoreDataException()
def testDestroyFlowStates(self): """Check that we can efficiently destroy the flow's request queues.""" session_id = rdfvalue.SessionID(flow_name="test2") request = rdfvalue.RequestState(id=1, client_id=self.client_id, next_state="TestState", session_id=session_id) with queue_manager.QueueManager(token=self.token) as manager: manager.QueueRequest(session_id, request) manager.QueueResponse( session_id, rdfvalue.GrrMessage(request_id=1, response_id=1)) # Check the request and responses are there. all_requests = list(manager.FetchRequestsAndResponses(session_id)) self.assertEqual(len(all_requests), 1) self.assertEqual(all_requests[0][0], request) # Ensure the rows are in the data store: self.assertEqual( data_store.DB.ResolveRegex(session_id.Add("state"), ".*", token=self.token)[0][0], "flow:request:00000001") self.assertEqual( data_store.DB.ResolveRegex( session_id.Add("state/request:00000001"), ".*", token=self.token)[0][0], "flow:response:00000001:00000001") with queue_manager.QueueManager(token=self.token) as manager: manager.DestroyFlowStates(session_id) all_requests = list(manager.FetchRequestsAndResponses(session_id)) self.assertEqual(len(all_requests), 0) # Ensure the rows are gone from the data store. self.assertEqual( data_store.DB.ResolveRegex( session_id.Add("state/request:00000001"), ".*", token=self.token), []) self.assertEqual( data_store.DB.ResolveRegex(session_id.Add("state"), ".*", token=self.token), [])
def GetFlowRequests(self, flow_urns, token): """Returns all outstanding requests for the flows in flow_urns.""" flow_requests = {} flow_request_urns = [flow_urn.Add("state") for flow_urn in flow_urns] for flow_urn, values in data_store.DB.MultiResolveRegex( flow_request_urns, "flow:.*", token=token): for subject, serialized, _ in values: try: if "status" in subject: msg = rdfvalue.GrrMessage(serialized) else: msg = rdfvalue.RequestState(serialized) except Exception as e: # pylint: disable=broad-except logging.warn("Error while parsing: %s", e) continue flow_requests.setdefault(flow_urn, []).append(msg) return flow_requests
def DestroyFlowStates(self, session_id): """Deletes all states in this flow and dequeue all client messages.""" subject = session_id.Add("state") for _, serialized, _ in self.data_store.ResolveRegex( subject, self.FLOW_REQUEST_REGEX, token=self.token, limit=self.request_limit): request = rdfvalue.RequestState(serialized) # Efficiently drop all responses to this request. response_subject = self.GetFlowResponseSubject(session_id, request.id) self.data_store.DeleteSubject(response_subject, token=self.token) if request.HasField("request"): self.DeQueueClientRequest(request.client_id, request.request.task_id) # Now drop all the requests at once. self.data_store.DeleteSubject(subject, token=self.token)
def testDrainUpdateSessionRequestStates(self): """Draining the flow requests and preparing messages.""" # This flow sends 10 messages on Start() flow_obj = self.FlowSetup("SendingTestFlow") session_id = flow_obj.session_id # There should be 10 messages in the client's task queue manager = queue_manager.QueueManager(token=self.token) tasks = manager.Query(self.client_id, 100) self.assertEqual(len(tasks), 10) # Check that the response state objects have the correct ts_id set # in the client_queue: for task in tasks: request_id = task.request_id # Retrieve the request state for this request_id request_state, _ = data_store.DB.Resolve( session_id.Add("state"), manager.FLOW_REQUEST_TEMPLATE % request_id, token=self.token) request_state = rdfvalue.RequestState(request_state) # Check that task_id for the client message is correctly set in # request_state. self.assertEqual(request_state.request.task_id, task.task_id) # Now ask the server to drain the outbound messages into the # message list. response = rdfvalue.MessageList() self.server.DrainTaskSchedulerQueueForClient( self.client_id, 5, response) # Check that we received only as many messages as we asked for self.assertEqual(len(response.job), 5) for i in range(4): self.assertEqual(response.job[i].session_id, session_id) self.assertEqual(response.job[i].name, "Test")
def StartClients(cls, hunt_id, client_ids, token=None): """This method is called by the foreman for each client it discovers. Note that this function is performance sensitive since it is called by the foreman for every client which needs to be scheduled. Args: hunt_id: The hunt to schedule. client_ids: List of clients that should be added to the hunt. token: An optional access token to use. """ token = token or access_control.ACLToken(username="******", reason="hunting") with queue_manager.QueueManager(token=token) as flow_manager: for client_id in client_ids: # Now we construct a special response which will be sent to the hunt # flow. Randomize the request_id so we do not overwrite other messages # in the queue. state = rdfvalue.RequestState(id=utils.PRNG.GetULong(), session_id=hunt_id, client_id=client_id, next_state="AddClient") # Queue the new request. flow_manager.QueueRequest(hunt_id, state) # Send a response. msg = rdfvalue.GrrMessage(session_id=hunt_id, request_id=state.id, response_id=1, auth_state=rdfvalue.GrrMessage. AuthorizationState.AUTHENTICATED, type=rdfvalue.GrrMessage.Type.STATUS, payload=rdfvalue.GrrStatus()) flow_manager.QueueResponse(hunt_id, msg) # And notify the worker about it. flow_manager.QueueNotification(session_id=hunt_id)
def testDeleteFlowRequestStates(self): """Check that we can efficiently destroy a single flow request.""" session_id = rdfvalue.SessionID(flow_name="test3") request = rdfvalue.RequestState(id=1, client_id=self.client_id, next_state="TestState", session_id=session_id) with queue_manager.QueueManager(token=self.token) as manager: manager.QueueRequest(session_id, request) manager.QueueResponse( session_id, rdfvalue.GrrMessage(request_id=1, response_id=1)) # Check the request and responses are there. all_requests = list(manager.FetchRequestsAndResponses(session_id)) self.assertEqual(len(all_requests), 1) self.assertEqual(all_requests[0][0], request) with queue_manager.QueueManager(token=self.token) as manager: manager.DeleteFlowRequestStates(session_id, request) all_requests = list(manager.FetchRequestsAndResponses(session_id)) self.assertEqual(len(all_requests), 0)
def testQueueing(self): """Tests that queueing and fetching of requests and responses work.""" session_id = rdfvalue.SessionID(flow_name="test") request = rdfvalue.RequestState(id=1, client_id=self.client_id, next_state="TestState", session_id=session_id) with queue_manager.QueueManager(token=self.token) as manager: manager.QueueRequest(session_id, request) # We only have one unanswered request on the queue. all_requests = list(manager.FetchRequestsAndResponses(session_id)) self.assertEqual(len(all_requests), 1) self.assertEqual(all_requests[0], (request, [])) # FetchCompletedRequests should return nothing now. self.assertEqual(list(manager.FetchCompletedRequests(session_id)), []) # Now queue more requests and responses: with queue_manager.QueueManager(token=self.token) as manager: # Start with request 2 - leave request 1 un-responded to. for request_id in range(2, 5): request = rdfvalue.RequestState(id=request_id, client_id=self.client_id, next_state="TestState", session_id=session_id) manager.QueueRequest(session_id, request) response_id = None for response_id in range(1, 10): # Normal message. manager.QueueResponse( session_id, rdfvalue.GrrMessage(request_id=request_id, response_id=response_id)) # And a status message. manager.QueueResponse( session_id, rdfvalue.GrrMessage(request_id=request_id, response_id=response_id + 1, type=rdfvalue.GrrMessage.Type.STATUS)) completed_requests = list(manager.FetchCompletedRequests(session_id)) self.assertEqual(len(completed_requests), 3) # First completed message is request_id = 2 with 10 responses. self.assertEqual(completed_requests[0][0].id, 2) # Last message is the status message. self.assertEqual(completed_requests[0][-1].type, rdfvalue.GrrMessage.Type.STATUS) self.assertEqual(completed_requests[0][-1].response_id, 10) # Now fetch all the completed responses. Set the limit so we only fetch some # of the responses. completed_response = list(manager.FetchCompletedResponses(session_id)) self.assertEqual(len(completed_response), 3) for i, (request, responses) in enumerate(completed_response, 2): self.assertEqual(request.id, i) self.assertEqual(len(responses), 10) # Now check if the limit is enforced. The limit refers to the total number # of responses to return. We ask for maximum 15 responses, so we should get # a single request with 10 responses (since 2 requests will exceed the # limit). more_data = False i = 0 try: partial_response = manager.FetchCompletedResponses(session_id, limit=15) for i, (request, responses) in enumerate(partial_response, 2): self.assertEqual(request.id, i) self.assertEqual(len(responses), 10) except queue_manager.MoreDataException: more_data = True # Returns the first request that is completed. self.assertEqual(i, 3) # Make sure the manager told us that more data is available. self.assertTrue(more_data)
def CallState(self, messages=None, next_state="", client_id=None, request_data=None, start_time=None): """This method is used to asynchronously schedule a new hunt state. The state will be invoked in a later time and receive all the messages we send. Args: messages: A list of rdfvalues to send. If the last one is not a GrrStatus, we append an OK Status. next_state: The state in this hunt to be invoked with the responses. client_id: ClientURN to use in scheduled requests. request_data: Any dict provided here will be available in the RequestState protobuf. The Responses object maintains a reference to this protobuf for use in the execution of the state method. (so you can access this data by responses.request). start_time: Schedule the state at this time. This delays notification and messages for processing into the future. Raises: ValueError: on arguments error. """ if messages is None: messages = [] if not next_state: raise ValueError("next_state can't be empty.") # Now we construct a special response which will be sent to the hunt # flow. Randomize the request_id so we do not overwrite other messages in # the queue. request_state = rdfvalue.RequestState( id=utils.PRNG.GetULong(), session_id=self.context.session_id, client_id=client_id, next_state=next_state) if request_data: request_state.data = rdfvalue.Dict().FromDict(request_data) self.QueueRequest(request_state, timestamp=start_time) # Add the status message if needed. if not messages or not isinstance(messages[-1], rdfvalue.GrrStatus): messages.append(rdfvalue.GrrStatus()) # Send all the messages for i, payload in enumerate(messages): if isinstance(payload, rdfvalue.RDFValue): msg = rdfvalue.GrrMessage( session_id=self.session_id, request_id=request_state.id, response_id=1 + i, auth_state=rdfvalue.GrrMessage.AuthorizationState. AUTHENTICATED, payload=payload, type=rdfvalue.GrrMessage.Type.MESSAGE) if isinstance(payload, rdfvalue.GrrStatus): msg.type = rdfvalue.GrrMessage.Type.STATUS else: raise flow_runner.FlowRunnerError( "Bad message %s of type %s." % (payload, type(payload))) self.QueueResponse(msg, timestamp=start_time) # Add the status message if needed. if not messages or not isinstance(messages[-1], rdfvalue.GrrStatus): messages.append(rdfvalue.GrrStatus()) # Notify the worker about it. self.QueueNotification(session_id=self.session_id, timestamp=start_time)
def CallFlow(self, flow_name=None, next_state=None, sync=True, request_data=None, client_id=None, base_session_id=None, output=None, **kwargs): """Creates a new flow and send its responses to a state. This creates a new flow. The flow may send back many responses which will be queued by the framework until the flow terminates. The final status message will cause the entire transaction to be committed to the specified state. Args: flow_name: The name of the flow to invoke. next_state: The state in this flow, that responses to this message should go to. sync: If True start the flow inline on the calling thread, else schedule a worker to actually start the child flow. request_data: Any dict provided here will be available in the RequestState protobuf. The Responses object maintains a reference to this protobuf for use in the execution of the state method. (so you can access this data by responses.request). There is no format mandated on this data but it may be a serialized protobuf. client_id: If given, the flow is started for this client. base_session_id: A URN which will be used to build a URN. output: A relative output name for the child collection. Normally subflows do not write their own collections, but this can be specified to change this behaviour. **kwargs: Arguments for the child flow. Raises: FlowRunnerError: If next_state is not one of the allowed next states. Returns: The URN of the child flow which was created. """ if self.process_requests_in_order: # Check that the next state is allowed if next_state and next_state not in self.context.next_states: raise FlowRunnerError( "Flow %s: State '%s' called to '%s' which is " "not declared in decorator." % (self.__class__.__name__, self.context.current_state, next_state)) client_id = client_id or self.args.client_id # This looks very much like CallClient() above - we prepare a request state, # and add it to our queue - any responses from the child flow will return to # the request state and the stated next_state. Note however, that there is # no client_id or actual request message here because we directly invoke the # child flow rather than queue anything for it. state = rdfvalue.RequestState(id=self.GetNextOutboundId(), session_id=utils.SmartUnicode( self.session_id), client_id=client_id, next_state=next_state, response_count=0) if request_data: state.data = rdfvalue.Dict().FromDict(request_data) # If the urn is passed explicitly (e.g. from the hunt runner) use that, # otherwise use the urn from the flow_runner args. If both are None, create # a new collection and give the urn to the flow object. logs_urn = self._GetLogsCollectionURN( kwargs.pop("logs_collection_urn", None) or self.args.logs_collection_urn) # If we were called with write_intermediate_results, propagate down to # child flows. This allows write_intermediate_results to be set to True # either at the top level parent, or somewhere in the middle of # the call chain. write_intermediate = (kwargs.pop("write_intermediate_results", False) or getattr(self.args, "write_intermediate_results", False)) # Create the new child flow but do not notify the user about it. child_urn = self.flow_obj.StartFlow( client_id=client_id, flow_name=flow_name, base_session_id=base_session_id or self.session_id, event_id=self.context.get("event_id"), request_state=state, token=self.token, notify_to_user=False, parent_flow=self.flow_obj, _store=self.data_store, sync=sync, output=output, queue=self.args.queue, write_intermediate_results=write_intermediate, logs_collection_urn=logs_urn, creator=self.context.creator, **kwargs) self.QueueRequest(state) return child_urn
def CallClient(self, action_name, request=None, next_state=None, client_id=None, request_data=None, start_time=None, **kwargs): """Calls the client asynchronously. This sends a message to the client to invoke an Action. The run action may send back many responses. These will be queued by the framework until a status message is sent by the client. The status message will cause the entire transaction to be committed to the specified state. Args: action_name: The function to call on the client. request: The request to send to the client. If not specified (Or None) we create a new RDFValue using the kwargs. next_state: The state in this flow, that responses to this message should go to. client_id: rdfvalue.ClientURN to send the request to. request_data: A dict which will be available in the RequestState protobuf. The Responses object maintains a reference to this protobuf for use in the execution of the state method. (so you can access this data by responses.request). Valid values are strings, unicode and protobufs. start_time: Call the client at this time. This Delays the client request for into the future. **kwargs: These args will be used to construct the client action semantic protobuf. Raises: FlowRunnerError: If next_state is not one of the allowed next states. RuntimeError: The request passed to the client does not have the correct type. """ if client_id is None: client_id = self.args.client_id if client_id is None: raise FlowRunnerError( "CallClient() is used on a flow which was not " "started with a client.") if not isinstance(client_id, rdfvalue.ClientURN): # Try turning it into a ClientURN client_id = rdfvalue.ClientURN(client_id) # Retrieve the correct rdfvalue to use for this client action. try: action = actions.ActionPlugin.classes[action_name] except KeyError: raise RuntimeError("Client action %s not found." % action_name) if action.in_rdfvalue is None: if request: raise RuntimeError("Client action %s does not expect args." % action_name) else: if request is None: # Create a new rdf request. request = action.in_rdfvalue(**kwargs) else: # Verify that the request type matches the client action requirements. if not isinstance(request, action.in_rdfvalue): raise RuntimeError("Client action expected %s but got %s" % (action.in_rdfvalue, type(request))) outbound_id = self.GetNextOutboundId() # Create a new request state state = rdfvalue.RequestState(id=outbound_id, session_id=self.session_id, next_state=next_state, client_id=client_id) if request_data is not None: state.data = rdfvalue.Dict(request_data) # Send the message with the request state msg = rdfvalue.GrrMessage(session_id=utils.SmartUnicode( self.session_id), name=action_name, request_id=outbound_id, priority=self.args.priority, require_fastpoll=self.args.require_fastpoll, queue=client_id.Queue(), payload=request) if self.context.remaining_cpu_quota: msg.cpu_limit = int(self.context.remaining_cpu_quota) cpu_usage = self.context.client_resources.cpu_usage if self.context.args.cpu_limit: msg.cpu_limit = max( self.context.args.cpu_limit - cpu_usage.user_cpu_time - cpu_usage.system_cpu_time, 0) if msg.cpu_limit == 0: raise FlowRunnerError("CPU limit exceeded.") if self.context.args.network_bytes_limit: msg.network_bytes_limit = max( self.context.args.network_bytes_limit - self.context.network_bytes_sent, 0) if msg.network_bytes_limit == 0: raise FlowRunnerError("Network limit exceeded.") state.request = msg self.QueueRequest(state, timestamp=start_time)
def CallState(self, messages=None, next_state="", request_data=None, start_time=None): """This method is used to schedule a new state on a different worker. This is basically the same as CallFlow() except we are calling ourselves. The state will be invoked in a later time and receive all the messages we send. Args: messages: A list of rdfvalues to send. If the last one is not a GrrStatus, we append an OK Status. next_state: The state in this flow to be invoked with the responses. request_data: Any dict provided here will be available in the RequestState protobuf. The Responses object maintains a reference to this protobuf for use in the execution of the state method. (so you can access this data by responses.request). start_time: Start the flow at this time. This Delays notification for flow processing into the future. Note that the flow may still be processed earlier if there are client responses waiting. Raises: FlowRunnerError: if the next state is not valid. """ if messages is None: messages = [] # Check if the state is valid if not getattr(self.flow_obj, next_state): raise FlowRunnerError("Next state %s is invalid.") # Queue the response message to the parent flow request_state = rdfvalue.RequestState( id=self.GetNextOutboundId(), session_id=self.context.session_id, client_id=self.args.client_id, next_state=next_state) if request_data: request_state.data = rdfvalue.Dict().FromDict(request_data) self.QueueRequest(request_state, timestamp=start_time) # Add the status message if needed. if not messages or not isinstance(messages[-1], rdfvalue.GrrStatus): messages.append(rdfvalue.GrrStatus()) # Send all the messages for i, payload in enumerate(messages): if isinstance(payload, rdfvalue.RDFValue): msg = rdfvalue.GrrMessage( session_id=self.session_id, request_id=request_state.id, response_id=1 + i, auth_state=rdfvalue.GrrMessage.AuthorizationState. AUTHENTICATED, payload=payload, type=rdfvalue.GrrMessage.Type.MESSAGE) if isinstance(payload, rdfvalue.GrrStatus): msg.type = rdfvalue.GrrMessage.Type.STATUS else: raise FlowRunnerError("Bad message %s of type %s." % (payload, type(payload))) self.QueueResponse(msg, start_time) # Notify the worker about it. self.QueueNotification(session_id=self.session_id, timestamp=start_time)