def StartFlow(client_id=None, cpu_limit=7200, creator=None, flow_args=None, flow_cls=None, network_bytes_limit=None, original_flow=None, output_plugins=None, parent_flow_obj=None, **kwargs): """The main factory function for creating and executing a new flow. Args: client_id: ID of the client this flow should run on. cpu_limit: CPU limit in seconds for this flow. creator: Username that requested this flow. flow_args: An arg protocol buffer which is an instance of the required flow's args_type class attribute. flow_cls: Class of the flow that should be started. network_bytes_limit: Limit on the network traffic this flow can generated. original_flow: A FlowReference object in case this flow was copied from another flow. output_plugins: An OutputPluginDescriptor object indicating what output plugins should be used for this flow. parent_flow_obj: A parent flow object. None if this is a top level flow. **kwargs: If args or runner_args are not specified, we construct these protobufs from these keywords. Returns: the flow id of the new flow. Raises: ValueError: Unknown or invalid parameters were provided. """ # Is the required flow a known flow? try: registry.FlowRegistry.FlowClassByName(flow_cls.__name__) except ValueError: stats.STATS.IncrementCounter("grr_flow_invalid_flow_count") raise ValueError("Unable to locate flow %s" % flow_cls.__name__) if not client_id: raise ValueError("Client_id is needed to start a flow.") # Now parse the flow args into the new object from the keywords. if flow_args is None: flow_args = flow_cls.args_type() FilterArgsFromSemanticProtobuf(flow_args, kwargs) # At this point we should exhaust all the keyword args. If any are left # over, we do not know what to do with them so raise. if kwargs: raise type_info.UnknownArg("Unknown parameters to StartFlow: %s" % kwargs) # Check that the flow args are valid. flow_args.Validate() rdf_flow = rdf_flow_objects.Flow(client_id=client_id, flow_class_name=flow_cls.__name__, args=flow_args, create_time=rdfvalue.RDFDatetime.Now(), creator=creator, output_plugins=output_plugins, original_flow=original_flow, flow_state="RUNNING") rdf_flow.flow_id = "%08X" % utils.PRNG.GetUInt32() if parent_flow_obj: parent_rdf_flow = parent_flow_obj.rdf_flow rdf_flow.long_flow_id = "%s/%s" % (parent_rdf_flow.long_flow_id, rdf_flow.flow_id) rdf_flow.parent_flow_id = parent_rdf_flow.flow_id rdf_flow.parent_request_id = parent_flow_obj.GetCurrentOutboundId() if parent_rdf_flow.creator: rdf_flow.creator = parent_rdf_flow.creator else: rdf_flow.long_flow_id = "%s/%s" % (client_id, rdf_flow.flow_id) if output_plugins: rdf_flow.output_plugins_states = GetOutputPluginStates( output_plugins, rdf_flow.long_flow_id, token=access_control.ACLToken(username=rdf_flow.creator)) if network_bytes_limit is not None: rdf_flow.network_bytes_limit = network_bytes_limit if cpu_limit is not None: rdf_flow.cpu_limit = cpu_limit logging.info(u"Scheduling %s(%s) on %s", rdf_flow.long_flow_id, rdf_flow.flow_class_name, client_id) flow_obj = flow_cls(rdf_flow) # Just run the first state inline. NOTE: Running synchronously means # that this runs on the thread that starts the flow. The advantage is # that that Start method can raise any errors immediately. flow_obj.Start() flow_obj.PersistState() # The flow does not need to actually remain running. if not flow_obj.outstanding_requests: flow_obj.RunStateMethod("End") flow_obj.MarkDone() data_store.REL_DB.WriteFlowObject(flow_obj.rdf_flow) if parent_flow_obj is not None: # We can optimize here and not write requests/responses to the database # since we have to do this for the parent flow at some point anyways. parent_flow_obj.MergeQueuedMessages(flow_obj) else: flow_obj.FlushQueuedMessages() # Publish an audit event, only for top level flows. # TODO(amoser): split urn field into dedicated strings. events.Events.PublishEvent( "Audit", rdf_events.AuditEvent(user=creator, action="RUN_FLOW", flow_name=rdf_flow.flow_class_name, urn=rdf_flow.long_flow_id, client=client_id)) return rdf_flow.flow_id
def StartFlow(client_id=None, cpu_limit=None, creator=None, flow_args=None, flow_cls=None, network_bytes_limit=None, original_flow=None, output_plugins=None, start_at=None, parent_flow_obj=None, parent_hunt_id=None, **kwargs): """The main factory function for creating and executing a new flow. Args: client_id: ID of the client this flow should run on. cpu_limit: CPU limit in seconds for this flow. creator: Username that requested this flow. flow_args: An arg protocol buffer which is an instance of the required flow's args_type class attribute. flow_cls: Class of the flow that should be started. network_bytes_limit: Limit on the network traffic this flow can generated. original_flow: A FlowReference object in case this flow was copied from another flow. output_plugins: An OutputPluginDescriptor object indicating what output plugins should be used for this flow. start_at: If specified, flow will be started not immediately, but at a given time. parent_flow_obj: A parent flow object. None if this is a top level flow. parent_hunt_id: String identifying parent hunt. Can't be passed together with parent_flow_obj. **kwargs: If args or runner_args are not specified, we construct these protobufs from these keywords. Returns: the flow id of the new flow. Raises: ValueError: Unknown or invalid parameters were provided. """ if parent_flow_obj is not None and parent_hunt_id is not None: raise ValueError( "parent_flow_obj and parent_hunt_id are mutually exclusive.") # Is the required flow a known flow? try: registry.FlowRegistry.FlowClassByName(flow_cls.__name__) except ValueError: stats_collector_instance.Get().IncrementCounter( "grr_flow_invalid_flow_count") raise ValueError("Unable to locate flow %s" % flow_cls.__name__) if not client_id: raise ValueError("Client_id is needed to start a flow.") # Now parse the flow args into the new object from the keywords. if flow_args is None: flow_args = flow_cls.args_type() FilterArgsFromSemanticProtobuf(flow_args, kwargs) # At this point we should exhaust all the keyword args. If any are left # over, we do not know what to do with them so raise. if kwargs: raise type_info.UnknownArg("Unknown parameters to StartFlow: %s" % kwargs) # Check that the flow args are valid. flow_args.Validate() rdf_flow = rdf_flow_objects.Flow(client_id=client_id, flow_class_name=flow_cls.__name__, args=flow_args, create_time=rdfvalue.RDFDatetime.Now(), creator=creator, output_plugins=output_plugins, original_flow=original_flow, flow_state="RUNNING") if parent_hunt_id is not None and parent_flow_obj is None: rdf_flow.flow_id = parent_hunt_id if IsLegacyHunt(parent_hunt_id): rdf_flow.flow_id = rdf_flow.flow_id[2:] else: rdf_flow.flow_id = RandomFlowId() # For better performance, only do conflicting IDs check for top-level flows. if not parent_flow_obj: try: data_store.REL_DB.ReadFlowObject(client_id, rdf_flow.flow_id) raise CanNotStartFlowWithExistingIdError(client_id, rdf_flow.flow_id) except db.UnknownFlowError: pass if parent_flow_obj: # A flow is a nested flow. parent_rdf_flow = parent_flow_obj.rdf_flow rdf_flow.long_flow_id = "%s/%s" % (parent_rdf_flow.long_flow_id, rdf_flow.flow_id) rdf_flow.parent_flow_id = parent_rdf_flow.flow_id rdf_flow.parent_hunt_id = parent_rdf_flow.parent_hunt_id rdf_flow.parent_request_id = parent_flow_obj.GetCurrentOutboundId() if parent_rdf_flow.creator: rdf_flow.creator = parent_rdf_flow.creator elif parent_hunt_id: # A flow is a root-level hunt-induced flow. rdf_flow.long_flow_id = "%s/%s" % (client_id, rdf_flow.flow_id) rdf_flow.parent_hunt_id = parent_hunt_id else: # A flow is a root-level non-hunt flow. rdf_flow.long_flow_id = "%s/%s" % (client_id, rdf_flow.flow_id) if output_plugins: rdf_flow.output_plugins_states = GetOutputPluginStates( output_plugins, rdf_flow.long_flow_id, token=access_control.ACLToken(username=rdf_flow.creator)) if network_bytes_limit is not None: rdf_flow.network_bytes_limit = network_bytes_limit if cpu_limit is not None: rdf_flow.cpu_limit = cpu_limit logging.info(u"Scheduling %s(%s) on %s (%s)", rdf_flow.long_flow_id, rdf_flow.flow_class_name, client_id, start_at or "now") rdf_flow.current_state = "Start" flow_obj = flow_cls(rdf_flow) if start_at is None: # Store an initial version of the flow straight away. This is needed so the # database doesn't raise consistency errors due to missing parent keys when # writing logs / errors / results which might happen in Start(). data_store.REL_DB.WriteFlowObject(flow_obj.rdf_flow) # Just run the first state inline. NOTE: Running synchronously means # that this runs on the thread that starts the flow. The advantage is # that that Start method can raise any errors immediately. flow_obj.Start() # The flow does not need to actually remain running. if not flow_obj.outstanding_requests: flow_obj.RunStateMethod("End") # Additional check for the correct state in case the End method raised and # terminated the flow. if flow_obj.IsRunning(): flow_obj.MarkDone() else: flow_obj.CallState("Start", start_time=start_at) flow_obj.PersistState() data_store.REL_DB.WriteFlowObject(flow_obj.rdf_flow) if parent_flow_obj is not None: # We can optimize here and not write requests/responses to the database # since we have to do this for the parent flow at some point anyways. parent_flow_obj.MergeQueuedMessages(flow_obj) else: flow_obj.FlushQueuedMessages() # Publish an audit event, only for top level flows. # TODO(amoser): split urn field into dedicated strings. events.Events.PublishEvent( "Audit", rdf_events.AuditEvent(user=creator, action="RUN_FLOW", flow_name=rdf_flow.flow_class_name, urn=rdf_flow.long_flow_id, client=client_id)) return rdf_flow.flow_id
def StartAFF4Flow(args=None, runner_args=None, parent_flow=None, sync=True, token=None, **kwargs): """The main factory function for creating and executing a new flow. Args: args: An arg protocol buffer which is an instance of the required flow's args_type class attribute. runner_args: an instance of FlowRunnerArgs() protocol buffer which is used to initialize the runner for this flow. parent_flow: A parent flow or None if this is a top level flow. sync: If True, the Start method of this flow will be called inline. Otherwise we schedule the starting of this flow on another worker. token: Security credentials token identifying the user. **kwargs: If args or runner_args are not specified, we construct these protobufs from these keywords. Returns: the session id of the flow. Raises: RuntimeError: Unknown or invalid parameters were provided. """ # Build the runner args from the keywords. if runner_args is None: runner_args = rdf_flow_runner.FlowRunnerArgs() FilterArgsFromSemanticProtobuf(runner_args, kwargs) # Is the required flow a known flow? try: flow_cls = registry.AFF4FlowRegistry.FlowClassByName( runner_args.flow_name) except ValueError: stats.STATS.IncrementCounter("grr_flow_invalid_flow_count") raise RuntimeError("Unable to locate flow %s" % runner_args.flow_name) # If no token is specified, raise. if not token: raise access_control.UnauthorizedAccess("A token must be specified.") # For the flow itself we use a supervisor token. token = token.SetUID() # Extend the expiry time of this token indefinitely. Python on Windows only # supports dates up to the year 3000. token.expiry = rdfvalue.RDFDatetime.FromHumanReadable("2997-01-01") if flow_cls.category and not runner_args.client_id: raise RuntimeError("Flow with category (user-visible flow) has to be " "started on a client, but runner_args.client_id " "is missing.") # We create an anonymous AFF4 object first, The runner will then generate # the appropriate URN. flow_obj = aff4.FACTORY.Create(None, flow_cls, token=token) # Now parse the flow args into the new object from the keywords. if args is None: args = flow_obj.args_type() FilterArgsFromSemanticProtobuf(args, kwargs) # Check that the flow args are valid. args.Validate() # Store the flow args. flow_obj.args = args flow_obj.runner_args = runner_args # At this point we should exhaust all the keyword args. If any are left # over, we do not know what to do with them so raise. if kwargs: raise type_info.UnknownArg("Unknown parameters to StartAFF4Flow: %s" % kwargs) # Create a flow runner to run this flow with. if parent_flow: parent_runner = parent_flow.runner else: parent_runner = None runner = flow_obj.CreateRunner(parent_runner=parent_runner, runner_args=runner_args) logging.info(u"Scheduling %s(%s) on %s", flow_obj.urn, runner_args.flow_name, runner_args.client_id) if sync: # Just run the first state inline. NOTE: Running synchronously means # that this runs on the thread that starts the flow. The advantage is # that that Start method can raise any errors immediately. flow_obj.Start() else: # Running Asynchronously: Schedule the start method on another worker. runner.CallState(next_state="Start") # The flow does not need to actually remain running. if not flow_obj.outstanding_requests: flow_obj.Terminate() flow_obj.Close() # Publish an audit event, only for top level flows. if parent_flow is None: events.Events.PublishEvent("Audit", rdf_events.AuditEvent( user=token.username, action="RUN_FLOW", flow_name=runner_args.flow_name, urn=flow_obj.urn, client=runner_args.client_id), token=token) return flow_obj.urn
def StartFlow(client_id=None, cpu_limit=None, creator=None, flow_args=None, flow_cls=None, network_bytes_limit=None, original_flow=None, output_plugins=None, start_at=None, parent=None, runtime_limit=None, **kwargs): """The main factory function for creating and executing a new flow. Args: client_id: ID of the client this flow should run on. cpu_limit: CPU limit in seconds for this flow. creator: Username that requested this flow. flow_args: An arg protocol buffer which is an instance of the required flow's args_type class attribute. flow_cls: Class of the flow that should be started. network_bytes_limit: Limit on the network traffic this flow can generated. original_flow: A FlowReference object in case this flow was copied from another flow. output_plugins: An OutputPluginDescriptor object indicating what output plugins should be used for this flow. start_at: If specified, flow will be started not immediately, but at a given time. parent: A FlowParent referencing the parent, or None for top-level flows. runtime_limit: Runtime limit as Duration for all ClientActions. **kwargs: If args or runner_args are not specified, we construct these protobufs from these keywords. Returns: the flow id of the new flow. Raises: ValueError: Unknown or invalid parameters were provided. """ # Is the required flow a known flow? try: registry.FlowRegistry.FlowClassByName(flow_cls.__name__) except ValueError: GRR_FLOW_INVALID_FLOW_COUNT.Increment() raise ValueError("Unable to locate flow %s" % flow_cls.__name__) if not client_id: raise ValueError("Client_id is needed to start a flow.") # Now parse the flow args into the new object from the keywords. if flow_args is None: flow_args = flow_cls.args_type() FilterArgsFromSemanticProtobuf(flow_args, kwargs) # At this point we should exhaust all the keyword args. If any are left # over, we do not know what to do with them so raise. if kwargs: raise type_info.UnknownArg("Unknown parameters to StartFlow: %s" % kwargs) # Check that the flow args are valid. flow_args.Validate() rdf_flow = rdf_flow_objects.Flow(client_id=client_id, flow_class_name=flow_cls.__name__, args=flow_args, create_time=rdfvalue.RDFDatetime.Now(), creator=creator, output_plugins=output_plugins, original_flow=original_flow, flow_state="RUNNING") if parent is None: parent = FlowParent.FromRoot() if parent.is_hunt or parent.is_scheduled_flow: # When starting a flow from a hunt or ScheduledFlow, re-use the parent's id # to make it easy to find flows. For hunts, every client has a top-level # flow with the hunt's id. rdf_flow.flow_id = parent.id else: # For new top-level and child flows, assign a random ID. rdf_flow.flow_id = RandomFlowId() # For better performance, only do conflicting IDs check for top-level flows. if not parent.is_flow: try: data_store.REL_DB.ReadFlowObject(client_id, rdf_flow.flow_id) raise CanNotStartFlowWithExistingIdError(client_id, rdf_flow.flow_id) except db.UnknownFlowError: pass if parent.is_flow: # A flow is a nested flow. parent_rdf_flow = parent.flow_obj.rdf_flow rdf_flow.long_flow_id = "%s/%s" % (parent_rdf_flow.long_flow_id, rdf_flow.flow_id) rdf_flow.parent_flow_id = parent_rdf_flow.flow_id rdf_flow.parent_hunt_id = parent_rdf_flow.parent_hunt_id rdf_flow.parent_request_id = parent.flow_obj.GetCurrentOutboundId() if parent_rdf_flow.creator: rdf_flow.creator = parent_rdf_flow.creator elif parent.is_hunt: # Root-level hunt-induced flow. rdf_flow.long_flow_id = "%s/%s" % (client_id, rdf_flow.flow_id) rdf_flow.parent_hunt_id = parent.id elif parent.is_root or parent.is_scheduled_flow: # A flow is a root-level non-hunt flow. rdf_flow.long_flow_id = "%s/%s" % (client_id, rdf_flow.flow_id) else: raise ValueError(f"Unknown flow parent type {parent}") if output_plugins: rdf_flow.output_plugins_states = GetOutputPluginStates( output_plugins, rdf_flow.long_flow_id) if network_bytes_limit is not None: rdf_flow.network_bytes_limit = network_bytes_limit if cpu_limit is not None: rdf_flow.cpu_limit = cpu_limit if runtime_limit is not None: rdf_flow.runtime_limit_us = runtime_limit logging.info(u"Starting %s(%s) on %s (%s)", rdf_flow.long_flow_id, rdf_flow.flow_class_name, client_id, start_at or "now") rdf_flow.current_state = "Start" flow_obj = flow_cls(rdf_flow) # Prevent a race condition, where a flow is scheduled twice, because one # worker inserts the row and another worker silently updates the existing row. allow_update = False if start_at is None: # Store an initial version of the flow straight away. This is needed so the # database doesn't raise consistency errors due to missing parent keys when # writing logs / errors / results which might happen in Start(). try: data_store.REL_DB.WriteFlowObject(flow_obj.rdf_flow, allow_update=False) except db.FlowExistsError: raise CanNotStartFlowWithExistingIdError(client_id, rdf_flow.flow_id) allow_update = True try: # Just run the first state inline. NOTE: Running synchronously means # that this runs on the thread that starts the flow. The advantage is # that that Start method can raise any errors immediately. flow_obj.Start() # The flow does not need to actually remain running. if not flow_obj.outstanding_requests: flow_obj.RunStateMethod("End") # Additional check for the correct state in case the End method raised # and terminated the flow. if flow_obj.IsRunning(): flow_obj.MarkDone() except Exception as e: # pylint: disable=broad-except # We catch all exceptions that happen in Start() and mark the flow as # failed. msg = compatibility.NativeStr(e) if compatibility.PY2: msg = msg.decode("utf-8", "replace") flow_obj.Error(error_message=msg, backtrace=traceback.format_exc()) else: flow_obj.CallState("Start", start_time=start_at) flow_obj.PersistState() try: data_store.REL_DB.WriteFlowObject(flow_obj.rdf_flow, allow_update=allow_update) except db.FlowExistsError: raise CanNotStartFlowWithExistingIdError(client_id, rdf_flow.flow_id) if parent.is_flow: # We can optimize here and not write requests/responses to the database # since we have to do this for the parent flow at some point anyways. parent.flow_obj.MergeQueuedMessages(flow_obj) else: flow_obj.FlushQueuedMessages() return rdf_flow.flow_id