Beispiel #1
0
def StartFlow(client_id=None,
              cpu_limit=7200,
              creator=None,
              flow_args=None,
              flow_cls=None,
              network_bytes_limit=None,
              original_flow=None,
              output_plugins=None,
              parent_flow_obj=None,
              **kwargs):
    """The main factory function for creating and executing a new flow.

  Args:
    client_id: ID of the client this flow should run on.
    cpu_limit: CPU limit in seconds for this flow.
    creator: Username that requested this flow.
    flow_args: An arg protocol buffer which is an instance of the required
      flow's args_type class attribute.
    flow_cls: Class of the flow that should be started.
    network_bytes_limit: Limit on the network traffic this flow can generated.
    original_flow: A FlowReference object in case this flow was copied from
      another flow.
    output_plugins: An OutputPluginDescriptor object indicating what output
      plugins should be used for this flow.
    parent_flow_obj: A parent flow object. None if this is a top level flow.
    **kwargs: If args or runner_args are not specified, we construct these
      protobufs from these keywords.

  Returns:
    the flow id of the new flow.

  Raises:
    ValueError: Unknown or invalid parameters were provided.
  """
    # Is the required flow a known flow?
    try:
        registry.FlowRegistry.FlowClassByName(flow_cls.__name__)
    except ValueError:
        stats.STATS.IncrementCounter("grr_flow_invalid_flow_count")
        raise ValueError("Unable to locate flow %s" % flow_cls.__name__)

    if not client_id:
        raise ValueError("Client_id is needed to start a flow.")

    # Now parse the flow args into the new object from the keywords.
    if flow_args is None:
        flow_args = flow_cls.args_type()

    FilterArgsFromSemanticProtobuf(flow_args, kwargs)
    # At this point we should exhaust all the keyword args. If any are left
    # over, we do not know what to do with them so raise.
    if kwargs:
        raise type_info.UnknownArg("Unknown parameters to StartFlow: %s" %
                                   kwargs)

    # Check that the flow args are valid.
    flow_args.Validate()

    rdf_flow = rdf_flow_objects.Flow(client_id=client_id,
                                     flow_class_name=flow_cls.__name__,
                                     args=flow_args,
                                     create_time=rdfvalue.RDFDatetime.Now(),
                                     creator=creator,
                                     output_plugins=output_plugins,
                                     original_flow=original_flow,
                                     flow_state="RUNNING")

    rdf_flow.flow_id = "%08X" % utils.PRNG.GetUInt32()

    if parent_flow_obj:
        parent_rdf_flow = parent_flow_obj.rdf_flow
        rdf_flow.long_flow_id = "%s/%s" % (parent_rdf_flow.long_flow_id,
                                           rdf_flow.flow_id)
        rdf_flow.parent_flow_id = parent_rdf_flow.flow_id
        rdf_flow.parent_request_id = parent_flow_obj.GetCurrentOutboundId()
        if parent_rdf_flow.creator:
            rdf_flow.creator = parent_rdf_flow.creator
    else:
        rdf_flow.long_flow_id = "%s/%s" % (client_id, rdf_flow.flow_id)

    if output_plugins:
        rdf_flow.output_plugins_states = GetOutputPluginStates(
            output_plugins,
            rdf_flow.long_flow_id,
            token=access_control.ACLToken(username=rdf_flow.creator))

    if network_bytes_limit is not None:
        rdf_flow.network_bytes_limit = network_bytes_limit
    if cpu_limit is not None:
        rdf_flow.cpu_limit = cpu_limit

    logging.info(u"Scheduling %s(%s) on %s", rdf_flow.long_flow_id,
                 rdf_flow.flow_class_name, client_id)

    flow_obj = flow_cls(rdf_flow)
    # Just run the first state inline. NOTE: Running synchronously means
    # that this runs on the thread that starts the flow. The advantage is
    # that that Start method can raise any errors immediately.
    flow_obj.Start()
    flow_obj.PersistState()

    # The flow does not need to actually remain running.
    if not flow_obj.outstanding_requests:
        flow_obj.RunStateMethod("End")
        flow_obj.MarkDone()

    data_store.REL_DB.WriteFlowObject(flow_obj.rdf_flow)

    if parent_flow_obj is not None:
        # We can optimize here and not write requests/responses to the database
        # since we have to do this for the parent flow at some point anyways.
        parent_flow_obj.MergeQueuedMessages(flow_obj)
    else:
        flow_obj.FlushQueuedMessages()

        # Publish an audit event, only for top level flows.
        # TODO(amoser): split urn field into dedicated strings.
        events.Events.PublishEvent(
            "Audit",
            rdf_events.AuditEvent(user=creator,
                                  action="RUN_FLOW",
                                  flow_name=rdf_flow.flow_class_name,
                                  urn=rdf_flow.long_flow_id,
                                  client=client_id))

    return rdf_flow.flow_id
Beispiel #2
0
def StartFlow(client_id=None,
              cpu_limit=None,
              creator=None,
              flow_args=None,
              flow_cls=None,
              network_bytes_limit=None,
              original_flow=None,
              output_plugins=None,
              start_at=None,
              parent_flow_obj=None,
              parent_hunt_id=None,
              **kwargs):
    """The main factory function for creating and executing a new flow.

  Args:
    client_id: ID of the client this flow should run on.
    cpu_limit: CPU limit in seconds for this flow.
    creator: Username that requested this flow.
    flow_args: An arg protocol buffer which is an instance of the required
      flow's args_type class attribute.
    flow_cls: Class of the flow that should be started.
    network_bytes_limit: Limit on the network traffic this flow can generated.
    original_flow: A FlowReference object in case this flow was copied from
      another flow.
    output_plugins: An OutputPluginDescriptor object indicating what output
      plugins should be used for this flow.
    start_at: If specified, flow will be started not immediately, but at a given
      time.
    parent_flow_obj: A parent flow object. None if this is a top level flow.
    parent_hunt_id: String identifying parent hunt. Can't be passed together
      with parent_flow_obj.
    **kwargs: If args or runner_args are not specified, we construct these
      protobufs from these keywords.

  Returns:
    the flow id of the new flow.

  Raises:
    ValueError: Unknown or invalid parameters were provided.
  """

    if parent_flow_obj is not None and parent_hunt_id is not None:
        raise ValueError(
            "parent_flow_obj and parent_hunt_id are mutually exclusive.")

    # Is the required flow a known flow?
    try:
        registry.FlowRegistry.FlowClassByName(flow_cls.__name__)
    except ValueError:
        stats_collector_instance.Get().IncrementCounter(
            "grr_flow_invalid_flow_count")
        raise ValueError("Unable to locate flow %s" % flow_cls.__name__)

    if not client_id:
        raise ValueError("Client_id is needed to start a flow.")

    # Now parse the flow args into the new object from the keywords.
    if flow_args is None:
        flow_args = flow_cls.args_type()

    FilterArgsFromSemanticProtobuf(flow_args, kwargs)
    # At this point we should exhaust all the keyword args. If any are left
    # over, we do not know what to do with them so raise.
    if kwargs:
        raise type_info.UnknownArg("Unknown parameters to StartFlow: %s" %
                                   kwargs)

    # Check that the flow args are valid.
    flow_args.Validate()

    rdf_flow = rdf_flow_objects.Flow(client_id=client_id,
                                     flow_class_name=flow_cls.__name__,
                                     args=flow_args,
                                     create_time=rdfvalue.RDFDatetime.Now(),
                                     creator=creator,
                                     output_plugins=output_plugins,
                                     original_flow=original_flow,
                                     flow_state="RUNNING")

    if parent_hunt_id is not None and parent_flow_obj is None:
        rdf_flow.flow_id = parent_hunt_id
        if IsLegacyHunt(parent_hunt_id):
            rdf_flow.flow_id = rdf_flow.flow_id[2:]
    else:
        rdf_flow.flow_id = RandomFlowId()

    # For better performance, only do conflicting IDs check for top-level flows.
    if not parent_flow_obj:
        try:
            data_store.REL_DB.ReadFlowObject(client_id, rdf_flow.flow_id)
            raise CanNotStartFlowWithExistingIdError(client_id,
                                                     rdf_flow.flow_id)
        except db.UnknownFlowError:
            pass

    if parent_flow_obj:  # A flow is a nested flow.
        parent_rdf_flow = parent_flow_obj.rdf_flow
        rdf_flow.long_flow_id = "%s/%s" % (parent_rdf_flow.long_flow_id,
                                           rdf_flow.flow_id)
        rdf_flow.parent_flow_id = parent_rdf_flow.flow_id
        rdf_flow.parent_hunt_id = parent_rdf_flow.parent_hunt_id
        rdf_flow.parent_request_id = parent_flow_obj.GetCurrentOutboundId()
        if parent_rdf_flow.creator:
            rdf_flow.creator = parent_rdf_flow.creator
    elif parent_hunt_id:  # A flow is a root-level hunt-induced flow.
        rdf_flow.long_flow_id = "%s/%s" % (client_id, rdf_flow.flow_id)
        rdf_flow.parent_hunt_id = parent_hunt_id
    else:  # A flow is a root-level non-hunt flow.
        rdf_flow.long_flow_id = "%s/%s" % (client_id, rdf_flow.flow_id)

    if output_plugins:
        rdf_flow.output_plugins_states = GetOutputPluginStates(
            output_plugins,
            rdf_flow.long_flow_id,
            token=access_control.ACLToken(username=rdf_flow.creator))

    if network_bytes_limit is not None:
        rdf_flow.network_bytes_limit = network_bytes_limit
    if cpu_limit is not None:
        rdf_flow.cpu_limit = cpu_limit

    logging.info(u"Scheduling %s(%s) on %s (%s)", rdf_flow.long_flow_id,
                 rdf_flow.flow_class_name, client_id, start_at or "now")

    rdf_flow.current_state = "Start"

    flow_obj = flow_cls(rdf_flow)
    if start_at is None:

        # Store an initial version of the flow straight away. This is needed so the
        # database doesn't raise consistency errors due to missing parent keys when
        # writing logs / errors / results which might happen in Start().
        data_store.REL_DB.WriteFlowObject(flow_obj.rdf_flow)

        # Just run the first state inline. NOTE: Running synchronously means
        # that this runs on the thread that starts the flow. The advantage is
        # that that Start method can raise any errors immediately.
        flow_obj.Start()

        # The flow does not need to actually remain running.
        if not flow_obj.outstanding_requests:
            flow_obj.RunStateMethod("End")
            # Additional check for the correct state in case the End method raised and
            # terminated the flow.
            if flow_obj.IsRunning():
                flow_obj.MarkDone()
    else:
        flow_obj.CallState("Start", start_time=start_at)

    flow_obj.PersistState()

    data_store.REL_DB.WriteFlowObject(flow_obj.rdf_flow)

    if parent_flow_obj is not None:
        # We can optimize here and not write requests/responses to the database
        # since we have to do this for the parent flow at some point anyways.
        parent_flow_obj.MergeQueuedMessages(flow_obj)
    else:
        flow_obj.FlushQueuedMessages()

        # Publish an audit event, only for top level flows.
        # TODO(amoser): split urn field into dedicated strings.
        events.Events.PublishEvent(
            "Audit",
            rdf_events.AuditEvent(user=creator,
                                  action="RUN_FLOW",
                                  flow_name=rdf_flow.flow_class_name,
                                  urn=rdf_flow.long_flow_id,
                                  client=client_id))

    return rdf_flow.flow_id
Beispiel #3
0
def StartAFF4Flow(args=None,
                  runner_args=None,
                  parent_flow=None,
                  sync=True,
                  token=None,
                  **kwargs):
    """The main factory function for creating and executing a new flow.

  Args:
    args: An arg protocol buffer which is an instance of the required flow's
      args_type class attribute.
    runner_args: an instance of FlowRunnerArgs() protocol buffer which is used
      to initialize the runner for this flow.
    parent_flow: A parent flow or None if this is a top level flow.
    sync: If True, the Start method of this flow will be called inline.
      Otherwise we schedule the starting of this flow on another worker.
    token: Security credentials token identifying the user.
    **kwargs: If args or runner_args are not specified, we construct these
      protobufs from these keywords.

  Returns:
    the session id of the flow.

  Raises:
    RuntimeError: Unknown or invalid parameters were provided.
  """
    # Build the runner args from the keywords.
    if runner_args is None:
        runner_args = rdf_flow_runner.FlowRunnerArgs()

    FilterArgsFromSemanticProtobuf(runner_args, kwargs)

    # Is the required flow a known flow?
    try:
        flow_cls = registry.AFF4FlowRegistry.FlowClassByName(
            runner_args.flow_name)
    except ValueError:
        stats.STATS.IncrementCounter("grr_flow_invalid_flow_count")
        raise RuntimeError("Unable to locate flow %s" % runner_args.flow_name)

    # If no token is specified, raise.
    if not token:
        raise access_control.UnauthorizedAccess("A token must be specified.")

    # For the flow itself we use a supervisor token.
    token = token.SetUID()

    # Extend the expiry time of this token indefinitely. Python on Windows only
    # supports dates up to the year 3000.
    token.expiry = rdfvalue.RDFDatetime.FromHumanReadable("2997-01-01")

    if flow_cls.category and not runner_args.client_id:
        raise RuntimeError("Flow with category (user-visible flow) has to be "
                           "started on a client, but runner_args.client_id "
                           "is missing.")

    # We create an anonymous AFF4 object first, The runner will then generate
    # the appropriate URN.
    flow_obj = aff4.FACTORY.Create(None, flow_cls, token=token)

    # Now parse the flow args into the new object from the keywords.
    if args is None:
        args = flow_obj.args_type()

    FilterArgsFromSemanticProtobuf(args, kwargs)

    # Check that the flow args are valid.
    args.Validate()

    # Store the flow args.
    flow_obj.args = args
    flow_obj.runner_args = runner_args

    # At this point we should exhaust all the keyword args. If any are left
    # over, we do not know what to do with them so raise.
    if kwargs:
        raise type_info.UnknownArg("Unknown parameters to StartAFF4Flow: %s" %
                                   kwargs)

    # Create a flow runner to run this flow with.
    if parent_flow:
        parent_runner = parent_flow.runner
    else:
        parent_runner = None

    runner = flow_obj.CreateRunner(parent_runner=parent_runner,
                                   runner_args=runner_args)

    logging.info(u"Scheduling %s(%s) on %s", flow_obj.urn,
                 runner_args.flow_name, runner_args.client_id)
    if sync:
        # Just run the first state inline. NOTE: Running synchronously means
        # that this runs on the thread that starts the flow. The advantage is
        # that that Start method can raise any errors immediately.
        flow_obj.Start()
    else:
        # Running Asynchronously: Schedule the start method on another worker.
        runner.CallState(next_state="Start")

    # The flow does not need to actually remain running.
    if not flow_obj.outstanding_requests:
        flow_obj.Terminate()

    flow_obj.Close()

    # Publish an audit event, only for top level flows.
    if parent_flow is None:
        events.Events.PublishEvent("Audit",
                                   rdf_events.AuditEvent(
                                       user=token.username,
                                       action="RUN_FLOW",
                                       flow_name=runner_args.flow_name,
                                       urn=flow_obj.urn,
                                       client=runner_args.client_id),
                                   token=token)

    return flow_obj.urn
Beispiel #4
0
def StartFlow(client_id=None,
              cpu_limit=None,
              creator=None,
              flow_args=None,
              flow_cls=None,
              network_bytes_limit=None,
              original_flow=None,
              output_plugins=None,
              start_at=None,
              parent=None,
              runtime_limit=None,
              **kwargs):
    """The main factory function for creating and executing a new flow.

  Args:
    client_id: ID of the client this flow should run on.
    cpu_limit: CPU limit in seconds for this flow.
    creator: Username that requested this flow.
    flow_args: An arg protocol buffer which is an instance of the required
      flow's args_type class attribute.
    flow_cls: Class of the flow that should be started.
    network_bytes_limit: Limit on the network traffic this flow can generated.
    original_flow: A FlowReference object in case this flow was copied from
      another flow.
    output_plugins: An OutputPluginDescriptor object indicating what output
      plugins should be used for this flow.
    start_at: If specified, flow will be started not immediately, but at a given
      time.
    parent: A FlowParent referencing the parent, or None for top-level flows.
    runtime_limit: Runtime limit as Duration for all ClientActions.
    **kwargs: If args or runner_args are not specified, we construct these
      protobufs from these keywords.
  Returns:
    the flow id of the new flow.

  Raises:
    ValueError: Unknown or invalid parameters were provided.
  """
    # Is the required flow a known flow?
    try:
        registry.FlowRegistry.FlowClassByName(flow_cls.__name__)
    except ValueError:
        GRR_FLOW_INVALID_FLOW_COUNT.Increment()
        raise ValueError("Unable to locate flow %s" % flow_cls.__name__)

    if not client_id:
        raise ValueError("Client_id is needed to start a flow.")

    # Now parse the flow args into the new object from the keywords.
    if flow_args is None:
        flow_args = flow_cls.args_type()

    FilterArgsFromSemanticProtobuf(flow_args, kwargs)
    # At this point we should exhaust all the keyword args. If any are left
    # over, we do not know what to do with them so raise.
    if kwargs:
        raise type_info.UnknownArg("Unknown parameters to StartFlow: %s" %
                                   kwargs)

    # Check that the flow args are valid.
    flow_args.Validate()

    rdf_flow = rdf_flow_objects.Flow(client_id=client_id,
                                     flow_class_name=flow_cls.__name__,
                                     args=flow_args,
                                     create_time=rdfvalue.RDFDatetime.Now(),
                                     creator=creator,
                                     output_plugins=output_plugins,
                                     original_flow=original_flow,
                                     flow_state="RUNNING")

    if parent is None:
        parent = FlowParent.FromRoot()

    if parent.is_hunt or parent.is_scheduled_flow:
        # When starting a flow from a hunt or ScheduledFlow, re-use the parent's id
        # to make it easy to find flows. For hunts, every client has a top-level
        # flow with the hunt's id.
        rdf_flow.flow_id = parent.id
    else:  # For new top-level and child flows, assign a random ID.
        rdf_flow.flow_id = RandomFlowId()

    # For better performance, only do conflicting IDs check for top-level flows.
    if not parent.is_flow:
        try:
            data_store.REL_DB.ReadFlowObject(client_id, rdf_flow.flow_id)
            raise CanNotStartFlowWithExistingIdError(client_id,
                                                     rdf_flow.flow_id)
        except db.UnknownFlowError:
            pass

    if parent.is_flow:  # A flow is a nested flow.
        parent_rdf_flow = parent.flow_obj.rdf_flow
        rdf_flow.long_flow_id = "%s/%s" % (parent_rdf_flow.long_flow_id,
                                           rdf_flow.flow_id)
        rdf_flow.parent_flow_id = parent_rdf_flow.flow_id
        rdf_flow.parent_hunt_id = parent_rdf_flow.parent_hunt_id
        rdf_flow.parent_request_id = parent.flow_obj.GetCurrentOutboundId()
        if parent_rdf_flow.creator:
            rdf_flow.creator = parent_rdf_flow.creator
    elif parent.is_hunt:  # Root-level hunt-induced flow.
        rdf_flow.long_flow_id = "%s/%s" % (client_id, rdf_flow.flow_id)
        rdf_flow.parent_hunt_id = parent.id
    elif parent.is_root or parent.is_scheduled_flow:
        # A flow is a root-level non-hunt flow.
        rdf_flow.long_flow_id = "%s/%s" % (client_id, rdf_flow.flow_id)
    else:
        raise ValueError(f"Unknown flow parent type {parent}")

    if output_plugins:
        rdf_flow.output_plugins_states = GetOutputPluginStates(
            output_plugins, rdf_flow.long_flow_id)

    if network_bytes_limit is not None:
        rdf_flow.network_bytes_limit = network_bytes_limit
    if cpu_limit is not None:
        rdf_flow.cpu_limit = cpu_limit
    if runtime_limit is not None:
        rdf_flow.runtime_limit_us = runtime_limit

    logging.info(u"Starting %s(%s) on %s (%s)", rdf_flow.long_flow_id,
                 rdf_flow.flow_class_name, client_id, start_at or "now")

    rdf_flow.current_state = "Start"

    flow_obj = flow_cls(rdf_flow)

    # Prevent a race condition, where a flow is scheduled twice, because one
    # worker inserts the row and another worker silently updates the existing row.
    allow_update = False

    if start_at is None:
        # Store an initial version of the flow straight away. This is needed so the
        # database doesn't raise consistency errors due to missing parent keys when
        # writing logs / errors / results which might happen in Start().
        try:
            data_store.REL_DB.WriteFlowObject(flow_obj.rdf_flow,
                                              allow_update=False)
        except db.FlowExistsError:
            raise CanNotStartFlowWithExistingIdError(client_id,
                                                     rdf_flow.flow_id)

        allow_update = True

        try:
            # Just run the first state inline. NOTE: Running synchronously means
            # that this runs on the thread that starts the flow. The advantage is
            # that that Start method can raise any errors immediately.
            flow_obj.Start()

            # The flow does not need to actually remain running.
            if not flow_obj.outstanding_requests:
                flow_obj.RunStateMethod("End")
                # Additional check for the correct state in case the End method raised
                # and terminated the flow.
                if flow_obj.IsRunning():
                    flow_obj.MarkDone()
        except Exception as e:  # pylint: disable=broad-except
            # We catch all exceptions that happen in Start() and mark the flow as
            # failed.
            msg = compatibility.NativeStr(e)
            if compatibility.PY2:
                msg = msg.decode("utf-8", "replace")

            flow_obj.Error(error_message=msg, backtrace=traceback.format_exc())

    else:
        flow_obj.CallState("Start", start_time=start_at)

    flow_obj.PersistState()

    try:
        data_store.REL_DB.WriteFlowObject(flow_obj.rdf_flow,
                                          allow_update=allow_update)
    except db.FlowExistsError:
        raise CanNotStartFlowWithExistingIdError(client_id, rdf_flow.flow_id)

    if parent.is_flow:
        # We can optimize here and not write requests/responses to the database
        # since we have to do this for the parent flow at some point anyways.
        parent.flow_obj.MergeQueuedMessages(flow_obj)
    else:
        flow_obj.FlushQueuedMessages()

    return rdf_flow.flow_id