Beispiel #1
0
def _FlowStatusToClientResources(flow_obj, status_msg):
    return rdf_client_stats.ClientResources(
        client_id=flow_obj.client_id,
        session_id=flow_obj.flow_id,
        cpu_usage=rdf_client_stats.CpuSeconds(
            user_cpu_time=status_msg.cpu_time_used.user_cpu_time,
            system_cpu_time=status_msg.cpu_time_used.system_cpu_time),
        network_bytes_sent=status_msg.network_bytes_sent)
Beispiel #2
0
    def GenerateStatusMessage(self, message, response_id=1):
        cpu_time_used = rdf_client_stats.CpuSeconds(
            user_cpu_time=self.user_cpu_usage.next(),
            system_cpu_time=self.system_cpu_usage.next())
        network_bytes_sent = self.network_usage.next()

        return rdf_flows.GrrMessage(
            session_id=message.session_id,
            name=message.name,
            response_id=response_id,
            request_id=message.request_id,
            payload=rdf_flows.GrrStatus(
                status=rdf_flows.GrrStatus.ReturnedStatus.OK,
                cpu_time_used=cpu_time_used,
                network_bytes_sent=network_bytes_sent),
            type=rdf_flows.GrrMessage.Type.STATUS)
Beispiel #3
0
def ProcessHuntFlowDone(flow_obj, status_msg=None):
    """Notifis hunt about a given hunt-induced flow completion."""

    if not hunt.IsLegacyHunt(flow_obj.parent_hunt_id):
        resources = rdf_client_stats.ClientResources(
            client_id=flow_obj.client_id,
            session_id=flow_obj.flow_id,
            cpu_usage=rdf_client_stats.CpuSeconds(
                user_cpu_time=status_msg.cpu_time_used.user_cpu_time,
                system_cpu_time=status_msg.cpu_time_used.system_cpu_time),
            network_bytes_sent=status_msg.network_bytes_sent)

        def UpdateFn(hunt_obj):
            hunt_obj.num_successful_clients += 1
            if flow_obj.num_replies_sent:
                hunt_obj.num_clients_with_results += 1
            hunt_obj.client_resources_stats.RegisterResources(resources)

            return hunt_obj

        hunt_obj = data_store.REL_DB.UpdateHuntObject(flow_obj.parent_hunt_id,
                                                      UpdateFn)
        hunt_obj = hunt.StopHuntIfAverageLimitsExceeded(hunt_obj)
        hunt.CompleteHuntIfExpirationTimeReached(hunt_obj)
        return

    hunt_urn = rdfvalue.RDFURN("hunts").Add(flow_obj.parent_hunt_id)
    client_urn = rdf_client.ClientURN(flow_obj.client_id)

    with aff4.FACTORY.OpenWithLock(hunt_urn,
                                   lease_time=_HUNT_LEASE_TIME,
                                   blocking=True) as fd:
        # Legacy AFF4 code expects token to be set.
        fd.token = access_control.ACLToken(username=fd.creator)

        fd.RegisterCompletedClient(client_urn)
        if flow_obj.num_replies_sent:
            fd.RegisterClientWithResults(client_urn)
            fd.context.clients_with_results_count += 1

        fd.context.completed_clients_count += 1
        fd.context.results_count += flow_obj.num_replies_sent

        fd.GetRunner().SaveResourceUsage(flow_obj.client_id, status_msg)

        fd.StopHuntIfAverageLimitsExceeded()
Beispiel #4
0
  def testReadHuntCountersCorrectlyAggregatesResultsAmongDifferentFlows(self):
    hunt_obj = rdf_hunt_objects.Hunt(description="foo")
    self.db.WriteHuntObject(hunt_obj)

    expectations = self._BuildFilterConditionExpectations(hunt_obj)

    hunt_counters = self.db.ReadHuntCounters(hunt_obj.hunt_id)
    self.assertEqual(hunt_counters.num_clients,
                     len(expectations[db.HuntFlowsCondition.UNSET]))
    self.assertEqual(
        hunt_counters.num_successful_clients,
        len(expectations[db.HuntFlowsCondition.SUCCEEDED_FLOWS_ONLY]))
    self.assertEqual(hunt_counters.num_failed_clients,
                     len(expectations[db.HuntFlowsCondition.FAILED_FLOWS_ONLY]))

    # _BuildFilterConditionExpectations writes 10 sample results for one client.
    self.assertEqual(hunt_counters.num_clients_with_results, 1)
    self.assertEqual(
        hunt_counters.num_crashed_clients,
        len(expectations[db.HuntFlowsCondition.CRASHED_FLOWS_ONLY]))

    # _BuildFilterConditionExpectations writes 10 sample results.
    self.assertEqual(hunt_counters.num_results, 10)

    self.assertEqual(hunt_counters.total_cpu_seconds, 0)
    self.assertEqual(hunt_counters.total_network_bytes_sent, 0)

    # Check that after adding a flow with resource metrics, total counters
    # get updated.
    self._SetupHuntClientAndFlow(
        flow_state=rdf_flow_objects.Flow.FlowState.FINISHED,
        cpu_time_used=rdf_client_stats.CpuSeconds(
            user_cpu_time=4.5, system_cpu_time=10),
        network_bytes_sent=42,
        hunt_id=hunt_obj.hunt_id)
    hunt_counters = self.db.ReadHuntCounters(hunt_obj.hunt_id)
    self.assertAlmostEqual(hunt_counters.total_cpu_seconds, 14.5)
    self.assertEqual(hunt_counters.total_network_bytes_sent, 42)
Beispiel #5
0
    def InitFromFlowObject(self,
                           flow_obj,
                           with_args=True,
                           with_progress=False,
                           with_state_and_context=False):
        try:
            self.flow_id = flow_obj.flow_id
            self.client_id = flow_obj.client_id

            # TODO(amoser): Get rid of all urns.
            self.urn = flow_obj.long_flow_id

            self.name = flow_obj.flow_class_name
            self.started_at = flow_obj.create_time
            self.last_active_at = flow_obj.last_update_time
            self.creator = flow_obj.creator

            if flow_obj.client_crash_info:
                self.state = "CLIENT_CRASHED"
            elif flow_obj.pending_termination:
                self.state = "ERROR"
                self.status = ("Pending termination: %s" %
                               flow_obj.pending_termination.reason)
            else:
                context_state_map = {1: "RUNNING", 2: "TERMINATED", 3: "ERROR"}
                self.state = context_state_map[int(flow_obj.flow_state)]

            if with_state_and_context:
                outstanding_requests = (flow_obj.next_outbound_id -
                                        flow_obj.next_request_to_process)
                self.context = rdf_flow_runner.FlowContext(
                    # TODO(amoser): No need to set this in all cases once the legacy API
                    # is removed.
                    client_resources=rdf_client_stats.ClientResources(
                        cpu_usage=rdf_client_stats.CpuSeconds()),
                    create_time=flow_obj.create_time,
                    creator=flow_obj.creator,
                    current_state=flow_obj.current_state,
                    next_outbound_id=flow_obj.next_outbound_id,
                    outstanding_requests=outstanding_requests,
                    state=self.state,
                    # TODO(amoser): Get rid of all urns.
                    session_id=flow_obj.long_flow_id,
                )
                if flow_obj.output_plugins_states:
                    self.context.output_plugins_states = flow_obj.output_plugins_states
                if flow_obj.network_bytes_sent:
                    self.context.network_bytes_sent = flow_obj.network_bytes_sent
                    self.context.client_resources.network_bytes_sent = (
                        flow_obj.network_bytes_sent)
                if flow_obj.cpu_time_used:
                    self.context.client_resources.cpu_time_used = flow_obj.cpu_time_used
                if flow_obj.error_message:
                    self.context.status = flow_obj.error_message
                if flow_obj.backtrace:
                    self.context.backtrace = flow_obj.backtrace

            if with_args:
                try:
                    self.args = flow_obj.args
                except ValueError:
                    # If args class name has changed, ValueError will be raised. Handling
                    # this gracefully - we should still try to display some useful info
                    # about the flow.
                    pass

            if with_progress:
                flow_cls = self._GetFlowClass()
                if flow_cls:
                    self.progress = flow_cls(flow_obj).GetProgress()

            self.runner_args = rdf_flow_runner.FlowRunnerArgs(
                client_id=flow_obj.client_id,
                flow_name=flow_obj.flow_class_name,
                notify_to_user=flow_base.FlowBase(
                    flow_obj).ShouldSendNotifications())

            if flow_obj.output_plugins:
                self.runner_args.output_plugins = flow_obj.output_plugins

            if flow_obj.HasField("cpu_limit"):
                self.runner_args.cpu_limit = flow_obj.cpu_limit

            if flow_obj.HasField("network_bytes_limit"):
                self.runner_args.cpu_limit = flow_obj.network_bytes_limit

            if flow_obj.original_flow.flow_id:
                self.original_flow = ApiFlowReference().FromFlowReference(
                    flow_obj.original_flow)

            if with_state_and_context and flow_obj.persistent_data.ToDict():
                self.state_data = (
                    api_call_handler_utils.ApiDataObject().InitFromDataObject(
                        flow_obj.persistent_data))

        except Exception as e:  # pylint: disable=broad-except
            self.internal_error = "Error while opening flow: %s" % str(e)

        return self
Beispiel #6
0
    def ReadHuntClientResourcesStats(self, hunt_id, cursor=None):
        """Read/calculate hunt client resources stats."""
        hunt_id_int = db_utils.HuntIDToInt(hunt_id)

        query = """
      SELECT
        COUNT(*),
        SUM(user_cpu_time_used_micros),
        SUM((user_cpu_time_used_micros) * (user_cpu_time_used_micros)),
        SUM(system_cpu_time_used_micros),
        SUM((system_cpu_time_used_micros) * (system_cpu_time_used_micros)),
        SUM(network_bytes_sent),
        SUM(network_bytes_sent * network_bytes_sent),
    """

        scaled_bins = [
            int(1000000 * b)
            for b in rdf_stats.ClientResourcesStats.CPU_STATS_BINS
        ]

        query += self._BinsToQuery(scaled_bins, "(user_cpu_time_used_micros)")
        query += ","
        query += self._BinsToQuery(scaled_bins,
                                   "(system_cpu_time_used_micros)")
        query += ","
        query += self._BinsToQuery(
            rdf_stats.ClientResourcesStats.NETWORK_STATS_BINS,
            "network_bytes_sent")

        query += " FROM flows "
        query += "FORCE INDEX(flows_by_hunt) "
        query += "WHERE parent_hunt_id = %s AND parent_flow_id IS NULL"

        cursor.execute(query, [hunt_id_int])

        response = cursor.fetchone()
        (count, user_sum, user_sq_sum, system_sum, system_sq_sum, network_sum,
         network_sq_sum) = response[:7]

        stats = rdf_stats.ClientResourcesStats(
            user_cpu_stats=rdf_stats.RunningStats(
                num=count,
                sum=db_utils.MicrosToSeconds(int(user_sum or 0)),
                sum_sq=int(user_sq_sum or 0) / 1e12,
            ),
            system_cpu_stats=rdf_stats.RunningStats(
                num=count,
                sum=db_utils.MicrosToSeconds(int(system_sum or 0)),
                sum_sq=int(system_sq_sum or 0) / 1e12,
            ),
            network_bytes_sent_stats=rdf_stats.RunningStats(
                num=count,
                sum=float(network_sum or 0),
                sum_sq=float(network_sq_sum or 0),
            ),
        )

        offset = 7
        stats.user_cpu_stats.histogram = rdf_stats.StatsHistogram()
        for b_num, b_max_value in zip(
                response[offset:],
                rdf_stats.ClientResourcesStats.CPU_STATS_BINS):
            stats.user_cpu_stats.histogram.bins.append(
                rdf_stats.StatsHistogramBin(range_max_value=b_max_value,
                                            num=b_num))

        offset += len(rdf_stats.ClientResourcesStats.CPU_STATS_BINS)
        stats.system_cpu_stats.histogram = rdf_stats.StatsHistogram()
        for b_num, b_max_value in zip(
                response[offset:],
                rdf_stats.ClientResourcesStats.CPU_STATS_BINS):
            stats.system_cpu_stats.histogram.bins.append(
                rdf_stats.StatsHistogramBin(range_max_value=b_max_value,
                                            num=b_num))

        offset += len(rdf_stats.ClientResourcesStats.CPU_STATS_BINS)
        stats.network_bytes_sent_stats.histogram = rdf_stats.StatsHistogram()
        for b_num, b_max_value in zip(
                response[offset:],
                rdf_stats.ClientResourcesStats.NETWORK_STATS_BINS):
            stats.network_bytes_sent_stats.histogram.bins.append(
                rdf_stats.StatsHistogramBin(range_max_value=b_max_value,
                                            num=b_num))

        query = """
      SELECT
        client_id, flow_id, user_cpu_time_used_micros,
        system_cpu_time_used_micros, network_bytes_sent
      FROM flows
      FORCE INDEX(flows_by_hunt)
      WHERE parent_hunt_id = %s AND parent_flow_id IS NULL AND
            (user_cpu_time_used_micros > 0 OR
             system_cpu_time_used_micros > 0 OR
             network_bytes_sent > 0)
      ORDER BY (user_cpu_time_used_micros + system_cpu_time_used_micros) DESC
      LIMIT 10
    """

        cursor.execute(query, [hunt_id_int])

        for cid, fid, ucpu, scpu, nbs in cursor.fetchall():
            client_id = db_utils.IntToClientID(cid)
            flow_id = db_utils.IntToFlowID(fid)
            stats.worst_performers.append(
                rdf_client_stats.ClientResources(
                    client_id=client_id,
                    session_id=rdfvalue.RDFURN(client_id).Add(flow_id),
                    cpu_usage=rdf_client_stats.CpuSeconds(
                        user_cpu_time=db_utils.MicrosToSeconds(ucpu),
                        system_cpu_time=db_utils.MicrosToSeconds(scpu),
                    ),
                    network_bytes_sent=nbs))

        return stats
Beispiel #7
0
  def testReadHuntClientResourcesStatsCorrectlyAggregatesData(self):
    hunt_obj = rdf_hunt_objects.Hunt(description="foo")
    self.db.WriteHuntObject(hunt_obj)

    flow_data = []
    expected_user_cpu_histogram = rdf_stats.StatsHistogram.FromBins(
        rdf_stats.ClientResourcesStats.CPU_STATS_BINS)
    expected_system_cpu_histogram = rdf_stats.StatsHistogram.FromBins(
        rdf_stats.ClientResourcesStats.CPU_STATS_BINS)
    expected_network_histogram = rdf_stats.StatsHistogram.FromBins(
        rdf_stats.ClientResourcesStats.NETWORK_STATS_BINS)
    for i in range(10):
      user_cpu_time = 4.5 + i
      system_cpu_time = 10 + i * 2
      network_bytes_sent = 42 + i * 3

      client_id, flow_id = self._SetupHuntClientAndFlow(
          flow_state=rdf_flow_objects.Flow.FlowState.FINISHED,
          cpu_time_used=rdf_client_stats.CpuSeconds(
              user_cpu_time=user_cpu_time, system_cpu_time=system_cpu_time),
          network_bytes_sent=network_bytes_sent,
          hunt_id=hunt_obj.hunt_id)

      expected_user_cpu_histogram.RegisterValue(user_cpu_time)
      expected_system_cpu_histogram.RegisterValue(system_cpu_time)
      expected_network_histogram.RegisterValue(network_bytes_sent)

      flow_data.append((client_id, flow_id, (user_cpu_time, system_cpu_time,
                                             network_bytes_sent)))

    usage_stats = self.db.ReadHuntClientResourcesStats(hunt_obj.hunt_id)

    self.assertEqual(usage_stats.user_cpu_stats.num, 10)
    self.assertAlmostEqual(usage_stats.user_cpu_stats.mean, 9)
    self.assertAlmostEqual(usage_stats.user_cpu_stats.std, 2.8722813232690143)
    self.assertLen(usage_stats.user_cpu_stats.histogram.bins,
                   len(expected_user_cpu_histogram.bins))
    for b, model_b in zip(usage_stats.user_cpu_stats.histogram.bins,
                          expected_user_cpu_histogram.bins):
      self.assertAlmostEqual(b.range_max_value, model_b.range_max_value)
      self.assertEqual(b.num, model_b.num)

    self.assertEqual(usage_stats.system_cpu_stats.num, 10)
    self.assertAlmostEqual(usage_stats.system_cpu_stats.mean, 19)
    self.assertAlmostEqual(usage_stats.system_cpu_stats.std, 5.744562646538029)
    self.assertLen(usage_stats.system_cpu_stats.histogram.bins,
                   len(expected_system_cpu_histogram.bins))
    for b, model_b in zip(usage_stats.system_cpu_stats.histogram.bins,
                          expected_system_cpu_histogram.bins):
      self.assertAlmostEqual(b.range_max_value, model_b.range_max_value)
      self.assertEqual(b.num, model_b.num)

    self.assertEqual(usage_stats.network_bytes_sent_stats.num, 10)
    self.assertAlmostEqual(usage_stats.network_bytes_sent_stats.mean, 55.5)
    self.assertAlmostEqual(usage_stats.network_bytes_sent_stats.std,
                           8.616843969807043)
    self.assertLen(usage_stats.network_bytes_sent_stats.histogram.bins,
                   len(expected_network_histogram.bins))
    for b, model_b in zip(usage_stats.network_bytes_sent_stats.histogram.bins,
                          expected_network_histogram.bins):
      self.assertAlmostEqual(b.range_max_value, model_b.range_max_value)
      self.assertEqual(b.num, model_b.num)

    self.assertLen(usage_stats.worst_performers, 10)
    for worst_performer, flow_d in zip(usage_stats.worst_performers,
                                       reversed(flow_data)):
      client_id, flow_id, (user_cpu_time, system_cpu_time,
                           network_bytes_sent) = flow_d
      self.assertEqual(worst_performer.client_id.Basename(), client_id)
      self.assertAlmostEqual(worst_performer.cpu_usage.user_cpu_time,
                             user_cpu_time)
      self.assertAlmostEqual(worst_performer.cpu_usage.system_cpu_time,
                             system_cpu_time)
      self.assertEqual(worst_performer.network_bytes_sent, network_bytes_sent)
      self.assertEqual(worst_performer.session_id.Path(),
                       "/%s/%s" % (client_id, flow_id))