def _FlowStatusToClientResources(flow_obj, status_msg): return rdf_client_stats.ClientResources( client_id=flow_obj.client_id, session_id=flow_obj.flow_id, cpu_usage=rdf_client_stats.CpuSeconds( user_cpu_time=status_msg.cpu_time_used.user_cpu_time, system_cpu_time=status_msg.cpu_time_used.system_cpu_time), network_bytes_sent=status_msg.network_bytes_sent)
def GenerateStatusMessage(self, message, response_id=1): cpu_time_used = rdf_client_stats.CpuSeconds( user_cpu_time=self.user_cpu_usage.next(), system_cpu_time=self.system_cpu_usage.next()) network_bytes_sent = self.network_usage.next() return rdf_flows.GrrMessage( session_id=message.session_id, name=message.name, response_id=response_id, request_id=message.request_id, payload=rdf_flows.GrrStatus( status=rdf_flows.GrrStatus.ReturnedStatus.OK, cpu_time_used=cpu_time_used, network_bytes_sent=network_bytes_sent), type=rdf_flows.GrrMessage.Type.STATUS)
def ProcessHuntFlowDone(flow_obj, status_msg=None): """Notifis hunt about a given hunt-induced flow completion.""" if not hunt.IsLegacyHunt(flow_obj.parent_hunt_id): resources = rdf_client_stats.ClientResources( client_id=flow_obj.client_id, session_id=flow_obj.flow_id, cpu_usage=rdf_client_stats.CpuSeconds( user_cpu_time=status_msg.cpu_time_used.user_cpu_time, system_cpu_time=status_msg.cpu_time_used.system_cpu_time), network_bytes_sent=status_msg.network_bytes_sent) def UpdateFn(hunt_obj): hunt_obj.num_successful_clients += 1 if flow_obj.num_replies_sent: hunt_obj.num_clients_with_results += 1 hunt_obj.client_resources_stats.RegisterResources(resources) return hunt_obj hunt_obj = data_store.REL_DB.UpdateHuntObject(flow_obj.parent_hunt_id, UpdateFn) hunt_obj = hunt.StopHuntIfAverageLimitsExceeded(hunt_obj) hunt.CompleteHuntIfExpirationTimeReached(hunt_obj) return hunt_urn = rdfvalue.RDFURN("hunts").Add(flow_obj.parent_hunt_id) client_urn = rdf_client.ClientURN(flow_obj.client_id) with aff4.FACTORY.OpenWithLock(hunt_urn, lease_time=_HUNT_LEASE_TIME, blocking=True) as fd: # Legacy AFF4 code expects token to be set. fd.token = access_control.ACLToken(username=fd.creator) fd.RegisterCompletedClient(client_urn) if flow_obj.num_replies_sent: fd.RegisterClientWithResults(client_urn) fd.context.clients_with_results_count += 1 fd.context.completed_clients_count += 1 fd.context.results_count += flow_obj.num_replies_sent fd.GetRunner().SaveResourceUsage(flow_obj.client_id, status_msg) fd.StopHuntIfAverageLimitsExceeded()
def testReadHuntCountersCorrectlyAggregatesResultsAmongDifferentFlows(self): hunt_obj = rdf_hunt_objects.Hunt(description="foo") self.db.WriteHuntObject(hunt_obj) expectations = self._BuildFilterConditionExpectations(hunt_obj) hunt_counters = self.db.ReadHuntCounters(hunt_obj.hunt_id) self.assertEqual(hunt_counters.num_clients, len(expectations[db.HuntFlowsCondition.UNSET])) self.assertEqual( hunt_counters.num_successful_clients, len(expectations[db.HuntFlowsCondition.SUCCEEDED_FLOWS_ONLY])) self.assertEqual(hunt_counters.num_failed_clients, len(expectations[db.HuntFlowsCondition.FAILED_FLOWS_ONLY])) # _BuildFilterConditionExpectations writes 10 sample results for one client. self.assertEqual(hunt_counters.num_clients_with_results, 1) self.assertEqual( hunt_counters.num_crashed_clients, len(expectations[db.HuntFlowsCondition.CRASHED_FLOWS_ONLY])) # _BuildFilterConditionExpectations writes 10 sample results. self.assertEqual(hunt_counters.num_results, 10) self.assertEqual(hunt_counters.total_cpu_seconds, 0) self.assertEqual(hunt_counters.total_network_bytes_sent, 0) # Check that after adding a flow with resource metrics, total counters # get updated. self._SetupHuntClientAndFlow( flow_state=rdf_flow_objects.Flow.FlowState.FINISHED, cpu_time_used=rdf_client_stats.CpuSeconds( user_cpu_time=4.5, system_cpu_time=10), network_bytes_sent=42, hunt_id=hunt_obj.hunt_id) hunt_counters = self.db.ReadHuntCounters(hunt_obj.hunt_id) self.assertAlmostEqual(hunt_counters.total_cpu_seconds, 14.5) self.assertEqual(hunt_counters.total_network_bytes_sent, 42)
def InitFromFlowObject(self, flow_obj, with_args=True, with_progress=False, with_state_and_context=False): try: self.flow_id = flow_obj.flow_id self.client_id = flow_obj.client_id # TODO(amoser): Get rid of all urns. self.urn = flow_obj.long_flow_id self.name = flow_obj.flow_class_name self.started_at = flow_obj.create_time self.last_active_at = flow_obj.last_update_time self.creator = flow_obj.creator if flow_obj.client_crash_info: self.state = "CLIENT_CRASHED" elif flow_obj.pending_termination: self.state = "ERROR" self.status = ("Pending termination: %s" % flow_obj.pending_termination.reason) else: context_state_map = {1: "RUNNING", 2: "TERMINATED", 3: "ERROR"} self.state = context_state_map[int(flow_obj.flow_state)] if with_state_and_context: outstanding_requests = (flow_obj.next_outbound_id - flow_obj.next_request_to_process) self.context = rdf_flow_runner.FlowContext( # TODO(amoser): No need to set this in all cases once the legacy API # is removed. client_resources=rdf_client_stats.ClientResources( cpu_usage=rdf_client_stats.CpuSeconds()), create_time=flow_obj.create_time, creator=flow_obj.creator, current_state=flow_obj.current_state, next_outbound_id=flow_obj.next_outbound_id, outstanding_requests=outstanding_requests, state=self.state, # TODO(amoser): Get rid of all urns. session_id=flow_obj.long_flow_id, ) if flow_obj.output_plugins_states: self.context.output_plugins_states = flow_obj.output_plugins_states if flow_obj.network_bytes_sent: self.context.network_bytes_sent = flow_obj.network_bytes_sent self.context.client_resources.network_bytes_sent = ( flow_obj.network_bytes_sent) if flow_obj.cpu_time_used: self.context.client_resources.cpu_time_used = flow_obj.cpu_time_used if flow_obj.error_message: self.context.status = flow_obj.error_message if flow_obj.backtrace: self.context.backtrace = flow_obj.backtrace if with_args: try: self.args = flow_obj.args except ValueError: # If args class name has changed, ValueError will be raised. Handling # this gracefully - we should still try to display some useful info # about the flow. pass if with_progress: flow_cls = self._GetFlowClass() if flow_cls: self.progress = flow_cls(flow_obj).GetProgress() self.runner_args = rdf_flow_runner.FlowRunnerArgs( client_id=flow_obj.client_id, flow_name=flow_obj.flow_class_name, notify_to_user=flow_base.FlowBase( flow_obj).ShouldSendNotifications()) if flow_obj.output_plugins: self.runner_args.output_plugins = flow_obj.output_plugins if flow_obj.HasField("cpu_limit"): self.runner_args.cpu_limit = flow_obj.cpu_limit if flow_obj.HasField("network_bytes_limit"): self.runner_args.cpu_limit = flow_obj.network_bytes_limit if flow_obj.original_flow.flow_id: self.original_flow = ApiFlowReference().FromFlowReference( flow_obj.original_flow) if with_state_and_context and flow_obj.persistent_data.ToDict(): self.state_data = ( api_call_handler_utils.ApiDataObject().InitFromDataObject( flow_obj.persistent_data)) except Exception as e: # pylint: disable=broad-except self.internal_error = "Error while opening flow: %s" % str(e) return self
def ReadHuntClientResourcesStats(self, hunt_id, cursor=None): """Read/calculate hunt client resources stats.""" hunt_id_int = db_utils.HuntIDToInt(hunt_id) query = """ SELECT COUNT(*), SUM(user_cpu_time_used_micros), SUM((user_cpu_time_used_micros) * (user_cpu_time_used_micros)), SUM(system_cpu_time_used_micros), SUM((system_cpu_time_used_micros) * (system_cpu_time_used_micros)), SUM(network_bytes_sent), SUM(network_bytes_sent * network_bytes_sent), """ scaled_bins = [ int(1000000 * b) for b in rdf_stats.ClientResourcesStats.CPU_STATS_BINS ] query += self._BinsToQuery(scaled_bins, "(user_cpu_time_used_micros)") query += "," query += self._BinsToQuery(scaled_bins, "(system_cpu_time_used_micros)") query += "," query += self._BinsToQuery( rdf_stats.ClientResourcesStats.NETWORK_STATS_BINS, "network_bytes_sent") query += " FROM flows " query += "FORCE INDEX(flows_by_hunt) " query += "WHERE parent_hunt_id = %s AND parent_flow_id IS NULL" cursor.execute(query, [hunt_id_int]) response = cursor.fetchone() (count, user_sum, user_sq_sum, system_sum, system_sq_sum, network_sum, network_sq_sum) = response[:7] stats = rdf_stats.ClientResourcesStats( user_cpu_stats=rdf_stats.RunningStats( num=count, sum=db_utils.MicrosToSeconds(int(user_sum or 0)), sum_sq=int(user_sq_sum or 0) / 1e12, ), system_cpu_stats=rdf_stats.RunningStats( num=count, sum=db_utils.MicrosToSeconds(int(system_sum or 0)), sum_sq=int(system_sq_sum or 0) / 1e12, ), network_bytes_sent_stats=rdf_stats.RunningStats( num=count, sum=float(network_sum or 0), sum_sq=float(network_sq_sum or 0), ), ) offset = 7 stats.user_cpu_stats.histogram = rdf_stats.StatsHistogram() for b_num, b_max_value in zip( response[offset:], rdf_stats.ClientResourcesStats.CPU_STATS_BINS): stats.user_cpu_stats.histogram.bins.append( rdf_stats.StatsHistogramBin(range_max_value=b_max_value, num=b_num)) offset += len(rdf_stats.ClientResourcesStats.CPU_STATS_BINS) stats.system_cpu_stats.histogram = rdf_stats.StatsHistogram() for b_num, b_max_value in zip( response[offset:], rdf_stats.ClientResourcesStats.CPU_STATS_BINS): stats.system_cpu_stats.histogram.bins.append( rdf_stats.StatsHistogramBin(range_max_value=b_max_value, num=b_num)) offset += len(rdf_stats.ClientResourcesStats.CPU_STATS_BINS) stats.network_bytes_sent_stats.histogram = rdf_stats.StatsHistogram() for b_num, b_max_value in zip( response[offset:], rdf_stats.ClientResourcesStats.NETWORK_STATS_BINS): stats.network_bytes_sent_stats.histogram.bins.append( rdf_stats.StatsHistogramBin(range_max_value=b_max_value, num=b_num)) query = """ SELECT client_id, flow_id, user_cpu_time_used_micros, system_cpu_time_used_micros, network_bytes_sent FROM flows FORCE INDEX(flows_by_hunt) WHERE parent_hunt_id = %s AND parent_flow_id IS NULL AND (user_cpu_time_used_micros > 0 OR system_cpu_time_used_micros > 0 OR network_bytes_sent > 0) ORDER BY (user_cpu_time_used_micros + system_cpu_time_used_micros) DESC LIMIT 10 """ cursor.execute(query, [hunt_id_int]) for cid, fid, ucpu, scpu, nbs in cursor.fetchall(): client_id = db_utils.IntToClientID(cid) flow_id = db_utils.IntToFlowID(fid) stats.worst_performers.append( rdf_client_stats.ClientResources( client_id=client_id, session_id=rdfvalue.RDFURN(client_id).Add(flow_id), cpu_usage=rdf_client_stats.CpuSeconds( user_cpu_time=db_utils.MicrosToSeconds(ucpu), system_cpu_time=db_utils.MicrosToSeconds(scpu), ), network_bytes_sent=nbs)) return stats
def testReadHuntClientResourcesStatsCorrectlyAggregatesData(self): hunt_obj = rdf_hunt_objects.Hunt(description="foo") self.db.WriteHuntObject(hunt_obj) flow_data = [] expected_user_cpu_histogram = rdf_stats.StatsHistogram.FromBins( rdf_stats.ClientResourcesStats.CPU_STATS_BINS) expected_system_cpu_histogram = rdf_stats.StatsHistogram.FromBins( rdf_stats.ClientResourcesStats.CPU_STATS_BINS) expected_network_histogram = rdf_stats.StatsHistogram.FromBins( rdf_stats.ClientResourcesStats.NETWORK_STATS_BINS) for i in range(10): user_cpu_time = 4.5 + i system_cpu_time = 10 + i * 2 network_bytes_sent = 42 + i * 3 client_id, flow_id = self._SetupHuntClientAndFlow( flow_state=rdf_flow_objects.Flow.FlowState.FINISHED, cpu_time_used=rdf_client_stats.CpuSeconds( user_cpu_time=user_cpu_time, system_cpu_time=system_cpu_time), network_bytes_sent=network_bytes_sent, hunt_id=hunt_obj.hunt_id) expected_user_cpu_histogram.RegisterValue(user_cpu_time) expected_system_cpu_histogram.RegisterValue(system_cpu_time) expected_network_histogram.RegisterValue(network_bytes_sent) flow_data.append((client_id, flow_id, (user_cpu_time, system_cpu_time, network_bytes_sent))) usage_stats = self.db.ReadHuntClientResourcesStats(hunt_obj.hunt_id) self.assertEqual(usage_stats.user_cpu_stats.num, 10) self.assertAlmostEqual(usage_stats.user_cpu_stats.mean, 9) self.assertAlmostEqual(usage_stats.user_cpu_stats.std, 2.8722813232690143) self.assertLen(usage_stats.user_cpu_stats.histogram.bins, len(expected_user_cpu_histogram.bins)) for b, model_b in zip(usage_stats.user_cpu_stats.histogram.bins, expected_user_cpu_histogram.bins): self.assertAlmostEqual(b.range_max_value, model_b.range_max_value) self.assertEqual(b.num, model_b.num) self.assertEqual(usage_stats.system_cpu_stats.num, 10) self.assertAlmostEqual(usage_stats.system_cpu_stats.mean, 19) self.assertAlmostEqual(usage_stats.system_cpu_stats.std, 5.744562646538029) self.assertLen(usage_stats.system_cpu_stats.histogram.bins, len(expected_system_cpu_histogram.bins)) for b, model_b in zip(usage_stats.system_cpu_stats.histogram.bins, expected_system_cpu_histogram.bins): self.assertAlmostEqual(b.range_max_value, model_b.range_max_value) self.assertEqual(b.num, model_b.num) self.assertEqual(usage_stats.network_bytes_sent_stats.num, 10) self.assertAlmostEqual(usage_stats.network_bytes_sent_stats.mean, 55.5) self.assertAlmostEqual(usage_stats.network_bytes_sent_stats.std, 8.616843969807043) self.assertLen(usage_stats.network_bytes_sent_stats.histogram.bins, len(expected_network_histogram.bins)) for b, model_b in zip(usage_stats.network_bytes_sent_stats.histogram.bins, expected_network_histogram.bins): self.assertAlmostEqual(b.range_max_value, model_b.range_max_value) self.assertEqual(b.num, model_b.num) self.assertLen(usage_stats.worst_performers, 10) for worst_performer, flow_d in zip(usage_stats.worst_performers, reversed(flow_data)): client_id, flow_id, (user_cpu_time, system_cpu_time, network_bytes_sent) = flow_d self.assertEqual(worst_performer.client_id.Basename(), client_id) self.assertAlmostEqual(worst_performer.cpu_usage.user_cpu_time, user_cpu_time) self.assertAlmostEqual(worst_performer.cpu_usage.system_cpu_time, system_cpu_time) self.assertEqual(worst_performer.network_bytes_sent, network_bytes_sent) self.assertEqual(worst_performer.session_id.Path(), "/%s/%s" % (client_id, flow_id))