def testInCallAcceptsRegularExpressions(self): # Initialize and write test data. stats_collector_instance.Get().IncrementCounter(_SINGLE_DIM_COUNTER) self.stats_store.WriteStats( process_id="pid1", timestamp=rdfvalue.RDFDatetime.FromSecondsSinceEpoch(0)) stats_collector_instance.Get().IncrementCounter(_SINGLE_DIM_COUNTER) self.stats_store.WriteStats( process_id="pid1", timestamp=rdfvalue.RDFDatetime.FromSecondsSinceEpoch(90)) self.stats_store.WriteStats( process_id="pid2", timestamp=rdfvalue.RDFDatetime.FromSecondsSinceEpoch(90)) stats_data = self.stats_store.MultiReadStats( process_ids=["pid1", "pid2"]) query = stats_store.StatsStoreDataQuery(stats_data) self.assertEqual( query.In("pid1").In(_SINGLE_DIM_COUNTER).SeriesCount(), 1) query = stats_store.StatsStoreDataQuery(stats_data) self.assertEqual( query.In("pid2").In(_SINGLE_DIM_COUNTER).SeriesCount(), 1) query = stats_store.StatsStoreDataQuery(stats_data) self.assertEqual( query.In("pid.*").In(_SINGLE_DIM_COUNTER).SeriesCount(), 2)
def testInTimeRangeLimitsQueriesByTime(self): # Write test data. stats_collector_instance.Get().IncrementCounter(_SINGLE_DIM_COUNTER) self.stats_store.WriteStats( process_id=self.process_id, timestamp=rdfvalue.RDFDatetime.FromSecondsSinceEpoch(42)) stats_collector_instance.Get().IncrementCounter(_SINGLE_DIM_COUNTER) self.stats_store.WriteStats( process_id=self.process_id, timestamp=rdfvalue.RDFDatetime.FromSecondsSinceEpoch(100)) stats_collector_instance.Get().IncrementCounter(_SINGLE_DIM_COUNTER) self.stats_store.WriteStats( process_id=self.process_id, timestamp=rdfvalue.RDFDatetime.FromSecondsSinceEpoch(140)) # Read data back. stats_data = self.stats_store.ReadStats(process_id=self.process_id) # Check that InTimeRange works as expected. query = stats_store.StatsStoreDataQuery(stats_data) ts = query.In(_SINGLE_DIM_COUNTER).TakeValue().InTimeRange( rdfvalue.RDFDatetime.FromSecondsSinceEpoch(80), rdfvalue.RDFDatetime.FromSecondsSinceEpoch(120)).ts self.assertListEqual(ts.data, [[2, 100 * 1e6]])
def _GenerateStatsEntriesForMultiDimensionalMetric(process_id, metric_name, metadata, timestamp): """Generates StatsStoreEntries for the given multi-dimensional metric. Args: process_id: Process identifier to use for for the generated entries. metric_name: Name of the multi-dimensional metric. metadata: MetricMetadata for the metric. timestamp: Timestamp to use for the generated entries. Returns: A list of StatsStoreEntries containing current values for the metric's dimensions. """ stats_entries = [] for raw_field_values in stats_collector_instance.Get().GetMetricFields( metric_name): _ValidateFieldValues(raw_field_values, metadata) metric_value = stats_values.StatsStoreValue() for i, raw_field_value in enumerate(raw_field_values): field_value = stats_values.StatsStoreFieldValue() field_value.SetValue(raw_field_value, metadata.fields_defs[i].field_type) metric_value.fields_values.Append(field_value) raw_metric_value = stats_collector_instance.Get().GetMetricValue( metric_name, fields=raw_field_values) metric_value.SetValue(raw_metric_value, metadata.value_type) stats_entries.append( stats_values.StatsStoreEntry(process_id=process_id, metric_name=metric_name, metric_value=metric_value, timestamp=timestamp)) return stats_entries
def KillOldFlows(self): """Disable cron flow if it has exceeded CRON_ARGS.lifetime. Returns: bool: True if the flow is was killed. """ if not self.IsRunning(): return False start_time = self.Get(self.Schema.LAST_RUN_TIME) lifetime = self.Get(self.Schema.CRON_ARGS).lifetime elapsed = rdfvalue.RDFDatetime.Now() - start_time if lifetime and elapsed > lifetime: self.StopCurrentRun() stats_collector_instance.Get().IncrementCounter( "cron_job_timeout", fields=[self.urn.Basename()]) stats_collector_instance.Get().RecordEvent( "cron_job_latency", elapsed.seconds, fields=[self.urn.Basename()]) return True return False
def _ProcessRepliesWithHuntOutputPlugins(self, replies): if db_compat.IsLegacyHunt(self.rdf_flow.parent_hunt_id): return hunt_obj = data_store.REL_DB.ReadHuntObject( self.rdf_flow.parent_hunt_id) self.rdf_flow.output_plugins = hunt_obj.output_plugins self.rdf_flow.output_plugins_states = hunt_obj.output_plugins_states created_plugins = self._ProcessRepliesWithFlowOutputPlugins(replies) def UpdateFn(hunt_to_update): for plugin, state in zip(created_plugins, hunt_to_update.output_plugins_states): if plugin is None: state.plugin_state["error_count"] += 1 else: state.plugin_state["success_count"] += 1 plugin.UpdateState(state.plugin_state) return hunt_to_update data_store.REL_DB.UpdateHuntObject(hunt_obj.hunt_id, UpdateFn) for plugin_def, created_plugin in zip(hunt_obj.output_plugins, created_plugins): if created_plugin is not None: stats_collector_instance.Get().IncrementCounter( "hunt_results_ran_through_plugin", delta=len(replies), fields=[plugin_def.plugin_name]) else: stats_collector_instance.Get().IncrementCounter( "hunt_output_plugin_errors", fields=[plugin_def.plugin_name])
def testLatencyStatsAreCorrectlyRecorded(self): with test_lib.FakeTime(0): cron_manager = aff4_cronjobs.GetCronManager() cron_args = rdf_cronjobs.CreateCronJobArgs(flow_name="FakeCronJob", frequency="1w") cron_job_id = cron_manager.CreateJob(cron_args=cron_args, token=self.token) cron_manager.RunOnce(token=self.token) prev_metric_value = stats_collector_instance.Get().GetMetricValue( "cron_job_latency", fields=[cron_job_id]) # Fast forward one minute with test_lib.FakeTime(60): cron_manager.RunOnce(token=self.token) cron_job = cron_manager.ReadJob(cron_job_id, token=self.token) cron_flow_urn = cron_job.Get(cron_job.Schema.CURRENT_FLOW_URN) flow_test_lib.TestFlowHelper(cron_flow_urn, check_flow_errors=False, token=self.token) # This RunOnce call should determine that the flow has finished cron_manager.RunOnce(token=self.token) # Check that stats got updated current_metric_value = stats_collector_instance.Get().GetMetricValue( "cron_job_latency", fields=[cron_job_id]) self.assertEqual(current_metric_value.count - prev_metric_value.count, 1) self.assertEqual(current_metric_value.sum - prev_metric_value.sum, 60)
def ProcessTask(self, target, args, name, queueing_time): """Processes the tasks.""" if self.pool.name: time_in_queue = time.time() - queueing_time stats_collector_instance.Get().RecordEvent( _QUEUEING_TIME_METRIC, time_in_queue, fields=[self.pool.name]) start_time = time.time() try: target(*args) # We can't let a worker die because one of the tasks it has to process # throws an exception. Therefore, we catch every error that is # raised in the call to target(). except Exception as e: # pylint: disable=broad-except if self.pool.name: stats_collector_instance.Get().IncrementCounter( _TASK_EXCEPTIONS_METRIC, fields=[self.pool.name]) logging.exception("Caught exception in worker thread (%s): %s", name, str(e)) if self.pool.name: total_time = time.time() - start_time stats_collector_instance.Get().RecordEvent( _WORKING_TIME_METRIC, total_time, fields=[self.pool.name])
def testExportedFunctions(self): """Tests if the outstanding tasks variable is exported correctly.""" signal_event, wait_event = threading.Event(), threading.Event() def RunFn(): signal_event.set() wait_event.wait() pool_name = "test_pool3" pool = threadpool.ThreadPool.Factory(pool_name, 10) pool.Start() try: # First 10 tasks should be scheduled immediately, as we have max_threads # set to 10. for _ in range(10): signal_event.clear() pool.AddTask(RunFn, ()) signal_event.wait(10) outstanding_tasks = stats_collector_instance.Get().GetMetricValue( threadpool._OUTSTANDING_TASKS_METRIC, fields=[pool_name]) self.assertEqual(outstanding_tasks, 0) # Next 5 tasks should sit in the queue. for i in range(5): pool.AddTask(RunFn, ()) outstanding_tasks = stats_collector_instance.Get().GetMetricValue( threadpool._OUTSTANDING_TASKS_METRIC, fields=[pool_name]) self.assertEqual(outstanding_tasks, i + 1) finally: wait_event.set() pool.Stop()
def testUpdatesStatsCounterOnFailure(self): failing_plugin_descriptor = rdf_output_plugin.OutputPluginDescriptor( plugin_name="FailingDummyHuntOutputPlugin") self.StartHunt(output_plugins=[failing_plugin_descriptor]) prev_success_count = stats_collector_instance.Get().GetMetricValue( "hunt_results_ran_through_plugin", fields=["FailingDummyHuntOutputPlugin"]) prev_errors_count = stats_collector_instance.Get().GetMetricValue( "hunt_output_plugin_errors", fields=["FailingDummyHuntOutputPlugin"]) self.AssignTasksToClients() self.RunHunt(failrate=-1) try: self.ProcessHuntOutputPlugins() except process_results.ResultsProcessingError: pass success_count = stats_collector_instance.Get().GetMetricValue( "hunt_results_ran_through_plugin", fields=["FailingDummyHuntOutputPlugin"]) errors_count = stats_collector_instance.Get().GetMetricValue( "hunt_output_plugin_errors", fields=["FailingDummyHuntOutputPlugin"]) self.assertEqual(success_count - prev_success_count, 0) self.assertEqual(errors_count - prev_errors_count, 1)
def _GetRemotePublicKey(self, common_name): try: # See if we have this client already cached. remote_key = self.pub_key_cache.Get(str(common_name)) stats_collector_instance.Get().IncrementCounter( "grr_pub_key_cache", fields=["hits"]) return remote_key except KeyError: stats_collector_instance.Get().IncrementCounter( "grr_pub_key_cache", fields=["misses"]) # Fetch the client's cert and extract the key. client = aff4.FACTORY.Create(common_name, aff4.AFF4Object.classes["VFSGRRClient"], mode="rw", token=self.token) cert = client.Get(client.Schema.CERT) if not cert: stats_collector_instance.Get().IncrementCounter( "grr_unique_clients") raise communicator.UnknownClientCert("Cert not found") if rdfvalue.RDFURN(cert.GetCN()) != rdfvalue.RDFURN(common_name): logging.error("Stored cert mismatch for %s", common_name) raise communicator.UnknownClientCert("Stored cert mismatch") self.client_cache.Put(common_name, client) stats_collector_instance.Get().SetGaugeValue( "grr_frontendserver_client_cache_size", len(self.client_cache)) pub_key = cert.GetPublicKey() self.pub_key_cache.Put(common_name, pub_key) return pub_key
def testUpdatesStatsCounterOnOutputPluginFailure(self): plugin_descriptor = rdf_output_plugin.OutputPluginDescriptor( plugin_name="FailingDummyHuntOutputPlugin") prev_success_count = stats_collector_instance.Get().GetMetricValue( "hunt_results_ran_through_plugin", fields=["FailingDummyHuntOutputPlugin"]) prev_errors_count = stats_collector_instance.Get().GetMetricValue( "hunt_output_plugin_errors", fields=["FailingDummyHuntOutputPlugin"]) self._CreateAndRunHunt( num_clients=5, client_mock=hunt_test_lib.SampleHuntMock(failrate=-1), client_rule_set=foreman_rules.ForemanClientRuleSet(), client_rate=0, args=self.GetFileHuntArgs(), output_plugins=[plugin_descriptor]) success_count = stats_collector_instance.Get().GetMetricValue( "hunt_results_ran_through_plugin", fields=["FailingDummyHuntOutputPlugin"]) errors_count = stats_collector_instance.Get().GetMetricValue( "hunt_output_plugin_errors", fields=["FailingDummyHuntOutputPlugin"]) # 1 error for each client makes it 5 errors, 0 results. self.assertEqual(success_count - prev_success_count, 0) self.assertEqual(errors_count - prev_errors_count, 5)
def testError(self): with utils.Stubber(standard.RunHunt, "Run", Error): cron_manager = cronjobs.CronManager() create_flow_args = rdf_cronjobs.CreateCronJobArgs() job_id = cron_manager.CreateJob(cron_args=create_flow_args) prev_failure_value = stats_collector_instance.Get().GetMetricValue( "cron_job_failure", fields=[job_id]) prev_latency_value = stats_collector_instance.Get().GetMetricValue( "cron_job_latency", fields=[job_id]) cron_manager.RunOnce(token=self.token) cron_manager._GetThreadPool().Join() cron_job = cron_manager.ReadJob(job_id, token=self.token) self.assertFalse(cron_manager.JobIsRunning(cron_job)) runs = cron_manager.ReadJobRuns(job_id) self.assertEqual(len(runs), 1) run = runs[0] self.assertEqual(cron_job.last_run_status, "ERROR") self.assertEqual(run.status, "ERROR") self.assertTrue(run.backtrace) self.assertIn("cron job error", run.backtrace) current_failure_value = stats_collector_instance.Get( ).GetMetricValue("cron_job_failure", fields=[job_id]) current_latency_value = stats_collector_instance.Get( ).GetMetricValue("cron_job_latency", fields=[job_id]) self.assertEqual(current_failure_value, prev_failure_value + 1) self.assertEqual(current_latency_value.count, prev_latency_value.count + 1)
def testTaskRetransmissionsAreCorrectlyAccounted(self): test_queue = rdfvalue.RDFURN("fooSchedule") task = rdf_flows.GrrMessage(queue=test_queue, task_ttl=5, session_id="aff4:/Test", generate_task_id=True) manager = queue_manager.QueueManager(token=self.token) with data_store.DB.GetMutationPool() as pool: manager.Schedule([task], pool) # Get a lease on the task tasks = manager.QueryAndOwn(test_queue, lease_seconds=100, limit=100) self.assertLen(tasks, 1) self.assertEqual(tasks[0].task_ttl, 4) self.assertEqual( stats_collector_instance.Get().GetMetricValue( "grr_task_retransmission_count"), self.retransmission_metric_value) # Get a lease on the task 100 seconds later self._current_mock_time += 110 tasks = manager.QueryAndOwn(test_queue, lease_seconds=100, limit=100) self.assertLen(tasks, 1) self.assertEqual(tasks[0].task_ttl, 3) self.assertEqual( stats_collector_instance.Get().GetMetricValue( "grr_task_retransmission_count"), self.retransmission_metric_value + 1)
def do_POST(self): # pylint: disable=g-bad-name """Process encrypted message bundles.""" self._IncrementActiveCount() try: if self.path.startswith("/upload"): stats_collector_instance.Get().IncrementCounter( "frontend_http_requests", fields=["upload", "http"]) logging.error( "Requested no longer supported file upload through HTTP.") self.Send("File upload though HTTP is no longer supported", status=404) else: stats_collector_instance.Get().IncrementCounter( "frontend_http_requests", fields=["control", "http"]) self.Control() except Exception as e: # pylint: disable=broad-except if flags.FLAGS.debug: pdb.post_mortem() logging.exception("Had to respond with status 500.") self.Send("Error: %s" % e, status=500) finally: self._DecrementActiveCount()
def TerminateStuckRunIfNeeded(self, job): """Cleans up job state if the last run is stuck.""" if job.current_run_id and job.last_run_time and job.lifetime: now = rdfvalue.RDFDatetime.Now() # We add additional 10 minutes to give the job run a chance to kill itself # during one of the HeartBeat calls (HeartBeat checks if a cron job is # run is running too long and raises if it is). expiration_time = (job.last_run_time + job.lifetime + rdfvalue.DurationSeconds("10m")) if now > expiration_time: run = data_store.REL_DB.ReadCronJobRun(job.cron_job_id, job.current_run_id) run.status = "LIFETIME_EXCEEDED" run.finished_at = now data_store.REL_DB.WriteCronJobRun(run) data_store.REL_DB.UpdateCronJob(job.cron_job_id, current_run_id=None, last_run_status=run.status) stats_collector_instance.Get().RecordEvent( "cron_job_latency", (now - job.last_run_time).seconds, fields=[job.cron_job_id]) stats_collector_instance.Get().IncrementCounter( "cron_job_timeout", fields=[job.cron_job_id]) return True return False
def _CheckApprovalsForTokenWithoutReason(self, token, target): approval_root_urn = aff4.ROOT_URN.Add("ACL").Add(target.Path()).Add( token.username) try: cached_token = self.acl_cache.Get(approval_root_urn) stats_collector_instance.Get().IncrementCounter( "approval_searches", fields=["without_reason", "cache"]) token.is_emergency = cached_token.is_emergency token.reason = cached_token.reason return True except KeyError: stats_collector_instance.Get().IncrementCounter( "approval_searches", fields=["without_reason", "data_store"]) approved_token = security.Approval.GetApprovalForObject( target, token=token) token.reason = approved_token.reason token.is_emergency = approved_token.is_emergency self.acl_cache.Put(approval_root_urn, approved_token) return True
def _GetRemotePublicKey(self, common_name): remote_client_id = common_name.Basename() try: # See if we have this client already cached. remote_key = self.pub_key_cache.Get(remote_client_id) stats_collector_instance.Get().IncrementCounter( "grr_pub_key_cache", fields=["hits"]) return remote_key except KeyError: stats_collector_instance.Get().IncrementCounter( "grr_pub_key_cache", fields=["misses"]) try: md = data_store.REL_DB.ReadClientMetadata(remote_client_id) except db.UnknownClientError: stats_collector_instance.Get().IncrementCounter( "grr_unique_clients") raise communicator.UnknownClientCertError("Cert not found") cert = md.certificate if cert is None: raise communicator.UnknownClientCertError("Cert not found") if rdfvalue.RDFURN(cert.GetCN()) != rdfvalue.RDFURN(common_name): logging.error("Stored cert mismatch for %s", common_name) raise communicator.UnknownClientCertError("Stored cert mismatch") pub_key = cert.GetPublicKey() self.pub_key_cache.Put(common_name, pub_key) return pub_key
def testTimeout(self): wait_event = threading.Event() signal_event = threading.Event() waiting_func = functools.partial(WaitAndSignal, wait_event, signal_event) fake_time = rdfvalue.RDFDatetime.Now() with utils.Stubber(standard.RunHunt, "Run", waiting_func): with test_lib.FakeTime(fake_time): cron_manager = cronjobs.CronManager() create_flow_args = rdf_cronjobs.CreateCronJobArgs() create_flow_args.lifetime = "1h" job_id = cron_manager.CreateJob(cron_args=create_flow_args) cron_manager.RunOnce(token=self.token) # Make sure the cron job has actually been started. signal_event.wait(10) cron_job = cron_manager.ReadJob(job_id, token=self.token) self.assertTrue(cron_manager.JobIsRunning(cron_job)) runs = cron_manager.ReadJobRuns(job_id) self.assertEqual(len(runs), 1) run = runs[0] self.assertEqual(cron_job.current_run_id, run.run_id) self.assertEqual(run.status, "RUNNING") prev_timeout_value = stats_collector_instance.Get().GetMetricValue( "cron_job_timeout", fields=[job_id]) prev_latency_value = stats_collector_instance.Get().GetMetricValue( "cron_job_latency", fields=[job_id]) fake_time += rdfvalue.Duration("2h") with test_lib.FakeTime(fake_time): wait_event.set() cron_manager._GetThreadPool().Join() cron_job = cron_manager.ReadJob(job_id, token=self.token) runs = cron_manager.ReadJobRuns(job_id) self.assertEqual(len(runs), 1) run = runs[0] self.assertEqual(cron_job.last_run_status, "LIFETIME_EXCEEDED") self.assertEqual(run.status, "LIFETIME_EXCEEDED") # Check that timeout counter got updated. current_timeout_value = stats_collector_instance.Get( ).GetMetricValue("cron_job_timeout", fields=[job_id]) self.assertEqual(current_timeout_value - prev_timeout_value, 1) # Check that latency stat got updated. current_latency_value = stats_collector_instance.Get( ).GetMetricValue("cron_job_latency", fields=[job_id]) self.assertEqual( current_latency_value.count - prev_latency_value.count, 1) self.assertEqual( current_latency_value.sum - prev_latency_value.sum, rdfvalue.Duration("2h").seconds)
def SetMaster(self, master=True): """Switch the is_master stat variable.""" if master: logging.info("data center is now active.") stats_collector_instance.Get().SetGaugeValue("is_master", 1) self.is_master = True else: logging.info("data center became inactive.") stats_collector_instance.Get().SetGaugeValue("is_master", 0) self.is_master = False
def testAggregateViaSumAggregatesMultipleTimeSeriesIntoOne(self): # Write test data. stats_collector_instance.Get().IncrementCounter(_SINGLE_DIM_COUNTER) self.stats_store.WriteStats( process_id="pid1", timestamp=rdfvalue.RDFDatetime.FromSecondsSinceEpoch(0)) stats_collector_instance.Get().IncrementCounter(_SINGLE_DIM_COUNTER) self.stats_store.WriteStats( process_id="pid2", timestamp=rdfvalue.RDFDatetime.FromSecondsSinceEpoch(0)) stats_collector_instance.Get().IncrementCounter(_SINGLE_DIM_COUNTER) self.stats_store.WriteStats( process_id="pid1", timestamp=rdfvalue.RDFDatetime.FromSecondsSinceEpoch(90)) self.stats_store.WriteStats( process_id="pid2", timestamp=rdfvalue.RDFDatetime.FromSecondsSinceEpoch(90)) stats_data = self.stats_store.MultiReadStats( process_ids=["pid1", "pid2"]) query = stats_store.StatsStoreDataQuery(stats_data) ts = query.In("pid.*").In(_SINGLE_DIM_COUNTER).TakeValue().Normalize( rdfvalue.Duration("30s"), 0, rdfvalue.Duration("2m"), mode=timeseries.NORMALIZE_MODE_COUNTER).AggregateViaSum().ts # We expect 2 time series in the query: # 1970-01-01 00:00:00 1 # 1970-01-01 00:00:30 1 # 1970-01-01 00:01:00 1 # 1970-01-01 00:01:30 3 # # and: # 1970-01-01 00:00:00 2 # 1970-01-01 00:00:30 2 # 1970-01-01 00:01:00 2 # 1970-01-01 00:01:30 3 # # Therefore we expect the sum to look like: # 1970-01-01 00:00:00 3 # 1970-01-01 00:00:30 3 # 1970-01-01 00:01:00 3 # 1970-01-01 00:01:30 6 self.assertAlmostEqual(ts.data[0][0], 3) self.assertAlmostEqual(ts.data[1][0], 3) self.assertAlmostEqual(ts.data[2][0], 3) self.assertAlmostEqual(ts.data[3][0], 6) self.assertListEqual([t for _, t in ts.data], [0.0 * 1e6, 30.0 * 1e6, 60.0 * 1e6, 90.0 * 1e6])
def testFetchedValuesCanBeLimitedByName(self): stats_collector_instance.Get().SetGaugeValue("int_gauge", 4242) stats_collector_instance.Get().IncrementCounter("counter") self.stats_store.WriteStats(process_id=self.process_id, timestamp=42) stats_collector_instance.Get().IncrementCounter("counter") self.stats_store.WriteStats(process_id=self.process_id, timestamp=43) stats_history = self.stats_store.ReadStats( process_id=self.process_id, metric_name="counter") self.assertEqual(stats_history["counter"], [(1, 42), (2, 43)]) self.assertTrue("int_gauge" not in stats_history)
def testFetchedValuesCanBeLimitedByTimeRange(self): stats_collector_instance.Get().SetGaugeValue("int_gauge", 4242) stats_collector_instance.Get().IncrementCounter("counter") self.stats_store.WriteStats(process_id=self.process_id, timestamp=42) stats_collector_instance.Get().IncrementCounter("counter") self.stats_store.WriteStats(process_id=self.process_id, timestamp=43) stats_history = self.stats_store.ReadStats( process_id=self.process_id, timestamp=(0, 42)) self.assertEqual(stats_history["counter"], [(1, 42)]) self.assertEqual(stats_history["int_gauge"], [(4242, 42)])
def testValuesAreFetchedCorrectly(self): stats_collector_instance.Get().SetGaugeValue("int_gauge", 4242) stats_collector_instance.Get().IncrementCounter("counter") self.stats_store.WriteStats(process_id=self.process_id, timestamp=42) stats_collector_instance.Get().IncrementCounter("counter") self.stats_store.WriteStats(process_id=self.process_id, timestamp=43) stats_history = self.stats_store.ReadStats( process_id=self.process_id, timestamp=self.stats_store.ALL_TIMESTAMPS) self.assertEqual(stats_history["counter"], [(1, 42), (2, 43)]) self.assertEqual(stats_history["int_gauge"], [(4242, 42), (4242, 43)])
def testMultiReadStatsLimitsResultsByTimeRange(self): stats_collector_instance.Get().IncrementCounter("counter") self.stats_store.WriteStats(process_id="pid1", timestamp=42) self.stats_store.WriteStats(process_id="pid2", timestamp=42) self.stats_store.WriteStats(process_id="pid2", timestamp=44) stats_collector_instance.Get().IncrementCounter("counter") self.stats_store.WriteStats(process_id="pid1", timestamp=44) results = self.stats_store.MultiReadStats(timestamp=(43, 100)) self.assertEqual(sorted(iterkeys(results)), ["pid1", "pid2"]) self.assertEqual(results["pid1"]["counter"], [(2, 44)]) self.assertEqual(results["pid2"]["counter"], [(1, 44)])
def testMultiReadStatsWorksCorrectly(self): stats_collector_instance.Get().IncrementCounter("counter") self.stats_store.WriteStats(process_id="pid1", timestamp=42) self.stats_store.WriteStats(process_id="pid2", timestamp=42) self.stats_store.WriteStats(process_id="pid2", timestamp=43) stats_collector_instance.Get().IncrementCounter("counter") self.stats_store.WriteStats(process_id="pid1", timestamp=43) results = self.stats_store.MultiReadStats() self.assertEqual(sorted(iterkeys(results)), ["pid1", "pid2"]) self.assertEqual(results["pid1"]["counter"], [(1, 42), (2, 43)]) self.assertEqual(results["pid2"]["counter"], [(1, 42), (1, 43)])
def do_GET(self): # pylint: disable=g-bad-name """Serve the server pem with GET requests.""" if self.path.startswith("/server.pem"): stats_collector_instance.Get().IncrementCounter( "frontend_http_requests", fields=["cert", "http"]) self.ServerPem() elif self.path.startswith(self.rekall_profile_path): stats_collector_instance.Get().IncrementCounter( "frontend_http_requests", fields=["rekall", "http"]) self.ServeRekallProfile(self.path) elif self.path.startswith(self.static_content_path): stats_collector_instance.Get().IncrementCounter( "frontend_http_requests", fields=["static", "http"]) self.ServeStatic(self.path[len(self.static_content_path):])
def do_GET(self): # pylint: disable=g-bad-name """Serve the server pem with GET requests.""" self._IncrementActiveCount() try: if self.path.startswith("/server.pem"): stats_collector_instance.Get().IncrementCounter( "frontend_http_requests", fields=["cert", "http"]) self.ServerPem() elif self.path.startswith(self.static_content_path): stats_collector_instance.Get().IncrementCounter( "frontend_http_requests", fields=["static", "http"]) self.ServeStatic(self.path[len(self.static_content_path):]) finally: self._DecrementActiveCount()
def testDeleteStatsInTimeRangeWorksCorrectly(self): stats_collector_instance.Get().SetGaugeValue("int_gauge", 4242) stats_collector_instance.Get().IncrementCounter("counter") self.stats_store.WriteStats(process_id=self.process_id, timestamp=42) stats_collector_instance.Get().IncrementCounter("counter") self.stats_store.WriteStats(process_id=self.process_id, timestamp=44) self.stats_store.DeleteStats(process_id=self.process_id, timestamp=(0, 43)) stats_history = self.stats_store.ReadStats(process_id=self.process_id) self.assertEqual(stats_history["counter"], [(2, 44)]) self.assertEqual(stats_history["int_gauge"], [(4242, 44)])
def testKillOldFlows(self): with test_lib.FakeTime(0): cron_manager = aff4_cronjobs.GetCronManager() cron_args = rdf_cronjobs.CreateCronJobArgs( flow_name="FakeCronJob", frequency="1w", lifetime=FakeCronJob.lifetime) job_id = cron_manager.CreateJob(cron_args=cron_args, token=self.token) cron_manager.RunOnce(token=self.token) cron_job = cron_manager.ReadJob(job_id, token=self.token) self.assertTrue(cron_job.IsRunning()) self.assertFalse(cron_job.KillOldFlows()) prev_timeout_value = stats_collector_instance.Get().GetMetricValue( "cron_job_timeout", fields=[job_id]) prev_latency_value = stats_collector_instance.Get().GetMetricValue( "cron_job_latency", fields=[job_id]) # Fast forward one day with test_lib.FakeTime(24 * 60 * 60 + 1): flow_urn = cron_job.Get(cron_job.Schema.CURRENT_FLOW_URN) cron_manager.RunOnce(token=self.token) cron_job = cron_manager.ReadJob(job_id, token=self.token) self.assertFalse(cron_job.IsRunning()) # Check the termination log log_collection = flow.GRRFlow.LogCollectionForFID(flow_urn) for line in log_collection: if line.urn == flow_urn: self.assertTrue( "lifetime exceeded" in str(line.log_message)) # Check that timeout counter got updated. current_timeout_value = stats_collector_instance.Get( ).GetMetricValue("cron_job_timeout", fields=[job_id]) self.assertEqual(current_timeout_value - prev_timeout_value, 1) # Check that latency stat got updated. current_latency_value = stats_collector_instance.Get( ).GetMetricValue("cron_job_latency", fields=[job_id]) self.assertEqual( current_latency_value.count - prev_latency_value.count, 1) self.assertEqual( current_latency_value.sum - prev_latency_value.sum, 24 * 60 * 60 + 1)
def testRunningJobs(self): event = threading.Event() waiting_func = functools.partial(WaitForEvent, event) with mock.patch.object(standard.RunHunt, "Run", wraps=waiting_func): cron_manager = cronjobs.CronManager() create_flow_args = rdf_cronjobs.CreateCronJobArgs( frequency="1w", lifetime="1d") job_id = cron_manager.CreateJob(cron_args=create_flow_args) prev_timeout_value = stats_collector_instance.Get().GetMetricValue( "cron_job_timeout", fields=[job_id]) prev_latency_value = stats_collector_instance.Get().GetMetricValue( "cron_job_latency", fields=[job_id]) cron_manager.RunOnce(token=self.token) cron_job = cron_manager.ReadJob(job_id, token=self.token) self.assertTrue(cron_manager.JobIsRunning(cron_job)) runs = cron_manager.ReadJobRuns(job_id) self.assertLen(runs, 1) run = runs[0] self.assertEqual(cron_job.current_run_id, run.run_id) self.assertEqual(run.status, "RUNNING") event.set() cron_manager._GetThreadPool().Join() cron_job = cron_manager.ReadJob(job_id, token=self.token) self.assertFalse(cron_manager.JobIsRunning(cron_job)) runs = cron_manager.ReadJobRuns(job_id) self.assertLen(runs, 1) run = runs[0] self.assertFalse(cron_job.current_run_id) self.assertEqual(run.status, "FINISHED") # Check that timeout counter got updated. current_timeout_value = stats_collector_instance.Get().GetMetricValue( "cron_job_timeout", fields=[job_id]) self.assertEqual(current_timeout_value, prev_timeout_value) # Check that latency stat got updated. current_latency_value = stats_collector_instance.Get().GetMetricValue( "cron_job_latency", fields=[job_id]) self.assertEqual(current_latency_value.count - prev_latency_value.count, 1)