def search(): slo = ServiceSLOMetrics() slo_results = slo.get_slometrics_summary() registry = CollectorRegistry() ProcessCollector(registry=registry) slo_gauge = Gauge('serviceslometrics', labelnames=('cluster', 'namespace', 'service', 'slitype', 'slodoc', 'name', 'type'), documentation=("ServiceSLOMetrics by cluster," "service, namespace, slitype," "slodoc, name, type"), registry=registry) for result in slo_results: slo_gauge.labels(cluster=result.Cluster.name, namespace=result.Namespace.name, service=result.Service.name, slitype=result.SLIType.name, slodoc=result.SLODoc.name, name=result.ServiceSLO.name, type='slo_value').set(result.ServiceSLO.value) slo_gauge.labels(cluster=result.Cluster.name, namespace=result.Namespace.name, service=result.Service.name, slitype=result.SLIType.name, slodoc=result.SLODoc.name, name=result.ServiceSLO.name, type='slo_target').set(result.ServiceSLO.target) headers = {'Content-type': 'text/plain'} return Response(generate_latest(registry=registry), 200, headers)
def test_namespace(self): collector = ProcessCollector(proc=self.test_proc, pid=lambda: 26231, registry=self.registry, namespace='n') collector._ticks = 100 self.assertEqual( 17.21, self.registry.get_sample_value('n_process_cpu_seconds_total')) self.assertEqual( 56274944.0, self.registry.get_sample_value('n_process_virtual_memory_bytes')) self.assertEqual( 8114176, self.registry.get_sample_value('n_process_resident_memory_bytes')) self.assertEqual( 1418184099.75, self.registry.get_sample_value('n_process_start_time_seconds')) self.assertEqual(2048.0, self.registry.get_sample_value('n_process_max_fds')) self.assertEqual(5.0, self.registry.get_sample_value('n_process_open_fds')) self.assertEqual( None, self.registry.get_sample_value('process_cpu_seconds_total'))
def test_working_584(self): collector = ProcessCollector(proc=self.test_proc, pid=lambda: "584\n", registry=self.registry) collector._ticks = 100 self.assertEqual(0.0, self.registry.get_sample_value('process_cpu_seconds_total')) self.assertEqual(10395648.0, self.registry.get_sample_value('process_virtual_memory_bytes')) self.assertEqual(634880, self.registry.get_sample_value('process_resident_memory_bytes')) self.assertEqual(1418291667.75, self.registry.get_sample_value('process_start_time_seconds')) self.assertEqual(None, self.registry.get_sample_value('process_max_fds')) self.assertEqual(None, self.registry.get_sample_value('process_open_fds'))
def test_namespace(self): collector = ProcessCollector(proc=self.test_proc, pid=lambda: 26231, registry=self.registry, namespace='n') collector._ticks = 100 self.assertEqual(17.21, self.registry.get_sample_value('n_process_cpu_seconds_total')) self.assertEqual(56274944.0, self.registry.get_sample_value('n_process_virtual_memory_bytes')) self.assertEqual(8114176, self.registry.get_sample_value('n_process_resident_memory_bytes')) self.assertEqual(1418184099.75, self.registry.get_sample_value('n_process_start_time_seconds')) self.assertEqual(2048.0, self.registry.get_sample_value('n_process_max_fds')) self.assertEqual(5.0, self.registry.get_sample_value('n_process_open_fds')) self.assertEqual(None, self.registry.get_sample_value('process_cpu_seconds_total'))
def test_working_584(self): collector = ProcessCollector(proc=self.test_proc, pid=lambda: "584\n", registry=self.registry) collector._ticks = 100 collector._pagesize = 4096 self.assertEqual(0.0, self.registry.get_sample_value('process_cpu_seconds_total')) self.assertEqual(10395648.0, self.registry.get_sample_value('process_virtual_memory_bytes')) self.assertEqual(634880, self.registry.get_sample_value('process_resident_memory_bytes')) self.assertEqual(1418291667.75, self.registry.get_sample_value('process_start_time_seconds')) self.assertEqual(None, self.registry.get_sample_value('process_max_fds')) self.assertEqual(None, self.registry.get_sample_value('process_open_fds'))
def test_working_fake_pid(self): collector = ProcessCollector(proc=self.test_proc, pid=lambda: 123, registry=self.registry) collector._ticks = 100 self.assertEqual(None, self.registry.get_sample_value('process_cpu_seconds_total')) self.assertEqual(None, self.registry.get_sample_value('process_virtual_memory_bytes')) self.assertEqual(None, self.registry.get_sample_value('process_resident_memory_bytes')) self.assertEqual(None, self.registry.get_sample_value('process_start_time_seconds')) self.assertEqual(None, self.registry.get_sample_value('process_max_fds')) self.assertEqual(None, self.registry.get_sample_value('process_open_fds')) self.assertEqual(None, self.registry.get_sample_value('process_fake_namespace'))
def runDumper(): logger = initiateLogger() explorer_dumper = explorerDumperInit(logger) metrics_registry = explorer_dumper.getMetricsRegistry() process_collector = ProcessCollector(registry=metrics_registry) while True: process_collector.collect() logger.info("Preparing to run dumper") explorer_dumper.dump() logger.info('Dump completed. Restarting in %s seconds.', POLL_TIME) sleep(POLL_TIME)
def test_working_fake_pid(self): collector = ProcessCollector(proc=self.test_proc, pid=lambda: 123, registry=self.registry) collector._ticks = 100 collector._pagesize = 4096 self.assertEqual(None, self.registry.get_sample_value('process_cpu_seconds_total')) self.assertEqual(None, self.registry.get_sample_value('process_virtual_memory_bytes')) self.assertEqual(None, self.registry.get_sample_value('process_resident_memory_bytes')) self.assertEqual(None, self.registry.get_sample_value('process_start_time_seconds')) self.assertEqual(None, self.registry.get_sample_value('process_max_fds')) self.assertEqual(None, self.registry.get_sample_value('process_open_fds')) self.assertEqual(None, self.registry.get_sample_value('process_fake_namespace'))
def search(): imv = ImageManifestVuln() imv_results = imv.get_vulnerabilities_summary() dpv = DeploymentValidationData() dpv_results = dpv.get_deploymentvalidation_summary() registry = CollectorRegistry() ProcessCollector(registry=registry) imv_counter = Counter('imagemanifestvuln', labelnames=('cluster', 'namespace', 'severity'), documentation='Vulnerabilities total per severity', registry=registry) dv_counter = Counter('deploymentvalidation', labelnames=('cluster', 'namespace', 'validation', 'status'), documentation='Validations by validation type', registry=registry) for result in imv_results: imv_counter.labels(cluster=result.Cluster.name, namespace=result.Namespace.name, severity=result.Severity.name).inc(result.Count) for result in dpv_results: dv_counter.labels(cluster=result.Cluster.name, namespace=result.Namespace.name, validation=result.Validation.name, status=result.Validation.status).inc(result.Count) headers = {'Content-type': 'text/plain'} return Response(generate_latest(registry=registry), 200, headers)
def main(): """Main program. Parse arguments, start webserver to serve /metrics. """ args = ARGS.parse_args() if args.level >= 2: logging.basicConfig(level=logging.DEBUG) elif args.level == 1: logging.basicConfig(level=logging.INFO) elif args.level == 0: logging.basicConfig(level=logging.WARNING) LOG.debug("Starting HTTP server") httpd_thread = start_http_server(args.port, addr=args.addr) LOG.debug("Registering StatsPluginCollector") REGISTRY.register( StatsPluginCollector( args.endpoint, args.metrics_file, max_retries=args.max_retries, ssl_verify=args.sslverification, )) if args.procstats: LOG.debug("Registering ProcessCollector") ProcessCollector(pid=lambda: get_ts_pid(args.pidfile), namespace="trafficserver") LOG.info("Listening on :{port}".format(port=args.port)) # Wait for the webserver httpd_thread.join()
def main(): """Main program. Parse arguments, start webserver to serve /metrics. """ args = ARGS.parse_args() if args.level >= 2: logging.basicConfig(level=logging.DEBUG) elif args.level == 1: logging.basicConfig(level=logging.INFO) elif args.level == 0: logging.basicConfig(level=logging.WARNING) LOG.debug('Starting HTTP server') httpd_thread = start_http_server(args.port, addr=args.addr) LOG.debug('Registering StatsPluginCollector') REGISTRY.register(StatsPluginCollector(args.endpoint)) if not args.no_procstats: LOG.debug('Registering ProcessCollector') REGISTRY.register( ProcessCollector(pid=lambda: get_ts_pid(args.pidfile), namespace='trafficserver')) LOG.info('Listening on :{port}'.format(port=args.port)) # Wait for the webserver httpd_thread.join()
def search(): imv = ImageManifestVuln() imv_results = imv.get_vulnerabilities_summary() dpv = DeploymentValidationData() dpv_results = dpv.get_deploymentvalidation_summary() slo = ServiceSLOMetrics() slo_results = slo.get_slometrics_summary() registry = CollectorRegistry() ProcessCollector(registry=registry) imv_counter = Counter('imagemanifestvuln', labelnames=('cluster', 'namespace', 'severity'), documentation='Vulnerabilities total per severity', registry=registry) dv_counter = Counter('deploymentvalidation', labelnames=('cluster', 'namespace', 'validation', 'status'), documentation='Validations by validation type', registry=registry) slo_gauge = Gauge('serviceslometrics', labelnames=('cluster', 'namespace', 'service', 'slitype', 'name', 'type'), documentation=("ServiceSLOMetrics by cluster," "service, namespace, slitype," "name, type"), registry=registry) for result in imv_results: imv_counter.labels(cluster=result.Cluster.name, namespace=result.Namespace.name, severity=result.Severity.name).inc(result.Count) for result in dpv_results: dv_counter.labels(cluster=result.Cluster.name, namespace=result.Namespace.name, validation=result.Validation.name, status=result.Validation.status).inc(result.Count) for result in slo_results: slo_gauge.labels(cluster=result.Cluster.name, namespace=result.Namespace.name, service=result.Service.name, slitype=result.SLIType.name, name=result.ServiceSLO.name, type='slo_value').set(result.ServiceSLO.value) slo_gauge.labels(cluster=result.Cluster.name, namespace=result.Namespace.name, service=result.Service.name, slitype=result.SLIType.name, name=result.ServiceSLO.name, type='slo_target').set(result.ServiceSLO.target) headers = {'Content-type': 'text/plain'} return Response(generate_latest(registry=registry), 200, headers)
def __init__(self, prometheus, live: bool, project_name: str, registry=None): self.project_name = project_name self.prometheus = prometheus self.namespace = "csv_live" if live else "csv_hist" self.label_values = {self.project_name} self.registry = CognitePrometheus.registry if registry is None else registry self.info = Info("host", "Host info", namespace=self.namespace, registry=CognitePrometheus.registry) self.info.info({ "hostname": socket.gethostname(), "fqdn": socket.getfqdn() }) self.process = ProcessCollector(namespace=self.namespace, registry=CognitePrometheus.registry) self.platform = PlatformCollector(registry=CognitePrometheus.registry) self.created_time_series_counter = self._create_metric( Counter, "created_time_series_total", "Number of time series created since the extractor started running" ) self.all_data_points_counter = self._create_metric( Counter, "posted_data_points_total", "Number of datapoints posted since the extractor started running") self.count_of_time_series_gauge = self._create_metric( Gauge, "posted_time_series_count", "Number of timeseries that had valid datapoints in the current file" ) self.processing_failed_counter = self._create_metric( Counter, "failed_processing_files", "Number of files where processing failed since the extractor started running", ) self.available_csv_files_gauge = self._create_metric( Gauge, "available_csv_files", "Number of csv files in the folder that could be processed by the extractor" ) self.unprocessed_files_gauge = self._create_metric( Gauge, "unprocessed_files", "Number of csv files that remains to be processed in this batch") self.successfully_processed_files_gauge = self._create_metric( Gauge, "successfully_processed_files", "Number of csv files that has been successfully processed in this batch", )
def metrics(): registry = CollectorRegistry() multiprocess.MultiProcessCollector(registry) ProcessCollector(registry=registry) PlatformCollector(registry=registry) i = Info('build_info', "The build information", registry=registry) i.info({"version": "1,2,3"}) data = generate_latest(registry) return Response(data, mimetype=CONTENT_TYPE_LATEST)
def __getattr__(self, item): """ Safe class argument initialization. We do it here (instead of in the __init__()) so we don't overwrite them on when a new instance is created. """ if item == 'registry': # This will create the self.registry attribute, which # contains an instance of the CollectorRegistry. setattr(self, item, CollectorRegistry()) # Adding a ProcessCollector to the registry. The # ProcessCollector does not have to be an attribute, # since it's never manipulated directly. ProcessCollector(registry=self.registry) elif item == 'histogram': # Adding a Histogram to the registry and also making # the Histogram available as an attribute so we can # call its observe() setattr( self, item, Histogram(name='request_latency_seconds', labelnames=('cache', 'status', 'method'), documentation='request latency histogram', registry=self.registry)) elif item == 'counter': # Adding a Counter to the registry and also making # the Counter available as an attribute so we can # call its inc() setattr( self, item, Counter(name='http_request', documentation='total requests', registry=self.registry)) elif item == 'gauge_cache_size': setattr( self, item, Gauge(name='github_mirror_cache_size', documentation='cache size in bytes', registry=self.registry)) elif item == 'gauge_cached_objects': setattr( self, item, Gauge(name='github_mirror_cached_objects', documentation='number of cached objects', registry=self.registry)) else: raise AttributeError(f"object has no attribute {item}'") return getattr(self, item)
def application(env, start_response): try: for namespace, search_terms in _PROCESSES.items(): found_processes = _find_processes_matching(search_terms) # Many flaws in this, assumes single master and optionally multiple child workers master_pids = set() for process in found_processes.values(): if process.ppid() in found_processes.keys(): master_pids.add(process.ppid()) master_pids = sorted(master_pids) child_pids = sorted({ pid for pid in found_processes.keys() if pid not in master_pids }) for master_pid in master_pids: # Annoying we cannot seem to use labels easily (without making ourselves). The # process collector is probably too limited for what we would want. pid_namespace = '{}_master'.format(namespace) try: ProcessCollector(namespace=pid_namespace, pid=lambda: master_pid).collect() except Exception as ex: print 'Unable to collect stats for {}, PID {}: {}'.format( pid_namespace, master_pid, ex.message) for index, child_pid in enumerate(child_pids): pid_namespace = '{}_worker_{}'.format(namespace, index) try: ProcessCollector(namespace=pid_namespace, pid=lambda: child_pid).collect() except Exception as ex: print 'Unable to collect stats for {}, PID {}: {}'.format( pid_namespace, child_pid, ex.message) start_response('200 OK', [('Content-Type', 'text/html')]) return generate_latest() except Exception as ex: print ex.message traceback.print_tb(sys.exc_info()[2]) start_response('500 INTERNAL SERVER ERROR', [('Content-Type', 'text/html')]) return ex.message
def export_my_metrics(backend_connected=False): """ Return self-metrics as Prometheus export. """ backend_up_value = '1.0' if backend_connected else '0.0' registry = CollectorRegistry() g = Gauge( my_name + '_threads', 'Number of threads performing moksha.monitoring.socket connections.', registry=registry) g.set(worker_threads_count()) ProcessCollector(namespace=my_name, registry=registry) return generate_latest(registry).decode() + ( '# HELP ' + my_name + '_up Show that we\'re connected to the backend!\n' '# TYPE ' + my_name + '_up untyped\n' '' + my_name + '_up ' + backend_up_value + '\n')
def search(): imv = ImageManifestVuln() results = imv.get_vulnerabilities_summary() registry = CollectorRegistry() ProcessCollector(registry=registry) counter = Counter('imagemanifestvuln', labelnames=('cluster', 'namespace', 'severity'), documentation='Vulnerabilities total per severity', registry=registry) for result in results: counter.labels(cluster=result.Cluster.name, namespace=result.Namespace.name, severity=result.Severity.name).inc(result.Count) headers = {'Content-type': 'text/plain'} return Response(generate_latest(registry=registry), 200, headers)
def search(): dpv = DeploymentValidationData() dpv_results = dpv.get_deploymentvalidation_summary() registry = CollectorRegistry() ProcessCollector(registry=registry) dv_counter = Counter('deploymentvalidation', labelnames=('cluster', 'namespace', 'validation', 'status'), documentation='Validations by validation type', registry=registry) for result in dpv_results: dv_counter.labels(cluster=result.Cluster.name, namespace=result.Namespace.name, validation=result.Validation.name, status=result.Validation.status).inc(result.Count) headers = {'Content-type': 'text/plain'} return Response(generate_latest(registry=registry), 200, headers)
import os import tempfile from prometheus_client import CollectorRegistry from prometheus_client import ProcessCollector from prometheus_client import multiprocess from prometheus_client import Counter from prometheus_client import generate_latest if not os.environ.get('prometheus_multiproc_dir'): dir_name = tempfile.mkdtemp(prefix='mts-prometheus-multiproc-') os.environ.setdefault('prometheus_multiproc_dir', dir_name) registry = CollectorRegistry() ProcessCollector(registry=registry) multiprocess.MultiProcessCollector(registry) failed_tag_build_requests_counter = Counter( 'failed_tag_build_requests', 'The number of failed tagBuild API calls.', registry=registry) matched_module_builds_counter = Counter( 'matched_module_builds', 'The number of module builds which are matched rule(s) to be tagged.', registry=registry) messaging_tx_failed_counter = Counter( 'messaging_tx_failed', 'The number of errors occurred during sending message to bus.',
def _configure_registry(self, include_process_stats: bool = False): """Configure the MetricRegistry.""" if include_process_stats: self.registry.register_additional_collector( ProcessCollector(registry=None))
def setup_monitoring( app: web.Application, app_name: str, *, enter_middleware_cb: Optional[EnterMiddlewareCB] = None, exit_middleware_cb: Optional[ExitMiddlewareCB] = None, **app_info_kwargs, ): # app-scope registry target_info = {"application_name": app_name} target_info.update(app_info_kwargs) app[kCOLLECTOR_REGISTRY] = reg = CollectorRegistry( auto_describe=False, target_info=target_info ) # automatically collects process metrics see [https://github.com/prometheus/client_python] app[kPROCESS_COLLECTOR] = ProcessCollector(registry=reg) # automatically collects python_info metrics see [https://github.com/prometheus/client_python] app[kPLATFORM_COLLECTOR] = PlatformCollector(registry=reg) # automatically collects python garbage collector metrics see [https://github.com/prometheus/client_python] # prefixed with python_gc_ app[kGC_COLLECTOR] = GCCollector(registry=reg) # Total number of requests processed app[kREQUEST_COUNT] = Counter( name="http_requests", documentation="Total requests count", labelnames=["app_name", "method", "endpoint", "http_status"], registry=reg, ) app[kINFLIGHTREQUESTS] = Gauge( name="http_in_flight_requests", documentation="Number of requests in process", labelnames=["app_name", "method", "endpoint"], registry=reg, ) app[kRESPONSELATENCY] = Summary( name="http_request_latency_seconds", documentation="Time processing a request", labelnames=["app_name", "method", "endpoint"], registry=reg, ) # WARNING: ensure ERROR middleware is over this one # # non-API request/response (e.g /metrics, /x/* ...) # | # API request/response (/v0/*) | # | | # | | # v | # ===== monitoring-middleware ===== # == rest-error-middlewarer ==== | # == ... == | # == rest-envelope-middleware == v # # # ensures is first layer but cannot guarantee the order setup is applied app.middlewares.insert( 0, middleware_factory( app_name, enter_middleware_cb=enter_middleware_cb, exit_middleware_cb=exit_middleware_cb, ), ) app.router.add_get("/metrics", metrics_handler) return True
Metric Examples: Counter - A cumulative metric whose value can only increase or be reset on restart. Gauge - A metric that reperesents a single numerical value that can arbitrarily go up and down. Histogram - Counts metrics and places them in configurable buckets, also provides sums(request duration and sizes). Summary - Similar to histogram, but over a sliding time window. """ c = Counter('total_failures', 'Number of failures due to network errors') g = Gauge('in_process_requests', 'Number of failures due to network errors') i = Info('my_app_info', 'The application version info') e = Enum('my_app_state', 'The current state of the application', states=['started', 'stopped']) p = ProcessCollector(namespace='mydaemon', pid=lambda: open('/var/run/daemon.pid').read()) def main(): c.inc(5) if __name__ == '__main__': start_http_server(8001) main() time.sleep(5) main() time.sleep(5) main() time.sleep(5) main()
import random c = Counter('requests_total', 'Total number of requests', registry=None) s = Summary('requests_time', '', registry=None) info = Info('author_info', 'author infomation', registry=None) info.info({'name': 'jeffrey4l', 'email': '*****@*****.**'}) h = Histogram('random_integer', 'Request size (bytes)', buckets=[0, 2, 4, 6, 8, INF], registry=None) e = Enum('task_state', 'Description of enum', states=['starting', 'running', 'stopped']) platform_collector = PlatformCollector(registry=None) p_collector = ProcessCollector(registry=None) collectors = [c, s, info, h, e, platform_collector, p_collector] global_registry = CollectorRegistry() gc_collector = GCCollector(registry=global_registry) class MyHandler(BaseHTTPRequestHandler): @s.time() def do_GET(self): c.inc() e.state(random.choice(['starting', 'running', 'stopped'])) h.observe(random.randint(1, 11)) registry = CollectorRegistry(auto_describe=True)