def main(): """Main program. Parse arguments, start webserver to serve /metrics. """ args = ARGS.parse_args() if args.level >= 2: logging.basicConfig(level=logging.DEBUG) elif args.level == 1: logging.basicConfig(level=logging.INFO) elif args.level == 0: logging.basicConfig(level=logging.WARNING) LOG.debug('Starting HTTP server') httpd_thread = start_http_server(args.port, addr=args.addr) LOG.debug('Registering StatsPluginCollector') REGISTRY.register(StatsPluginCollector(args.endpoint)) if not args.no_procstats: LOG.debug('Registering ProcessCollector') REGISTRY.register(ProcessCollector( pid=lambda: get_ts_pid(args.pidfile), namespace='trafficserver')) LOG.info('Listening on :{port}'.format(port=args.port)) # Wait for the webserver httpd_thread.join()
def test_enabled(self): self.assertEqual(1, REGISTRY.get_sample_value('python_gc_enabled')) try: gc.disable() self.assertEqual(0, REGISTRY.get_sample_value('python_gc_enabled')) finally: gc.enable()
def _assert_task_states(self, states, cnt): for state in states: assert REGISTRY.get_sample_value( 'celery_tasks', labels=dict(state=state)) == cnt task_by_name_label = dict(state=state, name=self.task) assert REGISTRY.get_sample_value( 'celery_tasks_by_name', labels=task_by_name_label) == cnt
def test_debug(self): self.assertEqual(0, REGISTRY.get_sample_value('python_gc_debug')) try: gc.set_debug(gc.DEBUG_STATS) self.assertEqual(gc.DEBUG_STATS, REGISTRY.get_sample_value( 'python_gc_enabled')) finally: gc.set_debug(0)
def ttl_watchdog_unregister_old_metrics(now): for (name, last_update) in list(GAUGES_LAST_UPDATE.items()): if now - last_update > GAUGES_TTL: REGISTRY.unregister(GAUGES[name]) del GAUGES[name] del GAUGES_LAST_UPDATE[name] for (other_name, label_values) in list(GAUGES_LABELS_LAST_UPDATE.keys()): if name == other_name: del GAUGES_LABELS_LAST_UPDATE[(name, label_values)]
def main(): """ Symbols list contain a list of pairs which describes stock symbols as used by Google API. Each element should be 'EXCHANGE:SYMBOL' examples: [ 'NASDAQ:GOOG', 'NASDAQ:CSCO', 'NYSE:IBM', 'BCBA:YPFD' ] """ start_http_server(int(sys.argv[1])) REGISTRY.register(QuoteCollector()) while True: time.sleep(1)
def test_rootLoggerExports(self): logging.error('There was an error.') self.assertEquals( 1, REGISTRY.get_sample_value('python_logging_messages_total', labels={'logger': 'test_levels', 'level': 'ERROR'}))
def test_set_zero_on_queue_length_when_an_channel_layer_error_occurs_during_queue_read(self): instance = QueueLenghtMonitoringThread(app=self.app, queue_list=['noqueue']) instance.measure_queues_length() sample = REGISTRY.get_sample_value('celery_queue_length', {'queue_name':'noqueue'}) self.assertEqual(0.0, sample)
def test_prometheus_metrics_guage(): metrics = PrometheusMetricsFactory(namespace='test') gauge = metrics.create_gauge(name='jaeger:test_gauge', tags={'result': 'ok'}) gauge(1) after = REGISTRY.get_sample_value('test_jaeger:test_gauge', {'result': 'ok'}) assert 1 == after
def test_setLevel(self): logger = logging.getLogger('test_setLevel') logger.setLevel(logging.CRITICAL) logger.debug('debug message') logger.info('info message') logger.warn('warn message') logger.error('error message') logger.critical('critical message') for level in ('DEBUG', 'INFO', 'WARNING', 'ERROR'): self.assertEquals( None, REGISTRY.get_sample_value('python_logging_messages_total', labels={'logger': 'test_setLevel', 'level': level})) self.assertEquals( 1, REGISTRY.get_sample_value('python_logging_messages_total', labels={'logger': 'test_levels', 'level': 'CRITICAL'}))
def prometheus_exporter(): registry = REGISTRY encoder, content_type = exposition.choose_encoder( request.headers.get('Accept')) if 'name[]' in request.args: registry = REGISTRY.restricted_registry( request.args.get('name[]')) body = encoder(registry) return Response(body, content_type=content_type)
def main(args): port = int(args.port) with open(args.metrics_file_path) as metrics_file: metrics_yaml = metrics_file.read() metrics = parse_aws_metrics(metrics_yaml) collector = AwsMetricsCollector(metrics, boto3.Session(region_name=args.region)) REGISTRY.register(collector) start_http_server(port) print("Serving at port: %s" % port) while True: try: collector.update() time.sleep(args.period_seconds) except KeyboardInterrupt: print("Caught SIGTERM - stopping...") break print("Done.")
def test_request_latency_count(self): method, path = 'TEST', 'PATH' before = REGISTRY.get_sample_value( 'http_requests_latency_seconds_count', { 'method': method, 'path': path }) or 0 trigger_metrics(method, path, wait=True) after = REGISTRY.get_sample_value( 'http_requests_latency_seconds_count', { 'method': method, 'path': path }) assert after is not None assert after - before == 1
def _app(): class Collector: def __init__(self): self.metrics = [] def set_metrics(self, metrics): self.metrics = metrics def collect(self): return self.metrics app = vadvisor.app.rest.app app.collector = Collector() app.metricStore = MetricStore() REGISTRY.register(LibvirtCollector(collector=app.collector)) return app
def main(args): port = int(args.port) with open(args.metrics_file_path) as metrics_file: metrics_yaml = metrics_file.read() metrics = parse_aws_metrics(metrics_yaml) collector = AwsMetricsCollector(metrics, boto3.client("sts"), args.assume_role_arn, "prometheusAssumeRole", args.duration_seconds) REGISTRY.register(collector) start_http_server(port) print("Serving at port: %s" % port) while True: try: print("Starting the collection again : ", datetime.datetime.now()) collector.update() time.sleep(args.period_seconds) except KeyboardInterrupt: print("Caught SIGTERM - stopping...") break print("Done.")
def main(): """Main entry point""" port = int(os.getenv("PORT", "9877")) configFile = os.getenv("CONFIG", "config.yaml") if os.getenv('KUBERNETES_SERVICE_HOST'): config.load_incluster_config() else: config.load_kube_config() with open(configFile) as f: resources = yaml.load(f, Loader=yaml.FullLoader) start_http_server(port, registry=REGISTRY) REGISTRY.register(CustomCollector(resources)) while True: time.sleep(1)
def test_assign_report_slice_new(self): """Test the assign report slice function with only a new report slice.""" self.report_slice.state = ReportSlice.NEW self.report_slice.save() self.processor.report_or_slice = None self.processor.assign_object() self.assertEqual(self.processor.report_or_slice, self.report_slice) queued_slices = REGISTRY.get_sample_value('queued_report_slices') self.assertEqual(queued_slices, 1)
def make_rest_app(libvirtConnection): # start libvirt event broker broker = LibvirtEventBroker() Greenlet(broker.run).start() app.eventBroker = broker # Attach event store to broker app.eventStore = EventStore() def store_events(): q = queue.Queue() broker.subscribe(q) while True: app.eventStore.put(q.get()) Greenlet(store_events).start() # Create metric collector app.conn = libvirtConnection app.collector = Collector(app.conn) # Register prometheus metrics REGISTRY.register(LibvirtCollector(app.collector)) # For statsd debugging app.statsd = StatsdCollector(app.collector) # Collect metrics every second and store them in the metrics store app.metricStore = MetricStore() def store_metrics(): while True: try: app.metricStore.put(app.collector.collect()) except Exception as e: logging.error(e) sleep(5) sleep(1) Greenlet(store_metrics).start() # Add gzip support mime_types = ['application/json', 'text/plain'] return gzip(mime_types=mime_types, compress_level=9)(app)
def create_app() -> "app": app = Flask(__name__) # Unregister all collectors. collectors = list(REGISTRY._collector_to_names.keys()) print(f"before unregister collectors={collectors}") for collector in collectors: REGISTRY.unregister(collector) print( f"after unregister collectors={list(REGISTRY._collector_to_names.keys())}" ) # Import default collectors. from prometheus_client import (gc_collector, platform_collector, process_collector) # Re-register default collectors. process_collector.ProcessCollector() platform_collector.PlatformCollector() gc_collector.GCCollector() @app.route("/") def home(): return "Hello World!" @app.route("/path/<page_name>") def other_page(page_name): return page_name @app.route("/to/exclude") def exclude(): return "Exclude me!" @app.route("/server_error") def server_error(): raise Exception("Test") return "will ever get here" @app.route("/ignored") @Instrumentator.do_not_track() def ignored(): return "HALLO" return app
def main(): opts = parse_opts(sys.argv[1:]) init_logger(opts.log_level) scheduler = Scheduler() def sigterm_handler(signum, frame): if scheduler and signal.SIGTERM == signum: scheduler.shutdown() signal.signal(signal.SIGTERM, sigterm_handler) sonarqube_client = SonarQubeClient(opts.url, opts.user_token, **{"verify": opts.ignore_ssl}) sonar_collector = SonarQubeCollector(sonarqube_client) REGISTRY.register(sonar_collector) scheduler.schedule(sonar_collector, int(opts.interval)) scheduler.start() start_http_server(int(opts.port)) sys.exit(scheduler.wait())
def test_set_zero_on_queue_length_when_an_channel_layer_error_occurs_during_queue_read( self): instance = QueueLengthMonitoringThread(app=self.app, queue_list=['noqueue']) instance.measure_queues_length() sample = REGISTRY.get_sample_value('celery_queue_length', {'queue_name': 'noqueue'}) self.assertEqual(0.0, sample)
def test_info_gauge(): dct = { "foo": "bar", "hello": "friend", } telemetry.info(dct, "test_info_gauge") assert (REGISTRY.get_sample_value( f"{s.PROMETHEUS_NAMESPACE}_test_info_gauge", dct) == 1)
def test_rootLoggerExports(self): logging.error('There was an error.') self.assertEquals( 1, REGISTRY.get_sample_value('python_logging_messages_total', labels={ 'logger': 'test_levels', 'level': 'ERROR' }))
def test_latency(icmp_mocked_host): """ Test if ping succeeds and generates the right metrics. """ clean_prometheus_registry() reload(monitor) before_up = REGISTRY.get_sample_value("internet_monitor_up") before_ping_total = REGISTRY.get_sample_value( "internet_monitor_ping_total") before_ping_packet_loss_total = REGISTRY.get_sample_value( "internet_monitor_ping_packet_loss_total") before_ping_jitter_seconds = REGISTRY.get_sample_value( "internet_monitor_ping_jitter_seconds") before_ping_latency_seconds = REGISTRY.get_sample_value( "internet_monitor_ping_latency_seconds") assert before_up == 0 assert before_ping_total == 0 assert before_ping_packet_loss_total == 0 assert before_ping_jitter_seconds == 0 with patch("im.monitor.ping") as mock_ping: mock_ping.return_value = icmp_mocked_host monitor.latency("1.1.1.1") after_up = REGISTRY.get_sample_value("internet_monitor_up") after_ping_total = REGISTRY.get_sample_value("internet_monitor_ping_total") after_ping_packet_loss_total = REGISTRY.get_sample_value( "internet_monitor_ping_packet_loss_total") after_ping_jitter_seconds = REGISTRY.get_sample_value( "internet_monitor_ping_jitter_seconds") after_ping_latency_seconds = REGISTRY.get_sample_value( "internet_monitor_ping_latency_seconds_sum") assert after_up == 1 assert after_ping_total == 1 assert after_ping_packet_loss_total == icmp_mocked_host.packet_loss assert (after_ping_jitter_seconds == (icmp_mocked_host.max_rtt - icmp_mocked_host.min_rtt) / 1000) assert after_ping_latency_seconds == icmp_mocked_host.avg_rtt / 1000
def test_prometheus_metrics_counter(): metrics = PrometheusMetricsFactory(namespace='test') counter1 = metrics.create_counter(name='jaeger:test_counter', tags={'result': 'ok'}) counter1(1) counter2 = metrics.create_counter(name='jaeger:test_counter', tags={'result': 'ok'}) counter2(1) after = REGISTRY.get_sample_value('test_jaeger:test_counter', {'result': 'ok'}) assert 2 == after
def test_can_measure_queue_length(self): celery_app = get_celery_app(queue='realqueue') sample_task = SampleTask() sample_task.app = celery_app monitoring_thread_instance = QueueLenghtMonitoringThread(celery_app, queue_list=['realqueue']) sample_task.delay() monitoring_thread_instance.measure_queues_length() sample = REGISTRY.get_sample_value('celery_queue_length', {'queue_name':'realqueue'}) self.assertEqual(1.0, sample)
def main(): parser = argparse.ArgumentParser() parser.add_argument("--config.file", dest="config_file", default="aci.yml") parser.add_argument("--web.listen-port", dest="web_listen_port", type=int, default=DEFAULT_PORT) parser.add_argument("--web.listen-address", dest="web_listen_address", type=str, default="") args = parser.parse_args() config = load_config(args.config_file) aci_collector = AciCollector(config) REGISTRY.register(aci_collector) if hasattr(signal, 'SIGHUP'): sighup_handler = get_sighup_handler(aci_collector, args.config_file) signal.signal(signal.SIGHUP, sighup_handler) start_http_server(args.web_listen_port, args.web_listen_address) while True: time.sleep(9001)
def test_add_probability_filter_passed_no_nodes_metric( prometheus_noop_scenario): """ Ensures that add_probability_filter_passed_no_nodes_metric is called when the filter decides to pass no nodes based on a probability """ assert prometheus_noop_scenario.name == "test scenario" random.seed(6) # make the tests deterministic candidates = [dummy_object()] before = REGISTRY.get_sample_value( PROBABILITY_FILTER_NOT_PASSED_METRIC_NAME) assert before == 0 criterion = {"probabilityPassAll": 0.00000001} prometheus_noop_scenario.filter_probability(candidates, criterion) after = REGISTRY.get_sample_value( PROBABILITY_FILTER_NOT_PASSED_METRIC_NAME) assert after == 1
def test_can_measure_queue_length(self): celery_app = get_celery_app(queue='realqueue') sample_task = SampleTask() sample_task.app = celery_app monitoring_thread_instance = QueueLengthMonitoringThread(celery_app, queue_list=['realqueue']) sample_task.delay() monitoring_thread_instance.measure_queues_length() sample = REGISTRY.get_sample_value('celery_queue_length', {'queue_name':'realqueue'}) self.assertEqual(1.0, sample)
def main(port, config_file=None): """ Main function. Parse config, create Aws connections to read data and create a web-server for prometheus :param port: The http port the server will listen for incoming http requests :type port: int :param config_file: The path of the config file, optional. If none, look for config in the script folder :type config_file: str|None :return: The exit code :rtype: int """ try: if not port: port = DEFAULT_PORT if not config_file: config_file = os.path.join( os.path.dirname(os.path.realpath(__file__)), "config.yml") if not os.path.exists(config_file): raise ExporterError("Unable to load config file '" + str(config_file) + "'") with open(config_file, "r") as cfg_fh: cfg_content = cfg_fh.read() collector = BkpCollector(yaml.load(cfg_content)) REGISTRY.register(collector) start_http_server(port) while True: time.sleep(0.1) except KeyboardInterrupt: print("\nExiting, please wait...") return 0 except SystemExit: raise except ExporterError as error: sys.stderr.write(error.message) sys.stderr.write("\n") sys.stderr.flush() return 1
def test_workers_count(self): assert REGISTRY.get_sample_value('celery_workers') == 0 with patch.object(self.app.control, 'ping') as mock_ping: w = WorkerMonitoringThread(app=self.app) mock_ping.return_value = [] w.update_workers_count() assert REGISTRY.get_sample_value('celery_workers') == 0 mock_ping.return_value = [0] # 1 worker w.update_workers_count() assert REGISTRY.get_sample_value('celery_workers') == 1 mock_ping.return_value = [0, 0] # 2 workers w.update_workers_count() assert REGISTRY.get_sample_value('celery_workers') == 2 mock_ping.return_value = [] w.update_workers_count() assert REGISTRY.get_sample_value('celery_workers') == 0
def start_webserver(conf): token_manager = TokenManager(conf.get('tableau_user'), conf.get('tableau_password'), conf['site'], conf['server_host'], conf['api_version'], token_name=conf.get('tableau_token_name'), token_secret=conf.get('tableau_token_secret')) REGISTRY.register( TableauMetricsCollector(token_manager, verify_ssl=conf.get('verify_ssl', False))) # Start up the server to expose the metrics. root = Resource() root.putChild(b'metrics', MetricsResource()) factory = Site(root) logger.info('Starting webserver on {}'.format(conf['exporter_port'])) reactor.listenTCP(conf['exporter_port'], factory) reactor.run()
def __init__(self, port=8000, counters={}): """Start the http server for scraping. The port where you open should be scraped by prometheus e.g port 8000 could have: scrape_configs: - job_name: 'local' scrape_interval: 5s static_configs: - targets: ['localhost:8000'] The counters dict given with will be used to keep track of counters and will be given to prometheus when scraped """ start_http_server(port) # keep data for scrape_interval * scrape_amount # (scrape_interval is found in /etc/prometheus/prometheus.yml, # when writing it is 5) self.scrape_amount = 60 self.scrape_count = self.scrape_amount // 2 self.data = [[] for _ in range(self.scrape_amount)] self.counters = counters REGISTRY.register(self)
def test_download_speed(http_mocked_response): """ Test if download succeeds and calculates the appropriate size and successfull job execution. """ clean_prometheus_registry() reload(monitor) before_download_duration_seconds_sum = REGISTRY.get_sample_value( "internet_monitor_download_duration_seconds_sum") before_download_size_bytes = REGISTRY.get_sample_value( "internet_monitor_download_size_bytes") before_download_total = REGISTRY.get_sample_value( "internet_monitor_download_total") assert before_download_duration_seconds_sum == 0 assert before_download_size_bytes == 0 assert before_download_total == 0 with patch("im.monitor.requests") as mocked_requests: mocked_requests.get.return_value = http_mocked_response monitor.download_speed("http://localhost") after_download_duration_seconds_sum = REGISTRY.get_sample_value( "internet_monitor_download_duration_seconds_sum") after_download_size_bytes = REGISTRY.get_sample_value( "internet_monitor_download_size_bytes") after_download_total = REGISTRY.get_sample_value( "internet_monitor_download_total") assert isinstance(after_download_duration_seconds_sum, float) assert after_download_size_bytes == len(http_mocked_response.content) assert after_download_total == 1
def test_upload_speed_failures(http_mocked_response): """ Test if upload fails and failure metrics are updated accordingly. """ clean_prometheus_registry() reload(monitor) before_upload_duration_seconds_sum = REGISTRY.get_sample_value( "internet_monitor_upload_duration_seconds_sum") before_upload_failures_total = REGISTRY.get_sample_value( "internet_monitor_upload_failures_total") before_upload_total = REGISTRY.get_sample_value( "internet_monitor_upload_total") assert before_upload_duration_seconds_sum == 0 assert before_upload_failures_total == 0 assert before_upload_total == 0 with patch("im.monitor.requests") as mocked_requests: mocked_requests.post.side_effect = Exception("boom!") monitor.upload_speed("http://localhost") after_upload_duration_seconds_sum = REGISTRY.get_sample_value( "internet_monitor_upload_duration_seconds_sum") after_upload_failures_total = REGISTRY.get_sample_value( "internet_monitor_upload_failures_total") after_upload_total = REGISTRY.get_sample_value( "internet_monitor_upload_total") assert isinstance(after_upload_duration_seconds_sum, float) assert after_upload_failures_total == 1 assert after_upload_total == 1
def test_latency_failure(icmp_mocked_host): """ Test if ping fails and generates the right metrics. """ clean_prometheus_registry() reload(monitor) before_up = REGISTRY.get_sample_value("internet_monitor_up") before_ping_total = REGISTRY.get_sample_value( "internet_monitor_ping_total") before_ping_failures_total = REGISTRY.get_sample_value( "internet_monitor_ping_failures_total") assert before_up == 0 assert before_ping_total == 0 assert before_ping_failures_total == 0 with patch("im.monitor.ping") as mock_ping: mock_ping.side_effect = Exception("boom!") monitor.latency("1.1.1.1") after_up = REGISTRY.get_sample_value("internet_monitor_up") after_ping_total = REGISTRY.get_sample_value("internet_monitor_ping_total") before_ping_failures_total = REGISTRY.get_sample_value( "internet_monitor_ping_failures_total") assert after_up == 0 assert after_ping_total == 1 assert before_ping_failures_total == 1
def test_converge_complete(self): """ At the end of a convergence iteration, ``_CONVERGE_COMPLETE`` is updated to the current time. """ interval = 45 reactor = MemoryReactorClock() deploy_config = DeploymentConfiguration( domain=u"s4.example.com", kubernetes_namespace=u"testing", subscription_manager_endpoint=URL.from_text( u"http://localhost:8000"), s3_access_key_id=u"access key id", s3_secret_key=u"secret key", introducer_image=u"introducer:abcdefgh", storageserver_image=u"storageserver:abcdefgh", ) state_path = FilePath(self.mktemp().decode("ascii")) state_path.makedirs() subscription_client = memory_client( state_path, deploy_config.domain, ) k8s_client = KubeClient(k8s=memory_kubernetes().client()) aws_region = FakeAWSServiceRegion( access_key=deploy_config.s3_access_key_id, secret_key=deploy_config.s3_secret_key, ) d = aws_region.get_route53_client().create_hosted_zone( u"foo", deploy_config.domain, ) self.successResultOf(d) service = _convergence_service( reactor, interval, deploy_config, subscription_client, k8s_client, aws_region, ) service.startService() reactor.advance(interval) last_completed = next( iter( list(metric.samples[-1][-1] for metric in REGISTRY.collect() if metric.name == u"s4_last_convergence_succeeded"))) self.assertThat(reactor.seconds(), Equals(last_completed))
def test_converge_complete(self): """ At the end of a convergence iteration, ``_CONVERGE_COMPLETE`` is updated to the current time. """ interval = 45 reactor = MemoryReactorClock() deploy_config = DeploymentConfiguration( domain=u"s4.example.com", kubernetes_namespace=u"testing", subscription_manager_endpoint=URL.from_text(u"http://localhost:8000"), s3_access_key_id=u"access key id", s3_secret_key=u"secret key", introducer_image=u"introducer:abcdefgh", storageserver_image=u"storageserver:abcdefgh", ) state_path = FilePath(self.mktemp().decode("ascii")) state_path.makedirs() subscription_client = memory_client( state_path, deploy_config.domain, ) k8s_client = KubeClient(k8s=memory_kubernetes().client()) aws_region = FakeAWSServiceRegion( access_key=deploy_config.s3_access_key_id, secret_key=deploy_config.s3_secret_key, ) d = aws_region.get_route53_client().create_hosted_zone( u"foo", deploy_config.domain, ) self.successResultOf(d) service = _convergence_service( reactor, interval, deploy_config, subscription_client, k8s_client, aws_region, ) service.startService() reactor.advance(interval) last_completed = next(iter(list( metric.samples[-1][-1] for metric in REGISTRY.collect() if metric.name == u"s4_last_convergence_succeeded" ))) self.assertThat(reactor.seconds(), Equals(last_completed))
def main(): parser = argparse.ArgumentParser() parser.add_argument('-l', '--listen', metavar='ADDRESS', help='Listen on this address', default=':8000') parser.add_argument('-u', '--uri', default='/', help='The URI to check for POSTs coming from Druid') parser.add_argument('-d', '--debug', action='store_true', help='Enable debug logging') parser.add_argument('-e', '--encoding', default='utf-8', help='Encoding of the Druid POST JSON data.') args = parser.parse_args() if args.debug: logging.basicConfig(level=logging.DEBUG) else: logging.basicConfig(level=logging.WARNING) collect_metrics_from = [] address, port = args.listen.split(':', 1) log.info('Starting druid_exporter on %s:%s', address, port) druid_collector = collector.DruidCollector() REGISTRY.register(druid_collector) prometheus_app = make_wsgi_app() druid_wsgi_app = DruidWSGIApp(args.uri, druid_collector, prometheus_app, args.encoding) httpd = make_server(address, int(port), druid_wsgi_app) httpd.serve_forever()
def __init__(self, rdc_ip_port, field_ids, update_freq, max_keep_age, max_keep_samples, gpu_indexes, rdc_unauth, enable_plugin_monitoring): group_name = "rdc_prometheus_plugin_group" field_group_name = "rdc_prometheus_plugin_fieldgroup" if rdc_unauth: RdcReader.__init__(self, ip_port=rdc_ip_port, field_ids=field_ids, update_freq=update_freq, max_keep_age=max_keep_age, max_keep_samples=max_keep_samples, gpu_indexes=gpu_indexes, field_group_name=field_group_name, gpu_group_name=group_name, root_ca=None) else: RdcReader.__init__(self, ip_port=rdc_ip_port, field_ids=field_ids, update_freq=update_freq, max_keep_age=max_keep_age, max_keep_samples=max_keep_samples, gpu_indexes=gpu_indexes, field_group_name=field_group_name, gpu_group_name=group_name) # Supress internal metrics from prometheus_client if enable_plugin_monitoring == False: REGISTRY.unregister(PROCESS_COLLECTOR) REGISTRY.unregister(PLATFORM_COLLECTOR) # Create the guages self.guages = {} for fid in self.field_ids: field_name = self.rdc_util.field_id_string(fid).lower() self.guages[fid] = Gauge(field_name, field_name, labelnames=['gpu_index'])
def start_metrics_server(): """ Starts a http server on a port specified in the configuration file and exposes Prometheus metrics on it. Also removes GC_COLLECTOR metrics because they are not really needed. """ # Remove garbage collection metrics REGISTRY.unregister(GC_COLLECTOR) # Gather configurations config = Configuration().values ip = config.service.ip metrics_port = config.service.metrics_port # Start server start_wsgi_server(metrics_port) # Log Logger() \ .event(category="runnable", action="run metrics") \ .server(ip=ip, port=metrics_port) \ .out(severity=Severity.INFO)
def test_request_latency_sum(self): method, path = 'TEST', 'PATH' before = REGISTRY.get_sample_value('http_requests_latency_seconds_sum', { 'method': method, 'path': path }) or 0 trigger_metrics(method, path, wait=True) after = REGISTRY.get_sample_value('http_requests_latency_seconds_sum', { 'method': method, 'path': path }) assert after is not None diff = after - before # Check the difference is roughly in the ballpark of what we expect. assert (diff >= 2) and (diff <= 3)
def test_combined_size_no_labels(): app = create_app() Instrumentator().add( metrics.combined_size( should_include_handler=False, should_include_method=False, should_include_status=False, )).instrument(app) client = TestClient(app) client.get("/") assert REGISTRY.get_sample_value("http_combined_size_bytes_sum", {}) == 14
def cli(**settings): """Kayako metrics exporter for Prometheus""" if settings['version']: click.echo('Version %s' % kayako_exporter.__version__) return if not settings['url']: click.echo('Please provide Kayako API URL') return if not settings['login']: click.echo('Please provide Kayako username') return if not settings['password']: click.echo('Please provide Kayako account password') return if settings['verbose']: handler = logging.StreamHandler() logger.addHandler(handler) logger.setLevel(logging.DEBUG) handler.setFormatter(logging.Formatter('[%(asctime)s] %(message)s', "%Y-%m-%d %H:%M:%S")) click.echo('Exporter for {base_url}, user: {login}, password: ***'.format( base_url=settings['url'].rstrip('/'), login=settings['login'], password=settings['password'] )) REGISTRY.register(KayakoCollector( base_url=settings['url'].rstrip('/'), login=settings['login'], password=settings['password'], department_ids=settings['department_id'], )) httpd = HTTPServer(('', int(settings['port'])), MetricsHandler) click.echo('Exporting Kayako metrics on http://0.0.0.0:{}'.format(settings['port'])) httpd.serve_forever()
def getMetricVector(self, metric_name): """Returns the values for all labels of a given metric. The result is returned as a list of (labels, value) tuples, where `labels` is a dict. This is quite a hack since it relies on the internal representation of the prometheus_client, and it should probably be provided as a function there instead. """ all_metrics = REGISTRY.collect() output = [] for metric in all_metrics: for n, l, value in metric._samples: if n == metric_name: output.append((l, value)) return output
def test_thresholds(self): self.assertTrue(REGISTRY.get_sample_value( 'python_gc_threshold', labels={'generation': '0'}) is not None) self.assertTrue(REGISTRY.get_sample_value( 'python_gc_threshold', labels={'generation': '1'}) is not None) self.assertTrue(REGISTRY.get_sample_value( 'python_gc_threshold', labels={'generation': '2'}) is not None) original_thresholds = gc.get_threshold() try: gc.disable() gc.set_threshold(42, 43, 44) self.assertEqual(42, REGISTRY.get_sample_value( 'python_gc_threshold', labels={'generation': '0'})) self.assertEqual(43, REGISTRY.get_sample_value( 'python_gc_threshold', labels={'generation': '1'})) self.assertEqual(44, REGISTRY.get_sample_value( 'python_gc_threshold', labels={'generation': '2'})) finally: gc.set_threshold(*original_thresholds) gc.enable()
def test_tasks_events(self): task_uuid = uuid() hostname = 'myhost' local_received = time() latency_before_started = 123.45 runtime = 234.5 m = MonitorThread(app=self.app) self._assert_task_states(celery.states.ALL_STATES, 0) assert REGISTRY.get_sample_value('celery_task_latency_count') == 0 assert REGISTRY.get_sample_value('celery_task_latency_sum') == 0 m._process_event(Event( 'task-received', uuid=task_uuid, name=self.task, args='()', kwargs='{}', retries=0, eta=None, hostname=hostname, clock=0, local_received=local_received)) self._assert_all_states({celery.states.RECEIVED}) m._process_event(Event( 'task-started', uuid=task_uuid, hostname=hostname, clock=1, name=self.task, local_received=local_received + latency_before_started)) self._assert_all_states({celery.states.STARTED}) m._process_event(Event( 'task-succeeded', uuid=task_uuid, result='42', runtime=runtime, hostname=hostname, clock=2, local_received=local_received + latency_before_started + runtime)) self._assert_all_states({celery.states.SUCCESS}) assert REGISTRY.get_sample_value('celery_task_latency_count') == 1 self.assertAlmostEqual(REGISTRY.get_sample_value( 'celery_task_latency_sum'), latency_before_started) assert REGISTRY.get_sample_value( 'celery_tasks_runtime_seconds_count', labels=dict(name=self.task)) == 1 assert REGISTRY.get_sample_value( 'celery_tasks_runtime_seconds_sum', labels=dict(name=self.task)) == 234.5
def test_collection_time(self): current_total = REGISTRY.get_sample_value( 'python_gc_collection_process_time_total_s') gc.collect() self.assertTrue(current_total < REGISTRY.get_sample_value( 'python_gc_collection_process_time_total_s'))
def test_initial_metric_values(self): self._assert_task_states(celery.states.ALL_STATES, 0) assert REGISTRY.get_sample_value('celery_workers') == 0 assert REGISTRY.get_sample_value('celery_task_latency_count') == 0 assert REGISTRY.get_sample_value('celery_task_latency_sum') == 0
def getMetric(self, metric_name, **labels): return REGISTRY.get_sample_value(metric_name, labels=labels)
def cli(**settings): """Zabbix metrics exporter for Prometheus Use config file to map zabbix metrics names/labels into prometheus. Config below transfroms this: local.metric[uwsgi,workers,myapp,busy] = 8 local.metric[uwsgi,workers,myapp,idle] = 6 into familiar Prometheus gauges: uwsgi_workers{instance="host1",app="myapp",status="busy"} 8 uwsgi_workers{instance="host1",app="myapp",status="idle"} 6 YAML: \b metrics: - key: 'local.metric[uwsgi,workers,*,*]' name: 'uwsgi_workers' labels: app: $1 status: $2 reject: - 'total' """ if settings['version']: click.echo('Version %s' % zabbix_exporter.__version__) return if not validate_settings(settings): return if settings['config']: exporter_config = yaml.safe_load(open(settings['config'])) else: exporter_config = {} base_logger = logging.getLogger('zabbix_exporter') handler = logging.StreamHandler() base_logger.addHandler(handler) base_logger.setLevel(logging.ERROR) handler.setFormatter(logging.Formatter('[%(asctime)s] %(message)s', "%Y-%m-%d %H:%M:%S")) if settings['verbose']: base_logger.setLevel(logging.DEBUG) collector = ZabbixCollector( base_url=settings['url'].rstrip('/'), login=settings['login'], password=settings['password'], verify_tls=settings['verify_tls'], timeout=settings['timeout'], **exporter_config ) if settings['dump_metrics']: return dump_metrics(collector) REGISTRY.register(collector) httpd = HTTPServer(('', int(settings['port'])), MetricsHandler) click.echo('Exporter for {base_url}, user: {login}, password: ***'.format( base_url=settings['url'].rstrip('/'), login=settings['login'], password=settings['password'] )) if settings['return_server']: return httpd click.echo('Exporting Zabbix metrics on http://0.0.0.0:{}'.format(settings['port'])) httpd.serve_forever()
def test_prometheus_metrics_counter_without_tags(): metrics = PrometheusMetricsFactory() counter = metrics.create_counter(name='jaeger:test_counter_no_tags') counter(1) after = REGISTRY.get_sample_value('jaeger:test_counter_no_tags') assert 1 == after
# DagRun metrics dag_duration = GaugeMetricFamily( 'airflow_dag_run_duration', 'Duration of currently running dag_runs in seconds', labels=['dag_id', 'run_id'] ) driver = Session.bind.driver for dag in get_dag_duration_info(): if driver == 'mysqldb' or driver == 'pysqlite': dag_duration.add_metric([dag.dag_id, dag.run_id], dag.duration) else: dag_duration.add_metric([dag.dag_id, dag.run_id], dag.duration.seconds) yield dag_duration REGISTRY.register(MetricsCollector()) class Metrics(BaseView): @expose('/') def index(self): return Response(generate_latest(), mimetype='text/plain') ADMIN_VIEW = Metrics(category="Prometheus exporter", name="metrics") class AirflowPrometheusPlugins(AirflowPlugin): '''plugin for show metrics''' name = "airflow_prometheus_plugin" operators = []
def register_slaves_metrics_collector(cls, get_slaves: Callable[[], List['app.master.slave.Slave']]): if not cls._slaves_collector_is_registered: REGISTRY.register(SlavesCollector(get_slaves)) cls._slaves_collector_is_registered = True
def test_prometheus_metrics_gauge_without_tags(): metrics = PrometheusMetricsFactory() gauge = metrics.create_gauge(name='jaeger:test_gauge_no_tags') gauge(1) after = REGISTRY.get_sample_value('jaeger:test_gauge_no_tags') assert 1 == after