def __init__(self, name, desc, service, buckets=None, client=None, valueType=None): self.service = service self.name = name if self.service == "prometheus": if buckets: self.h = prom.Histogram(name, desc, buckets=buckets) else: self.h = prom.Histogram(name, desc) else: # STACKDRIVER self.client = monitoring_v3.MetricServiceClient() self.project_name = self.client.project_path(name) descriptor = monitoring_v3.types.MetricDescriptor() descriptor.type = 'custom.googleapis.com/{}'.format(metric_type) # Cumulative descriptor.metric_kind = ( monitoring_v3.enums.MetricDescriptor.MetricKind.CUMULATIVE) # Double type (Will add switch for types later) descriptor.value_type = ( monitoring_v3.enums.MetricDescriptor.ValueType.DISTRIBUTION) descriptor.description = desc # Create the metric descriptor and print a success message descriptor = self.client.create_metric_descriptor(self.project_name, descriptor) print('StackDriver Histogram Created {}.'.format(descriptor.name))
def _stats(**kwargs): if not metrics: # first call, set up metrics labels_no_status = sorted(kwargs.keys()) + ['endpoint', 'method'] labels = labels_no_status + ['status'] metrics['latency'] = prom.Histogram( 'http_request_latency_{}'.format(endpoint), LATENCY_HELP, labels, buckets=LATENCY_BUCKETS, ) metrics['size'] = prom.Histogram( 'http_response_size_{}'.format(endpoint), SIZE_HELP, labels, buckets=SIZE_BUCKETS, ) metrics['concurrent'] = prom.Gauge( 'http_request_concurrency_{}'.format(endpoint), CONCURRENT_HELP, labels_no_status, ) request_store.metrics = metrics request_store.endpoint = endpoint request_store.method = request.method request_store.labels = {k: str(v) for k, v in kwargs.items()} generic_concurrent.labels(endpoint=endpoint, method=request.method).inc() metrics['concurrent'].labels(endpoint=endpoint, method=request.method, **request_store.labels).inc() request_store.start_time = monotonic() return fn(**kwargs)
def initialize(name, host, port): queue_latency = prometheus_client.Histogram('queue_latency', 'queue latency', ['app', 'queue']) queue_counter = prometheus_client.Counter('queue_counter', 'queue counter', ['app', 'queue']) dequeue_latency = prometheus_client.Histogram('dequeue_latency', 'queue latency', ['app', 'queue']) dequeue_counter = prometheus_client.Counter('dequeue_counter', 'queue counter', ['app', 'queue']) for _ in range(10): try: redis_conn = redis.Redis(host, port, decode_responses=True) break except Exception as e: print('ERROR', e) time.sleep(1) def push(queue, data): value = json.dumps(data) with queue_latency.labels(app=name, queue=queue).time(): redis_conn.rpush(queue, value) queue_counter.labels(app=name, queue=queue).inc() def pop(queue, function): while True: try: _, value = redis_conn.blpop(queue) dequeue_counter.labels(app=name, queue=queue).inc() try: with dequeue_latency.labels(app=name, queue=queue).time(): function(json.loads(value)) except Exception as e: print('ERROR:', e) push(queue + '.dead', value) except Exception as e: print('ERROR:', e) return push, pop
def __init__(self, prefix, description, labels): super().__init__(prefix, description, labels) self.timeout = prometheus_client.Histogram( '{}_timeout_duration_seconds'.format(self.full_prefix), 'Configured timeout (in seconds) to {}'.format(description), labels, registry=REGISTRY) self.timeout_usage = prometheus_client.Histogram( '{}_timeout_usage_ratio'.format(self.full_prefix), 'Ratio of duration to timeout to {}'.format(description), labels, registry=REGISTRY)
def test_record_call_latency(self): registry = prometheus_client.CollectorRegistry() metric = prometheus_client.Histogram('histo', 'An histogram', ['foo', 'bar'], registry=registry) prometheus_metrics = PrometheusMetrics(registry=registry, metrics={'histo': metric}) label_call_args = [] def get_labels(*args, **kwargs): label_call_args.append((args, kwargs)) return {'foo': 'FOO', 'bar': 'BAR'} @prometheus_metrics.record_call_latency('histo', get_labels=get_labels) @inlineCallbacks def func(param1, param2=None): yield returnValue(param1) obj = object() result = yield func(obj, param2='baz') self.assertIs(result, obj) # the get_labels function is called with the same args as the function self.assertEqual(label_call_args, [((obj, ), {'param2': 'baz'})]) self.assertIn('histo_count{bar="BAR",foo="FOO"} 1.0', prometheus_metrics.generate_latest().decode('ascii'))
def __init__(self): self.main_counter = pc.Counter( "main_counter", "total requests to your redirect page") self.redirect_time = pc.Histogram( "redirect_time", "this a histogram of the redirect time") self.users_counter = pc.Counter( "users_counter", "a counter of the users in our platform", ["ip", "browser", "platform", "language"])
def csets_filter_worker(args, config, db): class FilterAmqp(messagebus.Amqp): def on_message(self, payload, message): logger.info('Filter: {}'.format(payload)) start = time.time() if cset_filter(self.config, self.db, payload): amqp.send(payload, schema_name='cset', schema_version=1, routing_key='analysis_cset.osmtracker') m_events.labels('analysis', 'in').inc() m_events.labels('filter', 'out').inc() elapsed = time.time() - start m_filter_time.observe(elapsed) logger.info('Filtering of cid {} took {:.2f}s'.format( payload['cid'], elapsed)) message.ack() amqp = FilterAmqp(args.amqp_url, AMQP_EXCHANGE_TOPIC, 'topic', AMQP_QUEUES, [AMQP_FILTER_QUEUE]) amqp.config = config amqp.db = db if args.metrics: m_events = prometheus_client.Counter('osmtracker_events', 'Number of events', EVENT_LABELS) m_filter_time = prometheus_client.Histogram( 'osmtracker_changeset_filter_processing_time_seconds', 'Changeset filtering time (seconds)') logger.debug('Starting filter worker') amqp.run()
def init(app): # application version info app.version = None with open('VERSION') as f: app.version = f.read().rstrip() # prometheus metrics app.post_read_db_seconds = prometheus_client.Histogram( 'post_read_db_seconds', 'Request DB time') app.post_count = prometheus_client.Counter('post_count', 'A counter of new posts') app.post_create_db_seconds = prometheus_client.Histogram( 'post_create_db_seconds', 'Create post DB time') # database client connection app.db = MongoClient(POST_DATABASE_HOST, int(POST_DATABASE_PORT)).users_post.posts
def __init__(self, prefix, description, labels): """ :param prefix: prefix to use for each metric name :param description: description of action to use in metric description :param labels: label names to define for each metric """ self.full_prefix = '{}_{}'.format(self.__class__._PREFIX, prefix) self.progress = prometheus_client.Gauge( '{}_attempt_inprogress'.format(self.full_prefix), 'In progress attempts to {}'.format(description), labels, registry=REGISTRY, multiprocess_mode='livesum') self.attempt_total = prometheus_client.Counter( '{}_attempt_total'.format(self.full_prefix), 'Total attempts to {}'.format(description), labels, registry=REGISTRY) self.failure_total = prometheus_client.Counter( '{}_failure_total'.format(self.full_prefix), 'Total failures to {}'.format(description), labels, registry=REGISTRY) self.duration = prometheus_client.Histogram( '{}_duration_seconds'.format(self.full_prefix), 'Seconds to {}'.format(description), labels, registry=REGISTRY)
def after_process_boot(self, broker): os.environ["prometheus_multiproc_dir"] = DB_PATH # This import MUST happen at runtime, after process boot and # after the env variable has been set up. import prometheus_client as prom self.logger.debug("Setting up metrics...") registry = prom.CollectorRegistry() self.total_messages = prom.Counter( "dramatiq_messages_total", "The total number of messages processed.", ["queue_name", "actor_name"], registry=registry, ) self.total_errored_messages = prom.Counter( "dramatiq_message_errors_total", "The total number of errored messages.", ["queue_name", "actor_name"], registry=registry, ) self.total_retried_messages = prom.Counter( "dramatiq_message_retries_total", "The total number of retried messages.", ["queue_name", "actor_name"], registry=registry, ) self.total_rejected_messages = prom.Counter( "dramatiq_message_rejects_total", "The total number of dead-lettered messages.", ["queue_name", "actor_name"], registry=registry, ) self.total_revived_messages = prom.Counter( "dramatiq_message_revives_total", "The total number of messages revived from dead workers.", ["queue_name", "actor_name"], registry=registry, ) self.inprogress_messages = prom.Gauge( "dramatiq_messages_inprogress", "The number of messages in progress.", ["queue_name", "actor_name"], registry=registry, multiprocess_mode="livesum", ) self.inprogress_delayed_messages = prom.Gauge( "dramatiq_delayed_messages_inprogress", "The number of delayed messages in memory.", ["queue_name", "actor_name"], registry=registry, ) self.message_durations = prom.Histogram( "dramatiq_message_duration_milliseconds", "The time spent processing messages.", ["queue_name", "actor_name"], buckets=(5, 10, 25, 50, 75, 100, 250, 500, 750, 1000, 2500, 5000, 7500, 10000, 30000, 60000, 600000, 900000, float("inf")), registry=registry, )
def initialize(name, host, port): db_latency = prometheus_client.Histogram('db_latency', 'db latency', ['app', 'query']) db_counter = prometheus_client.Counter('db_counter', 'db counter', ['app', 'query']) for _ in range(10): try: postgres = psycopg2.connect(host=host, port=port, dbname='postgres', user='******', password='******') break except Exception as e: print('ERROR', e) time.sleep(1) execute('CREATE EXTENSION IF NOT EXISTS "uuid-ossp";') def execute(query, values=None): with db_latency.labels(app=name, query=query).time(): cursor = postgres.cursor() cursor.execute(query, values) data = cursor.fetchall() if cursor.description else [] postgres.commit() cursor.close() db_counter.labels(app=name, query=query).inc() return data return execute
def initialize(name): app = flask.Flask(name) request_latency = prometheus_client.Histogram( 'request_latency', 'request latency', ['app', 'method', 'path', 'status']) request_counter = prometheus_client.Counter( 'request_counter', 'request counter', ['app', 'method', 'path', 'status']) @app.route('/health') def health(): return 'OK' @app.route('/version') def version(): return '0.1.0' @app.before_request def before_request(): flask.request.start_time = time.time() @app.after_request def after_request(response): latency = time.time() - flask.request.start_time request_latency.labels(app=name, method=flask.request.method, path=flask.request.path, status=response.status_code).observe(latency) request_counter.labels(app=name, method=flask.request.method, path=flask.request.path, status=response.status_code).inc() return response return app
def consumer(name, instance, host, port): consumer_latency = prometheus_client.Histogram( 'consumer_latency', 'consumer latency', ['app', 'instance', 'topic']) consumer_counter = prometheus_client.Counter('consumer_counter', 'consumer counter', ['app', 'instance', 'topic']) for _ in range(100): try: consumer = kafka.KafkaConsumer(bootstrap_servers='{}:{}'.format( host, port), group_id=name, auto_offset_reset='earliest', enable_auto_commit=False) break except Exception as e: print('ERROR', e) time.sleep(1) def consume(topic, function): consumer.subscribe([topic]) for message in consumer: consumer_counter.labels(app=name, instance=instance, topic=topic).inc() with consumer_latency.labels(app=name, instance=instance, topic=topic).time(): function(json.loads(message.value.decode('utf-8'))) consumer.commit() return consume
def setup_status(app) -> prometheus_client.CollectorRegistry: """Add /status to serve Prometheus-driven runtime metrics.""" registry = prometheus_client.CollectorRegistry(auto_describe=True) app["request_count"] = prometheus_client.Counter( "requests_total", "Total Request Count", ["app_name", "method", "endpoint", "http_status"], registry=registry, ) app["request_latency"] = prometheus_client.Histogram( "request_latency_seconds", "Request latency", ["app_name", "endpoint"], registry=registry, ) app["request_in_progress"] = prometheus_client.Gauge( "requests_in_progress_total", "Requests in progress", ["app_name", "endpoint", "method"], registry=registry, ) prometheus_client.Info("server", "API server version", registry=registry).info({ "version": metadata.__version__, "commit": getattr(metadata, "__commit__", "null"), "build_date": getattr(metadata, "__date__", "null"), }) app.middlewares.insert(0, instrument) # passing StatusRenderer(registry) without __call__ triggers a spurious DeprecationWarning # FIXME(vmarkovtsev): https://github.com/aio-libs/aiohttp/issues/4519 app.router.add_get("/status", StatusRenderer(registry).__call__) return registry
def initialize(name, host, port): db_latency = prometheus_client.Histogram('db_latency', 'db latency', ['app', 'query']) db_counter = prometheus_client.Counter('db_counter', 'db counter', ['app', 'query']) for _ in range(100): try: session = cassandra.cluster.Cluster( [host], load_balancing_policy=cassandra.policies.RoundRobinPolicy(), port=port).connect() break except Exception as e: print('ERROR', e) time.sleep(1) session.execute( "CREATE KEYSPACE IF NOT EXISTS hjalp WITH replication = { 'class': 'SimpleStrategy', 'replication_factor': '2' }" ) session.set_keyspace('hjalp') def execute(query, values=tuple()): with db_latency.labels(app=name, query=query).time(): rows = session.execute(session.prepare(query).bind(values)) data = [r._asdict() for r in rows] db_counter.labels(app=name, query=query).inc() return data return execute
def __init__(self, prefix, description, labels): super().__init__(prefix, description, labels) self.concurrency = prometheus_client.Histogram( '{}_concurrency_count'.format(self.full_prefix), 'Count of charts being handled concurrently for chart', labels, registry=REGISTRY)
class Metrics(object): RequestCounter = prom.Counter('http_requests_total', 'Total number of HTTP requests.', ['method', 'scheme']) ResponseCounter = prom.Counter('http_responses_total', 'Total number of HTTP responses.', ['status']) LatencyHistogram = prom.Histogram('http_latency_seconds', 'Overall HTTP transaction latency.') RequestSizeHistogram = prom.Histogram( 'http_requests_body_bytes', 'Breakdown of HTTP requests by content length.', buckets=powers_of(5, 11)) ResponseSizeHistogram = prom.Histogram( 'http_responses_body_bytes', 'Breakdown of HTTP responses by content length.', buckets=powers_of(5, 11))
def _create_metric(self): """Creates a registry and records a new Histogram metric.""" self.registry = prometheus_client.CollectorRegistry() metric_name = 'kuryr_cni_request_duration_seconds' metric_description = 'The duration of CNI requests' buckets = (5, 10, 15, 20, 25, 30, 40, 50, 60, _INF) self.cni_requests_duration = prometheus_client.Histogram( metric_name, metric_description, labelnames={'command', 'error'}, buckets=buckets, registry=self.registry)
def get_prometheus_histogram(self): histogram = getattr(prometheus.REGISTRY, '_command_executor_histogram', None) if not histogram: histogram = prometheus.Histogram( 'cds_ce_execution_duration_seconds', 'How many times CE actions (upload, prepare env and execute) got executed and how long it took to complete for each CBA python script.', ['step', 'blueprint_name', 'blueprint_version', 'script_name']) prometheus.REGISTRY._command_executor_histogram = histogram return histogram
async def test_lock_metric(): lock = asyncio.Lock() metric = prometheus_client.Histogram( "test_metric", "Test", ) assert metric.collect()[0].samples[0].value == 0 async with metrics.watch_lock(metric, lock): assert lock.locked() assert not lock.locked() assert metric.collect()[0].samples[0].value == 1
def write_mode(parser): # Start up the server to expose the metrics. pclient.start_http_server(int(parser.listen_address)) # Generate some requests. s = [] for i in range(int(parser.metric_count)): s.append(pclient.Gauge('random_metric_%s' % i, 'Random value metric')) s.append(pclient.Histogram('random_metric_histogram_%s' % i, 'Random metric histogram')) while True: load_memory(s) time.sleep(float(parser.message_frequency) * 0.001)
def metrics_factory(registry): return { 'plain_gauge': prometheus_client.Gauge('plain_gauge', 'Simple gauge', registry=registry), 'instance_gauge': prometheus_client.Gauge('instance_gauge', 'Gauge with custom label', ['instance'], registry=registry), 'service_gauge': prometheus_client.Gauge('service_gauge', 'Gauge with "service" label', ['service'], registry=registry), 'counter': prometheus_client.Counter('counter', 'Simple counter', registry=registry), 'summary': prometheus_client.Summary('summary', 'Simple summary', registry=registry), 'histogram': prometheus_client.Histogram('histogram', 'Histogram with custom and "service" ' 'labels', ['instance', 'service'], registry=registry) }
def init_prometheus_client(REGISTRY): prome_dict = {} prome_dict['image_counter'] = prometheus_client.Counter( 'image_counter', 'all inferred images counter by component', ['component', 'pred_class'], registry=REGISTRY) # prome_dict['model_health'] = prometheus_client.Enum( # 'model_health', 'model_health return post status code', ['model_name'], # states=['healthy', 'error'], registry=REGISTRY) prome_dict['model_health'] = prometheus_client.Gauge( 'model_health', 'model_health return post status code', ['model_name'], registry=REGISTRY) prome_dict['sec_perimg_his'] = prometheus_client.Histogram( 'sec_perimg_Histogram', 'Histogram of time taken quantity per request', buckets=create_bucket_tuple(0.6,1e-3),registry=REGISTRY) #loop prome_dict['sec_perimg_gau'] = prometheus_client.Gauge( 'sec_perimg_Gauge', 'Gauge of time taken quantity per request', registry=REGISTRY) # For every speed on GPU/CPU through grpc/restful prome_dict['total_res_img_counter'] = prometheus_client.Counter( 'total_res_img_counter', 'Total requested/responded images', registry=REGISTRY) # For know how many images processed, knowing efficiency of gateway prome_dict['req_counter'] = prometheus_client.Counter( 'req_counter', 'Count of requests', registry=REGISTRY) # For know how many requests is, knowing efficiency of gateway prome_dict['pro_time_counter'] = prometheus_client.Counter( 'pro_time_counter', 'Count of time taken every request', registry=REGISTRY) # For know how long every request took, knowing efficiency of gateway prome_dict['no_infer_img_counter'] = prometheus_client.Counter( 'no_infer_img_counter', 'Count of total no inferred images', registry=REGISTRY) # For know how many not online components requested prome_dict['total_inferred_img_counter'] = prometheus_client.Counter( 'total_inferred_img_counter', 'Count of total inferred images ignoring kinds of images', registry=REGISTRY) # For know how many images processed prome_dict['inferred_img_counter'] = prometheus_client.Counter( 'inferred_img_counter', 'Count of inferred images with final outcome by model_name', ['model_name'], registry=REGISTRY) # For dividing other metrics such as predicts_duration_secs prome_dict['predicts_img_counter'] = prometheus_client.Counter( 'predicts_img_counter', 'Count of inferred images with preditions by model_name & pred_class', ['model_name', 'pred_class'], registry=REGISTRY) # For dividing other metrics such as confidence_sum prome_dict['outline_img_counter'] = prometheus_client.Counter( 'outline_img_counter', 'outline image counter by model name judged by checkpoints', ['model_name'], registry=REGISTRY) prome_dict['predicts_duration_secs'] = prometheus_client.Counter( 'predicts_duration_secs', 'predicts_duration_secs by each online model', ['model_name'], registry=REGISTRY) prome_dict['confidence_sum'] = prometheus_client.Counter( 'confidence_sum', 'sum of confidences by each online model', ['model_name', 'pred_class'], registry=REGISTRY) prome_dict['inference_version'] = prometheus_client.Gauge( 'inference_version', 'current inference version by each online model', ['model_name'], registry=REGISTRY) return prome_dict
def _create_metrics(self): """Creates a registry and records metrics""" self.registry = prometheus_client.CollectorRegistry() self.quota_free_count = prometheus_client.Gauge( 'kuryr_quota_free_count', 'Amount of quota available' ' for the network resource', labelnames={'resource'}, registry=self.registry) self.port_quota_per_subnet = prometheus_client.Gauge( 'kuryr_port_quota_per_subnet', 'Amount of ports available' ' on Subnet', labelnames={'subnet_id', 'subnet_name'}, registry=self.registry) self.lbs_members_count = prometheus_client.Gauge( 'kuryr_critical_lb_members_count', 'Amount of members per ' 'critical Load Balancer pool', labelnames={'lb_name', 'lb_pool_name'}, registry=self.registry) self.lbs_state = prometheus_client.Enum('kuryr_critical_lb_state', 'Critical Load Balancer State', labelnames={'lb_name'}, states=[ 'ERROR', 'ACTIVE', 'DELETED', 'PENDING_CREATE', 'PENDING_UPDATE', 'PENDING_DELETE' ], registry=self.registry) buckets = (10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, 120, _INF) self.pod_creation_latency = prometheus_client.Histogram( 'kuryr_pod_creation_latency', 'Time taken for a pod to have' ' Kuryr annotations set', buckets=buckets, registry=self.registry) self.load_balancer_readiness = prometheus_client.Counter( 'kuryr_load_balancer_readiness', 'This counter is increased when ' 'Kuryr notices that an Octavia load balancer is stuck in an ' 'unexpected state', registry=self.registry) self.port_readiness = prometheus_client.Counter( 'kuryr_port_readiness', 'This counter is increased when Kuryr ' 'times out waiting for Neutron to move port to ACTIVE', registry=self.registry)
def __init__(self): logger.debug("Initialising metric interceptor") self.registry = prometheus.CollectorRegistry() self.c = prometheus.Counter( "server_request_counter", "Number of times this API has been called", registry=self.registry, labelnames=['grpc_type', 'grpc_service', 'grpc_method']) self.g = prometheus.Gauge( 'server_last_call_time', 'Last time this API was called', registry=self.registry, labelnames=['grpc_type', 'grpc_service', 'grpc_method']) self.h = prometheus.Histogram( 'server_request_latency', 'Ammount of time for request to be processed', registry=self.registry, labelnames=['grpc_type', 'grpc_service', 'grpc_method'])
def get_prometheus_histogram(self): registry = self.get_prometheus_registry() if not registry or not prometheus_client: return # We have to hide a reference to the histogram on the registry # object, because it's collectors must be singletons for a given # registry but register at creation time. hist = getattr(registry, '_openstacksdk_histogram', None) if not hist: hist = prometheus_client.Histogram( 'openstack_http_response_time', 'Time taken for an http response to an OpenStack service', labelnames=[ 'method', 'endpoint', 'service_type', 'status_code' ], registry=registry, ) registry._openstacksdk_histogram = hist return hist
def backends_worker(args, config, db): class BackendAmqp(messagebus.Amqp): def on_message(self, payload, message): key = message.delivery_info['routing_key'] logger.info('Run backends, key: {}'.format(key)) if key == AMQP_NEW_GENERATION_KEY: m_events.labels('new_generation', 'out').inc() elif key == AMQP_NEW_POINTER_KEY: m_events.labels('new_pointer', 'out').inc() else: logger.error('Unknown key {}'.format(key)) start = time.time() run_backends(args, config, db, key) elapsed = time.time() - start logger.info('Running all backends (key {}) took {:.2f}s'.format( key, elapsed)) m_backend_time.observe(elapsed) message.ack() # Will create initial versions run_backends(args, config, db, AMQP_NEW_GENERATION_KEY) queue = [(socket.gethostname(), '', True) ] # No needed on broadcast exchange amqp = BackendAmqp(args.amqp_url, AMQP_EXCHANGE_FANOUT, 'fanout', queue, queue) amqp.config = config amqp.db = db if args.metrics: m_backend_time = prometheus_client.Histogram( 'osmtracker_backend_processing_time_seconds', 'Backend refresh time (seconds)', buckets=(.1, .5, 1.0, 2.5, 5.0, 7.5, 10.0, 15.0, 20.0, 25.0, 30.0, 35.0, 40.0, 45.0, 50.0, 55.0, 60.0, 80.0, 100.0, 130.0, 160.0, float("inf"))) m_events = prometheus_client.Counter('osmtracker_events', 'Number of events', EVENT_LABELS) logger.debug( 'Starting backend worker, queue/routing-key: {}'.format(queue)) amqp.run()
def __init__(self, metadata, registry): """Instantiates a new _Metric. Args: metadata: An rdf_stats.MetricMetadata instance describing this _Metric. registry: A prometheus_client.Registry instance. Raises: ValueError: metadata contains an unknown metric_type. """ self.metadata = metadata self.fields = stats_utils.FieldDefinitionTuplesFromProtos( metadata.fields_defs) field_names = [name for name, _ in self.fields] if metadata.metric_type == rdf_stats.MetricMetadata.MetricType.COUNTER: self.metric = prometheus_client.Counter( metadata.varname, metadata.docstring, labelnames=field_names, registry=registry) elif metadata.metric_type == rdf_stats.MetricMetadata.MetricType.EVENT: bins = metadata.bins or [ 0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.75, 1, 1.5, 2, 2.5, 3, 4, 5, 6, 7, 8, 9, 10, 15, 20, 50, 100 ] self.metric = prometheus_client.Histogram( metadata.varname, metadata.docstring, labelnames=field_names, buckets=bins, registry=registry) elif metadata.metric_type == rdf_stats.MetricMetadata.MetricType.GAUGE: self.metric = prometheus_client.Gauge( metadata.varname, metadata.docstring, labelnames=field_names, registry=registry) else: raise ValueError("Unknown metric type: {!r}".format(metadata.metric_type))
def _create_metrics(self): """Creates a registry and records a new Gauge metric""" self.registry = prometheus_client.CollectorRegistry() self.quota_free_count = prometheus_client.Gauge( 'kuryr_quota_free_count', 'Amount of quota available' ' for the network resource', labelnames={'resource'}, registry=self.registry) self.port_quota_per_subnet = prometheus_client.Gauge( 'kuryr_port_quota_per_subnet', 'Amount of ports available' ' on Subnet', labelnames={'subnet_id', 'subnet_name'}, registry=self.registry) buckets = (10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, 120, _INF) self.pod_creation_latency = prometheus_client.Histogram( 'kuryr_pod_creation_latency', 'Time taken for a pod to have' ' Kuryr annotations set', buckets=buckets, registry=self.registry)
def producer(name, instance, host, port): producer_latency = prometheus_client.Histogram( 'producer_latency', 'producer latency', ['app', 'instance', 'topic']) producer_counter = prometheus_client.Counter('producer_counter', 'producer counter', ['app', 'instance', 'topic']) for _ in range(100): try: producer = kafka.KafkaProducer( bootstrap_servers='{}:{}'.format(host, port)) break except Exception as e: print('ERROR', e) time.sleep(1) def produce(topic, key, data): with producer_latency.labels(app=name, instance=instance, topic=topic).time(): producer.send(topic, key=key.encode('utf-8'), value=json.dumps(data).encode('utf-8')) producer_counter.labels(app=name, instance=instance, topic=topic).inc() return produce