def process_valid_configs(data, prefix): # setup infra totalGauge = PC.Gauge('{}_total_space_gigabytes'.format(prefix), 'Total bytes in the mount-point', ['desc', 'path', 'users']) availGauge = PC.Gauge('{}_avail_space_gigabytes'.format(prefix), 'Total available space in the mount-point', ['desc', 'path', 'users']) usedGauge = PC.Gauge('{}_used_to_total_ratio'.format(prefix), 'Ratio of used-up space in mount-point', ['desc', 'path', 'users']) PC.start_http_server(int(data.get('port', HTTP_PORT))) while True: for entry in data.get('valid_path_configs', []): path, users, desc = entry['path'], ','.join(sorted( entry['users'])), entry['desc'] total, used, avail = get_usage(path) usedRatio = used / total if total else 1 #to raise alarm totalGauge.labels(desc=desc, path=path, users=users).set(total) availGauge.labels(desc=desc, path=path, users=users).set(avail) usedGauge.labels(desc=desc, path=path, users=users).set(usedRatio) logger.info( "+ metric<path={}, total={}, avail={}, usedRatio={}>".format( path, total, avail, usedRatio)) # for logger.info("sleep {}".format(SLEEP_INTERVAL_SEC)) time.sleep(SLEEP_INTERVAL_SEC)
def after_process_boot(self, broker): os.environ["prometheus_multiproc_dir"] = DB_PATH # This import MUST happen at runtime, after process boot and # after the env variable has been set up. import prometheus_client as prom self.logger.debug("Setting up metrics...") registry = prom.CollectorRegistry() self.total_messages = prom.Counter( "dramatiq_messages_total", "The total number of messages processed.", ["queue_name", "actor_name"], registry=registry, ) self.total_errored_messages = prom.Counter( "dramatiq_message_errors_total", "The total number of errored messages.", ["queue_name", "actor_name"], registry=registry, ) self.total_retried_messages = prom.Counter( "dramatiq_message_retries_total", "The total number of retried messages.", ["queue_name", "actor_name"], registry=registry, ) self.total_rejected_messages = prom.Counter( "dramatiq_message_rejects_total", "The total number of dead-lettered messages.", ["queue_name", "actor_name"], registry=registry, ) self.total_revived_messages = prom.Counter( "dramatiq_message_revives_total", "The total number of messages revived from dead workers.", ["queue_name", "actor_name"], registry=registry, ) self.inprogress_messages = prom.Gauge( "dramatiq_messages_inprogress", "The number of messages in progress.", ["queue_name", "actor_name"], registry=registry, multiprocess_mode="livesum", ) self.inprogress_delayed_messages = prom.Gauge( "dramatiq_delayed_messages_inprogress", "The number of delayed messages in memory.", ["queue_name", "actor_name"], registry=registry, ) self.message_durations = prom.Histogram( "dramatiq_message_duration_milliseconds", "The time spent processing messages.", ["queue_name", "actor_name"], buckets=(5, 10, 25, 50, 75, 100, 250, 500, 750, 1000, 2500, 5000, 7500, 10000, 30000, 60000, 600000, 900000, float("inf")), registry=registry, )
def _write_response(self, writer): registry = prometheus_client.CollectorRegistry() status_gauge = prometheus_client.Gauge( "ldapsync_up", "Connectivity to LDAP server", registry=registry, ) try: ctr = self.read_counter() except Exception as exc: status_gauge.set(0) else: status_gauge.set(1) delay_gauge = prometheus_client.Gauge( "ldapsync_delay_seconds", "Delay of LDAP replication in seconds", registry=registry, ) now = datetime.utcnow().timestamp() delay_gauge.set(now - ctr) writer.write(b"HTTP/1.0 200 OK\r\n") writer.write("Content-Type: {}\r\n".format( prometheus_client.exposition.CONTENT_TYPE_LATEST).encode("utf-8")) writer.write(b"\r\n") writer.write(prometheus_client.exposition.generate_latest(registry))
def __init__(self, push_gateaway, nodename, jobname="Duty"): ''' Arguments --------- push_gateaway: str Push gateway nodename : str Name of node jobname : str default of "Duty" Not important ''' self.push_gateaway = push_gateaway self.registry = pc.CollectorRegistry() self.nodename = nodename self.jobname = jobname ### Defining metrics # Are there heimdall|fredda candidates? self.heimdall_up = pc.Enum('cands_heimdall_up', 'Heimdall candidates present', states=['yes', 'no'], labelnames=['node', 'antenna']) self.fredda_up = pc.Enum('cands_fredda_up', 'Fredda candidates present', states=['yes', 'no'], labelnames=['node', 'antenna']) # How many candidates self.heimdall_n = pc.Gauge('cands_heimdall_num', 'Heimdall candidates number', labelnames=['node', 'antenna']) self.fredda_n = pc.Gauge('cands_fredda_num', 'Fredda candidates number', labelnames=['node', 'antenna'])
def __init__(self): self._feed_pk_to_system_id_and_feed_id = {} self._feed_pk_to_successful_update_data: typing.Dict[int, typing.Tuple[ float, float]] = {} self._num_updates = prometheus.Counter( PROMETHEUS_NUM_UPDATES, "Number of feed updates of a given feed, status and result", ["system_id", "feed_id", "status", "result"], ) self._last_update = prometheus.Gauge( PROMETHEUS_LAST_UPDATE, "Time since the last update of a given feed, status and result", ["system_id", "feed_id", "status", "result"], ) self._num_entities = prometheus.Gauge( PROMETHEUS_NUM_ENTITIES, "Number of entities of a given type present from a given feed", ["system_id", "feed_id", "entity_type"], ) self._update_latency = prometheus.Gauge( PROMETHEUS_SUCCESSFUL_UPDATE_LATENCY, "Number of seconds between successful updates of a feed", ["system_id", "feed_id"], )
def init(self): LOG.info("Host name: %s", socket.gethostname()) LOG.info("Host ip: %s", socket.gethostbyname(socket.gethostname())) self.in_queue_host = self.get_env("INPUT_QUEUE_HOST", "127.0.0.1") self.out_broker_host = self.get_env("OUTPUT_BROKER_HOST", "127.0.0.1") LOG.info("Input queue host: %s", self.in_queue_host) LOG.info("Output broker host: %s", self.out_broker_host) self.infer_type = self.get_env("INFER_TYPE", "face") self.model_name = self.get_env("INFER_MODEL_NAME") # MODEL_PATH env got higher priority path = self.get_env("INFER_MODEL_PATH") if path is not None and len(path) != 0: self.model_dir = self.get_env("INFER_MODEL_PATH") else: self.model_dir = self.get_env("MODEL_DIR") LOG.info("model dir: %s", self.model_dir) LOG.info("model name: %s", self.model_name) self._guage_infer_fps = prom.Gauge('ei_infer_fps', 'Total infererence FPS') self._guage_drop_fps = prom.Gauge('ei_drop_fps', 'Drop frames for infer') self._guage_scale_ratio = prom.Gauge( 'ei_scale_ratio', 'Scale ratio for inference, (ei_infer_fps+ei_drop_fps)/ei_infer_fps' )
def __init__(self, bot, prefix) -> None: self.command_counter = prom.Counter( f"{prefix}_commands_ran", "How many times commands were ran", ["command_name", "cluster"]) self.user_message_raw_count = prom.Counter( f"{prefix}_user_message_raw_count", "Raw count of how many messages we have seen from users", ["cluster"]) self.bot_message_raw_count = prom.Counter( f"{prefix}_bot_message_raw_count", "Raw count of how many messages we have seen from bots", ["cluster"]) self.own_message_raw_count = prom.Counter( f"{prefix}_own_message_raw_count", "Raw count of how many messages GearBot has send", ["cluster"]) self.bot_guilds = prom.Gauge(f"{prefix}_guilds", "How many guilds the bot is in", ["cluster"]) self.bot_users = prom.Gauge(f"{prefix}_users", "How many users the bot can see", ["cluster"]) self.bot_users_unique = prom.Gauge( f"{prefix}_users_unique", "How many unique users the bot can see", ["cluster"]) self.bot_event_counts = prom.Counter(f"{prefix}_event_counts", "How much each event occurred", ["event_name", "cluster"]) self.bot_latency = prom.Gauge(f"{prefix}_latency", "Current bot latency", ["cluster"]) self.uid_usage = prom.Counter( f"{prefix}_context_uid_usage", "Times uid was used from the context command", ["type", "cluster"]) self.userinfo_usage = prom.Counter( f"{prefix}_context_userinfo_usage", "Times userinfo was used from the context command", ["type", "cluster"]) self.inf_search_usage = prom.Counter( f"{prefix}_context_inf_search_usage", "Times inf serach was used from the context command", ["type", "cluster"]) bot.metrics_reg.register(self.command_counter) bot.metrics_reg.register(self.user_message_raw_count) bot.metrics_reg.register(self.bot_message_raw_count) bot.metrics_reg.register(self.bot_guilds) bot.metrics_reg.register(self.bot_users) bot.metrics_reg.register(self.bot_users_unique) bot.metrics_reg.register(self.bot_event_counts) bot.metrics_reg.register(self.own_message_raw_count) bot.metrics_reg.register(self.bot_latency) bot.metrics_reg.register(self.uid_usage) bot.metrics_reg.register(self.userinfo_usage) bot.metrics_reg.register(self.inf_search_usage)
def run(args): s3uri = args.s3uri localpath = args.localpath excludes = args.exclude interval = args.interval i = pc.Info('s3insync_version', 'Version and config information for the client') i.info({ 'version': s3insync.__version__, 'aws_repo': s3uri, 'localpath': localpath, }) start_time = pc.Gauge('s3insync_start_time', 'Time the sync process was started') start_time.set_to_current_time() last_sync = pc.Gauge('s3insync_last_sync_time', 'Time the last sync completed') op_count = pc.Counter('s3insync_operations', 'Count of operations', labelnames=('type', )) failed_op_count = pc.Counter('s3insync_failed_operations', 'Count of failed operations', labelnames=('type', )) files_in_s3 = pc.Gauge( 's3insync_files_in_s3', 'Number of files in S3', ) pc.start_http_server(8087) src = r.S3Repo('s3', s3uri) dest = r.LocalFSRepo('fs', localpath, os.path.join(os.getenv('HOME'), ".s3insync")) dest.ensure_directories() sync = sd.SyncDecider(excludes) set_exit = setup_signals() while not set_exit.is_set(): logger.debug("Starting sync") start = time.monotonic() try: success, failures = sync.execute_sync(src, dest) files_in_s3.set(success.pop('total', 0)) set_op_counts(success, op_count) set_op_counts(failures, failed_op_count) last_sync.set_to_current_time() except Exception: logger.exception("Failed to excute sync") duration = time.monotonic() - start logger.debug("Stopping sync after %g secs", duration) set_exit.wait(max(30, interval - duration))
def flask_app_factory(): #test_class, test_names=None, service_name=None): '''Create a Prometheus endpoint from a test class and test names to be executed taken from Phil's https://github.com/cedadev/ceda-unittest-prometheus-wrapper/blob/devel/ceda/unittest_prometheus_wrapper/flask_app.py metrics we need are - data node availability - do a download - index node availability - respond to query - compute node availability - available to recieve and process job # stretch goals - if ecmwf provide a way to measure performance then those metrics - random testing on threads/ esgf catalogue - runtime of compute node jobs - utilisations of compute node resources - request duration for index/data nodes ''' app = Flask(__name__) # Create list to append metrics to service_status_list = {} # Do tests to get the overall service status # Index node availability # need to set the correct env vars # esgf search esgf_status_gauge = pc.Gauge('esgf_search', 'esgf search test') service_status_list['esgf_search'] = esgf_status_gauge # opendap download opendap_status_gauge = pc.Gauge('opendap_search', 'opendap search test') service_status_list['opendap'] = opendap_status_gauge # node test node_status_gauge = pc.Gauge('node_test', 'processing node test') service_status_list['node'] = node_status_gauge # overall service status _service_status = pc.Gauge('overall', 'up(1)/down(0) status of service') service_status_list['service'] = _service_status flask_view = FlaskPrometheusView(service_status_list) path = '/metrics/' app.add_url_rule(path, 'metrics', flask_view) return app
def add_exporter_metrics(metrics): if 'memory' not in metrics['exporter']: metrics['exporter']['memory'] = prometheus.Gauge( 'mqtt_exporter_usage_memory_mb', 'Memory usage') metrics['exporter']['memory'].set(getCurrentMemoryUsage()) if 'metrics_total' not in metrics['exporter']: metrics['exporter']['metrics_total'] = prometheus.Gauge( 'mqtt_exporter_metrics_total', 'Total metrics') metrics['exporter']['metrics_total'].set(len(metrics['users'].keys()))
class Metric(object): """A namespace for our runtime metrics""" RECORDS_PROCESSED = prometheus_client.Counter( 'mjolnir_msearch_records_total', 'Number of kafka records processed') INTERVAL_VALUE = prometheus_client.Gauge( 'mjolnir_msearch_interval_sec', 'Seconds between polling elasticsearch for qps stats') EMA = prometheus_client.Gauge('mjolnir_msearch_ema_qps', 'Local estimate of canary index qps') PROCESS_BATCH = prometheus_client.Summary( 'mjolnir_msearch_process_batch_seconds', 'Time taken to process a batch of records from kafka')
def metrics_factory(registry): return { 'plain_gauge': prometheus_client.Gauge('plain_gauge', 'Simple gauge', registry=registry), 'instance_gauge': prometheus_client.Gauge('instance_gauge', 'Gauge with custom label', ['instance'], registry=registry), 'service_gauge': prometheus_client.Gauge('service_gauge', 'Gauge with "service" label', ['service'], registry=registry), 'counter': prometheus_client.Counter('counter', 'Simple counter', registry=registry), 'summary': prometheus_client.Summary('summary', 'Simple summary', registry=registry), 'histogram': prometheus_client.Histogram('histogram', 'Histogram with custom and "service" ' 'labels', ['instance', 'service'], registry=registry) }
def init_prometheus_client(REGISTRY): prome_dict = {} prome_dict['image_counter'] = prometheus_client.Counter( 'image_counter', 'all inferred images counter by component', ['component', 'pred_class'], registry=REGISTRY) # prome_dict['model_health'] = prometheus_client.Enum( # 'model_health', 'model_health return post status code', ['model_name'], # states=['healthy', 'error'], registry=REGISTRY) prome_dict['model_health'] = prometheus_client.Gauge( 'model_health', 'model_health return post status code', ['model_name'], registry=REGISTRY) prome_dict['sec_perimg_his'] = prometheus_client.Histogram( 'sec_perimg_Histogram', 'Histogram of time taken quantity per request', buckets=create_bucket_tuple(0.6,1e-3),registry=REGISTRY) #loop prome_dict['sec_perimg_gau'] = prometheus_client.Gauge( 'sec_perimg_Gauge', 'Gauge of time taken quantity per request', registry=REGISTRY) # For every speed on GPU/CPU through grpc/restful prome_dict['total_res_img_counter'] = prometheus_client.Counter( 'total_res_img_counter', 'Total requested/responded images', registry=REGISTRY) # For know how many images processed, knowing efficiency of gateway prome_dict['req_counter'] = prometheus_client.Counter( 'req_counter', 'Count of requests', registry=REGISTRY) # For know how many requests is, knowing efficiency of gateway prome_dict['pro_time_counter'] = prometheus_client.Counter( 'pro_time_counter', 'Count of time taken every request', registry=REGISTRY) # For know how long every request took, knowing efficiency of gateway prome_dict['no_infer_img_counter'] = prometheus_client.Counter( 'no_infer_img_counter', 'Count of total no inferred images', registry=REGISTRY) # For know how many not online components requested prome_dict['total_inferred_img_counter'] = prometheus_client.Counter( 'total_inferred_img_counter', 'Count of total inferred images ignoring kinds of images', registry=REGISTRY) # For know how many images processed prome_dict['inferred_img_counter'] = prometheus_client.Counter( 'inferred_img_counter', 'Count of inferred images with final outcome by model_name', ['model_name'], registry=REGISTRY) # For dividing other metrics such as predicts_duration_secs prome_dict['predicts_img_counter'] = prometheus_client.Counter( 'predicts_img_counter', 'Count of inferred images with preditions by model_name & pred_class', ['model_name', 'pred_class'], registry=REGISTRY) # For dividing other metrics such as confidence_sum prome_dict['outline_img_counter'] = prometheus_client.Counter( 'outline_img_counter', 'outline image counter by model name judged by checkpoints', ['model_name'], registry=REGISTRY) prome_dict['predicts_duration_secs'] = prometheus_client.Counter( 'predicts_duration_secs', 'predicts_duration_secs by each online model', ['model_name'], registry=REGISTRY) prome_dict['confidence_sum'] = prometheus_client.Counter( 'confidence_sum', 'sum of confidences by each online model', ['model_name', 'pred_class'], registry=REGISTRY) prome_dict['inference_version'] = prometheus_client.Gauge( 'inference_version', 'current inference version by each online model', ['model_name'], registry=REGISTRY) return prome_dict
def __init__(self, bot, prefix) -> None: self.command_counter = prom.Counter( f"{prefix}_commands_ran", "How many times commands were ran", [ "command_name", ]) self.guild_messages = prom.Counter( f"{prefix}_messages_sent", "What messages have been sent and by who", ["guild_id"]) self.user_message_raw_count = prom.Counter( f"{prefix}_user_message_raw_count", "Raw count of how many messages we have seen from users") self.bot_message_raw_count = prom.Counter( f"{prefix}_bot_message_raw_count", "Raw count of how many messages we have seen from bots") self.own_message_raw_count = prom.Counter( f"{prefix}_own_message_raw_count", "Raw count of how many messages GearBot has send") self.bot_guilds = prom.Gauge(f"{prefix}_guilds", "How many guilds the bot is in") self.bot_guilds.set_function(lambda: len(bot.guilds)) self.bot_users = prom.Gauge(f"{prefix}_users", "How many users the bot can see") self.bot_users.set_function( lambda: sum(len(g.members) for g in bot.guilds)) self.bot_users_unique = prom.Gauge( f"{prefix}_users_unique", "How many unique users the bot can see") self.bot_users_unique.set_function(lambda: len(bot.users)) self.bot_event_counts = prom.Counter(f"{prefix}_event_counts", "How much each event occurred", ["event_name"]) self.bot_latency = prom.Gauge(f"{prefix}_latency", "Current bot latency") self.bot_latency.set_function(lambda: bot.latency) bot.metrics_reg.register(self.command_counter) bot.metrics_reg.register(self.guild_messages) bot.metrics_reg.register(self.user_message_raw_count) bot.metrics_reg.register(self.bot_message_raw_count) bot.metrics_reg.register(self.bot_guilds) bot.metrics_reg.register(self.bot_users) bot.metrics_reg.register(self.bot_users_unique) bot.metrics_reg.register(self.bot_event_counts) bot.metrics_reg.register(self.own_message_raw_count) bot.metrics_reg.register(self.bot_latency)
def main(): parser = argparse.ArgumentParser(description='Prometheus statistics for a Mumble ICE interface') parser.add_argument('-l', '--listen', help='Port to listen on', default=9123, type=int) parser.add_argument('-H', '--host', help='Host of the Ice interface', default='127.0.0.1') parser.add_argument('-p', '--port', help='Port of the Ice interface', default=6502, type=int) parser.add_argument('-i', '--interval', help='Interval in seconds', default=60, type=int) parser.add_argument('--secret', help='The read secret', default=None) parser.add_argument('-v', '--verbose', help='Verbose', action='store_true') args = parser.parse_args() node.start_http_server(args.listen) gauges = { 'users': node.Gauge('mumble_users_connected', 'Number of connected users', ['ice_server', 'server_id']), 'uptime': node.Gauge('mumble_uptime', 'Virtual uptime', ['ice_server', 'server_id']), 'chancount': node.Gauge('mumble_channels', 'Number of channels', ['ice_server', 'server_id']), 'bancount': node.Gauge('mumble_users_banned', 'Number of banned users', ['ice_server', 'server_id']), } if args.verbose: logging.basicConfig(level=logging.DEBUG) else: logging.basicConfig(level=logging.INFO) ice_server = '%s:%d' % (args.host, args.port) with ice_connect(args.host, args.port, args.secret) as meta: while True: logger.info('gathering info') t1 = time.time() for server in meta.getBootedServers(): g_user = len(server.getUsers()) g_uptime = server.getUptime() g_chancount = len(server.getChannels()) g_bancount = len(server.getBans()) logger.debug('mumble_users_connected: %d' % g_user) logger.debug('mumble_uptime: %d' % g_uptime) logger.debug('mumble_channels: %d' % g_chancount) logger.debug('mumble_users_banned: %d' % g_bancount) labels = {'server_id': server.id(), 'ice_server': ice_server} gauges['users'].labels(labels).set(g_user) gauges['uptime'].labels(labels).set(g_uptime) gauges['chancount'].labels(labels).set(g_chancount) gauges['bancount'].labels(labels).set(g_bancount) time_to_wait = args.interval - (time.time() - t1) if time_to_wait > 0: time.sleep(time_to_wait) return 0
def __init__(self, bot: Life) -> None: self.bot = bot self.process = psutil.Process() self.ready = False self.guild_stats = prometheus_client.Gauge( 'counts', documentation='Guild counts', namespace='guild', labelnames=['guild_id', 'count']) self.socket_responses = prometheus_client.Counter( 'socket_responses', documentation='Socket responses', namespace='life', labelnames=['response']) self.socket_events = prometheus_client.Counter( 'socket_events', documentation='Socket events', namespace='life', labelnames=['event']) self.counters = prometheus_client.Counter('stats', documentation='Life stats', namespace='life', labelnames=['stat']) self.gauges = prometheus_client.Gauge('counts', documentation='Life counts', namespace='life', labelnames=['count']) self.op_types = { 0: 'DISPATCH', 1: 'HEARTBEAT', 2: 'IDENTIFY', 3: 'PRESENCE', 4: 'VOICE_STATE', 5: 'VOICE_PING', 6: 'RESUME', 7: 'RECONNECT', 8: 'REQUEST_MEMBERS', 9: 'INVALIDATE_SESSION', 10: 'HELLO', 11: 'HEARTBEAT_ACK', 12: 'GUILD_SYNC', } self.stats_five_minutes.start() self.stats_thirty_seconds.start()
def __init__(self, data_manager, config): self.config = config self.data_manager = data_manager self.http_server = prometheus_client.start_http_server( self.config.prometheus_port, addr=self.config.prometheus_addr) self.updated_containers_counter = prometheus_client.Counter( 'containers_updated', 'Count of containers updated', ['socket', 'container']) self.monitored_containers_gauge = prometheus_client.Gauge( 'containers_being_monitored', 'Gauge of containers being monitored', ['socket']) self.updated_all_containers_gauge = prometheus_client.Gauge( 'all_containers_updated', 'Count of total updated', ['socket']) self.logger = getLogger()
def _create_metrics(self): """Creates a registry and records metrics""" self.registry = prometheus_client.CollectorRegistry() self.quota_free_count = prometheus_client.Gauge( 'kuryr_quota_free_count', 'Amount of quota available' ' for the network resource', labelnames={'resource'}, registry=self.registry) self.port_quota_per_subnet = prometheus_client.Gauge( 'kuryr_port_quota_per_subnet', 'Amount of ports available' ' on Subnet', labelnames={'subnet_id', 'subnet_name'}, registry=self.registry) self.lbs_members_count = prometheus_client.Gauge( 'kuryr_critical_lb_members_count', 'Amount of members per ' 'critical Load Balancer pool', labelnames={'lb_name', 'lb_pool_name'}, registry=self.registry) self.lbs_state = prometheus_client.Enum('kuryr_critical_lb_state', 'Critical Load Balancer State', labelnames={'lb_name'}, states=[ 'ERROR', 'ACTIVE', 'DELETED', 'PENDING_CREATE', 'PENDING_UPDATE', 'PENDING_DELETE' ], registry=self.registry) buckets = (10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, 120, _INF) self.pod_creation_latency = prometheus_client.Histogram( 'kuryr_pod_creation_latency', 'Time taken for a pod to have' ' Kuryr annotations set', buckets=buckets, registry=self.registry) self.load_balancer_readiness = prometheus_client.Counter( 'kuryr_load_balancer_readiness', 'This counter is increased when ' 'Kuryr notices that an Octavia load balancer is stuck in an ' 'unexpected state', registry=self.registry) self.port_readiness = prometheus_client.Counter( 'kuryr_port_readiness', 'This counter is increased when Kuryr ' 'times out waiting for Neutron to move port to ACTIVE', registry=self.registry)
def __init__( self, reporter_order=( 'hdf5_reporter', 'dashboard_reporter', ), ): self.reporter_order = reporter_order # counters self.cycle_counter = prom.Counter('wepy_cycle_idx', "") # gauges self.walker_size_g = prom.Gauge('wepy_walker_single_size_bytes', "") self.ensemble_size_g = prom.Gauge('wepy_walker_ensemble_size_bytes', "") self.runner_size_g = prom.Gauge('wepy_runner_size_bytes', "") self.resampler_size_g = prom.Gauge('wepy_resampler_size_bytes', "") self.bc_size_g = prom.Gauge('wepy_bc_size_bytes', "") self.mapper_size_g = prom.Gauge('wepy_mapper_size_bytes', "") self.sim_manager_size_g = prom.Gauge('wepy_sim_manager_size_bytes', "") self.reporter_size_g = prom.Gauge( 'wepy_reporters_size_bytes', "", ["name"], )
def setUp(self): self.registry = prometheus_client.CollectorRegistry() self.some_gauge = prometheus_client.Gauge('some_gauge', 'Some gauge.', registry=self.registry) self.some_gauge.set(42) self.some_labelled_gauge = prometheus_client.Gauge( 'some_labelled_gauge', 'Some labelled gauge.', ['labelred', 'labelblue'], registry=self.registry) self.some_labelled_gauge.labels('pink', 'indigo').set(1) self.some_labelled_gauge.labels('pink', 'royal').set(2) self.some_labelled_gauge.labels('carmin', 'indigo').set(3) self.some_labelled_gauge.labels('carmin', 'royal').set(4) self.t = SomeTestCase()
def _config(self, fields): metrics = {} labels = ['serial_number', 'product_id'] for f in defs.FIELDS: label = f.label.replace('#', '') name = 'victron_%s' % label.lower() kind = f.kind() if isinstance(kind, pint.Quantity): unit = str(kind.units) else: unit = _UNITS.get(f.unit, f.unit) if unit == 'hour * watt': unit = 'wh' if kind == str: metrics[f.label] = prometheus_client.Info(name, f.description, labelnames=labels) elif _is_enum(kind): states = [x.name.lower() for x in kind] metrics[f.label] = prometheus_client.Enum( name, f.description, labelnames=['serial_number', 'product_id'], states=states) metrics[f.label + '_value'] = prometheus_client.Gauge( name + '_value', f.description, labelnames=['serial_number', 'product_id']) else: metrics[f.label] = prometheus_client.Gauge( name, f.description, labelnames=['serial_number', 'product_id'], unit=unit) updated = prometheus_client.Gauge( 'victron_updated', 'Last time a block was received from the device', labelnames=labels) blocks = prometheus_client.Counter( 'victron_blocks', 'Number of blocks received from the device', labelnames=labels) return metrics, updated, blocks
def setup_status(app) -> prometheus_client.CollectorRegistry: """Add /status to serve Prometheus-driven runtime metrics.""" registry = prometheus_client.CollectorRegistry(auto_describe=True) app["request_count"] = prometheus_client.Counter( "requests_total", "Total Request Count", ["app_name", "method", "endpoint", "http_status"], registry=registry, ) app["request_latency"] = prometheus_client.Histogram( "request_latency_seconds", "Request latency", ["app_name", "endpoint"], registry=registry, ) app["request_in_progress"] = prometheus_client.Gauge( "requests_in_progress_total", "Requests in progress", ["app_name", "endpoint", "method"], registry=registry, ) prometheus_client.Info("server", "API server version", registry=registry).info({ "version": metadata.__version__, "commit": getattr(metadata, "__commit__", "null"), "build_date": getattr(metadata, "__date__", "null"), }) app.middlewares.insert(0, instrument) # passing StatusRenderer(registry) without __call__ triggers a spurious DeprecationWarning # FIXME(vmarkovtsev): https://github.com/aio-libs/aiohttp/issues/4519 app.router.add_get("/status", StatusRenderer(registry).__call__) return registry
def setup_gauge(key, label): """ Helper method to setup a prometheus gauge """ global PROMETHEUS_METRICS g = prometheus_client.Gauge(key, label) g.set_function(lambda: PROMETHEUS_METRICS[key])
def __init__(self, servicetype, method, outparams): """Initialize a GaugeMetric.""" super(ActionGauge, self).__init__(servicetype, method) self.gauges = {} for outparam in outparams: name = self._generate_name(servicetype, outparam) self.gauges[outparam] = prometheus_client.Gauge(name, "")
def __init__(self, prefix, description, labels): """ :param prefix: prefix to use for each metric name :param description: description of action to use in metric description :param labels: label names to define for each metric """ self.full_prefix = '{}_{}'.format(self.__class__._PREFIX, prefix) self.progress = prometheus_client.Gauge( '{}_attempt_inprogress'.format(self.full_prefix), 'In progress attempts to {}'.format(description), labels, registry=REGISTRY, multiprocess_mode='livesum') self.attempt_total = prometheus_client.Counter( '{}_attempt_total'.format(self.full_prefix), 'Total attempts to {}'.format(description), labels, registry=REGISTRY) self.failure_total = prometheus_client.Counter( '{}_failure_total'.format(self.full_prefix), 'Total failures to {}'.format(description), labels, registry=REGISTRY) self.duration = prometheus_client.Histogram( '{}_duration_seconds'.format(self.full_prefix), 'Seconds to {}'.format(description), labels, registry=REGISTRY)
def _stats(**kwargs): if not metrics: # first call, set up metrics labels_no_status = sorted(kwargs.keys()) + ['endpoint', 'method'] labels = labels_no_status + ['status'] metrics['latency'] = prom.Histogram( 'http_request_latency_{}'.format(endpoint), LATENCY_HELP, labels, buckets=LATENCY_BUCKETS, ) metrics['size'] = prom.Histogram( 'http_response_size_{}'.format(endpoint), SIZE_HELP, labels, buckets=SIZE_BUCKETS, ) metrics['concurrent'] = prom.Gauge( 'http_request_concurrency_{}'.format(endpoint), CONCURRENT_HELP, labels_no_status, ) request_store.metrics = metrics request_store.endpoint = endpoint request_store.method = request.method request_store.labels = {k: str(v) for k, v in kwargs.items()} generic_concurrent.labels(endpoint=endpoint, method=request.method).inc() metrics['concurrent'].labels(endpoint=endpoint, method=request.method, **request_store.labels).inc() request_store.start_time = monotonic() return fn(**kwargs)
def main(): thermal_zone_0_celsius_gauge = prom.Gauge( "thermal_zone_0_celsius", "Thermal zone monitor (celsius)") thermal_zone_1_celsius_gauge = prom.Gauge( "thermal_zone_1_celsius", "Thermal zone monitor (celsius)") prom.start_http_server(9190) while True: thermal_zone_0_celsius_gauge.set( parse_temperature( "/sys/devices/virtual/thermal/thermal_zone0/temp")) thermal_zone_1_celsius_gauge.set( parse_temperature( "/sys/devices/virtual/thermal/thermal_zone1/temp")) time.sleep(1)
def update_timestamp(self): if TIMESTAMP_METRIC_NAME not in self.metrics: self.metrics[TIMESTAMP_METRIC_NAME] = prometheus_client.Gauge( TIMESTAMP_METRIC_NAME, "Date of last successful quotas data update as unix timestamp/epoch", registry=self.registry) self.metrics[TIMESTAMP_METRIC_NAME].set_to_current_time()
def main(): prometheus_client.start_http_server(PROMETHEUS_PORT) logging.info( f"Prometheus exporter started at http://127.0.0.1:{PROMETHEUS_PORT}") CPU_TIME = prometheus_client.Gauge( 'cpu_time', 'Hold current process CPU consumption time', ['id', 'cmd']) for counter in range(10): """ Step 1.3. Update Prometheus metrics in a loop Complete the next steps in the loop: 1. For each process from the function implemented in step 1.2, update `CPU_TIME` `Gauge` metric labeled with: a) `id` formed by combining `proc['name']` and `proc['pid']` ("name_pid" format) b) `cmd` formed by joining `proc['cmdline']` list values with " " (space) separator with *sum* of `proc['cpu_times'].system` and `proc['cpu_times'].user` values 2. Sleep for UPDATE_PERIOD 3. Perform HTTP GET request to Prometheus endpoint and print its content to stdout. """ for proc in processes(): pass ### Block implemented by student # set labeled cpu times value in `CPU_TIME` ### Block implemented by student ### Block implemented by student # sleep for an `UPDATE_PERIOD` # log HTTP Get request result to Prometheus endpoint ### Block implemented by student """
def get_metric(name, extra_labels): try: return metrics[name] except KeyError: metrics[name] = prometheus_client.Gauge( prometheus_prefix + name, name, labels + list(extra_labels.keys())) return metrics[name]