def collect(self): c = CounterMetricFamily('redis', 'Help text', labels=['id']) for key in self.r.keys('*'): try: c.add_metric([key], float(self.r.get(key))) except ValueError: None # who cares yield c
class JobsetScrapeImporter: def __init__(self): self.seconds = CounterMetricFamily( "hydra_jobset_seconds_total", "Total number of seconds the jobset has been building", labels=["name"]) self.shares_used = CounterMetricFamily( "hydra_jobset_shares_used_total", "Total shares the jobset has consumed", labels=["name"]) def load_jobset(self, name, report): self.seconds.add_metric([name], report.destructive_read("seconds")) self.shares_used.add_metric([name], report.destructive_read("shareUsed")) debug_remaining_state(report) def metrics(self): yield self.seconds yield self.shares_used
def collect(self): """ collectors only function called collect. and it collects data """ downstream = GaugeMetricFamily('node_bw_wan_bps', 'last tested wan downstream mb/s', labels=['nodeid']) for node in GLOBAL_NODES['nodes']: downstream.add_metric([node['id']], node['downstream_mbps_wan']) yield downstream upstream = GaugeMetricFamily('node_bw_ff_bps', 'last tested ff downstream in mb/s', labels=['nodeid']) for node in GLOBAL_NODES['nodes']: upstream.add_metric([node['id']], node['downstream_mbps_ff']) yield upstream ping = GaugeMetricFamily('node_gw_ping_ms', 'last tested gateway ping in ms', labels=['nodeid']) for node in GLOBAL_NODES['nodes']: ping.add_metric([node['id']], node['gw_ping_ms']) yield ping # 'test_host': self.properties['test_host'], # 'tested_when': self.properties['tested_when'], rx_counter = CounterMetricFamily('node_rx_bytes', 'received bytes', labels=['nodeid']) for node in GLOBAL_NODES['nodes']: rx_counter.add_metric([node['id']], int(node['rx_bytes'])) yield rx_counter tx_counter = CounterMetricFamily('node_tx_bytes', 'transmitted bytes', labels=['nodeid']) for node in GLOBAL_NODES['nodes']: tx_counter.add_metric([node['id']], int(node['tx_bytes'])) yield tx_counter
def _collect_metric(self, metric_name, metric): '''Collect data for one Prometheus metric.''' label_names = self._get_metric_label_names(metric) metric_type = metric.get('type', 'gauge') if metric_type == 'counter': met = CounterMetricFamily(metric_name, metric['doc'], labels=label_names) elif metric_type == 'gauge': met = GaugeMetricFamily(metric_name, metric['doc'], labels=label_names) else: logger.error( f"Invalid metric type definition '{metric_type}' for metric " f"'{metric_name}'. Using default type 'gauge'.") met = GaugeMetricFamily(metric_name, metric['doc'], labels=label_names) # Calculate the metric labels and values item_count = 0 for item in metric['items']: result = self._collect_metric_item(item, label_names) if result is None: continue item_count += 1 (label_values, value) = result met.add_metric(label_values, value) if item_count > 0: yield met else: logger.debug( f"Dropping metric '{metric_name}', because no data was added.")
class MetricCollector(object): def __init__(self, name, description, labels, mtype="GAUGE", id=None, filter=None): self.name = name if id == None: self.id = name else: self.id = id self.description = description self.labels = labels self.filter = filter self.label_list = [] self.value_list = [] if mtype == "GAUGE": self.metric_family = GaugeMetricFamily(self.id, self.description, labels=self.labels) elif mtype == "COUNTER": self.metric_family = CounterMetricFamily(self.id, self.description, labels=self.labels) else: raise ("Unknown type " + mtype) def add(self, label_values, value): for idx, val in enumerate(self.label_list): if val == label_values: self.value_list[idx] += value return self.label_list.append(label_values) self.value_list.append(value) def metric(self): for idx, val in enumerate(self.label_list): self.metric_family.add_metric(val, self.value_list[idx]) return self.metric_family
class CpuCollector(object): metric_name = 'cpu' metrics = {} def collect(self): # yield GaugeMetricFamily('my_gauge', 'Help text', value=7) if len(self.metrics) > 0: self.c = CounterMetricFamily(self.metric_name.replace(".", "_"), 'CPU cumulative nanoseconds', labels=[ 'resource_id', 'project_id', 'user_id', 'counter_unit', 'display_name' ]) for mt in self.metrics.copy(): ts = time.time() if self.metrics[mt][ 'last_pushed'] + 4 * self.metrics[mt]['epoch'] > ts: self.c.add_metric([ self.metrics[mt]['resource_id'], self.metrics[mt]['project_id'], self.metrics[mt]['user_id'], self.metrics[mt]['counter_unit'], self.metrics[mt]['resource_metadata']['display_name'] ], self.metrics[mt]['counter_volume']) else: self.metrics.pop(mt) continue yield self.c def update(self, msg): ts = time.time() if msg['counter_name'] == self.metric_name: if not msg['resource_id'] in self.metrics: msg['epoch'] = ts msg['last_pushed'] = ts else: msg['epoch'] = ts - self.metrics[ msg['resource_id']]['last_pushed'] msg['last_pushed'] = ts self.metrics[msg['resource_id']] = msg
def test_help_escaping(self): for escaped_val, unescaped_val in [('foo', 'foo'), ('\\foo', '\\foo'), ('\\\\foo', '\\foo'), ('foo\\', 'foo\\'), ('foo\\\\', 'foo\\'), ('\\n', '\n'), ('\\\\n', '\\n'), ('\\\\\\n', '\\\n'), ('\\"', '"'), ('\\\\"', '\\"'), ('\\\\\\"', '\\"')]: families = list( text_string_to_metric_families("""# TYPE a counter # HELP a %s a_total{foo="bar"} 1 # EOF """ % escaped_val)) metric_family = CounterMetricFamily("a", unescaped_val, labels=["foo"]) metric_family.add_metric(["bar"], 1) self.assertEqual([metric_family], list(families))
def collect(self): license_counter = CounterMetricFamily("active_licenses", "active license count", labels=("license", )) client_state = StateSetMetricFamily("active_licenses", "active license count", labels=("host", "port", "license", "user")) for license in self._licenses: license_counter.add_metric([license.name], len(license.licenses)) for client in license.licenses: client_state.add_metric( (client.host, str(client.port), license.name, client.user), value={"active": True}) yield (license_counter) yield (client_state)
def test_parse_stats(self): metrics = list(self.collector.collect(self.fake_data, self.fake_info)) expected = CounterMetricFamily( "ethtool_rx_no_dma_resources_total", "rx_no_dma_resources", labels=("interface",), ) expected.add_metric(["eth0"], 590843871.0) self.assertIn(expected, metrics) expected = CounterMetricFamily( "ethtool_tx_queue_bytes_total", "tx_queue_bytes", labels=("interface", "queue"), ) expected.add_metric(("eth0", "5"), 1467719549558.0) for m in metrics: if m.name == "ethtool_tx_queue_bytes_total": self.assertIn(expected.samples[0], m.samples) if m.name == "ethtool_interface_speed": self.assertEqual(m.samples[0][2], 1048576000.0)
def test_label_escaping(self): for escaped_val, unescaped_val in [ ('foo', 'foo'), ('\\foo', '\\foo'), ('\\\\foo', '\\foo'), ('foo\\\\', 'foo\\'), ('\\\\', '\\'), ('\\n', '\n'), ('\\\\n', '\\n'), ('\\\\\\n', '\\\n'), ('\\"', '"'), ('\\\\\\"', '\\"')]: families = list(text_string_to_metric_families(""" # TYPE a counter # HELP a help a{foo="%s",bar="baz"} 1 """ % escaped_val)) metric_family = CounterMetricFamily( "a", "help", labels=["foo", "bar"]) metric_family.add_metric([unescaped_val, "baz"], 1) self.assertEqualMetrics([metric_family], list(families))
def collect(self): try: self.torrents = self.client.torrents.info() except Exception as e: logger.error(f"Couldn't get server info: {e}") metrics = self.get_qbittorrent_metrics() for metric in metrics: name = metric["name"] value = metric["value"] help_text = metric.get("help", "") labels = metric.get("labels", {}) metric_type = metric.get("type", "gauge") if metric_type == "counter": prom_metric = CounterMetricFamily(name, help_text, labels=labels.keys()) else: prom_metric = GaugeMetricFamily(name, help_text, labels=labels.keys()) prom_metric.add_metric(value=value, labels=labels.values()) yield prom_metric
def collect(self): with self.cpu_time_collector_run_time.time(): worker_stat_cpu_time = CounterMetricFamily( 'solaris_exporter_cpu_time', 'python psutil counters, CPU usage time.', labels=['host', 'statistic']) cpuinfo = psutil.cpu_times(percpu=False) worker_stat_cpu_time.add_metric([host_name, 'user'], cpuinfo.user) worker_stat_cpu_time.add_metric([host_name, 'system'], cpuinfo.system) worker_stat_cpu_time.add_metric([host_name, 'idle'], cpuinfo.idle) worker_stat_cpu_time.add_metric([host_name, 'oiwait'], cpuinfo.iowait) yield worker_stat_cpu_time
def collect(self): release_validation_metric = CounterMetricFamily( 'quay_validation', 'To get the validation status of quay release', labels=['release_validation_status']) for quayOperation in quay_cache.iterkeys(): release_validation_metric.add_metric([quayOperation], quay_cache[quayOperation]) yield release_validation_metric push_speed_metric = CounterMetricFamily('push_speed', 'To get the push speed', labels=['push_speed']) if 'image_push_Speed' in quayPushSpeed_cache: push_speed_metric.add_metric( ['image_push_Speed'], quayPushSpeed_cache['image_push_Speed']) yield push_speed_metric pull_speed_metric = CounterMetricFamily('pull_speed', 'To get the pull speed', labels=['pull_speed']) if 'image_pull_Speed' in quayPullSpeed_cache: pull_speed_metric.add_metric( ['image_pull_Speed'], quayPullSpeed_cache['image_pull_Speed']) yield pull_speed_metric
def collect(self): #yield GaugeMetricFamily('my_gauge', 'Help text', value=7) vcpu = CounterMetricFamily('openstack_vcpu_usage', 'Help text', labels=['project']) ram_mb = CounterMetricFamily('openstack_ram_mb_usage', 'Help text', labels=['project']) instances = CounterMetricFamily('openstack_instances_usage', 'Help text', labels=['project']) usages = get_usage() # TODO!!! for usage in usages: project_name = usage['project_name'] instances.add_metric([project_name], usage['server_usage_count']) vcpu.add_metric([project_name, "vcpu"], usage.get('total_vcpus_usage')) ram_mb.add_metric([project.name], usage.get('total_memory_mb_usage')) yield vcpu yield ram_mb yield instances
def collect(self): yield SummaryMetricFamily('summary', 'This is simple summary', labels={'name': 'horizon.stellar.org'}) log.info('current_data.items(): %s' % current_data.items()) for k, v in current_data.items(): yield CounterMetricFamily(k, 'stellar base metric values', value=float(v)) log.info('current_payment_detail.items(): %s' % current_payment_detail.items()) for asset, asset_data in current_payment_detail.items(): summ = CounterMetricFamily('sum_payment', 'stellar payment metric values', labels=['sum_payment']) summ.add_metric(asset, asset_data['sum']) yield summ yield CounterMetricFamily('nb_payment', 'stellar payment metric values', value=float(asset_data['nm'])) metric = GaugeMetricFamily( 'large_native_payment_detail', 'large native stellar payment metric values', value=7) for from_addr, amount_by_dest in current_large_native_payment_detail.items( ): for to_addr, amount in amount_by_dest.items(): metric.add_sample('sum_large_native_payment', value=amount, labels={ 'from_addr': from_addr, 'to_addr': to_addr }) yield metric
def test_timestamps(self): families = text_string_to_metric_families("""# TYPE a counter # HELP a help a_total{foo="1"} 1 000 a_total{foo="2"} 1 0.0 a_total{foo="3"} 1 1.1 a_total{foo="4"} 1 12345678901234567890.1234567890 a_total{foo="5"} 1 1.5e3 # TYPE b counter # HELP b help b_total 2 1234567890 # EOF """) a = CounterMetricFamily("a", "help", labels=["foo"]) a.add_metric(["1"], 1, timestamp=Timestamp(0, 0)) a.add_metric(["2"], 1, timestamp=Timestamp(0, 0)) a.add_metric(["3"], 1, timestamp=Timestamp(1, 100000000)) a.add_metric(["4"], 1, timestamp=Timestamp(12345678901234567890, 123456789)) a.add_metric(["5"], 1, timestamp=1500.0) b = CounterMetricFamily("b", "help") b.add_metric([], 2, timestamp=Timestamp(1234567890, 0)) self.assertEqual([a, b], list(families))
class MachineTypeScrapeImporter: def __init__(self): self.runnable = GaugeMetricFamily( "hydra_machine_type_runnable", "Number of currently runnable builds", labels=["machineType"]) self.running = GaugeMetricFamily( "hydra_machine_type_running", "Number of currently running builds", labels=["machineType"]) self.wait_time = CounterMetricFamily( "hydra_machine_type_wait_time_total", "Number of seconds spent waiting", labels=["machineType"]) self.last_active = CounterMetricFamily( "hydra_machine_type_last_active_total", "Last time this machine type was active", labels=["machineType"]) def load_machine_type(self, name, report): self.runnable.add_metric([name], report.destructive_read("runnable")) self.running.add_metric([name], report.destructive_read("running")) try: self.wait_time.add_metric([name], report.destructive_read("waitTime")) except KeyError: pass try: self.last_active.add_metric([name], report.destructive_read("lastActive")) except KeyError: pass debug_remaining_state(report) def metrics(self): yield self.runnable yield self.running yield self.wait_time yield self.last_active
def dump_frequency( cls, metric_name: str, documentation: str, bin_to_count: Mapping[TBin, int] ) -> Metric: """Converts a dictionary of bin to count to Prometheus counter. :param metric_name: Name of the metric (must be the same for training and serving) :type metric_name: str :param documentation: Help text describing the metric (used for documentation) :type documentation: str :param bin_to_count: Counts of items in each bin. :type bin_to_count: Mapping[Union[str, float, int], int] :return: The converted Prometheus counter metric. :rtype: Metric """ counter = CounterMetricFamily( name=metric_name, documentation=documentation, labels=(cls.BIN_LABEL,) ) for k, v in bin_to_count.items(): if isinstance(k, int): k = float(k) counter.add_metric(labels=[str(k)], value=v) return counter
class MachineTypeScrapeImporter: def __init__(self): self.runnable = GaugeMetricFamily( "hydra_machine_type_runnable", "Number of currently runnable builds", labels=["machineType"]) self.running = GaugeMetricFamily("hydra_machine_type_running", "Number of currently running builds", labels=["machineType"]) self.wait_time = CounterMetricFamily( "hydra_machine_type_wait_time_total", "Number of seconds spent waiting", labels=["machineType"]) self.last_active = CounterMetricFamily( "hydra_machine_type_last_active_total", "Last time this machine type was active", labels=["machineType"]) def load_machine_type(self, name, report): self.runnable.add_metric([name], report.destructive_read("runnable")) self.running.add_metric([name], report.destructive_read("running")) try: self.wait_time.add_metric([name], report.destructive_read("waitTime")) except KeyError: pass try: self.last_active.add_metric([name], report.destructive_read("lastActive")) except KeyError: pass debug_remaining_state(report) def metrics(self): yield self.runnable yield self.running yield self.wait_time yield self.last_active
def test_commas(self): families = text_string_to_metric_families("""# TYPE a counter # HELP a help a{foo="bar",} 1 a{foo="baz", } 1 # TYPE b counter # HELP b help b{,} 2 # TYPE c counter # HELP c help c{ ,} 3 # TYPE d counter # HELP d help d{, } 4 """) a = CounterMetricFamily("a", "help", labels=["foo"]) a.add_metric(["bar"], 1) a.add_metric(["baz"], 1) b = CounterMetricFamily("b", "help", value=2) c = CounterMetricFamily("c", "help", value=3) d = CounterMetricFamily("d", "help", value=4) self.assertEqual([a, b, c, d], list(families))
def get_histograms(self, metrics_object): """Returns metrics list from histograms""" metrics_list = [] for metric_entry in metrics_object.keys(): def_labels = ["quantile"] extra_labels = [] extra_labels_value = [] if metric_entry.startswith( 'jenkins.node') and metric_entry.endswith('builds'): name = "jenkins_node_builds" extra_labels += ['node'] extra_labels_value = [metric_entry[13:-7]] def_labels += extra_labels else: name = re.sub(r'(\.|-|\(|\))', '_', metric_entry).lower() # count counter_metric = CounterMetricFamily( name, f'metric import from {metric_entry}', labels=extra_labels) counter_metric.add_metric( extra_labels_value, metrics_object.get(metric_entry).get('count')) metrics_list.append(counter_metric) metric = GaugeMetricFamily(name, '', labels=def_labels) metric.add_metric(["0.5"] + extra_labels_value, metrics_object.get(metric_entry).get('p50')) metric.add_metric(["0.75"] + extra_labels_value, metrics_object.get(metric_entry).get('p75')) metric.add_metric(["0.95"] + extra_labels_value, metrics_object.get(metric_entry).get('p95')) metric.add_metric(["0.98"] + extra_labels_value, metrics_object.get(metric_entry).get('p98')) metric.add_metric(["0.99"] + extra_labels_value, metrics_object.get(metric_entry).get('p99')) metric.add_metric(["0.999"] + extra_labels_value, metrics_object.get(metric_entry).get('p999')) metrics_list.append(metric) return metrics_list
def _translate_to_prometheus(self, metric_record: MetricRecord): prometheus_metric = None label_values = [] label_keys = [] for label_tuple in metric_record.labels: label_keys.append(self._sanitize(label_tuple[0])) label_values.append(label_tuple[1]) metric_name = "" if self._prefix != "": metric_name = self._prefix + "_" metric_name += self._sanitize(metric_record.instrument.name) if isinstance(metric_record.instrument, Counter): prometheus_metric = CounterMetricFamily( name=metric_name, documentation=metric_record.instrument.description, labels=label_keys, ) prometheus_metric.add_metric( labels=label_values, value=metric_record.aggregator.checkpoint ) # TODO: Add support for histograms when supported in OT elif isinstance(metric_record.instrument, ValueRecorder): prometheus_metric = UnknownMetricFamily( name=metric_name, documentation=metric_record.instrument.description, labels=label_keys, ) prometheus_metric.add_metric( labels=label_values, value=metric_record.aggregator.checkpoint ) else: logger.warning( "Unsupported metric type. %s", type(metric_record.instrument) ) return prometheus_metric
def counter_generator(metrics): metric_dict = group_metrics(metrics) for metric_name, (metric_doc, label_keys, value_dict) in metric_dict.items(): # If we have label keys we may have multiple different values, # each with their own label values. if label_keys: counter = CounterMetricFamily(metric_name, metric_doc, labels=label_keys) for label_values in sorted(value_dict.keys()): value = value_dict[label_values] counter.add_metric(tuple(str(v) for v in label_values), value) # No label keys, so we must have only a single value. else: counter = CounterMetricFamily(metric_name, metric_doc, value=list(value_dict.values())[0]) yield counter
def collect(self): """ Collects metrics describing the state of this store """ labels = ["brewery", "style", "abv"] beer_purchased = CounterMetricFamily( "beer_purchased", "The number of beers purchased", labels=labels, ) beer_stock = GaugeMetricFamily( "beer_stock", "The number of beers in stock", labels=labels, ) purchased = purchased_by_labels(labels, self._store) stock = stock_by_labels(labels, self._store) for labels, count in purchased: beer_purchased.add_metric(labels, count) for labels, count in stock: beer_stock.add_metric(labels, count) yield beer_purchased yield beer_stock
def collect(self): http_request_total = CounterMetricFamily( 'http_request_total', 'demo metric name.', labels=["path", "code", "__meta_kubernetes_namespace"]) result = [{ "path": "/", "code": "400" }, { "path": "/detail", "code": "200" }, { "path": "/me", "code": "404" }, { "path": "/me/error", "code": "502" }] for doc in result: http_request_total.add_metric( [doc["path"], doc["code"], "default"], 1) yield http_request_total
def collect(self): ds_metrics = ds.get_summary(ds_api_check) tm_ds_computers = CounterMetricFamily( 'deep_security_computers', 'Deep Security Computer Metrics', labels=['metric', 'type', 'platform', 'status']) tm_ds_modules = CounterMetricFamily( 'deep_security_modules', 'Deep Security Modules Metrics', labels=['metric', 'type', 'platform', 'status']) tm_ds_vulnerabilities = CounterMetricFamily( 'deep_security_vulnerabilities', 'Deep Security Vulnerabilities Metrics', labels=['metric', 'type', 'platform', 'status', 'mode']) # get_ds_summary(ds_metrics) for key, value in ds_metrics.items(): if key != 'timestamp': # print(var[key]) for k, val in ds_metrics[key].items(): # print('{} - key: {} - value: {}'.format(key, k, val)) if k.split('-')[2] == 'all': os_platform = k.split('-')[2] else: os_platform = k.split('-')[2].split('_')[1] if k.split('-')[0] == 'computer': # computer-os_type-os_linux-12.0.0.563-off - value: 1 # print('printing.... {},{},{},{}'.format(k.split('-')[1], key, os_platform, k.split('-')[3], int(val))) tm_ds_computers.add_metric([ k.split('-')[1], key, os_platform, k.split('-')[3] ], int(val)) elif k.split('-')[0] == 'module': # module-am_status-os_windows-on tm_ds_modules.add_metric([ k.split('-')[1], key, os_platform, k.split('-')[3] ], int(val)) elif k.split('-')[0] == 'vulnerabilities': # vulnerabilities-ips-os_windows-inline-tap - value: 2 tm_ds_vulnerabilities.add_metric([ k.split('-')[1], key, os_platform, k.split('-')[3], k.split('-')[4] ], int(val)) # if key == 'critical': # print('vul: {} - {} - {} - {} -{} - {}'.format(k.split('-')[1], key, os_platform, k.split('-')[3], k.split('-')[4], int(val))) yield tm_ds_computers yield tm_ds_modules yield tm_ds_vulnerabilities
def collect(self): ''' Get metrics for temperature and humidity measurements ''' sensors = get_sensor_names() sensor_measures = get_sensor_measures() yield GaugeMetricFamily('up', '1 if Rpi is up', value=1) temp_metric_fam = CounterMetricFamily( 'sensor_temperature', 'Temperature measured by each sensor', labels=['sensor']) hum_metric_fam = CounterMetricFamily( 'sensor_humidity', 'Humidity measured by each sensor', labels=['sensor']) sensor_num = 1 for sensor in sensors: value = get_measurements(sensor_num) if value is not None: if sensor_measures[sensor_num - 1] == "humidity": hum_metric_fam.add_metric([str(sensor_num)], str(value)) elif sensor_measures[sensor_num - 1] == "temperature": temp_metric_fam.add_metric([str(sensor_num)], str(value)) sensor_num += 1 for item in sensor_measures: if item == "temperature": yield temp_metric_fam break for item in sensor_measures: if item == "humidity": yield hum_metric_fam break
def collect(self): config = self._config metrics = self._metrics for db_config in config['db_resources']: kwargs = { 'logGroupName': 'RDSOSMetrics', 'limit': 1, 'logStreamName': db_config } response = self.client.get_log_events(**kwargs) message = json.loads(response['events'][0]['message']) result_tree = Tree(message) instance_id = message['instanceID'] engine = message['engine'] # Parse uptime to a number and produce a metric logging.info(message['uptime']) uptime = self.uptime_to_num(message['uptime']) c = CounterMetricFamily('rds_enhanced_uptime', 'RDS uptime in seconds', labels=['db', 'engine']) c.add_metric([instance_id, engine], uptime) yield c logging.info(instance_id) for metric_config in metrics['metrics'][engine]: metric_description = metric_config.get('description', '') metric_path = metric_config['path'] value = result_tree.execute(metric_path) logging.info("metric_name: {}, value for '{}' : {}".format( metric_config['name'], metric_path, value)) c = CounterMetricFamily(metric_config['name'], metric_description, labels=['db', 'engine']) c.add_metric([instance_id, engine], value) yield c
def collect(self): repl_health = GaugeMetricFamily( "mongodb_replset_member_health", "mongodb replset member health up(1)/down(0)", labels=["name", "set", "state"]) mongodb_op_counters = CounterMetricFamily("mongodb_op_counters", "mongodb opcounters", labels=["name", "type"]) mongodb_connections = CounterMetricFamily("mongodb_connections", "mongodb connections", labels=["name", "type"]) mongodb_network = CounterMetricFamily('mongodb_network', 'mongodb network', labels=["name", "type"]) hosts = get_hots() repl = [] all_hosts = hosts["config"] for h in hosts["shards"]: repl.append(h["host"][0]) all_hosts += h["host"] data = self.parallel(repl, "rs_status") for sub_data in data: for d in sub_data: repl_health.add_metric([d["name"], d["set"], d["state"]], int(d["health"])) yield repl_health server_status = self.parallel(all_hosts, "server_status") for s in server_status: for k, v in s["connections"].items(): mongodb_connections.add_metric([s["name"], k], v) for k, v in s["opcounters"].items(): mongodb_op_counters.add_metric([s["name"], k], v) for k, v in s["network"].items(): mongodb_network.add_metric([s["name"], k], v) yield mongodb_connections yield mongodb_op_counters yield mongodb_network
def get_counters(self, metrics: dict): for metric, metric_value in metrics.items(): if not isinstance(metric_value, dict): continue for counter_name, counter_value in metric_value.items(): counter_name = re.sub(r'[-.]', '_', counter_name) if 'rate' in counter_name.lower(): continue if not counter_name.startswith('rundeck'): counter_name = 'rundeck_' + counter_name if metric == 'counters' and 'status' not in counter_name: counter_value = counter_value['count'] rundeck_counters = GaugeMetricFamily( counter_name, 'Rundeck counters metrics') rundeck_counters.add_metric([], counter_value) yield rundeck_counters elif metric == 'gauges': counter_value = counter_value['value'] if 'services' in counter_name: rundeck_gauges = CounterMetricFamily( counter_name, 'Rundeck gauges metrics') else: rundeck_gauges = GaugeMetricFamily( counter_name, 'Rundeck gauges metrics') if counter_value is not None: rundeck_gauges.add_metric([], counter_value) else: rundeck_gauges.add_metric([], 0) yield rundeck_gauges elif metric == 'meters' or metric == 'timers': for counter, value in counter_value.items(): if counter == 'count' and not isinstance(value, str): rundeck_meters_timers = CounterMetricFamily( counter_name, f"Rundeck {metric} metrics") rundeck_meters_timers.add_metric([], value) yield rundeck_meters_timers
def test_timestamps(self): families = text_string_to_metric_families("""# TYPE a counter # HELP a help a{foo="bar"} 1\t000 # TYPE b counter # HELP b help b 2 1234567890 b 88 1234566000 """) a = CounterMetricFamily("a", "help", labels=["foo"]) a.add_metric(["bar"], 1, timestamp=0) b = CounterMetricFamily("b", "help") b.add_metric([], 2, timestamp=1234567.89) b.add_metric([], 88, timestamp=1234566) self.assertEqualMetrics([a, b], list(families))
def _translate_to_prometheus(self, export_record: ExportRecord): prometheus_metric = None label_values = [] label_keys = [] for label_tuple in export_record.labels: label_keys.append(self._sanitize(label_tuple[0])) label_values.append(label_tuple[1]) metric_name = "" if self._prefix != "": metric_name = self._prefix + "_" metric_name += self._sanitize(export_record.instrument.name) description = getattr(export_record.instrument, "description", "") if isinstance(export_record.instrument, Counter): prometheus_metric = CounterMetricFamily( name=metric_name, documentation=description, labels=label_keys ) prometheus_metric.add_metric( labels=label_values, value=export_record.aggregator.checkpoint ) # TODO: Add support for histograms when supported in OT elif isinstance(export_record.instrument, ValueRecorder): value = export_record.aggregator.checkpoint if isinstance(export_record.aggregator, MinMaxSumCountAggregator): prometheus_metric = SummaryMetricFamily( name=metric_name, documentation=description, labels=label_keys, ) prometheus_metric.add_metric( labels=label_values, count_value=value.count, sum_value=value.sum, ) else: prometheus_metric = UnknownMetricFamily( name=metric_name, documentation=description, labels=label_keys, ) prometheus_metric.add_metric(labels=label_values, value=value) else: logger.warning( "Unsupported metric type. %s", type(export_record.instrument) ) return prometheus_metric
def collect(self): c = CounterMetricFamily('env_dashboard', 'Help text', labels=['id']) for k, v in sorted(self.thread.bucket.iteritems()): c.add_metric([k], v) yield c
def trivial_counter(self, name, help, value): c = CounterMetricFamily(f"hydra_{name}_total", help) c.add_metric([], value) return c
class MachineScrapeImporter: def __init__(self): labels = [ "host" ] self.consective_failures = GaugeMetricFamily( "hydra_machine_consecutive_failures", "Number of consecutive failed builds", labels=labels) self.current_jobs = GaugeMetricFamily( "hydra_machine_current_jobs", "Number of current jobs", labels=labels) self.idle_since = GaugeMetricFamily( "hydra_machine_idle_since", "When the current idle period started", labels=labels) self.disabled_until = GaugeMetricFamily( "hydra_machine_disabled_until", "When the machine will be used again", labels=labels) self.enabled = GaugeMetricFamily( "hydra_machine_enabled", "If the machine is enabled (1) or not (0)", labels=labels) self.last_failure = CounterMetricFamily( "hydra_machine_last_failure", "timestamp of the last failure", labels=labels) self.number_steps_done = CounterMetricFamily( "hydra_machine_steps_done_total", "Total count of the steps completed", labels=labels) self.total_step_build_time = CounterMetricFamily( "hydra_machine_step_build_time_total", "Number of seconds spent building steps", labels=labels) self.total_step_time = CounterMetricFamily( "hydra_machine_step_time_total", "Number of seconds spent on steps", labels=labels) def load_machine(self, name, report): report.unused_read("mandatoryFeatures") report.unused_read("supportedFeatures") report.unused_read("systemTypes") report.unused_read("avgStepBuildTime") report.unused_read("avgStepTime") labels = [name] self.consective_failures.add_metric( labels, report.destructive_read("consecutiveFailures") ) self.current_jobs.add_metric( labels, report.destructive_read("currentJobs") ) try: self.idle_since.add_metric( labels, report.destructive_read("idleSince") ) except KeyError: pass self.disabled_until.add_metric( labels, report.destructive_read("disabledUntil") ) self.enabled.add_metric( labels, 1 if report.destructive_read("enabled") else 0 ) self.last_failure.add_metric( labels, report.destructive_read("lastFailure") ) self.number_steps_done.add_metric( labels, report.destructive_read("nrStepsDone") ) self.total_step_build_time.add_metric( labels, report.destructive_read_default("totalStepBuildTime", default=0) ) self.total_step_time.add_metric( labels, report.destructive_read_default("totalStepTime", default=0) ) debug_remaining_state(report) def metrics(self): yield self.consective_failures yield self.current_jobs yield self.idle_since yield self.disabled_until yield self.enabled yield self.last_failure yield self.number_steps_done yield self.total_step_build_time yield self.total_step_time
def collect(self): # Collect metrics from NetScalers data = {} for nsip in self.nsips: data[nsip] = {} for entity in self.metrics.keys(): # cycle through metrics json to get required entities whose stats need to be collected print('>>> Collecting stats for: %s::%s' % (nsip, entity)) try: data[nsip][entity] = collect_data(nsip, entity, self.username, self.password, self.secure) except Exception as e: print('>>> Caught exception while collecting data: ' + str(e)) # Provide collected stats to Prometheus as a counter/guage with desired labels for entity_name, entity in self.metrics.items(): if('labels' in entity.keys()): label_names = [v[1] for v in entity['labels']] label_names.append('nsip') else: label_names = [] label_names.append('nsip') for ns_metric_name, prom_metric_name in entity.get('counters', []): c = CounterMetricFamily(prom_metric_name, ns_metric_name, labels=label_names) for nsip in self.nsips: entity_stats = data[nsip].get(entity_name, []) if( type(entity_stats) is not list): entity_stats = [entity_stats] for data_item in entity_stats: if('labels' in entity.keys()): label_values = [data_item[key] for key in [v[0] for v in entity['labels']]] label_values.append(nsip) else: label_values = [nsip] try: c.add_metric(label_values, float(data_item[ns_metric_name])) except Exception as e: print('>>> Caught exception while adding counter %s to %s: %s' %(ns_metric_name, entity_name, str(e))) yield c for ns_metric_name, prom_metric_name in entity.get('gauges', []): g = GaugeMetricFamily(prom_metric_name, ns_metric_name, labels=label_names) for nsip in self.nsips: entity_stats = data[nsip].get(entity_name, []) if(type(entity_stats) is not list): entity_stats = [entity_stats] for data_item in entity_stats: if('labels' in entity.keys()): label_values = [data_item[key] for key in [v[0] for v in entity['labels']]] label_values.append(nsip) else: label_values = [nsip] try: g.add_metric(label_values, float(data_item[ns_metric_name])) except Exception as e: print('>>> Caught exception while adding guage %s to %s: %s' %(ns_metric_name, entity_name, str(e)) ) yield g
def collect(self): size = CounterMetricFamily('pg_master_data_size', 'size database', labels=['db_name']) size.add_metric([get_db['db_name']], get_db['db_size']) max_connections = CounterMetricFamily('pg_master_max_connections', 'max_connections', labels=['db_name']) max_connections.add_metric([get_db['db_name']], get_db['max_connections']) total_connections = CounterMetricFamily('pg_master_total_connections', 'total_connections', labels=['db_name']) total_connections.add_metric([get_db['db_name']], get_db['total_connections']) left_connections = CounterMetricFamily('pg_master_left_connections', 'left_connections', labels=['db_name']) left_connections.add_metric([get_db['db_name']], get_db['left_connections']) db_deadlocks = CounterMetricFamily('pg_master_db_deadlocks', 'db_deadlocks', labels=['db_name']) db_deadlocks.add_metric([get_db['db_name']], get_db['db_deadlocks']) replic_status = CounterMetricFamily('pg_master_replic_status', 'replic_status', labels=['db_name']) replic_status.add_metric([get_db['db_name']], get_db['replic_status']) replic_usesysid = CounterMetricFamily('pg_master_replic_usesysid', 'replic_usesysid', labels=['db_name', 'replic_ip']) replic_pid = CounterMetricFamily('pg_master_replic_pid', 'replic_pid', labels=['db_name', 'replic_ip']) replica_lags = CounterMetricFamily('pg_master_replica_lags', 'replica_lags', labels=['db_name', 'replic_ip']) for x in range(get_db['replic_status']): replic_usesysid.add_metric([get_db['db_name'], get_db['replic_ip'][x]], get_db['replic_usesysid'][x]) replic_pid.add_metric([get_db['db_name'], get_db['replic_ip'][x]], get_db['replic_pid'][x]) replica_lags.add_metric([get_db['db_name'], get_db['replic_ip'][x]], get_db['replica_lags'][x]) yield size yield max_connections yield total_connections yield left_connections yield db_deadlocks yield replica_lags yield replic_usesysid yield replic_pid yield replic_status