class SVCSCollector(object): """ 'svcs -x' checker """ # timeout how match seconds is allowed to collect data max_time_to_run = 4 svcs_x_collector_timeouts = Counter('solaris_exporter_svcs_x_timeouts', 'timeouts') svcs_x_collector_errors = Counter( 'solaris_exporter_svcs_x_errors', 'Number of times when collector ran' + ' with errors') svcs_x_collector_run_time = Gauge('solaris_exporter_svcs_x_processing', 'Time spent processing request') def collect(self): with self.svcs_x_collector_run_time.time(): output, task_return_code, task_timeouted = run_shell_command( '/usr/bin/svcs -x', self.max_time_to_run) if task_return_code == 0 and task_timeouted is False: lines = output.splitlines() svcs_x = GaugeMetricFamily( "solaris_exporter_svcs_x_failed_services", 'failed services counter in svcs -x', labels=['host']) svcs_fail = 0 for line in lines: if line.strip().startswith('svc:'): svcs_fail += 1 svcs_x.add_metric([host_name], float(svcs_fail)) else: self.svcs_x_collector_errors.inc() if task_timeouted: self.svcs_x_collector_timeouts.inc() yield svcs_x
def test_duplicate_metrics_raises(self): registry = CollectorRegistry() Counter('c', 'help', registry=registry) self.assertRaises(ValueError, Counter, 'c', 'help', registry=registry) self.assertRaises(ValueError, Gauge, 'c', 'help', registry=registry) Gauge('g', 'help', registry=registry) self.assertRaises(ValueError, Gauge, 'g', 'help', registry=registry) self.assertRaises(ValueError, Counter, 'g', 'help', registry=registry) Summary('s', 'help', registry=registry) self.assertRaises(ValueError, Summary, 's', 'help', registry=registry) # We don't currently expose quantiles, but let's prevent future # clashes anyway. self.assertRaises(ValueError, Gauge, 's', 'help', registry=registry) Histogram('h', 'help', registry=registry) self.assertRaises(ValueError, Histogram, 'h', 'help', registry=registry) # Clashes aggaint various suffixes. self.assertRaises(ValueError, Summary, 'h', 'help', registry=registry) self.assertRaises(ValueError, Counter, 'h_count', 'help', registry=registry) self.assertRaises(ValueError, Counter, 'h_sum', 'help', registry=registry) self.assertRaises(ValueError, Counter, 'h_bucket', 'help', registry=registry) # The name of the histogram itself isn't taken. Counter('h', 'help', registry=registry)
class ZpoolCollector(object): """ 'zpool status' checker """ # timeout how match seconds is allowed to collect data max_time_to_run = 4 zpool_collector_timeouts = Counter('solaris_exporter_zpool_timeouts', 'timeouts') zpool_collector_errors = Counter( 'solaris_exporter_zpool_errors', 'Number of times when collector ran' + ' with errors') zpool_collector_run_time = Gauge('solaris_exporter_zpool_processing', 'Time spent processing request') def collect(self): with self.zpool_collector_run_time.time(): output, task_return_code, task_timeouted = run_shell_command( '/usr/sbin/zpool status', self.max_time_to_run) if task_return_code == 0 and task_timeouted is False: lines = output.splitlines() zpool = GaugeMetricFamily("solaris_exporter_zpool_faults", 'faults in zpool status', labels=['host']) faults = 0 for line in lines: line = line.strip() if any(s in line for s in ['FAILED', 'DEGRADED']): faults += 1 zpool.add_metric([host_name], float(faults)) yield zpool else: self.zpool_collector_errors.inc() if task_timeouted: self.zpool_collector_timeouts.inc()
class FmadmCollector(object): """ 'fmadm faulty' checker """ # timeout how match seconds is allowed to collect data max_time_to_run = 15 fmadm_collector_timeouts = Counter('solaris_exporter_fmadm_timeouts', 'timeouts') fmadm_collector_errors = Counter( 'solaris_exporter_fmadm_errors', 'Number of times when collector ran' + ' with errors') fmadm_collector_run_time = Gauge('solaris_exporter_fmadm_processing', 'Time spent processing request') def collect(self): with self.fmadm_collector_run_time.time(): output, task_return_code, task_timeouted = run_shell_command( '/usr/bin/pfexec /usr/sbin/fmadm faulty', self.max_time_to_run) if task_return_code == 0 and task_timeouted is False: lines = output.splitlines() fmadm = GaugeMetricFamily("solaris_exporter_fmadm_faults", 'faults in fmadm faulty', labels=['host']) faults = 0 for line in lines: if line.strip().startswith('TIME'): faults += 1 fmadm.add_metric([host_name], float(faults)) yield fmadm else: self.fmadm_collector_errors.inc() if task_timeouted: self.fmadm_collector_timeouts.inc()
def test_counter_adds(self): c1 = Counter('c2', 'help', registry=None) c2 = Counter('c2', 'help', registry=None) self.assertEqual(0, self.registry.get_sample_value('c2_total')) c1.inc(1) c2.inc(2) self.assertEqual(3, self.registry.get_sample_value('c2_total'))
def test_counter_adds(self): c1 = Counter('c', 'help', registry=None) values.ValueClass = MultiProcessValue(lambda: 456) c2 = Counter('c', 'help', registry=None) self.assertEqual(0, self.registry.get_sample_value('c_total')) c1.inc(1) c2.inc(2) self.assertEqual(3, self.registry.get_sample_value('c_total'))
def setUp(self): self.registry = CollectorRegistry() self.counter = Counter('c_total', 'help', labelnames=['l'], registry=self.registry) self.two_labels = Counter('two', 'help', labelnames=['a', 'b'], registry=self.registry)
class FCinfoCollector(object): """ FC links Multipath """ # timeout how match seconds is allowed to collect data max_time_to_run = 4 fc_lun_collector_timeouts = Counter('solaris_exporter_fc_paths_timeouts', 'timeouts') fc_lun_collector_errors = Counter( 'solaris_exporter_fc_paths_errors', 'Number of times when collector ran' + ' with errors') fc_lun_collector_run_time = Gauge('solaris_exporter_fc_paths_processing', 'Time spent processing request') def collect(self): with self.fc_lun_collector_run_time.time(): output, task_return_code, task_timeouted = run_shell_command( '/usr/sbin/mpathadm list lu', self.max_time_to_run) if task_return_code == 0 and task_timeouted is False: lines = output.splitlines() fc_lun = GaugeMetricFamily("solaris_exporter_fc_paths", '/usr/sbin/mpathadm list lu', labels=['device', 'stat', 'host']) fc_total_paths = {} fc_active_paths = {} for line in lines: content = line.strip() if '/dev/rdsk/' in content: device = re.sub(r'/dev/rdsk/(.*)s2', r'\1', content) elif 'Total Path Count' in content: content = content.split(':') fc_total_paths[device] = content[1] elif 'Operational Path Count:' in content: content = content.split(':') fc_active_paths[device] = content[1] else: device = "unknown" for device in fc_total_paths.keys(): if device == "unknown": continue fc_lun.add_metric([device, 'active', host_name], float(fc_active_paths.get(device, 0))) fc_lun.add_metric([device, 'total', host_name], float(fc_total_paths.get(device, 0))) yield fc_lun else: self.fc_lun_collector_errors.inc() if task_timeouted: self.fc_lun_collector_timeouts.inc()
def __init__(self, host: str, port: int = 6379, include_keyspace_metrics: bool = True, exposed_info_metrics=DEFAULT_EXPOSED_METRICS): self.host = host self.port = port self.include_keyspace_metrics = include_keyspace_metrics self.exposed_info_metrics = exposed_info_metrics self.scrape_failed = Counter( name="redis_exporter_failed_scrape", documentation="Redis exporter total failed scrapes") self.scrape_succeeded = Counter( name="redis_exporter_successful_scrape", documentation="Redis exporter total succeeded scrapes")
def test_serves_empty_metrics_if_not_processed(self): c1 = Counter('c', 'help', registry=None) # The cleanup/archiver task hasn't run yet, no metrics self.assertEqual(None, self.registry.get_sample_value('c_total')) c1.inc(1) # Still no metrics self.assertEqual(self.collector.collect(), [])
def test_count_exceptions_not_observable(self): counter = Counter('counter', 'help', labelnames=('label',), registry=self.registry) try: counter.count_exceptions() except ValueError as e: self.assertIn('missing label values', str(e))
def test_collect(self): pid = 0 core._ValueClass = core._MultiProcessValue(lambda: pid) labels = dict((i, i) for i in 'abcd') def add_label(key, value): l = labels.copy() l[key] = value return l c = Counter('c', 'help', labelnames=labels.keys(), registry=None) g = Gauge('g', 'help', labelnames=labels.keys(), registry=None) h = Histogram('h', 'help', labelnames=labels.keys(), registry=None) c.labels(**labels).inc(1) g.labels(**labels).set(1) h.labels(**labels).observe(1) pid = 1 c.labels(**labels).inc(1) g.labels(**labels).set(1) h.labels(**labels).observe(5) metrics = dict((m.name, m) for m in self.collector.collect()) self.assertEqual( metrics['c'].samples, [Sample('c_total', labels, 2.0)] ) metrics['g'].samples.sort(key=lambda x: x[1]['pid']) self.assertEqual(metrics['g'].samples, [ Sample('g', add_label('pid', '0'), 1.0), Sample('g', add_label('pid', '1'), 1.0), ]) metrics['h'].samples.sort( key=lambda x: (x[0], float(x[1].get('le', 0))) ) expected_histogram = [ Sample('h_bucket', add_label('le', '0.005'), 0.0), Sample('h_bucket', add_label('le', '0.01'), 0.0), Sample('h_bucket', add_label('le', '0.025'), 0.0), Sample('h_bucket', add_label('le', '0.05'), 0.0), Sample('h_bucket', add_label('le', '0.075'), 0.0), Sample('h_bucket', add_label('le', '0.1'), 0.0), Sample('h_bucket', add_label('le', '0.25'), 0.0), Sample('h_bucket', add_label('le', '0.5'), 0.0), Sample('h_bucket', add_label('le', '0.75'), 0.0), Sample('h_bucket', add_label('le', '1.0'), 1.0), Sample('h_bucket', add_label('le', '2.5'), 1.0), Sample('h_bucket', add_label('le', '5.0'), 2.0), Sample('h_bucket', add_label('le', '7.5'), 2.0), Sample('h_bucket', add_label('le', '10.0'), 2.0), Sample('h_bucket', add_label('le', '+Inf'), 2.0), Sample('h_count', labels, 2.0), Sample('h_sum', labels, 6.0), ] self.assertEqual(metrics['h'].samples, expected_histogram)
def test_restricted_registry(self): registry = CollectorRegistry() Counter('c_total', 'help', registry=registry) Summary('s', 'help', registry=registry).observe(7) m = Metric('s', 'help', 'summary') m.samples = [Sample('s_sum', {}, 7)] self.assertEqual([m], registry.restricted_registry(['s_sum']).collect())
def test_name_cleanup_before_unit_append(self): self.assertEqual(self.counter._name, 'c') self.c = Counter('c_total', 'help', unit="total", labelnames=['l'], registry=self.registry) self.assertEqual(self.c._name, 'c_total')
def test_namespace_subsystem(self): c1 = Counter('c', 'help', registry=None, namespace='ns', subsystem='ss') c1.inc(1) self.assertEqual(1, self.registry.get_sample_value('ns_ss_c_total'))
def test_namespace_subsystem_concatenated(self): c = Counter('c_total', 'help', namespace='a', subsystem='b', registry=self.registry) c.inc() self.assertEqual(1, self.registry.get_sample_value('a_b_c_total'))
def test_collect_waits_for_cleanup(self): values.ValueClass = MultiProcessValue(lambda: 0) labels = dict((i, i) for i in 'abcd') c = Counter('c', 'help', labelnames=labels.keys(), registry=None) c.labels(**labels).inc(1) with self.assertRaises(EnvironmentError): with advisory_lock(LOCK_EX): self.collector.collect(blocking=False)
def test_aggregates_live_and_archived_metrics(self): pid = 456 values.ValueClass = MultiProcessValue(lambda: pid) def files(): fs = os.listdir(os.environ['prometheus_multiproc_dir']) fs.sort() return fs c1 = Counter('c1', 'c1', registry=None) c1.inc(1) self.assertIn('counter_456.db', files()) archive_metrics() self.assertNotIn('counter_456.db', files()) self.assertEqual(1, self.registry.get_sample_value('c1_total')) pid = 789 values.ValueClass = MultiProcessValue(lambda: pid) c1 = Counter('c1', 'c1', registry=None) c1.inc(2) g1 = Gauge('g1', 'g1', registry=None, multiprocess_mode="liveall") g1.set(5) self.assertIn('counter_789.db', files()) # Pretend that pid 789 is live archive_metrics(aggregate_only=True) # The live counter should be merged with the archived counter, and the # liveall gauge should be included self.assertIn('counter_789.db', files()) self.assertIn('gauge_liveall_789.db', files()) self.assertEqual(3, self.registry.get_sample_value('c1_total')) self.assertEqual( 5, self.registry.get_sample_value('g1', labels={u'pid': u'789'})) # Now pid 789 is dead archive_metrics() # The formerly live counter's value should be archived, and the # liveall gauge should be removed completely self.assertNotIn('counter_789.db', files()) self.assertNotIn('gauge_liveall_789.db', files()) self.assertEqual(3, self.registry.get_sample_value('c1_total')) self.assertEqual( None, self.registry.get_sample_value('g1', labels={u'pid': u'789'}))
def test_initialization_detects_pid_change(self): pid = 0 values.ValueClass = MultiProcessValue(lambda: pid) # can not inspect the files cache directly, as it's a closure, so we # check for the actual files themselves def files(): fs = os.listdir(os.environ['PROMETHEUS_MULTIPROC_DIR']) fs.sort() return fs c1 = Counter('c1', 'c1', registry=None) self.assertEqual(files(), ['counter_0.db']) c2 = Counter('c2', 'c2', registry=None) self.assertEqual(files(), ['counter_0.db']) pid = 1 c3 = Counter('c3', 'c3', registry=None) self.assertEqual(files(), ['counter_0.db', 'counter_1.db'])
def test_serves_metrics(self): labels = dict((i, i) for i in 'abcd') c = Counter('c', 'help', labelnames=labels.keys(), registry=None) c.labels(**labels).inc(1) self.assertEqual(None, self.registry.get_sample_value('c_total', labels)) archive_metrics() self.assertEqual(self.collector.collect()[0].samples, [Sample('c_total', labels, 1.0)])
def test_counter_across_forks(self): self.pid = 0 c1 = Counter('c', 'help', registry=None) self.assertEqual(0, self.registry.get_sample_value('c_total')) c1.inc(1) c1.inc(1) self.pid = 1 c1.inc(1) self.assertEqual(3, self.registry.get_sample_value('c_total')) self.assertEqual(1, c1._value.get())
def test_unregister_works(self): registry = CollectorRegistry() s = Summary('s', 'help', registry=registry) self.assertRaises(ValueError, Counter, 's_count', 'help', registry=registry) registry.unregister(s) Counter('s_count', 'help', registry=registry)
def test_collect_doesnt_block_other_collects(self): values.ValueClass = MultiProcessValue(lambda: 0) labels = dict((i, i) for i in 'abcd') c = Counter('c', 'help', labelnames=labels.keys(), registry=None) c.labels(**labels).inc(1) with advisory_lock(LOCK_SH): metrics = dict( (m.name, m) for m in self.collector.collect(blocking=False)) self.assertEqual(metrics['c'].samples, [Sample('c_total', labels, 1.0)])
def test_counter_across_forks(self): pid = 0 values.ValueClass = MultiProcessValue(lambda: pid) c1 = Counter('c', 'help', registry=None) self.assertEqual(0, self.registry.get_sample_value('c_total')) c1.inc(1) c1.inc(1) pid = 1 c1.inc(1) self.assertEqual(3, self.registry.get_sample_value('c_total')) self.assertEqual(1, c1._value.get())
def test_deprecation_warning(self): os.environ['prometheus_multiproc_dir'] = self.tempdir with warnings.catch_warnings(record=True) as w: values.ValueClass = get_value_class() registry = CollectorRegistry() collector = MultiProcessCollector(registry) Counter('c', 'help', registry=None) assert os.environ['PROMETHEUS_MULTIPROC_DIR'] == self.tempdir assert len(w) == 1 assert issubclass(w[-1].category, DeprecationWarning) assert "PROMETHEUS_MULTIPROC_DIR" in str(w[-1].message)
def buckets_to_metrics(self, metric_name, buckets): # Converts raw bucket metric into sorted list of buckets unit = buckets['boundary_unit'] description = 'libmedida metric type: ' + buckets['type'] c = Counter(metric_name + '_count', description, self.label_names, registry=self.registry) s = Counter(metric_name + '_sum', description, self.label_names, registry=self.registry) g = Gauge(metric_name + '_bucket', description, self.label_names + ['le'], registry=self.registry) measurements = [] for bucket in buckets['buckets']: measurements.append({ 'boundary': self.duration_to_seconds(bucket['boundary'], unit), 'count': bucket['count'], 'sum': bucket['sum'] }) count = 0 for m in sorted(measurements, key=lambda i: i['boundary']): # Buckets from core contain only values from their respective ranges. # Prometheus expects "le" buckets to be cummulative so we need some extra math count += m['count'] c.labels(*self.labels).inc(m['count']) s.labels(*self.labels).inc(self.duration_to_seconds( m['sum'], unit)) # Treat buckets larger than 30d as infinity if float(m['boundary']) > 30 * 86400: g.labels(*self.labels + ['+Inf']).inc(count) else: g.labels(*self.labels + [m['boundary']]).inc(count)
def test_counter_across_forks(self): pid = 0 def get_pid(): return pid core._ValueClass = core._MultiProcessValue(get_pid) c1 = Counter('c', 'help', registry=None) self.assertEqual(0, self.registry.get_sample_value('c')) c1.inc(1) c1.inc(1) pid = 1 c1.inc(1) self.assertEqual(3, self.registry.get_sample_value('c')) self.assertEqual(1, c1._value.get())
def test_reset_registry_with_labels(self): registry = CollectorRegistry() gauge = Gauge('g', 'help', ['l'], registry=registry) gauge.labels('a').inc() self.assertEqual(1, registry.get_sample_value('g', {'l': 'a'})) counter = Counter('c_total', 'help', ['l'], registry=registry) counter.labels('a').inc() self.assertEqual(1, registry.get_sample_value('c_total', {'l': 'a'})) summary = Summary('s', 'help', ['l'], registry=registry) summary.labels('a').observe(10) self.assertEqual(1, registry.get_sample_value('s_count', {'l': 'a'})) self.assertEqual(10, registry.get_sample_value('s_sum', {'l': 'a'})) histogram = Histogram('h', 'help', ['l'], registry=registry) histogram.labels('a').observe(2) self.assertEqual(0, registry.get_sample_value('h_bucket', {'le': '1.0', 'l': 'a'})) self.assertEqual(1, registry.get_sample_value('h_bucket', {'le': '2.5', 'l': 'a'})) self.assertEqual(1, registry.get_sample_value('h_bucket', {'le': '5.0', 'l': 'a'})) self.assertEqual(1, registry.get_sample_value('h_bucket', {'le': '+Inf', 'l': 'a'})) self.assertEqual(1, registry.get_sample_value('h_count', {'l': 'a'})) self.assertEqual(2, registry.get_sample_value('h_sum', {'l': 'a'})) registry.reset() self.assertEqual(0, registry.get_sample_value('g', {'l': 'a'})) self.assertEqual(0, registry.get_sample_value('c_total', {'l': 'a'})) self.assertEqual(0, registry.get_sample_value('s_count', {'l': 'a'})) self.assertEqual(0, registry.get_sample_value('s_sum', {'l': 'a'})) self.assertEqual(0, registry.get_sample_value('h_bucket', {'le': '1.0', 'l': 'a'})) self.assertEqual(0, registry.get_sample_value('h_bucket', {'le': '2.5', 'l': 'a'})) self.assertEqual(0, registry.get_sample_value('h_bucket', {'le': '5.0', 'l': 'a'})) self.assertEqual(0, registry.get_sample_value('h_bucket', {'le': '+Inf', 'l': 'a'})) self.assertEqual(0, registry.get_sample_value('h_count', {'l': 'a'})) self.assertEqual(0, registry.get_sample_value('h_sum', {'l': 'a'}))
def __init__(self, indexer, logger=getLogger(), metrics_registry=CollectorRegistry()): self.__indexer = indexer self.__logger = logger self.__metrics_registry = metrics_registry # metrics self.__metrics_requests_total = Counter( '{0}_indexer_grpc_requests_total'.format(NAME), 'The number of requests.', [ 'func' ], registry=self.__metrics_registry ) self.__metrics_requests_duration_seconds = Histogram( '{0}_indexer_grpc_requests_duration_seconds'.format(NAME), 'The invocation duration in seconds.', [ 'func' ], registry=self.__metrics_registry )
def test_wrapped_original_class(self): self.assertEqual(Counter.__wrapped__, Counter('foo', 'bar').__class__)