Exemple #1
0
class SVCSCollector(object):
    """
    'svcs -x' checker
    """
    # timeout how match seconds is allowed to collect data
    max_time_to_run = 4
    svcs_x_collector_timeouts = Counter('solaris_exporter_svcs_x_timeouts',
                                        'timeouts')
    svcs_x_collector_errors = Counter(
        'solaris_exporter_svcs_x_errors',
        'Number of times when collector ran' + ' with errors')
    svcs_x_collector_run_time = Gauge('solaris_exporter_svcs_x_processing',
                                      'Time spent processing request')

    def collect(self):
        with self.svcs_x_collector_run_time.time():
            output, task_return_code, task_timeouted = run_shell_command(
                '/usr/bin/svcs -x', self.max_time_to_run)
            if task_return_code == 0 and task_timeouted is False:
                lines = output.splitlines()
                svcs_x = GaugeMetricFamily(
                    "solaris_exporter_svcs_x_failed_services",
                    'failed services counter in svcs -x',
                    labels=['host'])
                svcs_fail = 0
                for line in lines:
                    if line.strip().startswith('svc:'):
                        svcs_fail += 1
                svcs_x.add_metric([host_name], float(svcs_fail))
            else:
                self.svcs_x_collector_errors.inc()
                if task_timeouted:
                    self.svcs_x_collector_timeouts.inc()
        yield svcs_x
Exemple #2
0
    def test_duplicate_metrics_raises(self):
        registry = CollectorRegistry()
        Counter('c', 'help', registry=registry)
        self.assertRaises(ValueError, Counter, 'c', 'help', registry=registry)
        self.assertRaises(ValueError, Gauge, 'c', 'help', registry=registry)

        Gauge('g', 'help', registry=registry)
        self.assertRaises(ValueError, Gauge, 'g', 'help', registry=registry)
        self.assertRaises(ValueError, Counter, 'g', 'help', registry=registry)

        Summary('s', 'help', registry=registry)
        self.assertRaises(ValueError, Summary, 's', 'help', registry=registry)
        # We don't currently expose quantiles, but let's prevent future
        # clashes anyway.
        self.assertRaises(ValueError, Gauge, 's', 'help', registry=registry)

        Histogram('h', 'help', registry=registry)
        self.assertRaises(ValueError, Histogram, 'h', 'help', registry=registry)
        # Clashes aggaint various suffixes.
        self.assertRaises(ValueError, Summary, 'h', 'help', registry=registry)
        self.assertRaises(ValueError, Counter, 'h_count', 'help', registry=registry)
        self.assertRaises(ValueError, Counter, 'h_sum', 'help', registry=registry)
        self.assertRaises(ValueError, Counter, 'h_bucket', 'help', registry=registry)
        # The name of the histogram itself isn't taken.
        Counter('h', 'help', registry=registry)
Exemple #3
0
class ZpoolCollector(object):
    """
    'zpool status' checker
    """
    # timeout how match seconds is allowed to collect data
    max_time_to_run = 4
    zpool_collector_timeouts = Counter('solaris_exporter_zpool_timeouts',
                                       'timeouts')
    zpool_collector_errors = Counter(
        'solaris_exporter_zpool_errors',
        'Number of times when collector ran' + ' with errors')
    zpool_collector_run_time = Gauge('solaris_exporter_zpool_processing',
                                     'Time spent processing request')

    def collect(self):
        with self.zpool_collector_run_time.time():
            output, task_return_code, task_timeouted = run_shell_command(
                '/usr/sbin/zpool status', self.max_time_to_run)
            if task_return_code == 0 and task_timeouted is False:
                lines = output.splitlines()
                zpool = GaugeMetricFamily("solaris_exporter_zpool_faults",
                                          'faults in zpool status',
                                          labels=['host'])
                faults = 0
                for line in lines:
                    line = line.strip()
                    if any(s in line for s in ['FAILED', 'DEGRADED']):
                        faults += 1
                zpool.add_metric([host_name], float(faults))
                yield zpool
            else:
                self.zpool_collector_errors.inc()
                if task_timeouted:
                    self.zpool_collector_timeouts.inc()
Exemple #4
0
class FmadmCollector(object):
    """
    'fmadm faulty' checker
    """
    # timeout how match seconds is allowed to collect data
    max_time_to_run = 15
    fmadm_collector_timeouts = Counter('solaris_exporter_fmadm_timeouts',
                                       'timeouts')
    fmadm_collector_errors = Counter(
        'solaris_exporter_fmadm_errors',
        'Number of times when collector ran' + ' with errors')
    fmadm_collector_run_time = Gauge('solaris_exporter_fmadm_processing',
                                     'Time spent processing request')

    def collect(self):
        with self.fmadm_collector_run_time.time():
            output, task_return_code, task_timeouted = run_shell_command(
                '/usr/bin/pfexec /usr/sbin/fmadm faulty', self.max_time_to_run)
            if task_return_code == 0 and task_timeouted is False:
                lines = output.splitlines()
                fmadm = GaugeMetricFamily("solaris_exporter_fmadm_faults",
                                          'faults in fmadm faulty',
                                          labels=['host'])
                faults = 0
                for line in lines:
                    if line.strip().startswith('TIME'):
                        faults += 1
                fmadm.add_metric([host_name], float(faults))
                yield fmadm
            else:
                self.fmadm_collector_errors.inc()
                if task_timeouted:
                    self.fmadm_collector_timeouts.inc()
 def test_counter_adds(self):
     c1 = Counter('c2', 'help', registry=None)
     c2 = Counter('c2', 'help', registry=None)
     self.assertEqual(0, self.registry.get_sample_value('c2_total'))
     c1.inc(1)
     c2.inc(2)
     self.assertEqual(3, self.registry.get_sample_value('c2_total'))
 def test_counter_adds(self):
     c1 = Counter('c', 'help', registry=None)
     values.ValueClass = MultiProcessValue(lambda: 456)
     c2 = Counter('c', 'help', registry=None)
     self.assertEqual(0, self.registry.get_sample_value('c_total'))
     c1.inc(1)
     c2.inc(2)
     self.assertEqual(3, self.registry.get_sample_value('c_total'))
Exemple #7
0
 def setUp(self):
     self.registry = CollectorRegistry()
     self.counter = Counter('c_total',
                            'help',
                            labelnames=['l'],
                            registry=self.registry)
     self.two_labels = Counter('two',
                               'help',
                               labelnames=['a', 'b'],
                               registry=self.registry)
Exemple #8
0
class FCinfoCollector(object):
    """
    FC links Multipath
    """
    # timeout how match seconds is allowed to collect data
    max_time_to_run = 4
    fc_lun_collector_timeouts = Counter('solaris_exporter_fc_paths_timeouts',
                                        'timeouts')
    fc_lun_collector_errors = Counter(
        'solaris_exporter_fc_paths_errors',
        'Number of times when collector ran' + ' with errors')
    fc_lun_collector_run_time = Gauge('solaris_exporter_fc_paths_processing',
                                      'Time spent processing request')

    def collect(self):
        with self.fc_lun_collector_run_time.time():
            output, task_return_code, task_timeouted = run_shell_command(
                '/usr/sbin/mpathadm list lu', self.max_time_to_run)
            if task_return_code == 0 and task_timeouted is False:
                lines = output.splitlines()
                fc_lun = GaugeMetricFamily("solaris_exporter_fc_paths",
                                           '/usr/sbin/mpathadm list lu',
                                           labels=['device', 'stat', 'host'])
                fc_total_paths = {}
                fc_active_paths = {}
                for line in lines:
                    content = line.strip()
                    if '/dev/rdsk/' in content:
                        device = re.sub(r'/dev/rdsk/(.*)s2', r'\1', content)
                    elif 'Total Path Count' in content:
                        content = content.split(':')
                        fc_total_paths[device] = content[1]
                    elif 'Operational Path Count:' in content:
                        content = content.split(':')
                        fc_active_paths[device] = content[1]
                    else:
                        device = "unknown"
                for device in fc_total_paths.keys():
                    if device == "unknown":
                        continue
                    fc_lun.add_metric([device, 'active', host_name],
                                      float(fc_active_paths.get(device, 0)))
                    fc_lun.add_metric([device, 'total', host_name],
                                      float(fc_total_paths.get(device, 0)))
                yield fc_lun
            else:
                self.fc_lun_collector_errors.inc()
                if task_timeouted:
                    self.fc_lun_collector_timeouts.inc()
Exemple #9
0
 def __init__(self,
              host: str,
              port: int = 6379,
              include_keyspace_metrics: bool = True,
              exposed_info_metrics=DEFAULT_EXPOSED_METRICS):
     self.host = host
     self.port = port
     self.include_keyspace_metrics = include_keyspace_metrics
     self.exposed_info_metrics = exposed_info_metrics
     self.scrape_failed = Counter(
         name="redis_exporter_failed_scrape",
         documentation="Redis exporter total failed scrapes")
     self.scrape_succeeded = Counter(
         name="redis_exporter_successful_scrape",
         documentation="Redis exporter total succeeded scrapes")
 def test_serves_empty_metrics_if_not_processed(self):
     c1 = Counter('c', 'help', registry=None)
     # The cleanup/archiver task hasn't run yet, no metrics
     self.assertEqual(None, self.registry.get_sample_value('c_total'))
     c1.inc(1)
     # Still no metrics
     self.assertEqual(self.collector.collect(), [])
Exemple #11
0
    def test_count_exceptions_not_observable(self):
        counter = Counter('counter', 'help', labelnames=('label',), registry=self.registry)

        try:
            counter.count_exceptions()
        except ValueError as e:
            self.assertIn('missing label values', str(e))
Exemple #12
0
    def test_collect(self):
        pid = 0
        core._ValueClass = core._MultiProcessValue(lambda: pid)
        labels = dict((i, i) for i in 'abcd')

        def add_label(key, value):
            l = labels.copy()
            l[key] = value
            return l

        c = Counter('c', 'help', labelnames=labels.keys(), registry=None)
        g = Gauge('g', 'help', labelnames=labels.keys(), registry=None)
        h = Histogram('h', 'help', labelnames=labels.keys(), registry=None)

        c.labels(**labels).inc(1)
        g.labels(**labels).set(1)
        h.labels(**labels).observe(1)

        pid = 1

        c.labels(**labels).inc(1)
        g.labels(**labels).set(1)
        h.labels(**labels).observe(5)

        metrics = dict((m.name, m) for m in self.collector.collect())

        self.assertEqual(
            metrics['c'].samples, [Sample('c_total', labels, 2.0)]
        )
        metrics['g'].samples.sort(key=lambda x: x[1]['pid'])
        self.assertEqual(metrics['g'].samples, [
            Sample('g', add_label('pid', '0'), 1.0),
            Sample('g', add_label('pid', '1'), 1.0),
        ])

        metrics['h'].samples.sort(
            key=lambda x: (x[0], float(x[1].get('le', 0)))
        )
        expected_histogram = [
            Sample('h_bucket', add_label('le', '0.005'), 0.0),
            Sample('h_bucket', add_label('le', '0.01'), 0.0),
            Sample('h_bucket', add_label('le', '0.025'), 0.0),
            Sample('h_bucket', add_label('le', '0.05'), 0.0),
            Sample('h_bucket', add_label('le', '0.075'), 0.0),
            Sample('h_bucket', add_label('le', '0.1'), 0.0),
            Sample('h_bucket', add_label('le', '0.25'), 0.0),
            Sample('h_bucket', add_label('le', '0.5'), 0.0),
            Sample('h_bucket', add_label('le', '0.75'), 0.0),
            Sample('h_bucket', add_label('le', '1.0'), 1.0),
            Sample('h_bucket', add_label('le', '2.5'), 1.0),
            Sample('h_bucket', add_label('le', '5.0'), 2.0),
            Sample('h_bucket', add_label('le', '7.5'), 2.0),
            Sample('h_bucket', add_label('le', '10.0'), 2.0),
            Sample('h_bucket', add_label('le', '+Inf'), 2.0),
            Sample('h_count', labels, 2.0),
            Sample('h_sum', labels, 6.0),
        ]

        self.assertEqual(metrics['h'].samples, expected_histogram)
Exemple #13
0
    def test_restricted_registry(self):
        registry = CollectorRegistry()
        Counter('c_total', 'help', registry=registry)
        Summary('s', 'help', registry=registry).observe(7)

        m = Metric('s', 'help', 'summary')
        m.samples = [Sample('s_sum', {}, 7)]
        self.assertEqual([m], registry.restricted_registry(['s_sum']).collect())
Exemple #14
0
 def test_name_cleanup_before_unit_append(self):
     self.assertEqual(self.counter._name, 'c')
     self.c = Counter('c_total',
                      'help',
                      unit="total",
                      labelnames=['l'],
                      registry=self.registry)
     self.assertEqual(self.c._name, 'c_total')
 def test_namespace_subsystem(self):
     c1 = Counter('c',
                  'help',
                  registry=None,
                  namespace='ns',
                  subsystem='ss')
     c1.inc(1)
     self.assertEqual(1, self.registry.get_sample_value('ns_ss_c_total'))
Exemple #16
0
 def test_namespace_subsystem_concatenated(self):
     c = Counter('c_total',
                 'help',
                 namespace='a',
                 subsystem='b',
                 registry=self.registry)
     c.inc()
     self.assertEqual(1, self.registry.get_sample_value('a_b_c_total'))
 def test_collect_waits_for_cleanup(self):
     values.ValueClass = MultiProcessValue(lambda: 0)
     labels = dict((i, i) for i in 'abcd')
     c = Counter('c', 'help', labelnames=labels.keys(), registry=None)
     c.labels(**labels).inc(1)
     with self.assertRaises(EnvironmentError):
         with advisory_lock(LOCK_EX):
             self.collector.collect(blocking=False)
    def test_aggregates_live_and_archived_metrics(self):
        pid = 456
        values.ValueClass = MultiProcessValue(lambda: pid)

        def files():
            fs = os.listdir(os.environ['prometheus_multiproc_dir'])
            fs.sort()
            return fs

        c1 = Counter('c1', 'c1', registry=None)
        c1.inc(1)
        self.assertIn('counter_456.db', files())

        archive_metrics()
        self.assertNotIn('counter_456.db', files())
        self.assertEqual(1, self.registry.get_sample_value('c1_total'))

        pid = 789
        values.ValueClass = MultiProcessValue(lambda: pid)
        c1 = Counter('c1', 'c1', registry=None)
        c1.inc(2)
        g1 = Gauge('g1', 'g1', registry=None, multiprocess_mode="liveall")
        g1.set(5)
        self.assertIn('counter_789.db', files())
        # Pretend that pid 789 is live
        archive_metrics(aggregate_only=True)

        # The live counter should be merged with the archived counter, and the
        # liveall gauge should be included
        self.assertIn('counter_789.db', files())
        self.assertIn('gauge_liveall_789.db', files())
        self.assertEqual(3, self.registry.get_sample_value('c1_total'))
        self.assertEqual(
            5, self.registry.get_sample_value('g1', labels={u'pid': u'789'}))
        # Now pid 789 is dead
        archive_metrics()

        # The formerly live counter's value should be archived, and the
        # liveall gauge should be removed completely
        self.assertNotIn('counter_789.db', files())
        self.assertNotIn('gauge_liveall_789.db', files())
        self.assertEqual(3, self.registry.get_sample_value('c1_total'))
        self.assertEqual(
            None, self.registry.get_sample_value('g1', labels={u'pid':
                                                               u'789'}))
    def test_initialization_detects_pid_change(self):
        pid = 0
        values.ValueClass = MultiProcessValue(lambda: pid)

        # can not inspect the files cache directly, as it's a closure, so we
        # check for the actual files themselves
        def files():
            fs = os.listdir(os.environ['PROMETHEUS_MULTIPROC_DIR'])
            fs.sort()
            return fs

        c1 = Counter('c1', 'c1', registry=None)
        self.assertEqual(files(), ['counter_0.db'])
        c2 = Counter('c2', 'c2', registry=None)
        self.assertEqual(files(), ['counter_0.db'])
        pid = 1
        c3 = Counter('c3', 'c3', registry=None)
        self.assertEqual(files(), ['counter_0.db', 'counter_1.db'])
 def test_serves_metrics(self):
     labels = dict((i, i) for i in 'abcd')
     c = Counter('c', 'help', labelnames=labels.keys(), registry=None)
     c.labels(**labels).inc(1)
     self.assertEqual(None,
                      self.registry.get_sample_value('c_total', labels))
     archive_metrics()
     self.assertEqual(self.collector.collect()[0].samples,
                      [Sample('c_total', labels, 1.0)])
 def test_counter_across_forks(self):
     self.pid = 0
     c1 = Counter('c', 'help', registry=None)
     self.assertEqual(0, self.registry.get_sample_value('c_total'))
     c1.inc(1)
     c1.inc(1)
     self.pid = 1
     c1.inc(1)
     self.assertEqual(3, self.registry.get_sample_value('c_total'))
     self.assertEqual(1, c1._value.get())
Exemple #22
0
 def test_unregister_works(self):
     registry = CollectorRegistry()
     s = Summary('s', 'help', registry=registry)
     self.assertRaises(ValueError,
                       Counter,
                       's_count',
                       'help',
                       registry=registry)
     registry.unregister(s)
     Counter('s_count', 'help', registry=registry)
    def test_collect_doesnt_block_other_collects(self):
        values.ValueClass = MultiProcessValue(lambda: 0)
        labels = dict((i, i) for i in 'abcd')
        c = Counter('c', 'help', labelnames=labels.keys(), registry=None)
        c.labels(**labels).inc(1)

        with advisory_lock(LOCK_SH):
            metrics = dict(
                (m.name, m) for m in self.collector.collect(blocking=False))
            self.assertEqual(metrics['c'].samples,
                             [Sample('c_total', labels, 1.0)])
 def test_counter_across_forks(self):
     pid = 0
     values.ValueClass = MultiProcessValue(lambda: pid)
     c1 = Counter('c', 'help', registry=None)
     self.assertEqual(0, self.registry.get_sample_value('c_total'))
     c1.inc(1)
     c1.inc(1)
     pid = 1
     c1.inc(1)
     self.assertEqual(3, self.registry.get_sample_value('c_total'))
     self.assertEqual(1, c1._value.get())
    def test_deprecation_warning(self):
        os.environ['prometheus_multiproc_dir'] = self.tempdir
        with warnings.catch_warnings(record=True) as w:
            values.ValueClass = get_value_class()
            registry = CollectorRegistry()
            collector = MultiProcessCollector(registry)
            Counter('c', 'help', registry=None)

            assert os.environ['PROMETHEUS_MULTIPROC_DIR'] == self.tempdir
            assert len(w) == 1
            assert issubclass(w[-1].category, DeprecationWarning)
            assert "PROMETHEUS_MULTIPROC_DIR" in str(w[-1].message)
Exemple #26
0
    def buckets_to_metrics(self, metric_name, buckets):
        # Converts raw bucket metric into sorted list of buckets
        unit = buckets['boundary_unit']
        description = 'libmedida metric type: ' + buckets['type']
        c = Counter(metric_name + '_count',
                    description,
                    self.label_names,
                    registry=self.registry)
        s = Counter(metric_name + '_sum',
                    description,
                    self.label_names,
                    registry=self.registry)
        g = Gauge(metric_name + '_bucket',
                  description,
                  self.label_names + ['le'],
                  registry=self.registry)

        measurements = []
        for bucket in buckets['buckets']:
            measurements.append({
                'boundary':
                self.duration_to_seconds(bucket['boundary'], unit),
                'count':
                bucket['count'],
                'sum':
                bucket['sum']
            })
        count = 0
        for m in sorted(measurements, key=lambda i: i['boundary']):
            # Buckets from core contain only values from their respective ranges.
            # Prometheus expects "le" buckets to be cummulative so we need some extra math
            count += m['count']
            c.labels(*self.labels).inc(m['count'])
            s.labels(*self.labels).inc(self.duration_to_seconds(
                m['sum'], unit))
            # Treat buckets larger than 30d as infinity
            if float(m['boundary']) > 30 * 86400:
                g.labels(*self.labels + ['+Inf']).inc(count)
            else:
                g.labels(*self.labels + [m['boundary']]).inc(count)
    def test_counter_across_forks(self):
        pid = 0

        def get_pid():
            return pid

        core._ValueClass = core._MultiProcessValue(get_pid)
        c1 = Counter('c', 'help', registry=None)
        self.assertEqual(0, self.registry.get_sample_value('c'))
        c1.inc(1)
        c1.inc(1)
        pid = 1
        c1.inc(1)
        self.assertEqual(3, self.registry.get_sample_value('c'))
        self.assertEqual(1, c1._value.get())
Exemple #28
0
    def test_reset_registry_with_labels(self):
        registry = CollectorRegistry()

        gauge = Gauge('g', 'help', ['l'], registry=registry)
        gauge.labels('a').inc()
        self.assertEqual(1, registry.get_sample_value('g', {'l': 'a'}))

        counter = Counter('c_total', 'help', ['l'], registry=registry)
        counter.labels('a').inc()
        self.assertEqual(1, registry.get_sample_value('c_total', {'l': 'a'}))

        summary = Summary('s', 'help', ['l'], registry=registry)
        summary.labels('a').observe(10)
        self.assertEqual(1, registry.get_sample_value('s_count', {'l': 'a'}))
        self.assertEqual(10, registry.get_sample_value('s_sum', {'l': 'a'}))

        histogram = Histogram('h', 'help', ['l'], registry=registry)
        histogram.labels('a').observe(2)
        self.assertEqual(0, registry.get_sample_value('h_bucket', {'le': '1.0', 'l': 'a'}))
        self.assertEqual(1, registry.get_sample_value('h_bucket', {'le': '2.5', 'l': 'a'}))
        self.assertEqual(1, registry.get_sample_value('h_bucket', {'le': '5.0', 'l': 'a'}))
        self.assertEqual(1, registry.get_sample_value('h_bucket', {'le': '+Inf', 'l': 'a'}))
        self.assertEqual(1, registry.get_sample_value('h_count', {'l': 'a'}))
        self.assertEqual(2, registry.get_sample_value('h_sum', {'l': 'a'}))


        registry.reset()

        self.assertEqual(0, registry.get_sample_value('g', {'l': 'a'}))

        self.assertEqual(0, registry.get_sample_value('c_total', {'l': 'a'}))

        self.assertEqual(0, registry.get_sample_value('s_count', {'l': 'a'}))
        self.assertEqual(0, registry.get_sample_value('s_sum', {'l': 'a'}))

        self.assertEqual(0, registry.get_sample_value('h_bucket', {'le': '1.0', 'l': 'a'}))
        self.assertEqual(0, registry.get_sample_value('h_bucket', {'le': '2.5', 'l': 'a'}))
        self.assertEqual(0, registry.get_sample_value('h_bucket', {'le': '5.0', 'l': 'a'}))
        self.assertEqual(0, registry.get_sample_value('h_bucket', {'le': '+Inf', 'l': 'a'}))
        self.assertEqual(0, registry.get_sample_value('h_count', {'l': 'a'}))
        self.assertEqual(0, registry.get_sample_value('h_sum', {'l': 'a'}))
Exemple #29
0
    def __init__(self, indexer, logger=getLogger(), metrics_registry=CollectorRegistry()):
        self.__indexer = indexer
        self.__logger = logger
        self.__metrics_registry = metrics_registry

        # metrics
        self.__metrics_requests_total = Counter(
            '{0}_indexer_grpc_requests_total'.format(NAME),
            'The number of requests.',
            [
                'func'
            ],
            registry=self.__metrics_registry
        )
        self.__metrics_requests_duration_seconds = Histogram(
            '{0}_indexer_grpc_requests_duration_seconds'.format(NAME),
            'The invocation duration in seconds.',
            [
                'func'
            ],
            registry=self.__metrics_registry
        )
Exemple #30
0
 def test_wrapped_original_class(self):
     self.assertEqual(Counter.__wrapped__, Counter('foo', 'bar').__class__)