Beispiel #1
1
def search():
    slo = ServiceSLOMetrics()
    slo_results = slo.get_slometrics_summary()

    registry = CollectorRegistry()
    ProcessCollector(registry=registry)

    slo_gauge = Gauge('serviceslometrics',
                      labelnames=('cluster', 'namespace', 'service', 'slitype',
                                  'slodoc', 'name', 'type'),
                      documentation=("ServiceSLOMetrics by cluster,"
                                     "service, namespace, slitype,"
                                     "slodoc, name, type"),
                      registry=registry)

    for result in slo_results:
        slo_gauge.labels(cluster=result.Cluster.name,
                         namespace=result.Namespace.name,
                         service=result.Service.name,
                         slitype=result.SLIType.name,
                         slodoc=result.SLODoc.name,
                         name=result.ServiceSLO.name,
                         type='slo_value').set(result.ServiceSLO.value)
        slo_gauge.labels(cluster=result.Cluster.name,
                         namespace=result.Namespace.name,
                         service=result.Service.name,
                         slitype=result.SLIType.name,
                         slodoc=result.SLODoc.name,
                         name=result.ServiceSLO.name,
                         type='slo_target').set(result.ServiceSLO.target)

    headers = {'Content-type': 'text/plain'}
    return Response(generate_latest(registry=registry), 200, headers)
Beispiel #2
0
    def test_namespace(self):
        collector = ProcessCollector(proc=self.test_proc,
                                     pid=lambda: 26231,
                                     registry=self.registry,
                                     namespace='n')
        collector._ticks = 100

        self.assertEqual(
            17.21,
            self.registry.get_sample_value('n_process_cpu_seconds_total'))
        self.assertEqual(
            56274944.0,
            self.registry.get_sample_value('n_process_virtual_memory_bytes'))
        self.assertEqual(
            8114176,
            self.registry.get_sample_value('n_process_resident_memory_bytes'))
        self.assertEqual(
            1418184099.75,
            self.registry.get_sample_value('n_process_start_time_seconds'))
        self.assertEqual(2048.0,
                         self.registry.get_sample_value('n_process_max_fds'))
        self.assertEqual(5.0,
                         self.registry.get_sample_value('n_process_open_fds'))
        self.assertEqual(
            None, self.registry.get_sample_value('process_cpu_seconds_total'))
    def test_working_584(self):
        collector = ProcessCollector(proc=self.test_proc, pid=lambda: "584\n", registry=self.registry)
        collector._ticks = 100

        self.assertEqual(0.0, self.registry.get_sample_value('process_cpu_seconds_total'))
        self.assertEqual(10395648.0, self.registry.get_sample_value('process_virtual_memory_bytes'))
        self.assertEqual(634880, self.registry.get_sample_value('process_resident_memory_bytes'))
        self.assertEqual(1418291667.75, self.registry.get_sample_value('process_start_time_seconds'))
        self.assertEqual(None, self.registry.get_sample_value('process_max_fds'))
        self.assertEqual(None, self.registry.get_sample_value('process_open_fds'))
    def test_namespace(self):
        collector = ProcessCollector(proc=self.test_proc, pid=lambda: 26231, registry=self.registry, namespace='n')
        collector._ticks = 100

        self.assertEqual(17.21, self.registry.get_sample_value('n_process_cpu_seconds_total'))
        self.assertEqual(56274944.0, self.registry.get_sample_value('n_process_virtual_memory_bytes'))
        self.assertEqual(8114176, self.registry.get_sample_value('n_process_resident_memory_bytes'))
        self.assertEqual(1418184099.75, self.registry.get_sample_value('n_process_start_time_seconds'))
        self.assertEqual(2048.0, self.registry.get_sample_value('n_process_max_fds'))
        self.assertEqual(5.0, self.registry.get_sample_value('n_process_open_fds'))
        self.assertEqual(None, self.registry.get_sample_value('process_cpu_seconds_total'))
Beispiel #5
0
    def test_working_584(self):
        collector = ProcessCollector(proc=self.test_proc, pid=lambda: "584\n", registry=self.registry)
        collector._ticks = 100
        collector._pagesize = 4096

        self.assertEqual(0.0, self.registry.get_sample_value('process_cpu_seconds_total'))
        self.assertEqual(10395648.0, self.registry.get_sample_value('process_virtual_memory_bytes'))
        self.assertEqual(634880, self.registry.get_sample_value('process_resident_memory_bytes'))
        self.assertEqual(1418291667.75, self.registry.get_sample_value('process_start_time_seconds'))
        self.assertEqual(None, self.registry.get_sample_value('process_max_fds'))
        self.assertEqual(None, self.registry.get_sample_value('process_open_fds'))
    def test_working_fake_pid(self):
        collector = ProcessCollector(proc=self.test_proc, pid=lambda: 123, registry=self.registry)
        collector._ticks = 100

        self.assertEqual(None, self.registry.get_sample_value('process_cpu_seconds_total'))
        self.assertEqual(None, self.registry.get_sample_value('process_virtual_memory_bytes'))
        self.assertEqual(None, self.registry.get_sample_value('process_resident_memory_bytes'))
        self.assertEqual(None, self.registry.get_sample_value('process_start_time_seconds'))
        self.assertEqual(None, self.registry.get_sample_value('process_max_fds'))
        self.assertEqual(None, self.registry.get_sample_value('process_open_fds'))
        self.assertEqual(None, self.registry.get_sample_value('process_fake_namespace'))
Beispiel #7
0
def runDumper():
    logger = initiateLogger()

    explorer_dumper = explorerDumperInit(logger)
    metrics_registry = explorer_dumper.getMetricsRegistry()
    process_collector = ProcessCollector(registry=metrics_registry)
    while True:
        process_collector.collect()
        logger.info("Preparing to run dumper")
        explorer_dumper.dump()
        logger.info('Dump completed.  Restarting in %s seconds.', POLL_TIME)
        sleep(POLL_TIME)
Beispiel #8
0
    def test_working_fake_pid(self):
        collector = ProcessCollector(proc=self.test_proc, pid=lambda: 123, registry=self.registry)
        collector._ticks = 100
        collector._pagesize = 4096

        self.assertEqual(None, self.registry.get_sample_value('process_cpu_seconds_total'))
        self.assertEqual(None, self.registry.get_sample_value('process_virtual_memory_bytes'))
        self.assertEqual(None, self.registry.get_sample_value('process_resident_memory_bytes'))
        self.assertEqual(None, self.registry.get_sample_value('process_start_time_seconds'))
        self.assertEqual(None, self.registry.get_sample_value('process_max_fds'))
        self.assertEqual(None, self.registry.get_sample_value('process_open_fds'))
        self.assertEqual(None, self.registry.get_sample_value('process_fake_namespace'))
Beispiel #9
0
def search():
    imv = ImageManifestVuln()
    imv_results = imv.get_vulnerabilities_summary()

    dpv = DeploymentValidationData()
    dpv_results = dpv.get_deploymentvalidation_summary()

    registry = CollectorRegistry()
    ProcessCollector(registry=registry)

    imv_counter = Counter('imagemanifestvuln',
                          labelnames=('cluster', 'namespace', 'severity'),
                          documentation='Vulnerabilities total per severity',
                          registry=registry)

    dv_counter = Counter('deploymentvalidation',
                         labelnames=('cluster', 'namespace', 'validation',
                                     'status'),
                         documentation='Validations by validation type',
                         registry=registry)

    for result in imv_results:
        imv_counter.labels(cluster=result.Cluster.name,
                           namespace=result.Namespace.name,
                           severity=result.Severity.name).inc(result.Count)

    for result in dpv_results:
        dv_counter.labels(cluster=result.Cluster.name,
                          namespace=result.Namespace.name,
                          validation=result.Validation.name,
                          status=result.Validation.status).inc(result.Count)

    headers = {'Content-type': 'text/plain'}
    return Response(generate_latest(registry=registry), 200, headers)
def main():
    """Main program.

    Parse arguments, start webserver to serve /metrics.
    """
    args = ARGS.parse_args()

    if args.level >= 2:
        logging.basicConfig(level=logging.DEBUG)
    elif args.level == 1:
        logging.basicConfig(level=logging.INFO)
    elif args.level == 0:
        logging.basicConfig(level=logging.WARNING)

    LOG.debug("Starting HTTP server")
    httpd_thread = start_http_server(args.port, addr=args.addr)

    LOG.debug("Registering StatsPluginCollector")
    REGISTRY.register(
        StatsPluginCollector(
            args.endpoint,
            args.metrics_file,
            max_retries=args.max_retries,
            ssl_verify=args.sslverification,
        ))

    if args.procstats:
        LOG.debug("Registering ProcessCollector")
        ProcessCollector(pid=lambda: get_ts_pid(args.pidfile),
                         namespace="trafficserver")

    LOG.info("Listening on :{port}".format(port=args.port))

    # Wait for the webserver
    httpd_thread.join()
Beispiel #11
0
def main():
    """Main program.

    Parse arguments, start webserver to serve /metrics.
    """
    args = ARGS.parse_args()

    if args.level >= 2:
        logging.basicConfig(level=logging.DEBUG)
    elif args.level == 1:
        logging.basicConfig(level=logging.INFO)
    elif args.level == 0:
        logging.basicConfig(level=logging.WARNING)

    LOG.debug('Starting HTTP server')
    httpd_thread = start_http_server(args.port, addr=args.addr)

    LOG.debug('Registering StatsPluginCollector')
    REGISTRY.register(StatsPluginCollector(args.endpoint))

    if not args.no_procstats:
        LOG.debug('Registering ProcessCollector')
        REGISTRY.register(
            ProcessCollector(pid=lambda: get_ts_pid(args.pidfile),
                             namespace='trafficserver'))

    LOG.info('Listening on :{port}'.format(port=args.port))

    # Wait for the webserver
    httpd_thread.join()
Beispiel #12
0
def search():
    imv = ImageManifestVuln()
    imv_results = imv.get_vulnerabilities_summary()

    dpv = DeploymentValidationData()
    dpv_results = dpv.get_deploymentvalidation_summary()

    slo = ServiceSLOMetrics()
    slo_results = slo.get_slometrics_summary()

    registry = CollectorRegistry()
    ProcessCollector(registry=registry)

    imv_counter = Counter('imagemanifestvuln',
                          labelnames=('cluster', 'namespace', 'severity'),
                          documentation='Vulnerabilities total per severity',
                          registry=registry)

    dv_counter = Counter('deploymentvalidation',
                         labelnames=('cluster', 'namespace', 'validation',
                                     'status'),
                         documentation='Validations by validation type',
                         registry=registry)

    slo_gauge = Gauge('serviceslometrics',
                      labelnames=('cluster', 'namespace', 'service', 'slitype',
                                  'name', 'type'),
                      documentation=("ServiceSLOMetrics by cluster,"
                                     "service, namespace, slitype,"
                                     "name, type"),
                      registry=registry)

    for result in imv_results:
        imv_counter.labels(cluster=result.Cluster.name,
                           namespace=result.Namespace.name,
                           severity=result.Severity.name).inc(result.Count)

    for result in dpv_results:
        dv_counter.labels(cluster=result.Cluster.name,
                          namespace=result.Namespace.name,
                          validation=result.Validation.name,
                          status=result.Validation.status).inc(result.Count)

    for result in slo_results:
        slo_gauge.labels(cluster=result.Cluster.name,
                         namespace=result.Namespace.name,
                         service=result.Service.name,
                         slitype=result.SLIType.name,
                         name=result.ServiceSLO.name,
                         type='slo_value').set(result.ServiceSLO.value)
        slo_gauge.labels(cluster=result.Cluster.name,
                         namespace=result.Namespace.name,
                         service=result.Service.name,
                         slitype=result.SLIType.name,
                         name=result.ServiceSLO.name,
                         type='slo_target').set(result.ServiceSLO.target)

    headers = {'Content-type': 'text/plain'}
    return Response(generate_latest(registry=registry), 200, headers)
    def __init__(self,
                 prometheus,
                 live: bool,
                 project_name: str,
                 registry=None):
        self.project_name = project_name
        self.prometheus = prometheus
        self.namespace = "csv_live" if live else "csv_hist"
        self.label_values = {self.project_name}
        self.registry = CognitePrometheus.registry if registry is None else registry

        self.info = Info("host",
                         "Host info",
                         namespace=self.namespace,
                         registry=CognitePrometheus.registry)
        self.info.info({
            "hostname": socket.gethostname(),
            "fqdn": socket.getfqdn()
        })
        self.process = ProcessCollector(namespace=self.namespace,
                                        registry=CognitePrometheus.registry)
        self.platform = PlatformCollector(registry=CognitePrometheus.registry)

        self.created_time_series_counter = self._create_metric(
            Counter, "created_time_series_total",
            "Number of time series created since the extractor started running"
        )

        self.all_data_points_counter = self._create_metric(
            Counter, "posted_data_points_total",
            "Number of datapoints posted since the extractor started running")

        self.count_of_time_series_gauge = self._create_metric(
            Gauge, "posted_time_series_count",
            "Number of timeseries that had valid datapoints in the current file"
        )

        self.processing_failed_counter = self._create_metric(
            Counter,
            "failed_processing_files",
            "Number of files where processing failed since the extractor started running",
        )

        self.available_csv_files_gauge = self._create_metric(
            Gauge, "available_csv_files",
            "Number of csv files in the folder that could be processed by the extractor"
        )

        self.unprocessed_files_gauge = self._create_metric(
            Gauge, "unprocessed_files",
            "Number of csv files that remains to be processed in this batch")

        self.successfully_processed_files_gauge = self._create_metric(
            Gauge,
            "successfully_processed_files",
            "Number of csv files that has been successfully processed in this batch",
        )
Beispiel #14
0
def metrics():
    registry = CollectorRegistry()
    multiprocess.MultiProcessCollector(registry)

    ProcessCollector(registry=registry)
    PlatformCollector(registry=registry)

    i = Info('build_info', "The build information", registry=registry)
    i.info({"version": "1,2,3"})

    data = generate_latest(registry)
    return Response(data, mimetype=CONTENT_TYPE_LATEST)
Beispiel #15
0
    def __getattr__(self, item):
        """
        Safe class argument initialization. We do it here
        (instead of in the __init__()) so we don't overwrite
        them on when a new instance is created.
        """
        if item == 'registry':
            # This will create the self.registry attribute, which
            # contains an instance of the CollectorRegistry.
            setattr(self, item, CollectorRegistry())
            # Adding a ProcessCollector to the registry. The
            # ProcessCollector does not have to be an attribute,
            # since it's never manipulated  directly.
            ProcessCollector(registry=self.registry)

        elif item == 'histogram':
            # Adding a Histogram to the registry and also making
            # the Histogram available as an attribute so we can
            # call its observe()
            setattr(
                self, item,
                Histogram(name='request_latency_seconds',
                          labelnames=('cache', 'status', 'method'),
                          documentation='request latency histogram',
                          registry=self.registry))
        elif item == 'counter':
            # Adding a Counter to the registry and also making
            # the Counter available as an attribute so we can
            # call its inc()
            setattr(
                self, item,
                Counter(name='http_request',
                        documentation='total requests',
                        registry=self.registry))

        elif item == 'gauge_cache_size':
            setattr(
                self, item,
                Gauge(name='github_mirror_cache_size',
                      documentation='cache size in bytes',
                      registry=self.registry))

        elif item == 'gauge_cached_objects':
            setattr(
                self, item,
                Gauge(name='github_mirror_cached_objects',
                      documentation='number of cached objects',
                      registry=self.registry))

        else:
            raise AttributeError(f"object has no attribute {item}'")

        return getattr(self, item)
Beispiel #16
0
def application(env, start_response):
    try:
        for namespace, search_terms in _PROCESSES.items():
            found_processes = _find_processes_matching(search_terms)
            # Many flaws in this, assumes single master and optionally multiple child workers
            master_pids = set()
            for process in found_processes.values():
                if process.ppid() in found_processes.keys():
                    master_pids.add(process.ppid())
            master_pids = sorted(master_pids)
            child_pids = sorted({
                pid
                for pid in found_processes.keys() if pid not in master_pids
            })
            for master_pid in master_pids:
                # Annoying we cannot seem to use labels easily (without making ourselves). The
                # process collector is probably too limited for what we would want.
                pid_namespace = '{}_master'.format(namespace)
                try:
                    ProcessCollector(namespace=pid_namespace,
                                     pid=lambda: master_pid).collect()
                except Exception as ex:
                    print 'Unable to collect stats for {}, PID {}: {}'.format(
                        pid_namespace, master_pid, ex.message)
            for index, child_pid in enumerate(child_pids):
                pid_namespace = '{}_worker_{}'.format(namespace, index)
                try:
                    ProcessCollector(namespace=pid_namespace,
                                     pid=lambda: child_pid).collect()
                except Exception as ex:
                    print 'Unable to collect stats for {}, PID {}: {}'.format(
                        pid_namespace, child_pid, ex.message)
        start_response('200 OK', [('Content-Type', 'text/html')])
        return generate_latest()
    except Exception as ex:
        print ex.message
        traceback.print_tb(sys.exc_info()[2])
        start_response('500 INTERNAL SERVER ERROR',
                       [('Content-Type', 'text/html')])
        return ex.message
def export_my_metrics(backend_connected=False):
    """
    Return self-metrics as Prometheus export.
    """
    backend_up_value = '1.0' if backend_connected else '0.0'
    registry = CollectorRegistry()
    g = Gauge(
        my_name + '_threads',
        'Number of threads performing moksha.monitoring.socket connections.',
        registry=registry)
    g.set(worker_threads_count())
    ProcessCollector(namespace=my_name, registry=registry)
    return generate_latest(registry).decode() + (
        '# HELP ' + my_name +
        '_up Show that we\'re connected to the backend!\n'
        '# TYPE ' + my_name + '_up untyped\n'
        '' + my_name + '_up ' + backend_up_value + '\n')
Beispiel #18
0
def search():
    imv = ImageManifestVuln()
    results = imv.get_vulnerabilities_summary()

    registry = CollectorRegistry()
    ProcessCollector(registry=registry)

    counter = Counter('imagemanifestvuln',
                      labelnames=('cluster', 'namespace', 'severity'),
                      documentation='Vulnerabilities total per severity',
                      registry=registry)

    for result in results:
        counter.labels(cluster=result.Cluster.name,
                       namespace=result.Namespace.name,
                       severity=result.Severity.name).inc(result.Count)

    headers = {'Content-type': 'text/plain'}
    return Response(generate_latest(registry=registry), 200, headers)
def search():
    dpv = DeploymentValidationData()
    dpv_results = dpv.get_deploymentvalidation_summary()

    registry = CollectorRegistry()
    ProcessCollector(registry=registry)

    dv_counter = Counter('deploymentvalidation',
                         labelnames=('cluster', 'namespace', 'validation',
                                     'status'),
                         documentation='Validations by validation type',
                         registry=registry)

    for result in dpv_results:
        dv_counter.labels(cluster=result.Cluster.name,
                          namespace=result.Namespace.name,
                          validation=result.Validation.name,
                          status=result.Validation.status).inc(result.Count)

    headers = {'Content-type': 'text/plain'}
    return Response(generate_latest(registry=registry), 200, headers)
import os
import tempfile

from prometheus_client import CollectorRegistry
from prometheus_client import ProcessCollector
from prometheus_client import multiprocess
from prometheus_client import Counter
from prometheus_client import generate_latest

if not os.environ.get('prometheus_multiproc_dir'):
    dir_name = tempfile.mkdtemp(prefix='mts-prometheus-multiproc-')
    os.environ.setdefault('prometheus_multiproc_dir', dir_name)

registry = CollectorRegistry()
ProcessCollector(registry=registry)
multiprocess.MultiProcessCollector(registry)

failed_tag_build_requests_counter = Counter(
    'failed_tag_build_requests',
    'The number of failed tagBuild API calls.',
    registry=registry)

matched_module_builds_counter = Counter(
    'matched_module_builds',
    'The number of module builds which are matched rule(s) to be tagged.',
    registry=registry)

messaging_tx_failed_counter = Counter(
    'messaging_tx_failed',
    'The number of errors occurred during sending message to bus.',
Beispiel #21
0
 def _configure_registry(self, include_process_stats: bool = False):
     """Configure the MetricRegistry."""
     if include_process_stats:
         self.registry.register_additional_collector(
             ProcessCollector(registry=None))
Beispiel #22
0
def setup_monitoring(
    app: web.Application,
    app_name: str,
    *,
    enter_middleware_cb: Optional[EnterMiddlewareCB] = None,
    exit_middleware_cb: Optional[ExitMiddlewareCB] = None,
    **app_info_kwargs,
):
    # app-scope registry
    target_info = {"application_name": app_name}
    target_info.update(app_info_kwargs)
    app[kCOLLECTOR_REGISTRY] = reg = CollectorRegistry(
        auto_describe=False, target_info=target_info
    )
    # automatically collects process metrics see [https://github.com/prometheus/client_python]
    app[kPROCESS_COLLECTOR] = ProcessCollector(registry=reg)
    # automatically collects python_info metrics see [https://github.com/prometheus/client_python]
    app[kPLATFORM_COLLECTOR] = PlatformCollector(registry=reg)
    # automatically collects python garbage collector metrics see [https://github.com/prometheus/client_python]
    # prefixed with python_gc_
    app[kGC_COLLECTOR] = GCCollector(registry=reg)

    # Total number of requests processed
    app[kREQUEST_COUNT] = Counter(
        name="http_requests",
        documentation="Total requests count",
        labelnames=["app_name", "method", "endpoint", "http_status"],
        registry=reg,
    )

    app[kINFLIGHTREQUESTS] = Gauge(
        name="http_in_flight_requests",
        documentation="Number of requests in process",
        labelnames=["app_name", "method", "endpoint"],
        registry=reg,
    )

    app[kRESPONSELATENCY] = Summary(
        name="http_request_latency_seconds",
        documentation="Time processing a request",
        labelnames=["app_name", "method", "endpoint"],
        registry=reg,
    )

    # WARNING: ensure ERROR middleware is over this one
    #
    # non-API request/response (e.g /metrics, /x/*  ...)
    #                                 |
    # API request/response (/v0/*)    |
    #       |                         |
    #       |                         |
    #       v                         |
    # ===== monitoring-middleware =====
    # == rest-error-middlewarer ====  |
    # ==           ...            ==  |
    # == rest-envelope-middleware ==  v
    #
    #

    # ensures is first layer but cannot guarantee the order setup is applied
    app.middlewares.insert(
        0,
        middleware_factory(
            app_name,
            enter_middleware_cb=enter_middleware_cb,
            exit_middleware_cb=exit_middleware_cb,
        ),
    )

    app.router.add_get("/metrics", metrics_handler)

    return True
Beispiel #23
0
Metric Examples:

Counter - A cumulative metric whose value can only increase or be reset on restart.
Gauge - A metric that reperesents a single numerical value that can arbitrarily go up and down.
Histogram - Counts metrics and places them in configurable buckets, also provides sums(request duration and sizes).
Summary - Similar to histogram, but over a sliding time window.

"""

c = Counter('total_failures', 'Number of failures due to network errors')
g = Gauge('in_process_requests', 'Number of failures due to network errors')
i = Info('my_app_info', 'The application version info')
e = Enum('my_app_state',
         'The current state of the application',
         states=['started', 'stopped'])
p = ProcessCollector(namespace='mydaemon',
                     pid=lambda: open('/var/run/daemon.pid').read())


def main():
    c.inc(5)


if __name__ == '__main__':
    start_http_server(8001)
    main()
    time.sleep(5)
    main()
    time.sleep(5)
    main()
    time.sleep(5)
    main()
import random

c = Counter('requests_total', 'Total number of requests', registry=None)
s = Summary('requests_time', '', registry=None)
info = Info('author_info', 'author infomation', registry=None)
info.info({'name': 'jeffrey4l', 'email': '*****@*****.**'})

h = Histogram('random_integer', 'Request size (bytes)',
              buckets=[0, 2, 4, 6, 8, INF],
              registry=None)

e = Enum('task_state', 'Description of enum',
         states=['starting', 'running', 'stopped'])

platform_collector = PlatformCollector(registry=None)
p_collector = ProcessCollector(registry=None)

collectors = [c, s, info, h, e, platform_collector, p_collector]
global_registry = CollectorRegistry()

gc_collector = GCCollector(registry=global_registry)


class MyHandler(BaseHTTPRequestHandler):

    @s.time()
    def do_GET(self):
        c.inc()
        e.state(random.choice(['starting', 'running', 'stopped']))
        h.observe(random.randint(1, 11))
        registry = CollectorRegistry(auto_describe=True)