Esempio n. 1
0
class PrometheusMiddleware(object):
    def __init__(self, register: CollectorRegistry):
        self.registry = register
        self.requests = Counter('http_total_request',
                                'Counter of total HTTP requests',
                                ['method', 'path', 'status'],
                                registry=self.registry)

        self.request_histogram = Histogram('request_latency_seconds',
                                           'Histogram of request latency',
                                           ['method', 'path', 'status'],
                                           registry=self.registry)

    def process_request(self, req: Request, resp: Response) -> None:
        req.start_time = time.time()

    def process_response(self, req: Request, resp: Response, resource,
                         req_succeeded: bool) -> None:
        resp_time = time.time() - req.start_time

        self.requests.labels(method=req.method,
                             path=req.path,
                             status=resp.status).inc()
        self.request_histogram.labels(method=req.method,
                                      path=req.path,
                                      status=resp.status).observe(resp_time)

    def on_get(self, req: Request, resp: Response) -> None:
        data = generate_latest(self.registry)
        resp.content_type = 'text/plain; version=0.0.4; charset=utf-8'
        resp.body = str(data.decode('utf-8'))
Esempio n. 2
0
class Prometheus:
    def __init__(self):
        self.request_count = Counter(
            'requests_total', 'Total Request Count',
            ['app_name', 'method', 'endpoint', 'http_status'])
        self.request_latency = Histogram('request_latency_seconds',
                                         'Request Latency',
                                         ['app_name', 'endpoint'])
        self.request_in_progress = Gauge('requests_in_progress_total',
                                         'Requests in progress',
                                         ['app_name', 'endpoint', 'method'])
        self.graph_stats = Counter('graph_stats', 'Graph Stats', [
            'constraints_added', 'constraints_removed', 'contains_updates',
            'indexes_added', 'indexes_removed', 'labels_added',
            'labels_removed', 'nodes_created', 'nodes_deleted',
            'properties_set', 'relationships_created', 'relationships_deleted',
            'query_count'
        ])

    @contextmanager
    def in_flight(self, name, path, method):
        self.request_in_progress.labels(name, path, method).inc()
        yield
        self.request_in_progress.labels(name, path, method).dec()

    @contextmanager
    def latency(self, name, path):
        start = time.time()
        yield
        self.request_latency.labels(name, path).observe(time.time() - start)
Esempio n. 3
0
class InstrumentMiddleware:
    def __init__(self, app, bento_service):
        self.app = app
        self.bento_service = bento_service

        from prometheus_client import Histogram, Counter, Gauge

        service_name = self.bento_service.name
        namespace = config('instrument').get('default_namespace')

        self.metrics_request_duration = Histogram(
            name=service_name + '_request_duration_seconds',
            documentation=service_name +
            " API HTTP request duration in seconds",
            namespace=namespace,
            labelnames=['endpoint', 'service_version', 'http_response_code'],
        )
        self.metrics_request_total = Counter(
            name=service_name + "_request_total",
            documentation='Totoal number of HTTP requests',
            namespace=namespace,
            labelnames=['endpoint', 'service_version', 'http_response_code'],
        )
        self.metrics_request_in_progress = Gauge(
            name=service_name + "_request_in_progress",
            documentation='Totoal number of HTTP requests in progress now',
            namespace=namespace,
            labelnames=['endpoint', 'service_version'],
        )

    def __call__(self, environ, start_response):
        req = Request(environ)
        endpoint = req.path
        start_time = default_timer()

        def start_response_wrapper(status, headers):
            ret = start_response(status, headers)
            status_code = int(status.split()[0])

            # instrument request total count
            self.metrics_request_total.labels(
                endpoint=endpoint,
                service_version=self.bento_service.version,
                http_response_code=status_code,
            ).inc()

            # instrument request duration
            total_time = max(default_timer() - start_time, 0)
            self.metrics_request_duration.labels(
                endpoint=endpoint,
                service_version=self.bento_service.version,
                http_response_code=status_code,
            ).observe(total_time)

            return ret

        with self.metrics_request_in_progress.labels(
                endpoint=endpoint,
                service_version=self.bento_service.version).track_inprogress():
            return self.app(environ, start_response_wrapper)
Esempio n. 4
0
class PrometheusReporter(NullReporter):
    def __init__(self, namespace='', normalize=default_normalize):
        self.histograms = {}
        self.lock = Lock()
        self.namespace = namespace
        self.normalize = normalize or default_normalize
        self._http_metrics = HTTPMetrics(namespace=namespace,
                                         normalize=normalize)
        self._operation_metrics = Histogram(
            self.metric_name(METRICS_NAME_OPERATION),
            'Duration of operations in microsecond', ['name'])

    def report_span(self, span):
        srv = self.get_tag(span, 'span.kind')
        surl = self.get_tag(span, 'http.url')
        smeth = self.get_tag(span, 'http.method')
        if srv == 'server' and (surl or smeth):
            self._http_metrics.record(span)
            return
        else:
            self._operation_metrics.labels(self.normalize(
                span.operation_name)).observe(span.end_time - span.start_time)

    def get_tag(self, span, key):
        for tag in span.tags:
            if tag.key == key:
                if hasattr(tag, 'value'):
                    return str(tag.value)
                break

        return ''

    def metric_name(self, name):
        return metric_name(name, namespace=self.namespace)
Esempio n. 5
0
class ProtonPrometheus(object):
    def __init__(self):
        super(ProtonPrometheus, self).__init__()
        self.registry = CollectorRegistry()
        self.requests = Counter('http_total_request',
                                'Counter of total HTTP requests',
                                ['method', 'path', 'status'],
                                registry=self.registry)

        self.request_historygram = Histogram('request_latency_seconds',
                                             'Histogram of request latency',
                                             ['method', 'path', 'status'],
                                             registry=self.registry)

    def process_request(self, req, resp):
        req.context.start_time = time.time()

    def process_response(self, req, resp, resource, req_succeeded):
        if 'start_time' in req.context:
            resp_time = time.time() - req.context.start_time

            self.requests.labels(method=req.method,
                                 path=req.path,
                                 status=resp.status).inc()
            self.request_historygram.labels(
                method=req.method, path=req.path,
                status=resp.status).observe(resp_time)

    def on_get(self, req, resp):
        data = generate_latest(self.registry)
        resp.content_type = 'text/plain; version=0.0.4; charset=utf-8'
        resp.body = str(data.decode('utf-8'))
Esempio n. 6
0
    def test_histogram(self):
        """Test that we can track histogram in Service303"""
        # Add a histogram with a label to the regisry
        c = Histogram('process_max_fds',
                      'A summary', ['result'],
                      registry=self.registry,
                      buckets=[0, 2, float('inf')])
        c.labels('success').observe(1.23)
        c.labels('failure').observe(2.34)

        # Build proto outputs
        histogram1 = metrics_pb2.Histogram(sample_count=1, sample_sum=1.23)
        histogram1.bucket.add(upper_bound=0, cumulative_count=0)
        histogram1.bucket.add(upper_bound=2, cumulative_count=1)
        histogram1.bucket.add(upper_bound=float('inf'), cumulative_count=1)
        histogram2 = metrics_pb2.Histogram(sample_count=1, sample_sum=2.34)
        histogram2.bucket.add(upper_bound=0, cumulative_count=0)
        histogram2.bucket.add(upper_bound=2, cumulative_count=0)
        histogram2.bucket.add(upper_bound=float('inf'), cumulative_count=1)
        metric1 = metrics_pb2.Metric(histogram=histogram1,
                                     timestamp_ms=1234000)
        metric2 = metrics_pb2.Metric(histogram=histogram2,
                                     timestamp_ms=1234000)
        family = metrics_pb2.MetricFamily(name=str(
            metricsd_pb2.process_max_fds),
                                          type=metrics_pb2.HISTOGRAM)
        metric1.label.add(name=str(metricsd_pb2.result), value='success')
        metric2.label.add(name=str(metricsd_pb2.result), value='failure')
        family.metric.extend([metric1, metric2])

        with unittest.mock.patch('time.time') as mock_time:
            mock_time.side_effect = lambda: 1234
            self.assertCountEqual(
                list(metrics_export.get_metrics(self.registry))[0].metric,
                family.metric)
Esempio n. 7
0
class HTTPMetrics(object):
    def __init__(self, namespace='', normalize=default_normalize):
        self.namespace = namespace
        self.normalize = normalize or default_normalize

        self.requests = Counter(
            self.metric_name(METRICS_NAME_HTTP_REQUESTS),
            'Counts the number of requests made distinguished by their endpoint and error status',
            ['endpoint', 'error'])

        self.latency = Histogram(
            self.metric_name(METRICS_NAME_HTTP_REQUEST_LATENCY),
            'Duration of HTTP requests in second distinguished by their endpoint and error status',
            ['endpoint', 'error'])

        self.status_codes = Counter(
            self.metric_name(METRICS_NAME_HTTP_STATUS_CODES),
            'Counts the responses distinguished by endpoint and status code bucket',
            ['endpoint', 'status_code'])

    def record(self, span):
        status_code = self.get_int_tag(span, 'http.status_code')
        sc = status_code / 100

        endpoint = self.normalize(span.operation_name)
        if not endpoint:
            endpoint = "other"

        error = self.get_tag(span, 'error')
        if not error or error.lower() == 'false':
            error = 'false'
        else:
            error = 'true'

        self.requests.labels(endpoint, error).inc(1)
        self.latency.labels(endpoint,
                            error).observe(span.end_time - span.start_time)
        if sc >= 2 and sc <= 5:
            self.status_codes.labels(endpoint, str(sc) + 'xx').inc(1)

    def get_int_tag(self, span, key):
        tg = self.get_tag(span, key)
        if not tg:
            return 0
        return int(tg)

    def get_tag(self, span, key):
        for tag in span.tags:
            if tag.key == key:
                if hasattr(tag, 'value'):
                    return str(tag.value)
                break

        return ''

    def metric_name(self, name):
        return metric_name(name, namespace=self.namespace)
Esempio n. 8
0
class OperationMetricSet:
    """Collection of Prometheus metrics representing a logical operation"""

    requests: Counter
    requests_duration: Histogram
    exceptions: Counter
    requests_in_progress: Gauge

    def __init__(self, operation_name: str, labels: List[str]):
        self.requests = Counter(
            f"pyncette_{operation_name}_total",
            f"Total count of {operation_name} operations",
            labels,
        )
        self.requests_duration = Histogram(
            f"pyncette_{operation_name}_duration_seconds",
            f"Histogram of {operation_name} processing time",
            labels,
        )
        self.exceptions = Counter(
            f"pyncette_{operation_name}_failures_total",
            f"Total count of failed {operation_name} failures",
            [*labels, "exception_type"],
        )
        self.requests_in_progress = Gauge(
            f"pyncette_{operation_name}_in_progress",
            f"Gauge of {operation_name} operations currently being processed",
            labels,
        )

    @contextlib.asynccontextmanager
    async def measure(self, **labels: Dict[str, str]) -> AsyncIterator[None]:
        """An async context manager that measures the execution of the wrapped code"""
        if labels:
            self.requests_in_progress.labels(**labels).inc()
            self.requests.labels(**labels).inc()
        else:
            self.requests_in_progress.inc()
            self.requests.inc()

        before_time = time.perf_counter()
        try:
            yield
        except Exception as e:
            self.exceptions.labels(**labels, exception_type=type(e).__name__).inc()
            raise e from None
        finally:
            if labels:
                self.requests_duration.labels(**labels).observe(
                    time.perf_counter() - before_time
                )
                self.requests_in_progress.labels(**labels).dec()
            else:
                self.requests_duration.observe(time.perf_counter() - before_time)
                self.requests_in_progress.dec()
Esempio n. 9
0
class PrometheusMixin(_Base):
    """Mixin for tornado.web.Application"""
    def __init__(self, *args: Any, **kwargs: Any) -> None:
        super().__init__(*args, **kwargs)
        self.prometheus_registry = kwargs.get("prometheus_registry", REGISTRY)
        self._requests_total_counter = Counter(
            registry=self.prometheus_registry,
            namespace=_NAMESPACE,
            subsystem=_SUB_SYSTEM,
            name="requests_total",
            documentation="Counter of HTTP requests.",
            labelnames=("handler", "method", "code"),
        )
        self._requests_duration_seconds_histogram = Histogram(
            registry=self.prometheus_registry,
            namespace=_NAMESPACE,
            subsystem=_SUB_SYSTEM,
            name="request_duration_seconds",
            documentation="Histogram of latencies for HTTP requests.",
            buckets=(0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1, 3, 8, 20, 60),
            labelnames=("handler", "method"),
        )
        self._response_size_bytes_histogram = Histogram(
            registry=self.prometheus_registry,
            namespace=_NAMESPACE,
            subsystem=_SUB_SYSTEM,
            name="response_size_bytes",
            documentation="Histogram of response size for HTTP requests.",
            buckets=(10, 100, 1_000, 10_000, 100_000, 1_000_000, 10_000_000),
            labelnames=("handler", "method"),
        )

    def log_request(self, handler: RequestHandler) -> None:
        super().log_request(handler)
        self._update_metrics(handler)

    def _update_metrics(self, handler: RequestHandler) -> None:
        method = handler.request.method
        handler_name = type(handler).__name__
        content_length_str = handler._headers.get("Content-Length")
        try:
            if isinstance(content_length_str, str):
                content_length: Optional[int] = int(content_length_str)
            else:
                content_length = None
        except ValueError:
            content_length = None

        self._requests_duration_seconds_histogram.labels(
            handler_name, method).observe(handler.request.request_time())
        self._requests_total_counter.labels(handler_name, method,
                                            handler.get_status()).inc()
        if isinstance(content_length, int):
            self._response_size_bytes_histogram.labels(
                handler_name, method).observe(content_length)
Esempio n. 10
0
class PrometheusExporter(object):
    __instance = None
    LABELS = [
        'name',
        'object_size',
        'type',
        'pool',
        'image',
        'status',
    ]
    NAMESPACE = 'rbd_prober'

    @staticmethod
    def getInstance():
        if PrometheusExporter.__instance is None:
            PrometheusExporter()
        return PrometheusExporter.__instance

    def __init__(self, *args, **kwargs):
        if PrometheusExporter.__instance is not None:
            raise Exception("This class is a singleton!")
        PrometheusExporter.__instance = self

    def init_metrics(self, histogram_buckets):
        histogram_buckets.append(INF)
        self.response_time = Histogram(
            name='response_time',
            documentation='Prober response time in seconds',
            labelnames=self.LABELS,
            namespace=self.NAMESPACE,
            buckets=histogram_buckets,
        )
        self.bandwidth = Counter(
            name='bandwidth',
            documentation='Bytes has be written or read from RBD',
            labelnames=self.LABELS,
            namespace=self.NAMESPACE,
        )
        self.prober_ops = Counter(
            name='ops',
            documentation='Total ops count',
            labelnames=self.LABELS,
            namespace=self.NAMESPACE,
        )

    def observe(self, response_time, bytes_size, label_values):
        if response_time != -1:
            label_values['status'] = 'success'
        else:
            label_values['status'] = 'fail'

        self.response_time.labels(**label_values).observe(response_time)
        self.bandwidth.labels(**label_values).inc(bytes_size)
        self.prober_ops.labels(**label_values).inc()
Esempio n. 11
0
class MonitorMiddleware(object):
    def __init__(self, flask_app, metric_url):
        self.metric_url = metric_url
        self.process_id = str(os.getpid())
        self.instance_id = requests.get(
            'http://169.254.169.254/latest/meta-data/instance-id').text

        flask_app.add_url_rule(metric_url, view_func=metrics, methods=['GET'])

        self.wsgi_app = ProxyFix(flask_app.wsgi_app)

        self.req_counter = Counter(
            'recommend_requests_total', 'Total request counts',
            ['method', 'endpoint', 'instance', 'process'])
        self.err_counter = Counter(
            'recommend_error_total', 'Total error counts',
            ['method', 'endpoint', 'instance', 'process'])
        self.resp_latency = Histogram(
            'recommend_response_latency_millisecond',
            'Response latency (millisecond)',
            ['method', 'endpoint', 'instance', 'process'],
            buckets=(10, 20, 30, 50, 80, 100, 200, 300, 500, 1000, 2000, 3000))

    def _label(self):
        return {
            'method': request.method,
            'endpoint': request.url_rule.rule,
            'instance': self.instance_id,
            'process': self.process_id,
        }

    def log_response(self, response):
        label = self._label()
        if label['endpoint'] == self.metric_url:
            return

        time_used = int((time.time() - g.start_time) * 1000)
        logger.info('{} {} {}'.format(response.status_code, label['endpoint'],
                                      time_used))
        self.req_counter.labels(**label).inc()
        self.resp_latency.labels(**label).observe(time_used)

    def log_exception(self, e):
        logger.exception(e)
        self.err_counter.labels(**self._label()).inc()
Esempio n. 12
0
class MetricDecoration:

    __instance = None

    def __init__(self, modules, service_name, whitelist=None):
        if MetricDecoration.__instance:
            raise Exception("MetricDecoration instance exists: Singleton")
        else:
            MetricDecoration.__instance = self
            self.modules = modules
            self.H = Histogram(f"{service_name}_call_duration_seconds",
                               "API call duration (s)", ["call"])
            self.whitelist = whitelist or []

    def decorate_all_in_modules(self):
        """
        Decorate all functions in a module with the specified decorator
        """
        for module_ in self.modules:
            for name in dir(module_):
                if name not in self.whitelist:
                    obj = getattr(module_, name)
                    if isinstance(obj, FunctionType):
                        # We only check functions that are defined in the module we
                        # specified. Some of the functions in the module may have been
                        # imported from other modules. These are ignored.
                        if obj.__module__ == module_.__name__:
                            logger.debug(f"Adding metrics to {module_}:{name}")
                            setattr(
                                module_, name,
                                self._prometheus_module_metric_decorator(obj))
                        else:
                            logger.debug(
                                f"No metrics on {module_}:{name} because it belongs to another "
                                f"module")
                    else:
                        logger.debug(
                            f"No metrics on {module_}:{name} because it is not a coroutine or "
                            f"function")

    def _prometheus_module_metric_decorator(self, f: FunctionType):
        """
        A Prometheus decorator adding timing metrics to a function.
        This decorator will work on both asynchronous and synchronous functions.
        Note, however, that this function will turn synchronous functions into
        asynchronous ones when used as a decorator.
        :param f: The function for which to capture metrics
        """
        module_ = f.__module__.split(".")[-1]
        call_key = "{}_{}".format(module_, f.__name__)

        @functools.wraps(f)
        def wrapper(*args, **kwargs):
            with self.H.labels(call=call_key).time():
                return f(*args, **kwargs)

        return wrapper
class MetricsMiddleware():
    def __init__(self):
        self.requests = Counter(
            'http_total_request',
            'Counter of total HTTP requests',
            ['method', 'path', 'status'])

        self.request_historygram = Histogram(
            'request_latency_seconds',
            'Histogram of request latency',
            ['method', 'path', 'status'])

    def process_request(self, req, resp):
        req.start_time = time.time()

    def process_response(self, req, resp, resource, req_succeeded):
        resp_time = time.time() - req.start_time

        self.requests.labels(method=req.method, path=req.path, status=resp.status).inc()
        self.request_historygram.labels(method=req.method, path=req.path, status=resp.status).observe(resp_time)
Esempio n. 14
0
class DatabaseMonitoring:
    _instance = None
    _init = False

    def __new__(cls, *args, **kwargs):
        if cls._instance:
            return cls._instance

        o = object.__new__(cls)
        cls._instance = o
        return o

    @once
    def __init__(self):
        self.request_latency = Histogram("enjoliver_db_request_duration_seconds", "Database request latency", ["caller"])
        self.request_count = Counter("enjoliver_db_request_total", "Database request count", ["caller"])
        self.cockroach_retry_count = Counter("enjoliver_cockroachdb_txn_retry_total", "CockroachDB transaction retry count",
                                             ['caller'])
        self.exception_count = Counter("enjoliver_db_exception_total", "Counter of number error during session",
                                       ["caller", "exception"])

    @contextmanager
    def observe_transaction(self, caller: str):
        """
        Wrapper to call around transaction against a database
        :param caller:
        :return:
        """
        start = time.time()
        try:
            yield
        except Exception as e:
            self.exception_count.labels(caller, type(e).__name__).inc()
            raise
        finally:
            latency = time.time() - start
            self.request_latency.labels(caller).observe(latency)
            self.request_count.labels(caller).inc()
Esempio n. 15
0
    class Application(web.Application):
        def __init__(self, *args, **kwargs):
            super().__init__([
                (r'/', HealthcheckHandler),
                (r'/code', BarcodeHandler),
                (r'/code.html', HTMLBarcodeHandler),
                (r'/metrics', MetricsHandler)],
                middlewares=[],
                *args, **settings, **kwargs)

            self.request_count = Counter(
                'requests_total',
                'Total requests count',
                ['method', 'endpoint', 'http_status']
            )
            self.redis_request_time = Histogram(
                'redis_request_latency',
                'Redis request total time',
                ['endpoint']
            )
            self.mongodb_collision_count = Counter(
                'collision_total',
                'Total collision omitted'
            )

        def log_request(self, handler):
            super(Application, self).log_request(handler)

            self.request_count.labels(
                method=handler.request.method.lower(),
                endpoint=type(handler).__name__.lower(),
                http_status=int(handler.get_status())
            ).inc()

            if hasattr(handler.request, 'redis_request_time'):
                self.redis_request_time.labels(
                    endpoint=type(handler).__name__.lower()
                ).observe(handler.request.redis_request_time)
Esempio n. 16
0
class PrometheusConfig:
    def __init__(self):
        self.bitcoin_cost = Histogram(
            name='bitcoin_cost',
            documentation='bitcoin cost over time',
            buckets=[
                10000,
                15000,
                20000,
                25000,
                30000,
                35000,
                40000,
                45000,
                50000,
                55000,
                60000,
                65000,
                70000,
                75000,
                80000,
                85000,
                90000,
                95000,
                100000
            ],
            labelnames=['currency']
        )
        start_http_server(8000)
        self.gather_metrics()

    def gather_metrics(self):
        while True:
            json_response = requests.get(API_URL).json()
            for currency, price in json_response.items():
                self.bitcoin_cost.labels(currency).observe(float(''.join(price.split(','))))
            time.sleep(5)
Esempio n. 17
0
mk = args.memcachekey
mv = args.memcachevalue
b = args.buckets

# convert buckets config string to float list
buckets = [float(i) for i in b.split(', ')]

# metrics
REQUEST_TIME = Histogram('memcachemon_request_duration_seconds', 'Time in seconds a memcache '
                            'operation takes',  ['operation', 'memcache'],
                            buckets=buckets)

REQUEST_FAIL = Counter('memcachemon_request_failures', 'Counter for failed operations',  ['operation', 'memcache'])

# labeled instances
request_time_get = REQUEST_TIME.labels(operation="get", memcache=mc)
request_time_set = REQUEST_TIME.labels(operation="set", memcache=mc)

# get func with decorator
@request_time_get.time()
def memc_get(key):
    try:
        client.get(key)
    except Exception as error:
        REQUEST_FAIL.labels(operation="get", memcache=mc).inc()
        logger.warning("Error on mc get: %s", error)

# set func with decorator
@request_time_set.time()
def memc_set(key, value):
    try:
Esempio n. 18
0
    class _MarshalService(cls):
        def __init__(self, *args, **kwargs):
            for attr_name in functools.WRAPPER_ASSIGNMENTS:
                try:
                    setattr(self.__class__, attr_name, getattr(cls, attr_name))
                except AttributeError:
                    pass

            from prometheus_client import Counter, Gauge, Histogram

            super(_MarshalService, self).__init__(*args, **kwargs)
            namespace = config('instrument').get(
                'default_namespace')  # its own namespace?
            service_name = self.bento_service_metadata_pb.name

            self.metrics_request_batch_size = Histogram(
                name=service_name + '_mb_batch_size',
                documentation=service_name + "microbatch request batch size",
                namespace=namespace,
                labelnames=['endpoint'],
            )
            self.metrics_request_duration = Histogram(
                name=service_name + '_mb_requestmb_duration_seconds',
                documentation=service_name +
                "API HTTP request duration in seconds",
                namespace=namespace,
                labelnames=['endpoint', 'http_response_code'],
            )
            self.metrics_request_in_progress = Gauge(
                name=service_name + "_mb_request_in_progress",
                documentation='Total number of HTTP requests in progress now',
                namespace=namespace,
                labelnames=['endpoint', 'http_method'],
            )
            self.metrics_request_exception = Counter(
                name=service_name + "_mb_request_exception",
                documentation='Total number of service exceptions',
                namespace=namespace,
                labelnames=['endpoint', 'exception_class'],
            )
            self.metrics_request_total = Counter(
                name=service_name + "_mb_request_total",
                documentation='Total number of service exceptions',
                namespace=namespace,
                labelnames=['endpoint', 'http_response_code'],
            )

        async def request_dispatcher(self, request):
            func = super(_MarshalService, self).request_dispatcher
            api_name = request.match_info.get("name", "/")
            _metrics_request_in_progress = self.metrics_request_in_progress.labels(
                endpoint=api_name,
                http_method=request.method,
            )
            _metrics_request_in_progress.inc()
            time_st = time.time()
            try:
                resp = await func(request)
            except Exception as e:  # pylint: disable=broad-except
                self.metrics_request_exception.labels(
                    endpoint=api_name,
                    exception_class=e.__class__.__name__).inc()
                logger.error(traceback.format_exc())
                resp = aiohttp.web.Response(status=500)
            self.metrics_request_total.labels(
                endpoint=api_name, http_response_code=resp.status).inc()
            self.metrics_request_duration.labels(
                endpoint=api_name,
                http_response_code=resp.status).observe(time.time() - time_st)
            _metrics_request_in_progress.dec()
            return resp

        async def _batch_handler_template(self, requests, api_name):
            func = super(_MarshalService, self)._batch_handler_template
            self.metrics_request_batch_size.labels(endpoint=api_name).observe(
                len(requests))
            return await func(requests, api_name)
Esempio n. 19
0
class Prometheus(commands.Cog):
    """Collects prometheus metrics"""
    def __init__(self, bot):
        self.bot = bot
        self.ram_gauge = Gauge(
            "miso_memory_usage_bytes",
            "Memory usage of the bot process in bytes.",
        )
        self.cpu_gauge = Gauge(
            "system_cpu_usage_percent",
            "CPU usage of the system in percent.",
            ["core"],
        )
        self.event_counter = Counter(
            "miso_gateway_events_total",
            "Total number of gateway events.",
            ["event_type"],
        )
        self.command_histogram = Histogram(
            "miso_command_response_time_seconds",
            "Command end-to-end response time in seconds.",
            ["command"],
            buckets=(0.1, 0.25, 0.5, 0.75, 1.0, 1.5, 2.0, 3.0, 5.0),
        )
        self.shard_latency_summary = Summary(
            "miso_shard_latency_seconds",
            "Latency of a shard in seconds.",
            ["shard"],
        )
        self.guild_count = Gauge(
            "miso_cached_guild_count",
            "Total amount of guilds cached.",
        )
        self.member_count = Gauge(
            "miso_cached_member_count",
            "Total amount of members cached.",
        )

    async def cog_load(self):
        self.log_system_metrics.start()
        self.log_shard_latencies.start()
        self.log_cache_contents.start()

    def cog_unload(self):
        self.log_system_metrics.cancel()
        self.log_shard_latencies.cancel()
        self.log_cache_contents.cancel()

    @commands.Cog.listener()
    async def on_socket_event_type(self, event_type):
        self.event_counter.labels(event_type).inc()

    @tasks.loop(seconds=10)
    async def log_shard_latencies(self):
        for shard in self.bot.shards.values():
            self.shard_latency_summary.labels(shard.id).observe(shard.latency)

    @tasks.loop(minutes=1)
    async def log_cache_contents(self):
        guild_count = len(self.bot.guilds)
        member_count = len(self.bot.users)
        self.guild_count.set(guild_count)
        self.member_count.set(member_count)

    @tasks.loop(seconds=10)
    async def log_system_metrics(self):
        ram = psutil.Process().memory_info().rss
        self.ram_gauge.set(ram)
        for core, usage in enumerate(
                psutil.cpu_percent(interval=None, percpu=True)):
            self.cpu_gauge.labels(core).set(usage)

    @log_shard_latencies.before_loop
    @log_cache_contents.before_loop
    async def task_waiter(self):
        await self.bot.wait_until_ready()

    @commands.Cog.listener()
    async def on_command_completion(self, ctx: commands.Context):
        if ctx.invoked_subcommand is None:
            took = time() - ctx.timer
            command = str(ctx.command)
            self.command_histogram.labels(command).observe(took)
Esempio n. 20
0
class ServerSpawnStatus(Enum):
    """
    Possible values for 'status' label of SERVER_SPAWN_DURATION_SECONDS
    """
    success = 'success'
    failure = 'failure'
    already_pending = 'already-pending'
    throttled = 'throttled'
    too_many_users = 'too-many-users'

    def __str__(self):
        return self.value

for s in ServerSpawnStatus:
    # Create empty metrics with the given status
    SERVER_SPAWN_DURATION_SECONDS.labels(status=s)


PROXY_ADD_DURATION_SECONDS = Histogram(
    'proxy_add_duration_seconds',
    'duration for adding user routes to proxy',
    ['status']
)

class ProxyAddStatus(Enum):
    """
    Possible values for 'status' label of PROXY_ADD_DURATION_SECONDS
    """
    success = 'success'
    failure = 'failure'
Esempio n. 21
0
    got_request_exception, abort, request
from flask_talisman import Talisman, DENY
from prometheus_client import generate_latest, CONTENT_TYPE_LATEST, Counter, Histogram

from .transformer import Transformer
from ..specs.factory import InvalidConfiguration
"""Web app that provides default values for fiaas config, an endpoint to transform between available fiaas config
versions and prometheus metrics."""

LOG = logging.getLogger(__name__)

web = Blueprint("web", __name__, template_folder="templates")

request_histogram = Histogram("web_request_latency",
                              "Request latency in seconds", ["page"])
defaults_histogram = request_histogram.labels("defaults")
defaults_versioned_histogram = request_histogram.labels("defaults_versioned")
frontpage_histogram = request_histogram.labels("frontpage")
metrics_histogram = request_histogram.labels("metrics")
transform_histogram = request_histogram.labels("transform")
healthz_histogram = request_histogram.labels("healthz")


@web.route("/")
@frontpage_histogram.time()
def frontpage():
    return render_template("frontpage.html")


@web.route("/internal-backstage/prometheus")
@metrics_histogram.time()
Esempio n. 22
0
    def push_job_information(self):
        '''
        Process Bareos job data and send it to the prometheus pushgateway
        '''
        registry = CollectorRegistry()

        TIME_BUCKETS=(6, 60, 600, 1800, 3600, 10800, 18000, 28800, 86400)

        bareos_job_status = Enum('bareos_job_status', 'Backup Status',
                                 states=self.job_status.values(),
                                 labelnames=['instance', 'jobid'], registry=registry)
        # see https://github.com/bareos/bareos/blob/master/core/src/include/job_level.h
        bareos_job_level = Enum('bareos_job_level', 'Backup Level',
                                states=self.job_levels.values(),
                                labelnames=['instance', 'jobid'], registry=registry)
        bareos_job_running_time = Histogram('bareos_job_running_time', 'Job running time',
                                            labelnames=['instance', 'jobid'], registry=registry,
                                            buckets=TIME_BUCKETS)
        bareos_job_files = Gauge('bareos_job_files', 'Backed up files', 
                                 labelnames=['instance', 'jobid'], registry=registry)
        bareos_job_bytes = Gauge('bareos_job_bytes', 'Backed up bytes',
                                 labelnames=['instance', 'jobid'], registry=registry)
        bareos_job_throughput = Gauge('bareos_job_throughtput', 'Backup throughtput',
                                      registry=registry, labelnames=['instance', 'jobid'])
        # see https://github.com/bareos/bareos/blob/master/core/src/include/job_types.h
        bareos_job_type = Enum('bareos_job_type', 'Job Type',
                               states=self.job_types.values(),
                               registry=registry, labelnames=['instance', 'jobid'])
        bareos_job_client = Info('bareos_job_client', 'Client',
                               registry=registry, labelnames=['instance', 'jobid'])
        bareos_job_priority = Gauge('bareos_job_priority', 'Job Priority',
                               registry=registry, labelnames=['instance', 'jobid'])

        bareos_job_name = '_'.join(self.jobName.split('.')[:-3])
        bareos_job_id = self.jobId

        if (self.jobStatus == 'E' or self.jobStatus == 'f' or self.jobStatus == 'A') and self.report_failed == False:
            return

        bareos_job_status.labels(instance=bareos_job_name, jobid=bareos_job_id).state(self.job_status[self.jobStatus])
        bareos_job_running_time.labels(instance=bareos_job_name, jobid=bareos_job_id).observe(self.jobRunningTime)
        bareos_job_files.labels(instance=bareos_job_name, jobid=bareos_job_id).set(self.jobFiles)
        bareos_job_bytes.labels(instance=bareos_job_name, jobid=bareos_job_id).set(self.jobBytes)
        bareos_job_throughput.labels(instance=bareos_job_name, jobid=bareos_job_id).set(self.throughput)
        bareos_job_priority.labels(instance=bareos_job_name, jobid=bareos_job_id).set(self.Priority)
        bareos_job_level.labels(instance=bareos_job_name, jobid=bareos_job_id).state(self.job_levels[self.jobLevel])
        bareos_job_type.labels(instance=bareos_job_name, jobid=bareos_job_id).state(self.job_types[chr(self.jobType)])
        bareos_job_client.labels(instance=bareos_job_name, jobid=bareos_job_id).info({'client': self.jobClient})

        if self.use_tls == True or self.use_tls == 'yes':
            gateway = "https://{}:{}".format(self.gateway_host,self.gateway_port)
        else:
            gateway = "{}:{}".format(self.gateway_host,self.gateway_port)

        bareosdir.DebugMessage(100, "Submitting metrics to {}\n".format(gateway))
        try:
          if self.use_basic_auth:
            push_to_gateway('{}'.format(gateway), job='bareos', registry=registry, handler=self.authentication_handler)
          else:
              push_to_gateway('{}'.format(gateway), job='bareos', registry=registry)
        except Exception as excp:
          bareosdir.DebugMessage(100, "Error: Submitting metrics to pushgateway '{}' failed.\n".format(gateway))
          bareosdir.DebugMessage(100, "python error was: {}\n".format(excp))
          bareosdir.JobMessage(bareosdir.M_INFO, "Failed to submit metrics to pushgateway\n")
Esempio n. 23
0
    Possible values for 'status' label of SERVER_SPAWN_DURATION_SECONDS
    """

    success = 'success'
    failure = 'failure'
    already_pending = 'already-pending'
    throttled = 'throttled'
    too_many_users = 'too-many-users'

    def __str__(self):
        return self.value


for s in ServerSpawnStatus:
    # Create empty metrics with the given status
    SERVER_SPAWN_DURATION_SECONDS.labels(status=s)

PROXY_ADD_DURATION_SECONDS = Histogram(
    'proxy_add_duration_seconds', 'duration for adding user routes to proxy',
    ['status'])


class ProxyAddStatus(Enum):
    """
    Possible values for 'status' label of PROXY_ADD_DURATION_SECONDS
    """

    success = 'success'
    failure = 'failure'

    def __str__(self):
Esempio n. 24
0
class TestHistogram(unittest.TestCase):
    def setUp(self):
        self.registry = CollectorRegistry()
        self.histogram = Histogram('h', 'help', registry=self.registry)
        self.labels = Histogram('hl', 'help', ['l'], registry=self.registry)

    def test_histogram(self):
        self.assertEqual(0, self.registry.get_sample_value('h_bucket', {'le': '1.0'}))
        self.assertEqual(0, self.registry.get_sample_value('h_bucket', {'le': '2.5'}))
        self.assertEqual(0, self.registry.get_sample_value('h_bucket', {'le': '5.0'}))
        self.assertEqual(0, self.registry.get_sample_value('h_bucket', {'le': '+Inf'}))
        self.assertEqual(0, self.registry.get_sample_value('h_count'))
        self.assertEqual(0, self.registry.get_sample_value('h_sum'))

        self.histogram.observe(2)
        self.assertEqual(0, self.registry.get_sample_value('h_bucket', {'le': '1.0'}))
        self.assertEqual(1, self.registry.get_sample_value('h_bucket', {'le': '2.5'}))
        self.assertEqual(1, self.registry.get_sample_value('h_bucket', {'le': '5.0'}))
        self.assertEqual(1, self.registry.get_sample_value('h_bucket', {'le': '+Inf'}))
        self.assertEqual(1, self.registry.get_sample_value('h_count'))
        self.assertEqual(2, self.registry.get_sample_value('h_sum'))

        self.histogram.observe(2.5)
        self.assertEqual(0, self.registry.get_sample_value('h_bucket', {'le': '1.0'}))
        self.assertEqual(2, self.registry.get_sample_value('h_bucket', {'le': '2.5'}))
        self.assertEqual(2, self.registry.get_sample_value('h_bucket', {'le': '5.0'}))
        self.assertEqual(2, self.registry.get_sample_value('h_bucket', {'le': '+Inf'}))
        self.assertEqual(2, self.registry.get_sample_value('h_count'))
        self.assertEqual(4.5, self.registry.get_sample_value('h_sum'))

        self.histogram.observe(float("inf"))
        self.assertEqual(0, self.registry.get_sample_value('h_bucket', {'le': '1.0'}))
        self.assertEqual(2, self.registry.get_sample_value('h_bucket', {'le': '2.5'}))
        self.assertEqual(2, self.registry.get_sample_value('h_bucket', {'le': '5.0'}))
        self.assertEqual(3, self.registry.get_sample_value('h_bucket', {'le': '+Inf'}))
        self.assertEqual(3, self.registry.get_sample_value('h_count'))
        self.assertEqual(float("inf"), self.registry.get_sample_value('h_sum'))

    def test_setting_buckets(self):
        h = Histogram('h', 'help', registry=None, buckets=[0, 1, 2])
        self.assertEqual([0.0, 1.0, 2.0, float("inf")], h._upper_bounds)

        h = Histogram('h', 'help', registry=None, buckets=[0, 1, 2, float("inf")])
        self.assertEqual([0.0, 1.0, 2.0, float("inf")], h._upper_bounds)

        self.assertRaises(ValueError, Histogram, 'h', 'help', registry=None, buckets=[])
        self.assertRaises(ValueError, Histogram, 'h', 'help', registry=None, buckets=[float("inf")])
        self.assertRaises(ValueError, Histogram, 'h', 'help', registry=None, buckets=[3, 1])

    def test_labels(self):
        self.labels.labels('a').observe(2)
        self.assertEqual(0, self.registry.get_sample_value('hl_bucket', {'le': '1.0', 'l': 'a'}))
        self.assertEqual(1, self.registry.get_sample_value('hl_bucket', {'le': '2.5', 'l': 'a'}))
        self.assertEqual(1, self.registry.get_sample_value('hl_bucket', {'le': '5.0', 'l': 'a'}))
        self.assertEqual(1, self.registry.get_sample_value('hl_bucket', {'le': '+Inf', 'l': 'a'}))
        self.assertEqual(1, self.registry.get_sample_value('hl_count', {'l': 'a'}))
        self.assertEqual(2, self.registry.get_sample_value('hl_sum', {'l': 'a'}))

    def test_function_decorator(self):
        self.assertEqual(0, self.registry.get_sample_value('h_count'))
        self.assertEqual(0, self.registry.get_sample_value('h_bucket', {'le': '+Inf'}))

        @self.histogram.time()
        def f():
            pass

        f()
        self.assertEqual(1, self.registry.get_sample_value('h_count'))
        self.assertEqual(1, self.registry.get_sample_value('h_bucket', {'le': '+Inf'}))

    def test_block_decorator(self):
        self.assertEqual(0, self.registry.get_sample_value('h_count'))
        self.assertEqual(0, self.registry.get_sample_value('h_bucket', {'le': '+Inf'}))
        with self.histogram.time():
            pass
        self.assertEqual(1, self.registry.get_sample_value('h_count'))
        self.assertEqual(1, self.registry.get_sample_value('h_bucket', {'le': '+Inf'}))
Esempio n. 25
0
class PrometheusMetrics(DependencyProvider):
    """
    Dependency provider which measures RPC, event handler and HTTP endpoint
    latency.

    On service start, a few default metrics are declared. These are:

    - ``<prefix>_http_requests_total``
    - ``<prefix>_http_request_latency_seconds``
    - ``<prefix>_rpc_requests_total``
    - ``<prefix>_rpc_request_latency_seconds``
    - ``<prefix>_events_total``
    - ``<prefix>_events_latency_seconds``

    where ``prefix`` is either derived from ``name`` attribute of the service
    class, or :ref:`configured manually <configuration>`.
    """

    def __init__(self):
        self.worker_starts: MutableMapping[WorkerContext, float] = WeakKeyDictionary()

    def setup(self) -> None:
        """
        Configures the dependency provider and declares default metrics.
        """
        # read config from container, use service name as default prefix
        service_name = self.container.service_name
        config = self.container.config.get("PROMETHEUS", {})
        service_config = config.get(service_name, {})
        prefix = service_config.get("prefix", service_name)
        # initialize default metrics exposed for every service
        self.http_request_total_counter = Counter(
            f"{prefix}_http_requests_total",
            "Total number of HTTP requests",
            ["http_method", "endpoint", "status_code"],
        )
        self.http_request_latency_histogram = Histogram(
            f"{prefix}_http_request_latency_seconds",
            "HTTP request duration in seconds",
            ["http_method", "endpoint", "status_code"],
        )
        self.rpc_request_total_counter = Counter(
            f"{prefix}_rpc_requests_total",
            "Total number of RPC requests",
            ["method_name"],
        )
        self.rpc_request_latency_histogram = Histogram(
            f"{prefix}_rpc_request_latency_seconds",
            "RPC request duration in seconds",
            ["method_name"],
        )
        self.events_total_counter = Counter(
            f"{prefix}_events_total",
            "Total number of handled events",
            ["source_service", "event_type"],
        )
        self.events_latency_histogram = Histogram(
            f"{prefix}_events_latency_seconds",
            "Event handler duration in seconds",
            ["source_service", "event_type"],
        )

    def get_dependency(self, worker_ctx: WorkerContext) -> MetricsServer:
        """
        Returns an instance of
        :class:`~nameko_prometheus.dependencies.MetricsServer` to be injected
        into the worker.
        """
        return MetricsServer()

    def worker_setup(self, worker_ctx: WorkerContext) -> None:
        """
        Called before service worker starts.
        """
        self.worker_starts[worker_ctx] = time.perf_counter()

    def worker_result(
        self, worker_ctx: WorkerContext, result=None, exc_info=None
    ) -> None:
        """
        Called after service worker completes.

        At this point the default metrics such as worker latency are observed,
        regardless of whether the worker finished successfully or raised an
        exception.
        """
        try:
            start = self.worker_starts.pop(worker_ctx)
            entrypoint = worker_ctx.entrypoint
            logger.debug(f"Got result from entrypoint: {entrypoint}")
            duration = time.perf_counter() - start
            if isinstance(entrypoint, HttpRequestHandler):
                http_method = entrypoint.method
                url = entrypoint.url
                if exc_info:
                    _, exc, _ = exc_info
                    status_code = entrypoint.response_from_exception(exc).status_code
                else:
                    status_code = entrypoint.response_from_result(result).status_code
                logger.debug(f"Tracing HTTP request: {http_method} {url} {status_code}")
                self.http_request_total_counter.labels(
                    http_method=http_method, endpoint=url, status_code=status_code
                ).inc()
                self.http_request_latency_histogram.labels(
                    http_method=http_method, endpoint=url, status_code=status_code
                ).observe(duration)
            elif isinstance(entrypoint, Rpc):
                method_name = entrypoint.method_name
                logger.debug(f"Tracing RPC request: {method_name}")
                self.rpc_request_total_counter.labels(method_name=method_name).inc()
                self.rpc_request_latency_histogram.labels(
                    method_name=method_name
                ).observe(duration)
            elif isinstance(entrypoint, EventHandler):
                source_service = entrypoint.source_service
                event_type = entrypoint.event_type
                logger.debug(f"Tracing event handler: {source_service} {event_type}")
                self.events_total_counter.labels(
                    source_service=source_service, event_type=event_type
                ).inc()
                self.events_latency_histogram.labels(
                    source_service=source_service, event_type=event_type
                ).observe(duration)
            else:
                logger.warning(
                    f"Entrypoint {entrypoint} is not traceable by nameko_prometheus"
                )
        except KeyError:
            logger.info("No worker_ctx in request start dictionary")
Esempio n. 26
0
start_http_server(9116)

hosts = sys.argv[1]
hosts = hosts.split(',')

while True:
    # ping servers
    command = 'fping -A -C 10 -f - -i 10 -q -r 0'.split()
    p = subprocess.Popen(command,
                         stdin=subprocess.PIPE,
                         stdout=subprocess.PIPE,
                         stderr=subprocess.PIPE)
    _, data = p.communicate(input='\n'.join(hosts).encode('utf-8'))
    # update histogram
    # 77.75.79.53      : 3.56 3.68 2.51 2.63 2.93 7.67 2.49 2.86 -
    for line in data.decode('utf-8').splitlines():
        if not line:
            continue
        line = line.strip()
        pinged_host, pings = line.rsplit(':', 1)
        pinged_host = pinged_host.strip()
        pings = pings.strip()
        for value in pings.split():
            if not value:
                continue
            if value == '-':
                value = 99999999999999999
            h.labels(target=pinged_host).observe(float(value) / 1000)
    # sleep
    time.sleep(10)
Esempio n. 27
0
"""
Copyright 2022 The Magma Authors.

This source code is licensed under the BSD-style license found in the
LICENSE file in the root directory of this source tree.

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""

from prometheus_client import Histogram

# Metrics for current configuration controller status
GRPC_REQUEST_PROCESSING_TIME = Histogram(
    'dp_rc_grpc_request_processing_seconds',
    'Time spent processing a GRPC request',
    ('name',),
)

GET_CBSD_STATE_PROCESSING_TIME = GRPC_REQUEST_PROCESSING_TIME.labels('get_cbsd_state')
GET_DB_STATE_PROCESSING_TIME = GRPC_REQUEST_PROCESSING_TIME.labels('get_database_state')
DELETE_CBSD_PROCESSING_TIME = GRPC_REQUEST_PROCESSING_TIME.labels('delete_cbsd')
ACKNOWLEDGE_UPDATE_PROCESSING_TIME = GRPC_REQUEST_PROCESSING_TIME.labels('acknowledge_cbsd_update')
INSERT_TO_DB_PROCESSING_TIME = GRPC_REQUEST_PROCESSING_TIME.labels('insert_requests_to_db')
STORE_AVAILABLE_FREQUENCIES_PROCESSING_TIME = GRPC_REQUEST_PROCESSING_TIME.labels('store_available_frequencies_in_db')
Esempio n. 28
0
    "shadowsocks network transmit bytes",
    labelnames=[
        "ss_node",
    ],
)
NETWORK_TRANSMIT_BYTES = NETWORK_TRANSMIT_BYTES.labels(ss_node=NODE_HOST_NAME)


ENCRYPT_DATA_TIME = Histogram(
    "encrypt_data_time_seconds",
    "shadowsocks encrypt data time seconds",
    labelnames=[
        "ss_node",
    ],
)
ENCRYPT_DATA_TIME = ENCRYPT_DATA_TIME.labels(ss_node=NODE_HOST_NAME)


DECRYPT_DATA_TIME = Histogram(
    "decrypt_data_time_seconds",
    "shadowsocks decrypt data time seconds",
    labelnames=[
        "ss_node",
    ],
)
DECRYPT_DATA_TIME = DECRYPT_DATA_TIME.labels(ss_node=NODE_HOST_NAME)


FIND_ACCESS_USER_TIME = Histogram(
    "find_access_user_time_seconds",
    "time to find access user",
Esempio n. 29
0
        random.seed()
        client_id = random.random()

        histogram_reg = CollectorRegistry()
        counter_reg = CollectorRegistry()

        c = Counter('coinbase_http_response_total',
                    'HTTP responses counted by status_code',
                    ['client', 'method', 'code', 'message'],
                    registry=counter_reg)
        req_time = Histogram('coinbase_request_seconds',
                             'Time spent processing request',
                             ['client', 'method'],
                             registry=histogram_reg)

        get_accounts_time = req_time.labels(client=client_id,
                                            method='get_accounts')
        update_user_time = req_time.labels(client=client_id,
                                           method='update_current_user')
        request_money_time = req_time.labels(client=client_id,
                                             method='request_money')

        @update_user_time.time()
        def update_user_request():
            try:
                client.update_current_user(name=''.join(
                    random.choice(string.ascii_lowercase) for i in range(10)))
                c.labels(client=client_id,
                         method='update_current_user',
                         code="200",
                         message='ok').inc()
            except CoinbaseError as E:
Esempio n. 30
0
class LyraMetrics:
    """ Стандартный класс для отправки метрик в prometheus gateway """

    pushgateway_host: str = LyraMetricsConsts.PUSH_GATEWAY_HOST_DEFAULT.value
    model_name: str = None
    model_version: str = None
    _metrics_registry: CollectorRegistry = None
    _metrics_counter: Counter = None
    _metrics_gauge: Gauge = None
    _metrics_histogram: Histogram = None

    def __init__(self,
                 host: str,
                 model_name: str,
                 model_version: str = "undefined"):
        if not isinstance(host, str):
            raise ValueError(f"push gateway host isn't string a value")

        if not is_host_defined(host):
            raise ValueError(
                f"lyra metrics host is not defined, set {LyraMetricsConsts.METRICS_HOST_ENV.value} variable"
            )
        if not model_name or model_name == "" or model_name is None:
            raise ValueError(f"lyra metrics model name is not set")

        self.pushgateway_host = get_host(host)
        self._metrics_registry = CollectorRegistry()
        self.model_name = model_name
        self.model_version = model_version

        if get_namespace() is not None:
            ns = get_namespace()

        self._metrics_counter = Counter(
            name="lyra_metrics_counter",
            documentation=
            "A counter is a cumulative metric that represents a single monotonically increasing counter whose value can only increase or be reset to zero on restart. For example, you can use a counter to represent the number of requests served, tasks completed, or errors. Do not use a counter to expose a value that can decrease. For example, do not use a counter for the number of currently running processes; instead use a gauge",
            namespace=(get_namespace() if get_namespace() is not None else ""),
            labelnames=[
                LyraMetricsConsts.LABEL_MODEL_NAME.value,
                LyraMetricsConsts.LABEL_MODEL_VERSION.value,
                LyraMetricsConsts.LABEL_METRIC_NAME.value,
            ],
            registry=self._metrics_registry,
        )

        self._metrics_gauge = Gauge(
            name="lyra_metrics_gauge",
            documentation=
            "A gauge is a metric that represents a single numerical value that can arbitrarily go up and down. Gauges are typically used for measured values like temperatures or current memory usage, but also counts that can go up and down, like the number of concurrent requests",
            namespace=(get_namespace() if get_namespace() is not None else ""),
            labelnames=[
                LyraMetricsConsts.LABEL_MODEL_NAME.value,
                LyraMetricsConsts.LABEL_MODEL_VERSION.value,
                LyraMetricsConsts.LABEL_METRIC_NAME.value,
            ],
            registry=self._metrics_registry,
        )

        self._metrics_histogram = Histogram(
            name="lyra_metrics_histogram",
            documentation=
            "A histogram samples observations (usually things like request durations or response sizes) and counts them in configurable buckets. It also provides a sum of all observed values",
            namespace=(get_namespace() if get_namespace() is not None else ""),
            labelnames=[
                LyraMetricsConsts.LABEL_MODEL_NAME.value,
                LyraMetricsConsts.LABEL_MODEL_VERSION.value,
                LyraMetricsConsts.LABEL_METRIC_NAME.value,
            ],
            registry=self._metrics_registry,
        )

    @property
    def registry(self) -> Optional[CollectorRegistry]:
        """ Геттер для реджистри """
        return self._metrics_registry

    def counter(self, metric_name: str, value: Any):
        """ Увеличиваем инкремент метрики """
        self._metrics_counter.labels(
            metric_name=metric_name,
            model_name=self.model_name,
            model_version=self.model_version,
        ).inc(value)
        push_to_gateway(
            self.pushgateway_host,
            job=f"{LyraMetricsConsts.JOB_PREFIX.value}_counter",
            registry=self._metrics_registry,
        )
        pass

    def set(self, metric_name: str, value: Any):
        """ Устанавливаем значение для калибра """
        self._metrics_gauge.labels(
            metric_name=metric_name,
            model_name=self.model_name,
            model_version=self.model_version,
        ).set(int(value))
        push_to_gateway(
            self.pushgateway_host,
            job=f"{LyraMetricsConsts.JOB_PREFIX.value}_gauge",
            registry=self._metrics_registry,
        )
        pass

    def observe(self, metric_name: str, value: Any):
        """ Устанавливаем значение для гистограммы """
        self._metrics_histogram.labels(
            metric_name=metric_name,
            model_name=self.model_name,
            model_version=self.model_version,
        ).observe(value)
        push_to_gateway(
            self.pushgateway_host,
            job=f"{LyraMetricsConsts.JOB_PREFIX.value}_histogram",
            registry=self._metrics_registry,
        )
        pass
Esempio n. 31
0
File: web.py Progetto: xavileon/mast
from prometheus_client import generate_latest, CONTENT_TYPE_LATEST, Histogram
from werkzeug.exceptions import UnprocessableEntity

from .application_generator import ApplicationGenerator
from .common import make_safe_name
from .configmap_generator import ConfigMapGenerator
from .deployer import Deployer
from .models import ApplicationConfiguration
from .models import Release
from .status import status

web = Blueprint("web", __name__)

request_histogram = Histogram("web_request_latency",
                              "Request latency in seconds", ["page"])
status_histogram = request_histogram.labels("status")
generate_application_histogram = request_histogram.labels(
    "generate_paasbetaapplication")
generate_configmap_histogram = request_histogram.labels("generate_configmap")
deploy_histogram = request_histogram.labels("deploy")
metrics_histogram = request_histogram.labels("metrics")
health_histogram = request_histogram.labels("health")

BOOTSTRAP_STATUS = dict(UNKNOWN="warning",
                        SUCCESS="success",
                        RUNNING="info",
                        FAILED="danger")


@web.route("/health", methods=["GET"])
@health_histogram.time()
Esempio n. 32
0
from prometheus_client import start_http_server, Histogram
import random
import time

function_exec = Histogram('function_exec_time',
                          'Time spend processing a function', ['func_name'])


def func1():
    if random.random() < 0.02:
        time.sleep(2)
        return
    time.sleep(0.2)


def func2():
    if random.random() < 0.5:
        time.sleep(0.6)
        return
    time.sleep(0.4)


start_http_server(9100)
while True:
    start_time1 = time.time()
    func1()
    function_exec.labels(func_name='func1').observe(time.time() - start_time1)
    start_time2 = time.time()
    func2()
    function_exec.labels(func_name='func2').observe(time.time() - start_time2)