コード例 #1
0
import subprocess
import dill as pickle
from tornado.options import define, options
from prometheus_client import CollectorRegistry, generate_latest, start_http_server, Summary, Counter, Histogram, Gauge

define('PIO_MODEL_STORE_HOME', default='', help='path to model_store', type=str)
define('PIO_MODEL_TYPE', default='', help='prediction model type', type=str)
define('PIO_MODEL_NAMESPACE', default='', help='prediction model namespace', type=str)
define('PIO_MODEL_NAME', default='', help='prediction model name', type=str)
define('PIO_MODEL_VERSION', default='', help='prediction model version', type=str)
define('PIO_MODEL_SERVER_PORT', default='9876', help='tornado http server listen port', type=int)
define('PIO_MODEL_SERVER_PROMETHEUS_PORT', default=8080, help='port to run the prometheus http metrics server on', type=int)

MODEL_MODULE_NAME = 'pio_bundle'
# Create a metric to track time spent and requests made.
REQUEST_TIME = Summary('request_processing_seconds', 'Model Server: Time spent processing request')
REQUEST_TIME.observe(1.0)    # Observe 1.0 (seconds in this case)
REQUESTS_IN_PROGRESS = Gauge('inprogress_requests', 'model server: request current in progress')
REQUESTS_COUNT = Counter('http_requests_total', 'model server: total \
            http request count since the last time the process was restarted', ['method', 'model_type', 'model_namespace',
                                                                                'model_name', 'model_version'])
EX_COUNT = Counter('exceptions_total', 'model server: total http request count since the last time the process was restarted')
REQUEST_LATENCY = Histogram('http_request_processing_seconds', 'model server: time in seconds spent processing requests.')
REQUEST_LATENCY_BUCKETS = Histogram('http_request_duration_microseconds', 'model server: \
                         time in microseconds spent processing requests.', ['method', 'model_type', 'model_namespace',
                                                                            'model_name', 'model_version'])
REGISTRY = CollectorRegistry()
REGISTRY.register(REQUEST_TIME)
REGISTRY.register(REQUESTS_IN_PROGRESS)
REGISTRY.register(REQUESTS_COUNT)
REGISTRY.register(EX_COUNT)
コード例 #2
0
ファイル: monitor.py プロジェクト: global/internet-monitor
# ping metrics
PING_REQUESTS = Counter(
    "internet_monitor_ping_total", "Total ping requests made to 1.1.1.1"
)
PING_FAILURES = Counter(
    "internet_monitor_ping_failures_total", "Total ping requests failed made to 1.1.1.1"
)
PING_PACKET_LOSS = Counter(
    "internet_monitor_ping_packet_loss_total",
    "Number of packets lost while checking latency",
)
PING_JITTER = Gauge("internet_monitor_ping_jitter_seconds", "ICMP Jitter")
UP = Gauge("internet_monitor_up", "Internet is up or down")
PING_LATENCY = Summary(
    "internet_monitor_ping_latency_seconds", "Ping latency to 1.1.1.1"
)

# download metrics
DOWNLOAD_DURATION = Histogram(
    "internet_monitor_download_duration_seconds",
    "Download latency",
    buckets=(1, 2, 5, 7, 10, 15, 20, 50, 100, INF),
)
DOWNLOAD_REQUEST_SIZE = Gauge(
    "internet_monitor_download_size_bytes", "Bytes downloaded"
)
DOWNLOAD_REQUESTS = Counter(
    "internet_monitor_download_total", "Number of times the download job runs"
)
DOWNLOAD_FAILURES = Counter(
コード例 #3
0
import logging
logging.basicConfig(
    format='%(name)s:%(levelname)s %(module)s:%(lineno)d:  %(message)s',
    level="DEBUG")

#VNF_NAME = os.environ.get('VNF_NAME', 'test')
VNF_NAME = 'client4'
#pushgateway = 'localhost:9091'
PUSHGATEWAY = '172.17.0.1:9091'

# Prometheus export data
# helper variables to calculate the metrics
VCDN_REGISTRY = CollectorRegistry()

PROM_FILESIZE = Summary('filesize',
                        'requested file sizes', ['vnf_name'],
                        registry=VCDN_REGISTRY).labels(vnf_name=VNF_NAME)

PROM_PROCESSED_CACHED_REQS = Counter(
    'processed_cached_reqs',
    'cached requests', ['vnf_name'],
    registry=VCDN_REGISTRY).labels(vnf_name=VNF_NAME)
PROM_PROCESSED_NON_CACHED_REQS = Counter(
    'processed_non_cached_reqs',
    'cached requests', ['vnf_name'],
    registry=VCDN_REGISTRY).labels(vnf_name=VNF_NAME)

PROM_FALSE_USERS = Counter('false_users',
                           'false requests', ['vnf_name'],
                           registry=VCDN_REGISTRY).labels(vnf_name=VNF_NAME)
PROM_CACHED_USERS = Counter('cached_users',
コード例 #4
0
ファイル: app.py プロジェクト: sophwats/nachlass
from werkzeug.wsgi import DispatcherMiddleware

import base64
from pickle import load as cPload
from pickle import loads as cPloads
import numpy
import cloudpickle
import sys
import os
import pandas as pd

app = Flask(__name__)

METRICS_PREFIX = os.getenv("S2I_APP_METRICS_PREFIX", "pipeline")

PREDICTION_TIME = Summary('%s_processing_seconds' % METRICS_PREFIX, 'Time spent processing predictions')
PREDICTIONS = Counter('%s_predictions_total' % METRICS_PREFIX, 'Total predictions for a given label', ['value'])
app.model = None

@app.route('/')
def index():
  return "Make a prediction by POSTing to /predict"

@app.route('/predict', methods=['POST'])
@PREDICTION_TIME.time()
def predict():
    import json
    if 'json_args' in request.form:
      args = pd.read_json(request.form['json_args'])
      if len(args.columns) == 1 and len(args.values) > 1:
          # convert to series
コード例 #5
0
from prometheus_client import Summary

LATENCY = Summary(
    'http_requests_latency_seconds',
    'HTTP request latency.',
    labelnames=['path'])

foo = LATENCY.labels('/foo')


@foo.time()
def foo_handler(params):
    pass
コード例 #6
0
from prometheus_client import Summary

from nautobot.dcim.models import Device

from .choices import OnboardingFailChoices
from .choices import OnboardingStatusChoices
from .exceptions import OnboardException
from .helpers import onboarding_task_fqdn_to_ip
from .metrics import onboardingtask_results_counter
from .models import OnboardingDevice
from .models import OnboardingTask
from .onboard import OnboardingManager

logger = logging.getLogger("rq.worker")

REQUEST_TIME = Summary("onboardingtask_processing_seconds",
                       "Time spent processing onboarding request")


@REQUEST_TIME.time()
@job("default")
def onboard_device(task_id, credentials):  # pylint: disable=too-many-statements
    """Process a single OnboardingTask instance."""
    username = credentials.username
    password = credentials.password
    secret = credentials.secret

    ot = OnboardingTask.objects.get(id=task_id)

    # Rewrite FQDN to IP for Onboarding Task
    onboarding_task_fqdn_to_ip(ot)
コード例 #7
0
 def test_summary(self):
     s = Summary('ss', 'A summary', ['a', 'b'], registry=self.registry)
     s.labels('c', 'd').observe(17)
     self.assertEqual(
         b'# HELP ss A summary\n# TYPE ss summary\nss_count{a="c",b="d"} 1.0\nss_sum{a="c",b="d"} 17.0\n',
         generate_latest(self.registry))
コード例 #8
0
    def export_defaults(self,
                        buckets=None,
                        group_by='path',
                        latency_as_histogram=True,
                        prefix='flask',
                        app=None,
                        **kwargs):
        """
        Export the default metrics:
            - HTTP request latencies
            - HTTP request exceptions
            - Number of HTTP requests

        :param buckets: the time buckets for request latencies
            (will use the default when `None`)
        :param group_by: group default HTTP metrics by
            this request property, like `path`, `endpoint`, `rule`, etc.
            (defaults to `path`)
        :param latency_as_histogram: export request latencies
            as a Histogram, otherwise use a Summary instead
            (defaults to `True` to export as a Histogram)
        :param prefix: prefix to start the default metrics names with
            or `NO_PREFIX` (to skip prefix)
        :param app: the Flask application
        """

        if app is None:
            app = self.app or current_app

        if not prefix:
            prefix = self._defaults_prefix or 'flask'

        if kwargs.get('group_by_endpoint') is True:
            warnings.warn(
                'The `group_by_endpoint` argument of '
                '`PrometheusMetrics.export_defaults` is deprecated since 0.4.0, '
                'please use the new `group_by` argument.', DeprecationWarning)

            duration_group = 'endpoint'

        elif group_by:
            duration_group = group_by

        else:
            duration_group = 'path'

        if callable(duration_group):
            duration_group_name = duration_group.__name__

        else:
            duration_group_name = duration_group

        if prefix == NO_PREFIX:
            prefix = ""
        else:
            prefix = prefix + "_"

        try:
            self.info('%sexporter_info' % prefix,
                      'Information about the Prometheus Flask exporter',
                      version=self.version)
        except ValueError:
            return  # looks like we have already exported the default metrics

        labels = self._get_combined_labels(None)

        if latency_as_histogram:
            # use the default buckets from prometheus_client if not given here
            buckets_as_kwargs = {}
            if buckets is not None:
                buckets_as_kwargs['buckets'] = buckets

            request_duration_metric = Histogram(
                '%shttp_request_duration_seconds' % prefix,
                'Flask HTTP request duration in seconds',
                ('method', duration_group_name, 'status') + labels.keys(),
                registry=self.registry,
                **buckets_as_kwargs)

        else:
            # export as Summary instead
            request_duration_metric = Summary(
                '%shttp_request_duration_seconds' % prefix,
                'Flask HTTP request duration in seconds',
                ('method', duration_group_name, 'status') + labels.keys(),
                registry=self.registry)

        counter_labels = ('method', 'status') + labels.keys()
        request_total_metric = Counter('%shttp_request_total' % prefix,
                                       'Total number of HTTP requests',
                                       counter_labels,
                                       registry=self.registry)

        request_exceptions_metric = Counter(
            '%shttp_request_exceptions_total' % prefix,
            'Total number of HTTP requests which resulted in an exception',
            counter_labels,
            registry=self.registry)

        def before_request():
            request.prom_start_time = default_timer()

        def after_request(response):
            if hasattr(request, 'prom_do_not_track') or hasattr(
                    request, 'prom_exclude_all'):
                return response

            if self.excluded_paths:
                if any(
                        pattern.match(request.path)
                        for pattern in self.excluded_paths):
                    return response

            if hasattr(request, 'prom_start_time'):
                total_time = max(default_timer() - request.prom_start_time, 0)

                if callable(duration_group):
                    group = duration_group(request)
                else:
                    group = getattr(request, duration_group)

                request_duration_labels = {
                    'method': request.method,
                    'status': _to_status_code(response.status_code),
                    duration_group_name: group
                }
                request_duration_labels.update(labels.values_for(response))

                request_duration_metric.labels(
                    **request_duration_labels).observe(total_time)

            request_total_metric.labels(method=request.method,
                                        status=_to_status_code(
                                            response.status_code),
                                        **labels.values_for(response)).inc()

            return response

        def teardown_request(exception=None):
            if not exception or hasattr(request,
                                        'prom_do_not_track') or hasattr(
                                            request, 'prom_exclude_all'):
                return

            if self.excluded_paths:
                if any(
                        pattern.match(request.path)
                        for pattern in self.excluded_paths):
                    return

            response = make_response('Exception: %s' % exception, 500)

            if callable(duration_group):
                group = duration_group(request)
            else:
                group = getattr(request, duration_group)

            request_exceptions_metric.labels(
                method=request.method,
                status=500,
                **labels.values_for(response)).inc()

            if hasattr(request, 'prom_start_time'):
                total_time = max(default_timer() - request.prom_start_time, 0)

                request_duration_labels = {
                    'method': request.method,
                    'status': 500,
                    duration_group_name: group
                }
                request_duration_labels.update(labels.values_for(response))

                request_duration_metric.labels(
                    **request_duration_labels).observe(total_time)

            request_total_metric.labels(method=request.method,
                                        status=500,
                                        **labels.values_for(response)).inc()

            return

        app.before_request(before_request)
        app.after_request(after_request)
        app.teardown_request(teardown_request)
コード例 #9
0
#!/usr/bin/python

import re
import time
import requests
import argparse
from pprint import pprint

import os
from sys import exit
from prometheus_client import start_http_server, Summary
from prometheus_client.core import GaugeMetricFamily, REGISTRY

DEBUG = int(os.environ.get('DEBUG', '0'))

COLLECTION_TIME = Summary('jenkins_collector_collect_seconds',
                          'Time spent to collect metrics from Jenkins')


class JenkinsCollector(object):
    # The build statuses we want to export about.
    statuses = [
        "lastBuild", "lastCompletedBuild", "lastFailedBuild",
        "lastStableBuild", "lastSuccessfulBuild", "lastUnstableBuild",
        "lastUnsuccessfulBuild"
    ]

    def __init__(self, target, user, password, insecure):
        self._target = target.rstrip("/")
        self._user = user
        self._password = password
        self._insecure = insecure
コード例 #10
0
patch_psycopg()

import os
import json
import subprocess
import time

from flask import Flask, jsonify, request, current_app, redirect, url_for
from prometheus_client import Summary

from . import db, jobs, metrics, cron
from .metrics import REGISTRY

SCHEDULER_REQUEST_SUMMARY = Summary(
    "scheduler_request_latency_seconds",
    "Flask Request Latency",
    ["method", "endpoint"],
    registry=REGISTRY,
)


def before_request():
    request.start_time = time.time()


def after_request(response):
    request_latency = time.time() - request.start_time
    SCHEDULER_REQUEST_SUMMARY.labels(request.method,
                                     request.url_rule).observe(request_latency)

    return response
コード例 #11
0
ファイル: app.py プロジェクト: obsessionsys/demo-app
#!/usr/bin/python
# -*- coding: utf-8 -*-

from BaseHTTPServer import BaseHTTPRequestHandler, HTTPServer
from prometheus_client import Summary, Counter, Gauge, Info, start_http_server
from os import curdir, sep
import time

PORT_NUMBER = 8080
BIND_IP = '0.0.0.0'
EXPORTER_PORT_NUMBER = 8000
EXPORTER_BIND_IP = '0.0.0.0'

counter_failures = Counter('demo_counter_failures',
                           'Quantity of fault requests')
counter_summary = Summary('demo_request_counter', 'Summary of ok requests')
counter_gauge = Gauge('demo_request_gauge', 'Gauge of requests')
application_info = Info('demo_build_version', 'Application info')


# This class will handles any incoming request from
# the browser
class myHandler(BaseHTTPRequestHandler):

    # Handler for the GET requests
    def do_GET(self):
        if self.path == "/":
            self.path = "./index.html"

        if self.path == "/about":
            self.path = "./about.html"
コード例 #12
0
from prometheus_client import start_http_server, Info, Counter, Enum, Gauge, Summary  # noqa

total = Counter('pup_advisor_total', 'The total amount of uploads')
valid = Counter('pup_advisor_valid', 'The total amount of valid uploads')
invalid = Counter('pup_advisor_invalid',
                  'The total amount of successfully validated uploads')
inventory_post_success = Counter(
    'pup_advisor_inventory_post_success',
    'The total amount of successful inventory posts')
inventory_post_failure = Counter('pup_advsior_inventory_post_failure',
                                 'The total amount of failed inventory posts')

validation_time = Summary('validation_processing_seconds',
                          'Time spent validating archive')
inventory_post_time = Summary('inventory_post_processing_seconds',
                              'Time spent posting to inventory')
handle_file_time = Summary('pup_handle_file_time',
                           'Time spent executing handle_file')
extract_facts_time = Summary('pup_extract_facts_time',
                             'Time spent extracting facts')
payload_size = Summary('pup_payload_size',
                       'Size in bytes of processed payloads')

upload_service_version = Info('upload_service_version',
                              'Build commit and date')

produce_queue_size = Gauge("pup_produce_queue_size",
                           "Size of the produce queue")
system_profile_queue_size = Gauge("pup_system_profile_queue_size",
                                  "Size of the system profile queue")
current_archives_size = Gauge("pup_current_archives_size",
コード例 #13
0
import re

from urllib.parse import urlparse

from ...config import config

from prometheus_client import Summary
from prometheus_client import Histogram
from prometheus_async.aio import time

REQ_TIME = Summary("external_to_internal_req_time",
                   "time spent with external_to_internal endpoint")
REQ_HISTOGRAM_TIME = Histogram("external_to_internal_req_histogram",
                               "Histogram for external_to_internal endpoint")


@time(REQ_TIME)
@time(REQ_HISTOGRAM_TIME)
async def translate(external_to_internal_spec, repo_provider):

    external_url = external_to_internal_spec["external_url"]

    internal_url = await translate_external_to_internal(external_url)

    result = {"external_url": external_url, "internal_url": internal_url}

    return result


async def translate_external_to_internal(external_git_url):
    """ Logic from original maitai code to do this: found in GitUrlParser.java#generateInternalGitRepoName """
コード例 #14
0
class PrometheusBlazeGraphExporter(object):
    scrape_duration = Summary(
        'blazegraph_scrape_duration_seconds', 'Blazegraph exporter scrape duration')

    def __init__(self, blazegraph_base_url, blazegraph_data_folder):

        self.url = blazegraph_base_url
        self.data_folder = blazegraph_data_folder
        self.counters = []
        self.sparql_endpoint = '{base_url}/namespace/wdq/sparql'.format(base_url=self.url)

    def query_to_metric(self, qname):
        return qname.replace(' ', '_').replace('/', '.').lstrip('.')

    def get_counter(self, cnt_name):
        # Not sure why we need depth but some counters don't work without it
        url = '{base_url}/counters?depth=10&path={cnt_name}'.format(
            base_url=self.url, cnt_name=cnt_name
        )

        try:
            header = {'Accept': 'application/xml'}
            response = requests.get(url, headers=header)
        except requests.exceptions.RequestException:
            log.exception('Error sending request')
            return None

        el = ElementTree.fromstring(response.content)
        last_name = cnt_name.split('/')[-1]

        for cnt in el.getiterator('c'):
            if cnt.attrib['name'] == last_name:
                return cnt.attrib['value']
        return None

    def fetch_allocators(self):
        allocators = 0
        try:
            url = "{base_url}/status?dumpJournal".format(base_url=self.url)
            response = requests.get(url).text
            split_info = response.split('AllocatorSize')
            for alloc_line in split_info[1].splitlines():
                # empty line finishes the table
                if len(alloc_line.strip()) == 0:
                    break
                parts = re.split("\s+", alloc_line)

                # second value must be a digit
                if not parts[1].isdigit():
                    continue
                allocators += int(parts[1])
            return allocators
        except requests.exceptions.RequestException:
            log.exception('Error fetching allocator data')
            return None

    @scrape_duration.time()
    def collect(self):
        blazegraph_metrics = {
            '/Query Engine/queryStartCount': CounterMetricFamily(
                'blazegraph_queries_start',
                'Number of queries that have started since the start of the application.'
            ),
            '/Query Engine/queryDoneCount': CounterMetricFamily(
                'blazegraph_queries_done',
                'Number of queries completed since the start of the application.'
            ),
            '/Query Engine/queryErrorCount': CounterMetricFamily(
                'blazegraph_queries_error',
                'Number of queries in error since the start of the application.'
            ),
            '/Query Engine/queriesPerSecond': GaugeMetricFamily(
                'blazegraph_queries_per_second',
                'Number of queries per second (rolling average).'
            ),
            '/Query Engine/operatorActiveCount': GaugeMetricFamily(
                'blazegraph_operator_active_count',
                'Number of active blazegraph operators'
            ),
            '/Query Engine/runningQueriesCount': GaugeMetricFamily(
                'blazegraph_running_queries_count',
                'Number of running queries'
            ),
            '/Query Engine/GeoSpatial/geoSpatialSearchRequests': GaugeMetricFamily(
                'blazegraph_geospatial_search_requets',
                'Number of geospatial search requests since the start of the application.'),

            '/Journal/bytesReadPerSec': GaugeMetricFamily(
                'blazegraph_journal_bytes_read_per_second',
                ''
            ),
            '/Journal/bytesWrittenPerSec': GaugeMetricFamily(
                'blazegraph_journal_bytes_written_per_second',
                ''
            ),
            '/Journal/extent': GaugeMetricFamily(
                'blazegraph_journal_extent',
                ''
            ),
            '/Journal/commitCount': CounterMetricFamily(
                'blazegraph_journal_commit_count',
                ''
            ),
            '/Journal/commit/totalCommitSecs': GaugeMetricFamily(
                'blazegraph_journal_total_commit_seconds',
                'Total time spent in commit.'
            ),
            '/Journal/commit/flushWriteSetSecs': GaugeMetricFamily(
                'blazegraph_journal_flush_write_set_seconds',
                ''
            ),
            '/Journal/Concurrency Manager/Read Service/Average Active Count': GaugeMetricFamily(
                'blazegraph_journal_concurrency_read_average_active_count',
                'Average Number of Read Active Threads'
            ),
            '/JVM/Memory/DirectBufferPool/default/bytesUsed': GaugeMetricFamily(
                'blazegraph_jvm_memory_direct_buffer_pool_default_bytes_used',
                ''
            ),
            '/JVM/Memory/Runtime Free Memory': GaugeMetricFamily(
                'blazegraph_jvm_memory_runtime_free_memory',
                'Current amount of free memory in the JVM.'
            ),
            '/JVM/Memory/Runtime Max Memory': GaugeMetricFamily(
                'blazegraph_jvm_memory_runtime_max_memory',
                'Max amount of memory the JVM can allocate.'
            ),
            '/JVM/Memory/Runtime Total Memory': GaugeMetricFamily(
                'blazegraph_jvm_memory_runtime_total_memory',
                'Total amount of memory allocated to the JVM.'
            ),
            '/JVM/Memory/Garbage Collectors/G1 Old Generation/Collection Count':
                CounterMetricFamily(
                    'blazegraph_jvm_memory_gc_g1_old_collecton_count',
                    'Number of old GC since JVM start.'
                ),
            '/JVM/Memory/Garbage Collectors/G1 Old Generation/Cumulative Collection Time':
                GaugeMetricFamily(
                    'blazegraph_jvm_memory_gc_g1_old_cumulative_collection_time',
                    'Total time spent in old GC (seconds).'
                ),
            '/JVM/Memory/Garbage Collectors/G1 Young Generation/Collection Count':
                CounterMetricFamily(
                    'blazegraph_jvm_memory_gc_g1_young_collection_count',
                    'Number of young GC since JVM start.'
                ),
            '/JVM/Memory/Garbage Collectors/G1 Young Generation/Cumulative Collection Time':
                GaugeMetricFamily(
                    'blazegraph_jvm_memory_gc_g1_young_cumulative_collection_time',
                    'Total time spent in young GC (seconds).'
                ),
        }

        for metric_name, metric_family in blazegraph_metrics.items():
            if metric_name is None:
                log.warning('Unknown metric %r', metric_name)
            else:
                metric_value = self.get_counter(metric_name)

                try:
                    value = float(metric_value)
                except (ValueError, TypeError):
                    value = float('nan')

                metric_family.add_metric([], value)

        alloc_metric = GaugeMetricFamily(
            'blazegraph_allocators',
            'Number of used FixedAllocators in Blazegraph'
        )
        alloc_free_metric = GaugeMetricFamily(
            'blazegraph_free_allocators',
            'Number of free FixedAllocators in Blazegraph'
        )

        allocs = self.fetch_allocators()
        if allocs:
            alloc_metric.add_metric([], allocs)
            alloc_free_metric.add_metric([], 256 * 1024 - allocs)
        else:
            alloc_metric.add_metric([], float('nan'))
            alloc_free_metric.add_metric([], float('nan'))

        yield alloc_metric
        yield alloc_free_metric

        # Added for BBP blazegraph disk monitoring
        disk_total_metric = GaugeMetricFamily(
            'blazegraph_total_disk',
            'Blazegraph total disk space'
        )
        disk_free_metric = GaugeMetricFamily(
            'blazegraph_free_disk',
            'Blazegraph free disk space'
        )
        statvfs = os.statvfs(self.data_folder)

        disk_total_metric.add_metric([], statvfs.f_frsize * statvfs.f_blocks)  # Size of filesystem in bytes
        disk_free_metric.add_metric([], statvfs.f_frsize * statvfs.f_bfree)  # Actual number of free bytes

        yield disk_total_metric
        yield disk_free_metric

        for metric in blazegraph_metrics.values():
            yield metric
コード例 #15
0
ファイル: endpoint.py プロジェクト: project-ncl/repour
ERROR_VALIDATION_JSON_COUNTER = Counter(
    "error_validation_json_counter",
    "User sent bad JSON requests that failed validation",
)
ERROR_RESPONSE_400_COUNTER = Counter(
    "error_response_400_counter", "App returned 400 status"
)
ERROR_RESPONSE_500_COUNTER = Counter(
    "error_response_500_counter", "App returned 500 status"
)
ERROR_CALLBACK_COUNTER = Counter(
    "error_callback_counter", "Errors calling callback url"
)

REQ_TIME = Summary("callback_time", "time spent with calling callback")
REQ_HISTOGRAM_TIME = Histogram("callback_histogram", "Histogram for calling callback")


def create_log_context_id():
    return "repour-" + base64.b32encode(os.urandom(20)).decode("ascii").lower()


def create_callback_id():
    return base64.b32encode(os.urandom(30)).decode("ascii")


def create_traceback_id():
    tb = traceback.format_exc()
    h = hashlib.md5()
    h.update(tb.encode("utf-8"))
コード例 #16
0
ファイル: prometheus.py プロジェクト: 20c/vaping
from prometheus_client import Counter, Gauge, Summary, start_http_server

import vaping
import vaping.plugins

min_latency = Summary(
    "minimum_latency_milliseconds", "Minimum latency in milliseconds.", ["host"]
)  # NOQA
max_latency = Summary(
    "maximum_latency_milliseconds", "Maximum latency in milliseconds.", ["host"]
)  # NOQA
avg_latency = Summary(
    "average_latency_milliseconds", "Average latency in milliseconds.", ["host"]
)  # NOQA
sent_packets = Counter(
    "number_of_packets_sent", "Number of pings sent to host.", ["host"]
)  # NOQA
packet_loss = Gauge("packet_loss", "% packet loss to host (0-100)", ["host"])  # NOQA


@vaping.plugin.register("prometheus")
class Prometheus(vaping.plugins.EmitBase):
    def init(self):
        self.log.debug("init prometheus plugin")
        port = self.pluginmgr_config.get("port", 9099)
        start_http_server(port)

    def emit(self, data):
        raw_data = data.get("data")

        self.log.debug("data: " + str(raw_data))
コード例 #17
0
ファイル: __init__.py プロジェクト: filipnavara/grapi
        print("PROFILE_DIR '{}' is invalid, not enabling profiling".format(
            PROFILE_DIR))
        PROFILE_DIR = None
"""
Master Fleet Runner

Instantiates the specified number of Bjoern WSGI server processes,
each taking orders on their own unix socket and passing requests to
the respective WSGI app (rest, notify or metrics).

"""

# metrics
if PROMETHEUS:
    REQUEST_TIME = Summary('kopano_mfr_request_processing_seconds',
                           'Time spent processing request',
                           ['method', 'endpoint'])
    EXCEPTION_COUNT = Counter('kopano_mfr_total_unhandled_exceptions',
                              'Total number of unhandled exceptions')
    MEMORY_GAUGE = Gauge('kopano_mfr_virtual_memory_bytes',
                         'Virtual memory size in bytes', ['worker'])
    CPUTIME_GAUGE = Gauge('kopano_mfr_cpu_seconds_total',
                          'Total user and system CPU time spent in seconds',
                          ['worker'])


def error_handler(ex, req, resp, params, with_metrics):
    if not isinstance(ex, (falcon.HTTPError, falcon.HTTPStatus)):
        if with_metrics:
            if PROMETHEUS:
                EXCEPTION_COUNT.inc()
コード例 #18
0
# URL of the GitLab instance, defaults to hosted GitLab
URL = str(os.environ.get('URL', 'https://gitlab.com'))

# Secret token for the app to authenticate itself
TOKEN = str(os.environ.get('TOKEN'))

# Login to GitLab
gl = gitlab.Gitlab(URL, TOKEN)
gl.auth()

# Initialize Prometheus instrumentation
projects_total = Gauge('gitlab_projects_total', 'Number of projects')
builds_total = Gauge('gitlab_builds_total', 'Number of builds',
                     ['project_id', 'project_name'])
build_duration_seconds = Summary(
    'gitlab_build_duration_seconds', 'Seconds the build took to run',
    ['project_id', 'project_name', 'stage', 'status', 'ref'])
open_issues_total = Gauge('gitlab_open_issues_total', 'Number of open issues',
                          ['project_id', 'project_name'])
pipeline_duration_seconds = Summary(
    'gitlab_pipeline_duration_seconds', 'Seconds the pipeline took to run',
    ['project_id', 'project_name', 'status', 'ref'])


def get_projects():
    try:
        projects = gl.projects.list(all=True)
        log.debug("Projects: {}".format(projects))
        return projects
    except gitlab.exceptions.GitlabListError:
        log.warn("Projects could not be retrieved")
コード例 #19
0
import time
import flask
import json
import requests
from prometheus_client import Summary
from friendbot import app, utils, messages

signing_secret = app.config["FRIENDBOT_SIGNING_SECRET"]
cache = app.config["REDIS_CACHE"]

length_summary = Summary("friendbot_request_time",
                         "Length of Friendbot Requests")


@app.route("/action", methods=["POST"])
def action_endpoint():
    start_time = time.time()
    if signing_secret:
        valid, valid_err = utils.validate_request(flask.request,
                                                  signing_secret)
        if not valid:
            app.logger.error(valid_err)
            return ("", 400)
    data = json.loads(flask.request.form["payload"])
    button_text = data["actions"][0]["text"]["text"]
    error = False
    if button_text == "Send":
        user_id = data["user"]["id"]
        real_name = cache.hget("users", user_id).decode("utf-8")
        payload = messages.send_message(data["actions"][0]["value"], real_name)
    elif button_text == "Shuffle":
コード例 #20
0
import time

from prometheus_client import start_http_server, Summary, Gauge
from celery import Celery
from celery.events.receiver import EventReceiver
from celery.utils.objects import FallbackContext
import amqp.exceptions
import httpx

logging.basicConfig(level=logging.INFO)

app = Celery("overseer", broker=os.environ["BROKER_URL"])

# Monitoring metrics (these are updated by `Receiver` and `Collector` below)

event_processing = Summary("overseer_event_processing",
                           "Summary of event processing duration")

queue_length = Gauge("overseer_queue_length", "Number of jobs in the queue.",
                     ["queue"])

workers_count = Gauge("overseer_workers_count",
                      "Number of workers listening to the queue.", ["queue"])

queue_length_worker_ratio = Gauge(
    "overseer_queue_length_worker_ratio",
    "Ratio of the number of jobs to the number of workers for each queue.",
    ["queue"],
)

# Setup API client
api = httpx.Client(
コード例 #21
0
                           ['account_number'])
FAILED_TO_VALIDATE = Gauge('failed_validation',
                           'Reports that could not be validated',
                           ['account_number'])
INVALID_REPORTS = Gauge('invalid_reports', 'Reports containing invalid syntax',
                        ['account_number'])
TIME_RETRIES = Gauge(
    'time_retries',
    'The total number of retries based on time for all reports',
    ['account_number'])
COMMIT_RETRIES = Gauge(
    'commit_retries',
    'The total number of retries based on commit for all reports',
    ['account_number'])
REPORT_PROCESSING_LATENCY = Summary(
    'report_processing_latency',
    'The time in seconds that it takes to process a report')
VALIDATION_LATENCY = Summary('validation_latency',
                             'The time it takes to validate a report')


# pylint: disable=broad-except, too-many-lines, too-many-public-methods
class AbstractProcessor(ABC):  # pylint: disable=too-many-instance-attributes
    """Class for processing saved reports that have been uploaded."""

    # pylint: disable=too-many-arguments
    def __init__(self, pre_delegate, state_functions, state_metrics,
                 async_states, object_prefix, object_class, object_serializer):
        """Create an abstract processor."""
        self.report_or_slice = None
        self.object_class = object_class
コード例 #22
0
import time

from prometheus_client import (
    CollectorRegistry,
    Counter,
    Gauge,
    Summary,
    push_to_gateway,
)

# A separate registry is used, as the default registry may contain other
# metrics such as those from the Process Collector.
REGISTRY = CollectorRegistry()

JOB_DURATION = Summary('my_batch_job_duration_seconds',
                       'Duration of my batch job in seconds.',
                       registry=REGISTRY)
RESULT = Gauge('my_batch_job_result', 'This is a test', registry=REGISTRY)
LAST_SUCCESS = Gauge('my_batch_job_last_success_unixtime',
                     'Last time my batch job succeeded, in unixtime.',
                     registry=REGISTRY)


@JOB_DURATION.time()
def main():
    if len(sys.argv) < 2:
        print("Usage: {} <Prometheus Pushgateway:port>".format(sys.argv[0]))
        return
    # Simulate some work
    time.sleep(random.random())
    # Update metrics
コード例 #23
0
import os
from multiprocessing import Process, Manager
from prometheus_client import start_http_server, Summary
from prometheus_client.core import GaugeMetricFamily, REGISTRY

try:
    IPS = os.getenv('TARGET_IPS').split(',')
except AttributeError:
    raise Exception("Mandatory `TARGET_IPS` environment variable is not set")

IPMI_USER = os.getenv('IPMI_USER', 'ADMIN')
IPMI_PASSWD = os.getenv('IPMI_PASSWD', 'ADMIN')

REQURED = ["CPU1 Temp", "System Temp", "FAN1"]
# Create a metric to track time spent and requests made.
REQUEST_TIME = Summary('request_processing_seconds',
                       'Time spent processing request')


def _run_cmd(ip, raw):
    logging.info("Collecting from target %s", ip)
    proc = subprocess.Popen(
        ["ipmitool", "-H", ip, "-U", IPMI_USER, "-P", IPMI_PASSWD, "sdr"],
        stdout=subprocess.PIPE)
    out = proc.communicate()[0]
    raw += [x.rstrip() for x in out.split('|')]


class IpmiCollector(object):
    @REQUEST_TIME.time()
    def collect(self):
        sys_metrics = {
コード例 #24
0
import vaping
import vaping.plugins
from builtins import str
from prometheus_client import start_http_server, Summary, Counter, Gauge

min_latency = Summary('minimum_latency_milliseconds',
                      'Minimum latency in milliseconds.', ['host'])  # NOQA
max_latency = Summary('maximum_latency_milliseconds',
                      'Maximum latency in milliseconds.', ['host'])  # NOQA
avg_latency = Summary('average_latency_milliseconds',
                      'Average latency in milliseconds.', ['host'])  # NOQA
sent_packets = Counter('number_of_packets_sent',
                       'Number of pings sent to host.', ['host'])  # NOQA
packet_loss = Gauge('packet_loss', '% packet loss to host (0-100)',
                    ['host'])  # NOQA


@vaping.plugin.register('prometheus')
class Prometheus(vaping.plugins.EmitBase):
    def init(self):
        self.log.debug('init prometheus plugin')
        port = self.pluginmgr_config.get('port', 9099)
        start_http_server(port)

    def emit(self, data):
        raw_data = data.get('data')
        self.log.debug("data: " + str(raw_data))
        for host_data in raw_data:
            host_name = host_data.get('host')
            min_latency.labels(host_name).observe(host_data.get('min'))
            max_latency.labels(host_name).observe(host_data.get('max'))
コード例 #25
0
ファイル: rest.py プロジェクト: teddybearz/argo
concurrent_reqs = 0
MAX_CONCURRENT_REQS = 100

MINION_MANAGER_HOSTNAME = "http://minion-manager.kube-system"
MINION_MANAGER_PORT = "6000"

kubectl = KubernetesApiClient(use_proxy=True)
cluster_name_id = os.getenv("AX_CLUSTER_NAME_ID", None)
asg_manager = AXUserASGManager(os.getenv("AX_CLUSTER_NAME_ID"),
                               AXClusterConfig().get_region())

# Need a lock to serialize cluster config operation
cfg_lock = RLock()

axmon_api_latency_stats = Summary("axmon_api_latency",
                                  "Latency for axmon REST APIs",
                                  ["method", "endpoint", "status"])
axmon_api_concurrent_reqs = Gauge("axmon_api_concurrent_reqs",
                                  "Concurrent requests in axmon")


def before_request():
    request.start_time = time.time()
    global concurrent_reqs, MAX_CONCURRENT_REQS, concurrent_reqs_lock
    with concurrent_reqs_lock:
        axmon_api_concurrent_reqs.set(concurrent_reqs)
        # Disabling concurrent request logic for now due to findings in AA-3167
        #if concurrent_reqs >= MAX_CONCURRENT_REQS:
        #    return ax_make_response(
        #        original_jsonify(result="too many concurrent requests (max {})".format(MAX_CONCURRENT_REQS)), 429
        #    )
コード例 #26
0
ファイル: info.py プロジェクト: michalovjan/repour
# flake8: noqa
import os
import sys

import repour
from aiohttp import web
from prometheus_async.aio import time
from prometheus_client import Histogram, Summary

from ... import exception
from repour.lib.scm import git

REQ_TIME = Summary("info_req_time", "time spent with info endpoint")
REQ_HISTOGRAM_TIME = Histogram("info_req_histogram", "Histogram for info endpoint")


@time(REQ_TIME)
@time(REQ_HISTOGRAM_TIME)
async def handle_request(request):
    version = repour.__version__
    path_name = os.path.dirname(sys.argv[0])
    try:
        git_sha = await git.rev_parse(path_name)
    except exception.CommandError:
        git_sha = "Unknown"

    html_text = """
    <h1>Repour Information</h1>
    <ul>
        <li><strong>Repour Version</strong> {}</li>
        <li><strong>Commit Hash</strong> {}</li>
コード例 #27
0
from prometheus_client import Summary, REGISTRY
from prometheus_client.core import GaugeMetricFamily, CounterMetricFamily
from threading import Event
from resotolib.args import ArgumentParser
from signal import signal, SIGTERM, SIGINT
from yaml import load

try:
    from yaml import CLoader as Loader
except ImportError:
    from yaml import Loader

shutdown_event = Event()

metrics_update_metrics = Summary(
    "resotometrics_update_metrics_seconds",
    "Time it took the update_metrics() function",
)


def handler(sig, frame) -> None:
    log.info("Shutting down")
    shutdown_event.set()


def main() -> None:
    setup_logger("resotometrics")

    signal(SIGINT, handler)
    signal(SIGTERM, handler)

    arg_parser = ArgumentParser(description="resoto metrics exporter",
コード例 #28
0
from django.db import transaction
from django.shortcuts import get_object_or_404, redirect, render
from django.utils import timezone
from django.views.decorators.http import require_POST

from ....utils.helpers import is_entirely_digit
from ....utils.locking import lock_on
from ....utils.serialization import safe_json
from ...auth.decorators import deny_restricted, eighth_admin_required
from ..exceptions import SignupException
from ..models import EighthActivity, EighthBlock, EighthScheduledActivity, EighthSignup, EighthWaitlist
from ..serializers import EighthBlockDetailSerializer

logger = logging.getLogger(__name__)

eighth_signup_visits = Summary("intranet_eighth_signup_visits",
                               "Visits to the eighth signup view")
eighth_signup_submits = Summary(
    "intranet_eighth_signup_submits",
    "Number of eighth period signups performed from the eighth signup view")


@login_required
@deny_restricted
def eighth_signup_view(request, block_id=None):
    start_time = time.time()

    if block_id is None and "block" in request.GET:
        block_ids = request.GET.getlist("block")
        if len(block_ids) > 1:
            return redirect("/eighth/signup/multi?{}".format(
                request.META["QUERY_STRING"]))
コード例 #29
0
#!/usr/bin/python

from prometheus_client import Counter, Gauge, Summary, Histogram, start_http_server

# need install prometheus_client

if __name__ == '__main__':
    c = Counter('cc', 'A counter')
    c.inc()

    g = Gauge('gg', 'A gauge')
    g.set(17)

    s = Summary('ss', 'A summary', ['a', 'b'])
    s.labels('c', 'd').observe(17)

    h = Histogram('hh', 'A histogram')
    h.observe(.6)

    start_http_server(8000)
    import time

    while True:
        time.sleep(1)
コード例 #30
0
logging.basicConfig(
    level=logging.DEBUG,
    format="[%(asctime)s] [%(process)d] [%(levelname)s] %(message)s")
log = logging.getLogger()
if os.getenv('REWE_BDP_STAGE') is not None:
    loghandler = logging.StreamHandler()
    loghandler.setFormatter(LogstashFormatter())
    log.handlers = []
    log.addHandler(loghandler)

MODELS = os.getenv("languages", "").split()

# Create a metric to track time spent and requests made.
REQUEST_TIME = Summary('request_processing_seconds',
                       'Time spent processing request',
                       labelnames=['method', 'endpoint'])

_models = {}


def get_model(model_name):
    if model_name not in _models:
        _models[model_name] = spacy.load(model_name)
    return _models[model_name]


def get_dep_types(model):
    '''List the available dep labels in the model.'''
    labels = []
    for label_id in model.parser.moves.freqs[DEP]: