Beispiel #1
0
class Metric:
    _INVALID_RECORDS = prometheus_client.Counter(
        'mjolnir_swift_invalid_records_total',
        "Number of requests that could not be processed",
        ['reason']
    )
    FAIL_VALIDATE = _INVALID_RECORDS.labels(reason="validate")
    FAIL_NO_CONFIG = _INVALID_RECORDS.labels(reason="no_config")

    PROCESS_MESSAGE = prometheus_client.Summary(
        'mjolnir_swift_process_message_seconds',
        'Time taken to process individual kafka messages')

    BULK_IMPORT = prometheus_client.Summary(
        'mjolnir_swift_import_file_seconds',
        'Time taken to import a file into elasticsearch'
    )

    _BULK_ACTION_RESULT = prometheus_client.Counter(
        'mjolnir_swift_action_total',
        'Number of bulk action responses per result type', ['result'])
    ACTION_RESULTS = {
        'updated': _BULK_ACTION_RESULT.labels(result='updated'),
        'created': _BULK_ACTION_RESULT.labels(result='created'),
        'noop': _BULK_ACTION_RESULT.labels(result='noop'),
    }
    OK_UNKNOWN = _BULK_ACTION_RESULT.labels(result='ok_unknown')
    MISSING = _BULK_ACTION_RESULT.labels(result='missing')
    FAILED = _BULK_ACTION_RESULT.labels(result='failed')
    TIMEOUT = _BULK_ACTION_RESULT.labels(result='timeout')
Beispiel #2
0
 def __init__(self, blocking_pool, timeout, namespace, k8s_api):
     self.blocking_pool = blocking_pool
     self.timeout = timeout
     self.namespace = namespace
     self._delete_pod = self._wrap_k8s_delete(k8s_api.delete_namespaced_pod)
     self._delete_pvc = self._wrap_k8s_delete(
         k8s_api.delete_namespaced_persistent_volume_claim)
     self._create_pod = self._wrap_k8s(
         k8s_api.create_namespaced_pod,
         pc.Summary('batch_create_pod_seconds',
                    'Batch k8s create pod latency in seconds'))
     self._create_pvc = self._wrap_k8s(
         k8s_api.create_namespaced_persistent_volume_claim,
         pc.Summary('batch_create_pvc_seconds',
                    'Batch k8s create pvc latency in seconds'))
     self._read_pod_log = self._wrap_k8s(k8s_api.read_namespaced_pod_log)
     self._read_pod_status = self._wrap_k8s(
         k8s_api.read_namespaced_pod_status)
     self._list_pods = self._wrap_k8s(k8s_api.list_namespaced_pod)
     self._list_pvcs = self._wrap_k8s(
         k8s_api.list_namespaced_persistent_volume_claim)
     self._get_pod = self._wrap_k8s(k8s_api.read_namespaced_pod)
     self._get_pvc = self._wrap_k8s(
         k8s_api.read_namespaced_persistent_volume_claim)
     self._read_secret = self._wrap_k8s(
         k8s_api.read_namespaced_secret,
         pc.Summary('batch_read_secret',
                    'Batch k8s read secret latency in seconds'))
Beispiel #3
0
    def __init__(self, client, **kwargs):
        super().__init__(client, **kwargs)
        self.min_keyword_length = 3
        self.max_query_length = 1024
        self.max_page_size = 100
        self.max_keywords = 5
        self._helper_funcs = {
            "nusers": (
                self._base_query_nusers,
                self._key_nusers,
            ),
            "address": (
                self._base_query_address,
                self._key_address,
            )
        }

        try:
            import prometheus_client
        except ImportError:
            self._response_duration_metric = None
        else:
            self._response_duration_metric = prometheus_client.Summary(
                "muclumbus_xmpp_search_response_duration_seconds",
                "Time it takes to answer a search response", ["phase"])
Beispiel #4
0
class Metric(object):
    """A Namespace for our metrics"""
    # Metrics we record in prometheus
    _INVALID_RECORDS = prometheus_client.Counter(
        'mjolnir_bulk_invalid_records_total',
        "Number of requests that could not be processed", ['reason'])
    FAIL_VALIDATE = _INVALID_RECORDS.labels(reason='fail_validate')
    MISSING_INDEX = _INVALID_RECORDS.labels(reason='missing_index')
    SUBMIT_BATCH = prometheus_client.Summary(
        'mjolnir_bulk_submit_batch_seconds',
        'Time taken to submit a batch from kafka to elasticsearch')
    RECORDS_PROCESSED = prometheus_client.Counter(
        'mjolnir_bulk_records_total',
        'Number of kafka records processed')
    _BULK_ACTION_RESULT = prometheus_client.Counter(
        'mjolnir_bulk_action_total',
        'Number of bulk action somethings', ['result'])
    ACTION_RESULTS = {
        'updated': _BULK_ACTION_RESULT.labels(result='updated'),
        'created': _BULK_ACTION_RESULT.labels(result='created'),
        'noop': _BULK_ACTION_RESULT.labels(result='noop'),
    }
    OK_UNKNOWN = _BULK_ACTION_RESULT.labels(result='ok_unknown')
    MISSING = _BULK_ACTION_RESULT.labels(result='missing')
    FAILED = _BULK_ACTION_RESULT.labels(result='failed')
Beispiel #5
0
 def get_metrics(cls):
     if cls.inited:
         return cls
     else:
         # Register your metrics here
         cls.REQUEST_TIME = prometheus_client.Summary(
             "some_summary", "Time spent in processing request"
         )
         cls.inited = True
Beispiel #6
0
    def __init__(self, client, **kwargs):
        super().__init__(client, **kwargs)
        self._disco_svc = self.dependencies[aioxmpp.DiscoClient]
        self.expire_after = timedelta(days=7)
        self.non_muc_rescan_delay = timedelta(hours=6)
        self.address_blocklist = frozenset()
        self._worker_pool._timeout = timedelta(seconds=45)

        try:
            import prometheus_client
        except ImportError:
            self._domain_scanned_metric = None
            self._disco_info_duration_metric = None
            self._version_duration_metric = None
            self._disco_items_duration_metric = None
            self._pass_duration_metric = None
            self._last_pass_end_metric = None
            self._update_duration_metric = None
        else:
            self._domain_scanned_metric = prometheus_client.Summary(
                "muclumbus_scanner_domain_scan_duration",
                "Total number of scan operations executed",
                ["type"],
            )
            self._disco_info_duration_metric = prometheus_client.Summary(
                "muclumbus_scanner_disco_info_duration_seconds",
                "Duration of info requests", ["result"])
            self._update_duration_metric = prometheus_client.Summary(
                "muclumbus_scanner_update_duration_seconds",
                "Duration of database updates", ["operation"])
            self._version_duration_metric = prometheus_client.Summary(
                "muclumbus_scanner_version_duration_seconds",
                "Duration of software version requests", ["result"])
            self._disco_items_duration_metric = prometheus_client.Summary(
                "muclumbus_scanner_disco_items_duration_seconds",
                "Duration of items requests",
                ["type", "result"],
            )
            self._pass_duration_metric = prometheus_client.Gauge(
                "muclumbus_scanner_pass_duration_seconds",
                "Duration of the last pass in seconds")
            self._last_pass_end_metric = prometheus_client.Gauge(
                "muclumbus_scanner_last_pass_end_seconds",
                "Timestamp of the last pass")
Beispiel #7
0
    def __init__(self, client, **kwargs):
        super().__init__(client, **kwargs)
        self._disco_svc = self.dependencies[aioxmpp.DiscoClient]
        self._vcard_client = self.dependencies[aioxmpp.vcard.VCardService]
        self.expire_after = timedelta(days=2)
        self.avatar_whitelist = frozenset()
        self.address_blocklist = frozenset()

        try:
            import prometheus_client
        except ImportError:
            self._room_scanned_metric = None
            self._disco_info_duration_metric = None
            self._avatar_fetch_duration_metric = None
            self._avatar_proc_duration_metric = None
            self._pass_duration_metric = None
            self._last_pass_end_metric = None
            self._update_duration_metric = None
        else:
            self._room_scanned_metric = prometheus_client.Summary(
                "muclumbus_watcher_room_scan_duration",
                "Total number of scan operations executed",
            )
            self._disco_info_duration_metric = prometheus_client.Summary(
                "muclumbus_watcher_disco_info_duration_seconds",
                "Duration of info requests", ["result"])
            self._update_duration_metric = prometheus_client.Summary(
                "muclumbus_watcher_update_duration_seconds",
                "Duration of database updates",
            )
            self._avatar_proc_duration_metric = prometheus_client.Summary(
                "muclumbus_watcher_avatar_proc_duration_seconds",
                "Duration of avatar processing",
            )
            self._avatar_fetch_duration_metric = prometheus_client.Summary(
                "muclumbus_watcher_avatar_fetch_duration_seconds",
                "Duration of avatar requests", ["result"])
            self._pass_duration_metric = prometheus_client.Gauge(
                "muclumbus_watcher_pass_duration_seconds",
                "Duration of the last pass in seconds")
            self._last_pass_end_metric = prometheus_client.Gauge(
                "muclumbus_watcher_last_pass_end_seconds",
                "Timestamp of the last pass")
class Metric(object):
    """A namespace for our runtime metrics"""
    RECORDS_PROCESSED = prometheus_client.Counter(
        'mjolnir_msearch_records_total', 'Number of kafka records processed')
    INTERVAL_VALUE = prometheus_client.Gauge(
        'mjolnir_msearch_interval_sec',
        'Seconds between polling elasticsearch for qps stats')
    EMA = prometheus_client.Gauge('mjolnir_msearch_ema_qps',
                                  'Local estimate of canary index qps')
    PROCESS_BATCH = prometheus_client.Summary(
        'mjolnir_msearch_process_batch_seconds',
        'Time taken to process a batch of records from kafka')
Beispiel #9
0
 def metrics_factory(registry):
     return {
         'plain_gauge': prometheus_client.Gauge('plain_gauge', 'Simple gauge', registry=registry),
         'instance_gauge': prometheus_client.Gauge('instance_gauge', 'Gauge with custom label',
                                                   ['instance'], registry=registry),
         'service_gauge': prometheus_client.Gauge('service_gauge', 'Gauge with "service" label',
                                                  ['service'], registry=registry),
         'counter': prometheus_client.Counter('counter', 'Simple counter', registry=registry),
         'summary': prometheus_client.Summary('summary', 'Simple summary', registry=registry),
         'histogram': prometheus_client.Histogram('histogram', 'Histogram with custom and "service" '
                                                  'labels', ['instance', 'service'],
                                                  registry=registry)
     }
Beispiel #10
0
    def __init__(self, client, **kwargs):
        super().__init__(client, **kwargs)
        self._pubsub = self.dependencies[aioxmpp.PubSubClient]
        self._disco = self.dependencies[aioxmpp.DiscoClient]
        self.publish_target = None
        self._state_future.add_done_callback(self._initialise)
        self._event_buffer = {}
        self._first_enqueued = None
        self._enqueued_callback = None
        self._worker_pool = worker_pool.WorkerPool(
            self.WORKER_POOL_SIZE,
            self._handle_item,
            max_queue_size=self.WORKER_POOL_SIZE * 128,
            delay=(self.MIN_PROCESS_INTERVAL *
                   self.WORKER_POOL_SIZE).total_seconds(),
            logger=self.logger,
        )
        self.node_config = aioxmpp.forms.Data(aioxmpp.forms.DataType.SUBMIT)
        self.node_config.fields.append(
            aioxmpp.forms.Field(
                type_=aioxmpp.forms.FieldType.HIDDEN,
                var="FORM_TYPE",
                values=["http://jabber.org/protocol/pubsub#node_config"]))
        self.node_config.fields.append(
            aioxmpp.forms.Field(var="pubsub#access_model", values=["open"]))
        self.node_config.fields.append(
            aioxmpp.forms.Field(var="pubsub#max_items", values=["16777216"]))
        self.node_config.fields.append(
            aioxmpp.forms.Field(var="pubsub#persist_items", values=["1"]))

        try:
            import prometheus_client
        except ImportError:
            self._initial_sync_duration_metric = None
            self._update_duration_metric = None
            self._lost_update_metric = None
        else:
            self._initial_sync_duration_metric = prometheus_client.Gauge(
                "muclumbus_mirror_server_initial_sync_duration_seconds",
                "Duration of various initial sync phases",
                ["phase"],
            )
            self._update_duration_metric = prometheus_client.Summary(
                "muclumbus_mirror_server_update_duration_seconds",
                "Duration of the updates",
                ["operation"],
            )
            self._lost_update_metric = prometheus_client.Counter(
                "muclumbus_mirror_server_lost_update_count",
                "Number of updates lost due to business",
            )
Beispiel #11
0
    def __init__(self, p_logger, p_config):

        self._logger = p_logger
        self._config = p_config
        self._gauge_monitored_users = prometheus_client.Gauge(
            self._config.prefix + "monitored_users",
            "number of monitored users")
        self._gauge_active_users = prometheus_client.Gauge(
            self._config.prefix + "active_users", "number of active users",
            ['username'])
        self._gauge_configured_users = prometheus_client.Gauge(
            self._config.prefix + "configured_users",
            "number of configured users")
        self._gauge_monitored_hosts = prometheus_client.Gauge(
            self._config.prefix + "monitored_hosts",
            "number of monitored hosts", ['hostname'])
        self._gauge_monitored_devices = prometheus_client.Gauge(
            self._config.prefix + "monitored_devices",
            "number of monitored devices")
        self._gauge_active_devices = prometheus_client.Gauge(
            self._config.prefix + "active_devices", "number of active devices",
            ['devicename'])
        self._gauge_device_response_time = prometheus_client.Gauge(
            self._config.prefix + "device_response_time",
            "response time of device [ms]", ['devicename'])
        self._gauge_device_moving_average_response_time = \
            prometheus_client.Gauge(self._config.prefix + "device_moving_average_response_time",
                                    "moving average of response time of device [ms]",
                                    ['devicename'])
        self._counter_forced_logouts = prometheus_client.Counter(
            self._config.prefix + "forced_logouts", "number of forced logouts",
            ['username'])

        self._summary_http_requests = prometheus_client.Summary(
            self._config.prefix + "http_requests",
            "request duration [ms] and count", ['service', 'hostname'])

        self._info_system = prometheus_client.Info(
            self._config.prefix + "system", "system information")
        self._info_system.info({
            "version":
            settings.settings['version'],
            "revision":
            settings.extended_settings['debian_package_revision']
        })

        self._gauge_uptime = prometheus_client.Gauge(
            self._config.prefix + "uptime", "uptime in seconds")
        self._start_time = time.time()
        self._gauge_uptime.set_function(lambda: time.time() - self._start_time)
Beispiel #12
0
    def __init__(self, registry=None):
        super().__init__()
        self.registry = registry or prometheus_client.REGISTRY
        self._response_time_metric = prometheus_client.Summary(
            "muclumbus_http_response_seconds",
            "Monotonic time passed for processing a reqeust",
            ["endpoint", "http_status"])
        self._existence_metric = prometheus_client.Gauge(
            "muclumbus_http_endpoint_flag",
            "Existence of an endpoint in the code",
            ["endpoint"],
        )
        # self.registry.register(self)

        self.handle_metrics = self.observe("metrics", self.handle_metrics)
Beispiel #13
0
class InstrumentedHandler(web.RequestHandler):
    duration_metric = prometheus_client.Summary(
        'http_request_duration_microseconds',
        'The HTTP request latencies in microseconds.', ['handler'])
    total_metric = prometheus_client.Counter(
        'http_requests_total', 'Total number of HTTP requests made.',
        ['code', 'handler', 'method'])

    def on_finish(self):
        super(InstrumentedHandler, self).on_finish()
        handler = type(self).__name__
        self.duration_metric.labels(handler).observe(
            self.request.request_time() * 1e6)
        self.total_metric.labels(self.get_status(), handler,
                                 self.request.method.lower()).inc()
Beispiel #14
0
    def __init__(self, config):
        # type: (AirflowFetchingConfiguration) -> WebFetcher
        super(WebFetcher, self).__init__(config)
        self.env = "Airflow"
        self.base_url = config.base_url
        self.endpoint_url = config.url
        self.api_mode = config.api_mode
        self.rbac_username = config.rbac_username
        self.rbac_password = config.rbac_password
        self.client = requests.session()
        self.is_logged_in = False

        if WebFetcher.prometheus_af_response_time_metrics is None:
            WebFetcher.prometheus_af_response_time_metrics = prometheus_client.Summary(
                "af_monitor_export_response_time",
                "Airflow export plugin response time",
                ["airflow_instance"],
            )
Beispiel #15
0
def main(
    target_db_id: int = 1,
    successful_writes_target: int = 10000,
):
    def cursor_generator():
        # just round robin, disregarding the config
        for db_settings in itertools.cycle(db_config.DATABASES):
            print('switching DB settings to:', db_settings)
            yield common.new_cursor(db_settings.host, db_settings.port,
                                    db_settings.user)

    sql = f"""
    UPDATE {common.TABLE_NAME}
    SET value = value + 1
    WHERE ID={target_db_id};
    """
    print('Starting writes...', flush=True)
    cursor_gen = cursor_generator()
    cursor = next(cursor_gen)

    successes = 0
    success_metric = prometheus_client.Counter('successful_writes',
                                               'How many passed DB writes.')
    write_times = prometheus_client.Summary(
        'write_times', 'How long successful writes took.')

    while successes != successful_writes_target:
        if successes % 200 == 0:
            print(successes, 'writes completed')
        try:
            query_start = perf_counter()

            cursor.execute(sql)

            successes += 1
            success_metric.inc()
            write_times.observe(perf_counter() - query_start)
        except psycopg2.Error:
            print('PG Error!', flush=True)
            time.sleep(3)
            cursor = next(cursor_gen)
    print('Done', flush=True)
Beispiel #16
0
    def before_worker_boot(self, broker, worker):
        self.logger.debug("Setting up metrics...")
        if self.registry is None:
            self.registry = prom.CollectorRegistry()
        self.worker_busy = prom.Gauge(
            "remoulade_worker_busy",
            "1 if the worker is processing a message, 0 if not",
            registry=self.registry)
        self.total_errored_messages = prom.Counter(
            "remoulade_message_errors_total",
            "The total number of errored messages.",
            ["queue_name", "actor_name"],
            registry=self.registry,
        )
        self.total_retried_messages = prom.Counter(
            "remoulade_message_retries_total",
            "The total number of retried messages.",
            ["queue_name", "actor_name"],
            registry=self.registry,
        )
        self.total_rejected_messages = prom.Counter(
            "remoulade_message_rejects_total",
            "The total number of dead-lettered messages.",
            ["queue_name", "actor_name"],
            registry=self.registry,
        )
        self.message_durations = prom.Summary(
            "remoulade_message_duration_milliseconds",
            "The time spent processing messages.",
            ["queue_name", "actor_name"],
            registry=self.registry,
        )
        for actor in broker.actors.values():
            self._init_labels(actor)

        self.logger.debug("Starting exposition server...")
        prom.start_http_server(addr=self.http_host,
                               port=self.http_port,
                               registry=self.registry)
Beispiel #17
0
 def __init__(self, client, **kwargs):
     super().__init__(client, **kwargs)
     try:
         import prometheus_client
     except ImportError:
         self._last_update_metric = None
         self._initial_sync_duration_metric = None
         self._update_duration_metric = None
     else:
         self._initial_sync_duration_metric = prometheus_client.Gauge(
             "muclumbus_mirror_client_initial_sync_duration_seconds",
             "Duration of various initial sync phases",
             ["phase"],
         )
         self._last_update_metric = prometheus_client.Gauge(
             "muclumbus_mirror_client_last_update_seconds",
             "Timestamp of the last update received")
         self._update_duration_metric = prometheus_client.Summary(
             "muclumbus_mirror_client_update_duration_seconds",
             "Duration of the updates",
             ["operation"],
         )
Beispiel #18
0
def main():
    args = parse_args()
    configure_logging(args.verbose)
    logger.debug("Arguments: {args}".format(args=args))

    if not args.prometheus_disable:
        logger.debug(
            "Starting prometheus exporter on {port}".format(port=args.prometheus_port)
        )
        prometheus.start_http_server(args.prometheus_port)

    kclient = KubernetesClient(args.kubeconfig, args.incluster_base_path)
    memguardian = MemGuardian(kclient)

    run_time_summary = prometheus.Summary(
        "memguardian_loop",
        "Loop execution time",
    )
    exceptions_count = prometheus.Counter(
        "memguardian_error",
        "Errors in the main loop",
    )
    while True:
        logger.debug("Running MemGuardian")
        try:
            with exceptions_count.count_exceptions():
                with run_time_summary.time():
                    memguardian.run()
        except:
            logger.exception("Unknown problem in the run loop.")
        if not args.daemon:
            break
        logger.debug(
            "MemGuardian finished. Sleeping for {delay}".format(delay=args.delay)
        )
        time.sleep(args.delay)
Beispiel #19
0
VALIDATOR_CONTAINER_NAME = os.environ.get('VALIDATOR_CONTAINER_NAME',
                                          'validator')
# for testnet, https://testnet-api.helium.wtf/v1
API_BASE_URL = os.environ.get('API_BASE_URL', 'https://api.helium.io/v1')

# Gather the ledger penalities for all, instead of just "this" validator. When in this
# mode all validators from `miner validator ledger` with a penalty >0.0 will be included.
# The >0 constraint is just to keep data and traffic smaller.
ALL_PENALTIES = os.environ.get('ALL_PENALTIES', 0)

# use the RPC calls where available. This means you have your RPC port open.
# Once all of the exec calls are replaced we can enable this by default.
ENABLE_RPC = os.environ.get('ENABLE_RPC', 0)

# prometheus exporter types Gauge,Counter,Summary,Histogram,Info and Enum
SCRAPE_TIME = prometheus_client.Summary('validator_scrape_time',
                                        'Time spent collecting miner data')
VALIDATOR_DISK_USAGE = prometheus_client.Gauge(
    'validator_disk_usage_bytes', 'Disk used by validator directory/volume',
    ['validator_name'])
SYSTEM_USAGE = prometheus_client.Gauge('system_usage',
                                       'Hold current system resource usage',
                                       ['resource_type', 'validator_name'])
CHAIN_STATS = prometheus_client.Gauge('chain_stats',
                                      'Stats about the global chain',
                                      ['resource_type'])
VAL = prometheus_client.Gauge('validator_height',
                              "Height of the validator's blockchain",
                              ['resource_type', 'validator_name'])
INCON = prometheus_client.Gauge('validator_inconsensus',
                                'Is validator currently in consensus group',
                                ['validator_name'])
Beispiel #20
0
    records = db.select_and_fetchall('''
SELECT batches.id, batches_n_jobs_in_complete_states.n_failed
FROM batches
LEFT JOIN batches_n_jobs_in_complete_states
  ON batches.id = batches_n_jobs_in_complete_states.id
WHERE state = 'running' AND cancel_after_n_failures IS NOT NULL AND n_failed >= cancel_after_n_failures
''')
    async for batch in records:
        await _cancel_batch(app, batch['id'])


USER_CORES = pc.Gauge('batch_user_cores', 'Batch user cores',
                      ['state', 'user', 'inst_coll'])
USER_JOBS = pc.Gauge('batch_user_jobs', 'Batch user jobs',
                     ['state', 'user', 'inst_coll'])
FREE_CORES = pc.Summary('batch_free_cores', 'Batch instance free cores',
                        ['inst_coll'])
UTILIZATION = pc.Summary('batch_utilization', 'Batch utilization rates',
                         ['inst_coll'])
COST_PER_HOUR = pc.Summary('batch_cost_per_hour', 'Batch cost ($/hr)',
                           ['measure', 'inst_coll'])
INSTANCES = pc.Gauge('batch_instances', 'Batch instances',
                     ['inst_coll', 'state'])

StateUserInstCollLabels = namedtuple('StateUserInstCollLabels',
                                     ['state', 'user', 'inst_coll'])
InstCollLabels = namedtuple('InstCollLabels', ['inst_coll'])
CostPerHourLabels = namedtuple('CostPerHourLabels', ['measure', 'inst_coll'])
InstanceLabels = namedtuple('InstanceLabels', ['inst_coll', 'state'])


async def monitor_user_resources(app):
Beispiel #21
0
from functools import wraps
import prometheus_client as pc  # type: ignore
from prometheus_async.aio import time as prom_async_time  # type: ignore

REQUEST_TIME = pc.Summary('http_request_latency_seconds',
                          'Endpoint latency in seconds', ['endpoint', 'verb'])
REQUEST_COUNT = pc.Counter('http_request_count', 'Number of HTTP requests',
                           ['endpoint', 'verb', 'status'])


def monitor_endpoint(handler):
    @wraps(handler)
    async def wrapped(request, *args, **kwargs):
        # Use the path template given to @route.<METHOD>, not the fully resolved one
        endpoint = request.match_info.route.resource.canonical
        verb = request.method
        response = await prom_async_time(
            REQUEST_TIME.labels(endpoint=endpoint, verb=verb),
            handler(request, *args, **kwargs))
        REQUEST_COUNT.labels(endpoint=endpoint,
                             verb=verb,
                             status=response.status).inc()
        return response

    return wrapped
Beispiel #22
0
#!/usr/bin/env python
"""Simple Python application which exposes metrics to Prometheus
and InfluxDB to populate pre-configured dashboards in Grafana.
"""
import logging
import math
import time
import random
import requests
import threading
import prometheus_client as prometheus
from flask import Flask

app = Flask(__name__)
REQUEST_TIME = prometheus.Summary('request_processing_seconds', 'Time spent processing request')
APP_Gauge = prometheus.Gauge('flask_gauge', 'Flask input changing gauge')


def main():
    """Entry point to example app."""
    setup_logging(logging.DEBUG)
    logging.info('Starting Flask Server')
    threading.Thread(target=start_flask).start()
    logging.info('Starting Prometheus Server')
    start_prometheus()
    logging.info('Starting metrics generation to InfluxDB')
    start_metrics()


def setup_logging(level=logging.INFO):
    """Setup logging to output to standard out."""
Beispiel #23
0
    'active_users_red_to_green_seconds',
    'Time in seconds for /active-users to change from 500 to 200',
    registry=registry)
respTime = prom.Gauge('active_users_response_time_seconda',
                      'Response time for /active-users',
                      registry=registry)

errCount = prom.Counter('active_users_errors',
                        'Error count for /active-users',
                        registry=registry)
tokCount = prom.Counter('token_refresh',
                        'Number of token refresh requests',
                        registry=registry)

TOKEN_CHECK = prom.Summary('runtime_token_status_seconds',
                           'Time spent processing /token-status',
                           registry=registry)
TOKEN_GEN = prom.Summary('runtime_token_seconds',
                         'Time spent processing /token',
                         registry=registry)
ACTIVE_USERS = prom.Summary(
    'runtime_active_users_seconds',
    'Time spent processing /active-users inclusive of token check/refresh',
    registry=registry)

# Global - bad implementation
TOKEN = ''


def update_last_recovery_time(red, green):
    delta = green - red
Beispiel #24
0
"""


import re
import json
import time
import logging
import datetime
import threading
import prometheus_client

from agent import utils

# pylint: disable-msg=no-value-for-parameter
DB_GET_SUMMARY_METRIC = \
    prometheus_client.Summary("database_get_processing_seconds",
                              "Time spent handling Database Get Call")
# pylint: disable-msg=no-value-for-parameter
DB_UPDATE_SUMMARY_METRIC = \
    prometheus_client.Summary("database_update_processing_seconds",
                              "Time spent handling Database Update Call")
# pylint: disable-msg=no-value-for-parameter
DB_INSERT_SUMMARY_METRIC = \
    prometheus_client.Summary("database_insert_processing_seconds",
                              "Time spent handling Database Insert Call")
# pylint: disable-msg=no-value-for-parameter
DB_DELETE_SUMMARY_METRIC = \
    prometheus_client.Summary("database_delete_processing_seconds",
                              "Time spent handling Database Delete Call")
# pylint: disable-msg=no-value-for-parameter
DB_FIND_PARAMS_SUMMARY_METRIC = \
    prometheus_client.Summary("database_find_params_processing_seconds",
Beispiel #25
0
# import uvloop

from ..batch import Batch, Job
from ..log_store import LogStore
from ..database import BatchDatabase, JobsBuilder
from ..datetime_json import JSON_ENCODER
from ..batch_configuration import POD_VOLUME_SIZE, INSTANCE_ID

from . import schemas

# uvloop.install()

log = logging.getLogger('batch.front_end')

REQUEST_TIME = pc.Summary('batch2_request_latency_seconds',
                          'Batch request latency in seconds',
                          ['endpoint', 'verb'])
REQUEST_TIME_GET_JOB = REQUEST_TIME.labels(
    endpoint='/api/v1alpha/batches/batch_id/jobs/job_id', verb="GET")
REQUEST_TIME_GET_JOB_LOG = REQUEST_TIME.labels(
    endpoint='/api/v1alpha/batches/batch_id/jobs/job_id/log', verb="GET")
REQUEST_TIME_GET_POD_STATUS = REQUEST_TIME.labels(
    endpoint='/api/v1alpha/batches/batch_id/jobs/job_id/pod_status',
    verb="GET")
REQUEST_TIME_GET_BATCHES = REQUEST_TIME.labels(endpoint='/api/v1alpha/batches',
                                               verb="GET")
REQUEST_TIME_POST_CREATE_JOBS = REQUEST_TIME.labels(
    endpoint='/api/v1alpha/batches/batch_id/jobs/create', verb="POST")
REQUEST_TIME_POST_CREATE_BATCH = REQUEST_TIME.labels(
    endpoint='/api/v1alpha/batches/create', verb='POST')
REQUEST_TIME_POST_GET_BATCH = REQUEST_TIME.labels(
Beispiel #26
0
from biggraphite import settings as bg_settings
from biggraphite import graphite_utils
from biggraphite import metric as bg_metric


# Ignore D102: Missing docstring in public method: Most of them come from upstream module.
# pylama:ignore=D102

WRITE_TIME = prometheus_client.Histogram(
    "bg_write_latency_seconds",
    "write latency in seconds",
    buckets=(0.005, .01, .025, .05, .075, .1, .25, .5, .75, 1.0, 2.5, 5.0, 7.5),
)

CREATE_TIME = prometheus_client.Summary(
    "bg_create_latency_seconds", "create latency in seconds"
)

EXISTS_TIME = prometheus_client.Summary(
    "bg_exists_latency_seconds", "create latency in seconds"
)

CREATES = prometheus_client.Counter(
    "bg_creates", "metric creations"
)

CREATES_ENQUEUED = prometheus_client.Counter(
    "bg_creates_enqueued", "metrics scheduled for creation"
)

CREATES_DEQUEUED = prometheus_client.Counter(
Beispiel #27
0
import prometheus_client as prom

req_summary = prom.Summary('namespace_streamingservice1',
                           'Time spent processing a request')


@req_summary.time()
def process(event, counter1, counter2):
    try:
        time.sleep(random.random())
    except Exception as e:
        print(str(e))


if __name__ == '__main__':
    counter1 = prom.Counter('namespace_ingress', 'Counter for ingress')
    counter2 = prom.Counter('namespace_egress', 'Counter for egress')
    prom.start_http_server = (8000)
    while True:
        process(event, counter1, counter2)

# from prometheus_client import CollectorRegistry, Gauge, push_to_gateway, Counter
# from prometheus_client.exposition import basic_auth_handler
# import random
# import time

# def my_auth_handler(url, method, timeout, headers, data):
#     username = '******'
#     password = '******'
#     return basic_auth_handler(url, method, timeout, headers, data, username, password)
Beispiel #28
0
import common

# create logger
logging.config.fileConfig('logging.conf')
logger = logging.getLogger('api_monitor')

# prometheus initialization
registry = prom.CollectorRegistry()
green = prom.Gauge('service_last_green_timestamp',
                   'Latest 200 status for /status endpoint',
                   registry=registry)
red = prom.Gauge('service_last_red_timestamp',
                 'Latest 40x/50x status for /status endpoint',
                 registry=registry)
REQUEST_TIME = prom.Summary('runtime_status_seconds',
                            'Time spent processing /status',
                            registry=registry)


@REQUEST_TIME.time()
def is_service_ok():
    url = 'https://lackadaisical-tip.glitch.me/status'
    status = common.is_http_ok(url)
    g = None
    if status:
        green.set_to_current_time()
    else:
        red.set_to_current_time()
    # if
    prom.push_to_gateway('localhost:9091', job='api_status', registry=registry)
Beispiel #29
0
import prometheus_client as prom
import random
import time
from threading import Thread

from flask import Flask, request
from flask_prometheus import monitor

req_summary = prom.Summary('python_my_req_example', 'Time spent processing a request')


@req_summary.time()
def process_request(t):
    time.sleep(t)


app = Flask("pyProm")


@app.route('/', methods=["GET", "POST"])
def hi():
    if request.method == "GET":
        return "OK", 200, None

    return "Bad Request", 400, None


counter = prom.Counter('python_my_counter', 'This is my counter')
gauge = prom.Gauge('python_my_gauge', 'This is my gauge')
histogram = prom.Histogram('python_my_histogram', 'This is my histogram')
summary = prom.Summary('python_my_summary', 'This is my summary')
        'rpc_server_status_code', 'Server side status code', ['service_name', 'method', 'status_code', 'server_type'])
SERVER_IN_PROGRESS = prometheus_client.Gauge("rpc_server_inprogress_requests", "", ["service_name", "server_type"], multiprocess_mode='livesum')
SERVER_FREE_WORKERS = prometheus_client.Gauge("rpc_server_free_workers", "", ["service_name", "server_type"], multiprocess_mode='livesum')

CLIENT_IN_PROGRESS_REQUESTS = prometheus_client.Gauge(
    "rpc_client_in_progress_requests",
    "Client side in progress requests",
    ["client_service_name", "server_service_name", "method_name"],
    multiprocess_mode='livesum'
)

CLIENT_ATTEMPT_DEADLINE = prometheus_client.Histogram(
        'rpc_deadline_histogram', 'Deadline set for each attempt', [
            'caller_service_name', 'service_name', 'config', 'method'])
CLIENT_ATTEMPT_LATENCY = prometheus_client.Summary(
        'rpc_attempt_latency_summary', 'Latency for each attempt', [
            'caller_service_name', 'service_name', 'config', 'method', 'status_code'])
CLIENT_LATENCY = prometheus_client.Histogram(
        'rpc_latency_histogram', 'Overall latency after all retries', [
            'caller_service_name', 'service_name', 'method'])

CLIENT_RETRIES = prometheus_client.Summary(
        'rpc_retries_summary', 'Number of retries', [
            'caller_service_name', 'service_name', 'method', 'status_code'])

JOB_LATENCY = prometheus_client.Histogram(
        'rpc_server_job_latency', 'Overall latency of async RabbitMQ calls', [
        'caller_service_name', 'service_name', 'method', 'status_code'])

JOB_RETRIES = prometheus_client.Summary('rpc_server_job_retries', 'number of retries to execute a job', [
        'caller_service_name', 'service_name', 'method', 'status_code' ])