import subprocess import dill as pickle from tornado.options import define, options from prometheus_client import CollectorRegistry, generate_latest, start_http_server, Summary, Counter, Histogram, Gauge define('PIO_MODEL_STORE_HOME', default='', help='path to model_store', type=str) define('PIO_MODEL_TYPE', default='', help='prediction model type', type=str) define('PIO_MODEL_NAMESPACE', default='', help='prediction model namespace', type=str) define('PIO_MODEL_NAME', default='', help='prediction model name', type=str) define('PIO_MODEL_VERSION', default='', help='prediction model version', type=str) define('PIO_MODEL_SERVER_PORT', default='9876', help='tornado http server listen port', type=int) define('PIO_MODEL_SERVER_PROMETHEUS_PORT', default=8080, help='port to run the prometheus http metrics server on', type=int) MODEL_MODULE_NAME = 'pio_bundle' # Create a metric to track time spent and requests made. REQUEST_TIME = Summary('request_processing_seconds', 'Model Server: Time spent processing request') REQUEST_TIME.observe(1.0) # Observe 1.0 (seconds in this case) REQUESTS_IN_PROGRESS = Gauge('inprogress_requests', 'model server: request current in progress') REQUESTS_COUNT = Counter('http_requests_total', 'model server: total \ http request count since the last time the process was restarted', ['method', 'model_type', 'model_namespace', 'model_name', 'model_version']) EX_COUNT = Counter('exceptions_total', 'model server: total http request count since the last time the process was restarted') REQUEST_LATENCY = Histogram('http_request_processing_seconds', 'model server: time in seconds spent processing requests.') REQUEST_LATENCY_BUCKETS = Histogram('http_request_duration_microseconds', 'model server: \ time in microseconds spent processing requests.', ['method', 'model_type', 'model_namespace', 'model_name', 'model_version']) REGISTRY = CollectorRegistry() REGISTRY.register(REQUEST_TIME) REGISTRY.register(REQUESTS_IN_PROGRESS) REGISTRY.register(REQUESTS_COUNT) REGISTRY.register(EX_COUNT)
# ping metrics PING_REQUESTS = Counter( "internet_monitor_ping_total", "Total ping requests made to 1.1.1.1" ) PING_FAILURES = Counter( "internet_monitor_ping_failures_total", "Total ping requests failed made to 1.1.1.1" ) PING_PACKET_LOSS = Counter( "internet_monitor_ping_packet_loss_total", "Number of packets lost while checking latency", ) PING_JITTER = Gauge("internet_monitor_ping_jitter_seconds", "ICMP Jitter") UP = Gauge("internet_monitor_up", "Internet is up or down") PING_LATENCY = Summary( "internet_monitor_ping_latency_seconds", "Ping latency to 1.1.1.1" ) # download metrics DOWNLOAD_DURATION = Histogram( "internet_monitor_download_duration_seconds", "Download latency", buckets=(1, 2, 5, 7, 10, 15, 20, 50, 100, INF), ) DOWNLOAD_REQUEST_SIZE = Gauge( "internet_monitor_download_size_bytes", "Bytes downloaded" ) DOWNLOAD_REQUESTS = Counter( "internet_monitor_download_total", "Number of times the download job runs" ) DOWNLOAD_FAILURES = Counter(
import logging logging.basicConfig( format='%(name)s:%(levelname)s %(module)s:%(lineno)d: %(message)s', level="DEBUG") #VNF_NAME = os.environ.get('VNF_NAME', 'test') VNF_NAME = 'client4' #pushgateway = 'localhost:9091' PUSHGATEWAY = '172.17.0.1:9091' # Prometheus export data # helper variables to calculate the metrics VCDN_REGISTRY = CollectorRegistry() PROM_FILESIZE = Summary('filesize', 'requested file sizes', ['vnf_name'], registry=VCDN_REGISTRY).labels(vnf_name=VNF_NAME) PROM_PROCESSED_CACHED_REQS = Counter( 'processed_cached_reqs', 'cached requests', ['vnf_name'], registry=VCDN_REGISTRY).labels(vnf_name=VNF_NAME) PROM_PROCESSED_NON_CACHED_REQS = Counter( 'processed_non_cached_reqs', 'cached requests', ['vnf_name'], registry=VCDN_REGISTRY).labels(vnf_name=VNF_NAME) PROM_FALSE_USERS = Counter('false_users', 'false requests', ['vnf_name'], registry=VCDN_REGISTRY).labels(vnf_name=VNF_NAME) PROM_CACHED_USERS = Counter('cached_users',
from werkzeug.wsgi import DispatcherMiddleware import base64 from pickle import load as cPload from pickle import loads as cPloads import numpy import cloudpickle import sys import os import pandas as pd app = Flask(__name__) METRICS_PREFIX = os.getenv("S2I_APP_METRICS_PREFIX", "pipeline") PREDICTION_TIME = Summary('%s_processing_seconds' % METRICS_PREFIX, 'Time spent processing predictions') PREDICTIONS = Counter('%s_predictions_total' % METRICS_PREFIX, 'Total predictions for a given label', ['value']) app.model = None @app.route('/') def index(): return "Make a prediction by POSTing to /predict" @app.route('/predict', methods=['POST']) @PREDICTION_TIME.time() def predict(): import json if 'json_args' in request.form: args = pd.read_json(request.form['json_args']) if len(args.columns) == 1 and len(args.values) > 1: # convert to series
from prometheus_client import Summary LATENCY = Summary( 'http_requests_latency_seconds', 'HTTP request latency.', labelnames=['path']) foo = LATENCY.labels('/foo') @foo.time() def foo_handler(params): pass
from prometheus_client import Summary from nautobot.dcim.models import Device from .choices import OnboardingFailChoices from .choices import OnboardingStatusChoices from .exceptions import OnboardException from .helpers import onboarding_task_fqdn_to_ip from .metrics import onboardingtask_results_counter from .models import OnboardingDevice from .models import OnboardingTask from .onboard import OnboardingManager logger = logging.getLogger("rq.worker") REQUEST_TIME = Summary("onboardingtask_processing_seconds", "Time spent processing onboarding request") @REQUEST_TIME.time() @job("default") def onboard_device(task_id, credentials): # pylint: disable=too-many-statements """Process a single OnboardingTask instance.""" username = credentials.username password = credentials.password secret = credentials.secret ot = OnboardingTask.objects.get(id=task_id) # Rewrite FQDN to IP for Onboarding Task onboarding_task_fqdn_to_ip(ot)
def test_summary(self): s = Summary('ss', 'A summary', ['a', 'b'], registry=self.registry) s.labels('c', 'd').observe(17) self.assertEqual( b'# HELP ss A summary\n# TYPE ss summary\nss_count{a="c",b="d"} 1.0\nss_sum{a="c",b="d"} 17.0\n', generate_latest(self.registry))
def export_defaults(self, buckets=None, group_by='path', latency_as_histogram=True, prefix='flask', app=None, **kwargs): """ Export the default metrics: - HTTP request latencies - HTTP request exceptions - Number of HTTP requests :param buckets: the time buckets for request latencies (will use the default when `None`) :param group_by: group default HTTP metrics by this request property, like `path`, `endpoint`, `rule`, etc. (defaults to `path`) :param latency_as_histogram: export request latencies as a Histogram, otherwise use a Summary instead (defaults to `True` to export as a Histogram) :param prefix: prefix to start the default metrics names with or `NO_PREFIX` (to skip prefix) :param app: the Flask application """ if app is None: app = self.app or current_app if not prefix: prefix = self._defaults_prefix or 'flask' if kwargs.get('group_by_endpoint') is True: warnings.warn( 'The `group_by_endpoint` argument of ' '`PrometheusMetrics.export_defaults` is deprecated since 0.4.0, ' 'please use the new `group_by` argument.', DeprecationWarning) duration_group = 'endpoint' elif group_by: duration_group = group_by else: duration_group = 'path' if callable(duration_group): duration_group_name = duration_group.__name__ else: duration_group_name = duration_group if prefix == NO_PREFIX: prefix = "" else: prefix = prefix + "_" try: self.info('%sexporter_info' % prefix, 'Information about the Prometheus Flask exporter', version=self.version) except ValueError: return # looks like we have already exported the default metrics labels = self._get_combined_labels(None) if latency_as_histogram: # use the default buckets from prometheus_client if not given here buckets_as_kwargs = {} if buckets is not None: buckets_as_kwargs['buckets'] = buckets request_duration_metric = Histogram( '%shttp_request_duration_seconds' % prefix, 'Flask HTTP request duration in seconds', ('method', duration_group_name, 'status') + labels.keys(), registry=self.registry, **buckets_as_kwargs) else: # export as Summary instead request_duration_metric = Summary( '%shttp_request_duration_seconds' % prefix, 'Flask HTTP request duration in seconds', ('method', duration_group_name, 'status') + labels.keys(), registry=self.registry) counter_labels = ('method', 'status') + labels.keys() request_total_metric = Counter('%shttp_request_total' % prefix, 'Total number of HTTP requests', counter_labels, registry=self.registry) request_exceptions_metric = Counter( '%shttp_request_exceptions_total' % prefix, 'Total number of HTTP requests which resulted in an exception', counter_labels, registry=self.registry) def before_request(): request.prom_start_time = default_timer() def after_request(response): if hasattr(request, 'prom_do_not_track') or hasattr( request, 'prom_exclude_all'): return response if self.excluded_paths: if any( pattern.match(request.path) for pattern in self.excluded_paths): return response if hasattr(request, 'prom_start_time'): total_time = max(default_timer() - request.prom_start_time, 0) if callable(duration_group): group = duration_group(request) else: group = getattr(request, duration_group) request_duration_labels = { 'method': request.method, 'status': _to_status_code(response.status_code), duration_group_name: group } request_duration_labels.update(labels.values_for(response)) request_duration_metric.labels( **request_duration_labels).observe(total_time) request_total_metric.labels(method=request.method, status=_to_status_code( response.status_code), **labels.values_for(response)).inc() return response def teardown_request(exception=None): if not exception or hasattr(request, 'prom_do_not_track') or hasattr( request, 'prom_exclude_all'): return if self.excluded_paths: if any( pattern.match(request.path) for pattern in self.excluded_paths): return response = make_response('Exception: %s' % exception, 500) if callable(duration_group): group = duration_group(request) else: group = getattr(request, duration_group) request_exceptions_metric.labels( method=request.method, status=500, **labels.values_for(response)).inc() if hasattr(request, 'prom_start_time'): total_time = max(default_timer() - request.prom_start_time, 0) request_duration_labels = { 'method': request.method, 'status': 500, duration_group_name: group } request_duration_labels.update(labels.values_for(response)) request_duration_metric.labels( **request_duration_labels).observe(total_time) request_total_metric.labels(method=request.method, status=500, **labels.values_for(response)).inc() return app.before_request(before_request) app.after_request(after_request) app.teardown_request(teardown_request)
#!/usr/bin/python import re import time import requests import argparse from pprint import pprint import os from sys import exit from prometheus_client import start_http_server, Summary from prometheus_client.core import GaugeMetricFamily, REGISTRY DEBUG = int(os.environ.get('DEBUG', '0')) COLLECTION_TIME = Summary('jenkins_collector_collect_seconds', 'Time spent to collect metrics from Jenkins') class JenkinsCollector(object): # The build statuses we want to export about. statuses = [ "lastBuild", "lastCompletedBuild", "lastFailedBuild", "lastStableBuild", "lastSuccessfulBuild", "lastUnstableBuild", "lastUnsuccessfulBuild" ] def __init__(self, target, user, password, insecure): self._target = target.rstrip("/") self._user = user self._password = password self._insecure = insecure
patch_psycopg() import os import json import subprocess import time from flask import Flask, jsonify, request, current_app, redirect, url_for from prometheus_client import Summary from . import db, jobs, metrics, cron from .metrics import REGISTRY SCHEDULER_REQUEST_SUMMARY = Summary( "scheduler_request_latency_seconds", "Flask Request Latency", ["method", "endpoint"], registry=REGISTRY, ) def before_request(): request.start_time = time.time() def after_request(response): request_latency = time.time() - request.start_time SCHEDULER_REQUEST_SUMMARY.labels(request.method, request.url_rule).observe(request_latency) return response
#!/usr/bin/python # -*- coding: utf-8 -*- from BaseHTTPServer import BaseHTTPRequestHandler, HTTPServer from prometheus_client import Summary, Counter, Gauge, Info, start_http_server from os import curdir, sep import time PORT_NUMBER = 8080 BIND_IP = '0.0.0.0' EXPORTER_PORT_NUMBER = 8000 EXPORTER_BIND_IP = '0.0.0.0' counter_failures = Counter('demo_counter_failures', 'Quantity of fault requests') counter_summary = Summary('demo_request_counter', 'Summary of ok requests') counter_gauge = Gauge('demo_request_gauge', 'Gauge of requests') application_info = Info('demo_build_version', 'Application info') # This class will handles any incoming request from # the browser class myHandler(BaseHTTPRequestHandler): # Handler for the GET requests def do_GET(self): if self.path == "/": self.path = "./index.html" if self.path == "/about": self.path = "./about.html"
from prometheus_client import start_http_server, Info, Counter, Enum, Gauge, Summary # noqa total = Counter('pup_advisor_total', 'The total amount of uploads') valid = Counter('pup_advisor_valid', 'The total amount of valid uploads') invalid = Counter('pup_advisor_invalid', 'The total amount of successfully validated uploads') inventory_post_success = Counter( 'pup_advisor_inventory_post_success', 'The total amount of successful inventory posts') inventory_post_failure = Counter('pup_advsior_inventory_post_failure', 'The total amount of failed inventory posts') validation_time = Summary('validation_processing_seconds', 'Time spent validating archive') inventory_post_time = Summary('inventory_post_processing_seconds', 'Time spent posting to inventory') handle_file_time = Summary('pup_handle_file_time', 'Time spent executing handle_file') extract_facts_time = Summary('pup_extract_facts_time', 'Time spent extracting facts') payload_size = Summary('pup_payload_size', 'Size in bytes of processed payloads') upload_service_version = Info('upload_service_version', 'Build commit and date') produce_queue_size = Gauge("pup_produce_queue_size", "Size of the produce queue") system_profile_queue_size = Gauge("pup_system_profile_queue_size", "Size of the system profile queue") current_archives_size = Gauge("pup_current_archives_size",
import re from urllib.parse import urlparse from ...config import config from prometheus_client import Summary from prometheus_client import Histogram from prometheus_async.aio import time REQ_TIME = Summary("external_to_internal_req_time", "time spent with external_to_internal endpoint") REQ_HISTOGRAM_TIME = Histogram("external_to_internal_req_histogram", "Histogram for external_to_internal endpoint") @time(REQ_TIME) @time(REQ_HISTOGRAM_TIME) async def translate(external_to_internal_spec, repo_provider): external_url = external_to_internal_spec["external_url"] internal_url = await translate_external_to_internal(external_url) result = {"external_url": external_url, "internal_url": internal_url} return result async def translate_external_to_internal(external_git_url): """ Logic from original maitai code to do this: found in GitUrlParser.java#generateInternalGitRepoName """
class PrometheusBlazeGraphExporter(object): scrape_duration = Summary( 'blazegraph_scrape_duration_seconds', 'Blazegraph exporter scrape duration') def __init__(self, blazegraph_base_url, blazegraph_data_folder): self.url = blazegraph_base_url self.data_folder = blazegraph_data_folder self.counters = [] self.sparql_endpoint = '{base_url}/namespace/wdq/sparql'.format(base_url=self.url) def query_to_metric(self, qname): return qname.replace(' ', '_').replace('/', '.').lstrip('.') def get_counter(self, cnt_name): # Not sure why we need depth but some counters don't work without it url = '{base_url}/counters?depth=10&path={cnt_name}'.format( base_url=self.url, cnt_name=cnt_name ) try: header = {'Accept': 'application/xml'} response = requests.get(url, headers=header) except requests.exceptions.RequestException: log.exception('Error sending request') return None el = ElementTree.fromstring(response.content) last_name = cnt_name.split('/')[-1] for cnt in el.getiterator('c'): if cnt.attrib['name'] == last_name: return cnt.attrib['value'] return None def fetch_allocators(self): allocators = 0 try: url = "{base_url}/status?dumpJournal".format(base_url=self.url) response = requests.get(url).text split_info = response.split('AllocatorSize') for alloc_line in split_info[1].splitlines(): # empty line finishes the table if len(alloc_line.strip()) == 0: break parts = re.split("\s+", alloc_line) # second value must be a digit if not parts[1].isdigit(): continue allocators += int(parts[1]) return allocators except requests.exceptions.RequestException: log.exception('Error fetching allocator data') return None @scrape_duration.time() def collect(self): blazegraph_metrics = { '/Query Engine/queryStartCount': CounterMetricFamily( 'blazegraph_queries_start', 'Number of queries that have started since the start of the application.' ), '/Query Engine/queryDoneCount': CounterMetricFamily( 'blazegraph_queries_done', 'Number of queries completed since the start of the application.' ), '/Query Engine/queryErrorCount': CounterMetricFamily( 'blazegraph_queries_error', 'Number of queries in error since the start of the application.' ), '/Query Engine/queriesPerSecond': GaugeMetricFamily( 'blazegraph_queries_per_second', 'Number of queries per second (rolling average).' ), '/Query Engine/operatorActiveCount': GaugeMetricFamily( 'blazegraph_operator_active_count', 'Number of active blazegraph operators' ), '/Query Engine/runningQueriesCount': GaugeMetricFamily( 'blazegraph_running_queries_count', 'Number of running queries' ), '/Query Engine/GeoSpatial/geoSpatialSearchRequests': GaugeMetricFamily( 'blazegraph_geospatial_search_requets', 'Number of geospatial search requests since the start of the application.'), '/Journal/bytesReadPerSec': GaugeMetricFamily( 'blazegraph_journal_bytes_read_per_second', '' ), '/Journal/bytesWrittenPerSec': GaugeMetricFamily( 'blazegraph_journal_bytes_written_per_second', '' ), '/Journal/extent': GaugeMetricFamily( 'blazegraph_journal_extent', '' ), '/Journal/commitCount': CounterMetricFamily( 'blazegraph_journal_commit_count', '' ), '/Journal/commit/totalCommitSecs': GaugeMetricFamily( 'blazegraph_journal_total_commit_seconds', 'Total time spent in commit.' ), '/Journal/commit/flushWriteSetSecs': GaugeMetricFamily( 'blazegraph_journal_flush_write_set_seconds', '' ), '/Journal/Concurrency Manager/Read Service/Average Active Count': GaugeMetricFamily( 'blazegraph_journal_concurrency_read_average_active_count', 'Average Number of Read Active Threads' ), '/JVM/Memory/DirectBufferPool/default/bytesUsed': GaugeMetricFamily( 'blazegraph_jvm_memory_direct_buffer_pool_default_bytes_used', '' ), '/JVM/Memory/Runtime Free Memory': GaugeMetricFamily( 'blazegraph_jvm_memory_runtime_free_memory', 'Current amount of free memory in the JVM.' ), '/JVM/Memory/Runtime Max Memory': GaugeMetricFamily( 'blazegraph_jvm_memory_runtime_max_memory', 'Max amount of memory the JVM can allocate.' ), '/JVM/Memory/Runtime Total Memory': GaugeMetricFamily( 'blazegraph_jvm_memory_runtime_total_memory', 'Total amount of memory allocated to the JVM.' ), '/JVM/Memory/Garbage Collectors/G1 Old Generation/Collection Count': CounterMetricFamily( 'blazegraph_jvm_memory_gc_g1_old_collecton_count', 'Number of old GC since JVM start.' ), '/JVM/Memory/Garbage Collectors/G1 Old Generation/Cumulative Collection Time': GaugeMetricFamily( 'blazegraph_jvm_memory_gc_g1_old_cumulative_collection_time', 'Total time spent in old GC (seconds).' ), '/JVM/Memory/Garbage Collectors/G1 Young Generation/Collection Count': CounterMetricFamily( 'blazegraph_jvm_memory_gc_g1_young_collection_count', 'Number of young GC since JVM start.' ), '/JVM/Memory/Garbage Collectors/G1 Young Generation/Cumulative Collection Time': GaugeMetricFamily( 'blazegraph_jvm_memory_gc_g1_young_cumulative_collection_time', 'Total time spent in young GC (seconds).' ), } for metric_name, metric_family in blazegraph_metrics.items(): if metric_name is None: log.warning('Unknown metric %r', metric_name) else: metric_value = self.get_counter(metric_name) try: value = float(metric_value) except (ValueError, TypeError): value = float('nan') metric_family.add_metric([], value) alloc_metric = GaugeMetricFamily( 'blazegraph_allocators', 'Number of used FixedAllocators in Blazegraph' ) alloc_free_metric = GaugeMetricFamily( 'blazegraph_free_allocators', 'Number of free FixedAllocators in Blazegraph' ) allocs = self.fetch_allocators() if allocs: alloc_metric.add_metric([], allocs) alloc_free_metric.add_metric([], 256 * 1024 - allocs) else: alloc_metric.add_metric([], float('nan')) alloc_free_metric.add_metric([], float('nan')) yield alloc_metric yield alloc_free_metric # Added for BBP blazegraph disk monitoring disk_total_metric = GaugeMetricFamily( 'blazegraph_total_disk', 'Blazegraph total disk space' ) disk_free_metric = GaugeMetricFamily( 'blazegraph_free_disk', 'Blazegraph free disk space' ) statvfs = os.statvfs(self.data_folder) disk_total_metric.add_metric([], statvfs.f_frsize * statvfs.f_blocks) # Size of filesystem in bytes disk_free_metric.add_metric([], statvfs.f_frsize * statvfs.f_bfree) # Actual number of free bytes yield disk_total_metric yield disk_free_metric for metric in blazegraph_metrics.values(): yield metric
ERROR_VALIDATION_JSON_COUNTER = Counter( "error_validation_json_counter", "User sent bad JSON requests that failed validation", ) ERROR_RESPONSE_400_COUNTER = Counter( "error_response_400_counter", "App returned 400 status" ) ERROR_RESPONSE_500_COUNTER = Counter( "error_response_500_counter", "App returned 500 status" ) ERROR_CALLBACK_COUNTER = Counter( "error_callback_counter", "Errors calling callback url" ) REQ_TIME = Summary("callback_time", "time spent with calling callback") REQ_HISTOGRAM_TIME = Histogram("callback_histogram", "Histogram for calling callback") def create_log_context_id(): return "repour-" + base64.b32encode(os.urandom(20)).decode("ascii").lower() def create_callback_id(): return base64.b32encode(os.urandom(30)).decode("ascii") def create_traceback_id(): tb = traceback.format_exc() h = hashlib.md5() h.update(tb.encode("utf-8"))
from prometheus_client import Counter, Gauge, Summary, start_http_server import vaping import vaping.plugins min_latency = Summary( "minimum_latency_milliseconds", "Minimum latency in milliseconds.", ["host"] ) # NOQA max_latency = Summary( "maximum_latency_milliseconds", "Maximum latency in milliseconds.", ["host"] ) # NOQA avg_latency = Summary( "average_latency_milliseconds", "Average latency in milliseconds.", ["host"] ) # NOQA sent_packets = Counter( "number_of_packets_sent", "Number of pings sent to host.", ["host"] ) # NOQA packet_loss = Gauge("packet_loss", "% packet loss to host (0-100)", ["host"]) # NOQA @vaping.plugin.register("prometheus") class Prometheus(vaping.plugins.EmitBase): def init(self): self.log.debug("init prometheus plugin") port = self.pluginmgr_config.get("port", 9099) start_http_server(port) def emit(self, data): raw_data = data.get("data") self.log.debug("data: " + str(raw_data))
print("PROFILE_DIR '{}' is invalid, not enabling profiling".format( PROFILE_DIR)) PROFILE_DIR = None """ Master Fleet Runner Instantiates the specified number of Bjoern WSGI server processes, each taking orders on their own unix socket and passing requests to the respective WSGI app (rest, notify or metrics). """ # metrics if PROMETHEUS: REQUEST_TIME = Summary('kopano_mfr_request_processing_seconds', 'Time spent processing request', ['method', 'endpoint']) EXCEPTION_COUNT = Counter('kopano_mfr_total_unhandled_exceptions', 'Total number of unhandled exceptions') MEMORY_GAUGE = Gauge('kopano_mfr_virtual_memory_bytes', 'Virtual memory size in bytes', ['worker']) CPUTIME_GAUGE = Gauge('kopano_mfr_cpu_seconds_total', 'Total user and system CPU time spent in seconds', ['worker']) def error_handler(ex, req, resp, params, with_metrics): if not isinstance(ex, (falcon.HTTPError, falcon.HTTPStatus)): if with_metrics: if PROMETHEUS: EXCEPTION_COUNT.inc()
# URL of the GitLab instance, defaults to hosted GitLab URL = str(os.environ.get('URL', 'https://gitlab.com')) # Secret token for the app to authenticate itself TOKEN = str(os.environ.get('TOKEN')) # Login to GitLab gl = gitlab.Gitlab(URL, TOKEN) gl.auth() # Initialize Prometheus instrumentation projects_total = Gauge('gitlab_projects_total', 'Number of projects') builds_total = Gauge('gitlab_builds_total', 'Number of builds', ['project_id', 'project_name']) build_duration_seconds = Summary( 'gitlab_build_duration_seconds', 'Seconds the build took to run', ['project_id', 'project_name', 'stage', 'status', 'ref']) open_issues_total = Gauge('gitlab_open_issues_total', 'Number of open issues', ['project_id', 'project_name']) pipeline_duration_seconds = Summary( 'gitlab_pipeline_duration_seconds', 'Seconds the pipeline took to run', ['project_id', 'project_name', 'status', 'ref']) def get_projects(): try: projects = gl.projects.list(all=True) log.debug("Projects: {}".format(projects)) return projects except gitlab.exceptions.GitlabListError: log.warn("Projects could not be retrieved")
import time import flask import json import requests from prometheus_client import Summary from friendbot import app, utils, messages signing_secret = app.config["FRIENDBOT_SIGNING_SECRET"] cache = app.config["REDIS_CACHE"] length_summary = Summary("friendbot_request_time", "Length of Friendbot Requests") @app.route("/action", methods=["POST"]) def action_endpoint(): start_time = time.time() if signing_secret: valid, valid_err = utils.validate_request(flask.request, signing_secret) if not valid: app.logger.error(valid_err) return ("", 400) data = json.loads(flask.request.form["payload"]) button_text = data["actions"][0]["text"]["text"] error = False if button_text == "Send": user_id = data["user"]["id"] real_name = cache.hget("users", user_id).decode("utf-8") payload = messages.send_message(data["actions"][0]["value"], real_name) elif button_text == "Shuffle":
import time from prometheus_client import start_http_server, Summary, Gauge from celery import Celery from celery.events.receiver import EventReceiver from celery.utils.objects import FallbackContext import amqp.exceptions import httpx logging.basicConfig(level=logging.INFO) app = Celery("overseer", broker=os.environ["BROKER_URL"]) # Monitoring metrics (these are updated by `Receiver` and `Collector` below) event_processing = Summary("overseer_event_processing", "Summary of event processing duration") queue_length = Gauge("overseer_queue_length", "Number of jobs in the queue.", ["queue"]) workers_count = Gauge("overseer_workers_count", "Number of workers listening to the queue.", ["queue"]) queue_length_worker_ratio = Gauge( "overseer_queue_length_worker_ratio", "Ratio of the number of jobs to the number of workers for each queue.", ["queue"], ) # Setup API client api = httpx.Client(
['account_number']) FAILED_TO_VALIDATE = Gauge('failed_validation', 'Reports that could not be validated', ['account_number']) INVALID_REPORTS = Gauge('invalid_reports', 'Reports containing invalid syntax', ['account_number']) TIME_RETRIES = Gauge( 'time_retries', 'The total number of retries based on time for all reports', ['account_number']) COMMIT_RETRIES = Gauge( 'commit_retries', 'The total number of retries based on commit for all reports', ['account_number']) REPORT_PROCESSING_LATENCY = Summary( 'report_processing_latency', 'The time in seconds that it takes to process a report') VALIDATION_LATENCY = Summary('validation_latency', 'The time it takes to validate a report') # pylint: disable=broad-except, too-many-lines, too-many-public-methods class AbstractProcessor(ABC): # pylint: disable=too-many-instance-attributes """Class for processing saved reports that have been uploaded.""" # pylint: disable=too-many-arguments def __init__(self, pre_delegate, state_functions, state_metrics, async_states, object_prefix, object_class, object_serializer): """Create an abstract processor.""" self.report_or_slice = None self.object_class = object_class
import time from prometheus_client import ( CollectorRegistry, Counter, Gauge, Summary, push_to_gateway, ) # A separate registry is used, as the default registry may contain other # metrics such as those from the Process Collector. REGISTRY = CollectorRegistry() JOB_DURATION = Summary('my_batch_job_duration_seconds', 'Duration of my batch job in seconds.', registry=REGISTRY) RESULT = Gauge('my_batch_job_result', 'This is a test', registry=REGISTRY) LAST_SUCCESS = Gauge('my_batch_job_last_success_unixtime', 'Last time my batch job succeeded, in unixtime.', registry=REGISTRY) @JOB_DURATION.time() def main(): if len(sys.argv) < 2: print("Usage: {} <Prometheus Pushgateway:port>".format(sys.argv[0])) return # Simulate some work time.sleep(random.random()) # Update metrics
import os from multiprocessing import Process, Manager from prometheus_client import start_http_server, Summary from prometheus_client.core import GaugeMetricFamily, REGISTRY try: IPS = os.getenv('TARGET_IPS').split(',') except AttributeError: raise Exception("Mandatory `TARGET_IPS` environment variable is not set") IPMI_USER = os.getenv('IPMI_USER', 'ADMIN') IPMI_PASSWD = os.getenv('IPMI_PASSWD', 'ADMIN') REQURED = ["CPU1 Temp", "System Temp", "FAN1"] # Create a metric to track time spent and requests made. REQUEST_TIME = Summary('request_processing_seconds', 'Time spent processing request') def _run_cmd(ip, raw): logging.info("Collecting from target %s", ip) proc = subprocess.Popen( ["ipmitool", "-H", ip, "-U", IPMI_USER, "-P", IPMI_PASSWD, "sdr"], stdout=subprocess.PIPE) out = proc.communicate()[0] raw += [x.rstrip() for x in out.split('|')] class IpmiCollector(object): @REQUEST_TIME.time() def collect(self): sys_metrics = {
import vaping import vaping.plugins from builtins import str from prometheus_client import start_http_server, Summary, Counter, Gauge min_latency = Summary('minimum_latency_milliseconds', 'Minimum latency in milliseconds.', ['host']) # NOQA max_latency = Summary('maximum_latency_milliseconds', 'Maximum latency in milliseconds.', ['host']) # NOQA avg_latency = Summary('average_latency_milliseconds', 'Average latency in milliseconds.', ['host']) # NOQA sent_packets = Counter('number_of_packets_sent', 'Number of pings sent to host.', ['host']) # NOQA packet_loss = Gauge('packet_loss', '% packet loss to host (0-100)', ['host']) # NOQA @vaping.plugin.register('prometheus') class Prometheus(vaping.plugins.EmitBase): def init(self): self.log.debug('init prometheus plugin') port = self.pluginmgr_config.get('port', 9099) start_http_server(port) def emit(self, data): raw_data = data.get('data') self.log.debug("data: " + str(raw_data)) for host_data in raw_data: host_name = host_data.get('host') min_latency.labels(host_name).observe(host_data.get('min')) max_latency.labels(host_name).observe(host_data.get('max'))
concurrent_reqs = 0 MAX_CONCURRENT_REQS = 100 MINION_MANAGER_HOSTNAME = "http://minion-manager.kube-system" MINION_MANAGER_PORT = "6000" kubectl = KubernetesApiClient(use_proxy=True) cluster_name_id = os.getenv("AX_CLUSTER_NAME_ID", None) asg_manager = AXUserASGManager(os.getenv("AX_CLUSTER_NAME_ID"), AXClusterConfig().get_region()) # Need a lock to serialize cluster config operation cfg_lock = RLock() axmon_api_latency_stats = Summary("axmon_api_latency", "Latency for axmon REST APIs", ["method", "endpoint", "status"]) axmon_api_concurrent_reqs = Gauge("axmon_api_concurrent_reqs", "Concurrent requests in axmon") def before_request(): request.start_time = time.time() global concurrent_reqs, MAX_CONCURRENT_REQS, concurrent_reqs_lock with concurrent_reqs_lock: axmon_api_concurrent_reqs.set(concurrent_reqs) # Disabling concurrent request logic for now due to findings in AA-3167 #if concurrent_reqs >= MAX_CONCURRENT_REQS: # return ax_make_response( # original_jsonify(result="too many concurrent requests (max {})".format(MAX_CONCURRENT_REQS)), 429 # )
# flake8: noqa import os import sys import repour from aiohttp import web from prometheus_async.aio import time from prometheus_client import Histogram, Summary from ... import exception from repour.lib.scm import git REQ_TIME = Summary("info_req_time", "time spent with info endpoint") REQ_HISTOGRAM_TIME = Histogram("info_req_histogram", "Histogram for info endpoint") @time(REQ_TIME) @time(REQ_HISTOGRAM_TIME) async def handle_request(request): version = repour.__version__ path_name = os.path.dirname(sys.argv[0]) try: git_sha = await git.rev_parse(path_name) except exception.CommandError: git_sha = "Unknown" html_text = """ <h1>Repour Information</h1> <ul> <li><strong>Repour Version</strong> {}</li> <li><strong>Commit Hash</strong> {}</li>
from prometheus_client import Summary, REGISTRY from prometheus_client.core import GaugeMetricFamily, CounterMetricFamily from threading import Event from resotolib.args import ArgumentParser from signal import signal, SIGTERM, SIGINT from yaml import load try: from yaml import CLoader as Loader except ImportError: from yaml import Loader shutdown_event = Event() metrics_update_metrics = Summary( "resotometrics_update_metrics_seconds", "Time it took the update_metrics() function", ) def handler(sig, frame) -> None: log.info("Shutting down") shutdown_event.set() def main() -> None: setup_logger("resotometrics") signal(SIGINT, handler) signal(SIGTERM, handler) arg_parser = ArgumentParser(description="resoto metrics exporter",
from django.db import transaction from django.shortcuts import get_object_or_404, redirect, render from django.utils import timezone from django.views.decorators.http import require_POST from ....utils.helpers import is_entirely_digit from ....utils.locking import lock_on from ....utils.serialization import safe_json from ...auth.decorators import deny_restricted, eighth_admin_required from ..exceptions import SignupException from ..models import EighthActivity, EighthBlock, EighthScheduledActivity, EighthSignup, EighthWaitlist from ..serializers import EighthBlockDetailSerializer logger = logging.getLogger(__name__) eighth_signup_visits = Summary("intranet_eighth_signup_visits", "Visits to the eighth signup view") eighth_signup_submits = Summary( "intranet_eighth_signup_submits", "Number of eighth period signups performed from the eighth signup view") @login_required @deny_restricted def eighth_signup_view(request, block_id=None): start_time = time.time() if block_id is None and "block" in request.GET: block_ids = request.GET.getlist("block") if len(block_ids) > 1: return redirect("/eighth/signup/multi?{}".format( request.META["QUERY_STRING"]))
#!/usr/bin/python from prometheus_client import Counter, Gauge, Summary, Histogram, start_http_server # need install prometheus_client if __name__ == '__main__': c = Counter('cc', 'A counter') c.inc() g = Gauge('gg', 'A gauge') g.set(17) s = Summary('ss', 'A summary', ['a', 'b']) s.labels('c', 'd').observe(17) h = Histogram('hh', 'A histogram') h.observe(.6) start_http_server(8000) import time while True: time.sleep(1)
logging.basicConfig( level=logging.DEBUG, format="[%(asctime)s] [%(process)d] [%(levelname)s] %(message)s") log = logging.getLogger() if os.getenv('REWE_BDP_STAGE') is not None: loghandler = logging.StreamHandler() loghandler.setFormatter(LogstashFormatter()) log.handlers = [] log.addHandler(loghandler) MODELS = os.getenv("languages", "").split() # Create a metric to track time spent and requests made. REQUEST_TIME = Summary('request_processing_seconds', 'Time spent processing request', labelnames=['method', 'endpoint']) _models = {} def get_model(model_name): if model_name not in _models: _models[model_name] = spacy.load(model_name) return _models[model_name] def get_dep_types(model): '''List the available dep labels in the model.''' labels = [] for label_id in model.parser.moves.freqs[DEP]: