Example #1
0
 def __init__(self):
     self.main_counter = pc.Counter(
         "main_counter", "total requests to your redirect page")
     self.redirect_time = pc.Histogram(
         "redirect_time", "this a histogram of the redirect time")
     self.users_counter = pc.Counter(
         "users_counter", "a counter of the users in our platform", ["ip", "browser", "platform", "language"])
Example #2
0
class Metric(object):
    """A Namespace for our metrics"""
    # Metrics we record in prometheus
    _INVALID_RECORDS = prometheus_client.Counter(
        'mjolnir_bulk_invalid_records_total',
        "Number of requests that could not be processed", ['reason'])
    FAIL_VALIDATE = _INVALID_RECORDS.labels(reason='fail_validate')
    MISSING_INDEX = _INVALID_RECORDS.labels(reason='missing_index')
    SUBMIT_BATCH = prometheus_client.Summary(
        'mjolnir_bulk_submit_batch_seconds',
        'Time taken to submit a batch from kafka to elasticsearch')
    RECORDS_PROCESSED = prometheus_client.Counter(
        'mjolnir_bulk_records_total',
        'Number of kafka records processed')
    _BULK_ACTION_RESULT = prometheus_client.Counter(
        'mjolnir_bulk_action_total',
        'Number of bulk action somethings', ['result'])
    ACTION_RESULTS = {
        'updated': _BULK_ACTION_RESULT.labels(result='updated'),
        'created': _BULK_ACTION_RESULT.labels(result='created'),
        'noop': _BULK_ACTION_RESULT.labels(result='noop'),
    }
    OK_UNKNOWN = _BULK_ACTION_RESULT.labels(result='ok_unknown')
    MISSING = _BULK_ACTION_RESULT.labels(result='missing')
    FAILED = _BULK_ACTION_RESULT.labels(result='failed')
Example #3
0
class Metric:
    _INVALID_RECORDS = prometheus_client.Counter(
        'mjolnir_swift_invalid_records_total',
        "Number of requests that could not be processed",
        ['reason']
    )
    FAIL_VALIDATE = _INVALID_RECORDS.labels(reason="validate")
    FAIL_NO_CONFIG = _INVALID_RECORDS.labels(reason="no_config")

    PROCESS_MESSAGE = prometheus_client.Summary(
        'mjolnir_swift_process_message_seconds',
        'Time taken to process individual kafka messages')

    BULK_IMPORT = prometheus_client.Summary(
        'mjolnir_swift_import_file_seconds',
        'Time taken to import a file into elasticsearch'
    )

    _BULK_ACTION_RESULT = prometheus_client.Counter(
        'mjolnir_swift_action_total',
        'Number of bulk action responses per result type', ['result'])
    ACTION_RESULTS = {
        'updated': _BULK_ACTION_RESULT.labels(result='updated'),
        'created': _BULK_ACTION_RESULT.labels(result='created'),
        'noop': _BULK_ACTION_RESULT.labels(result='noop'),
    }
    OK_UNKNOWN = _BULK_ACTION_RESULT.labels(result='ok_unknown')
    MISSING = _BULK_ACTION_RESULT.labels(result='missing')
    FAILED = _BULK_ACTION_RESULT.labels(result='failed')
    TIMEOUT = _BULK_ACTION_RESULT.labels(result='timeout')
Example #4
0
    def after_process_boot(self, broker):
        os.environ["prometheus_multiproc_dir"] = DB_PATH

        # This import MUST happen at runtime, after process boot and
        # after the env variable has been set up.
        import prometheus_client as prom

        self.logger.debug("Setting up metrics...")
        registry = prom.CollectorRegistry()
        self.total_messages = prom.Counter(
            "dramatiq_messages_total",
            "The total number of messages processed.",
            ["queue_name", "actor_name"],
            registry=registry,
        )
        self.total_errored_messages = prom.Counter(
            "dramatiq_message_errors_total",
            "The total number of errored messages.",
            ["queue_name", "actor_name"],
            registry=registry,
        )
        self.total_retried_messages = prom.Counter(
            "dramatiq_message_retries_total",
            "The total number of retried messages.",
            ["queue_name", "actor_name"],
            registry=registry,
        )
        self.total_rejected_messages = prom.Counter(
            "dramatiq_message_rejects_total",
            "The total number of dead-lettered messages.",
            ["queue_name", "actor_name"],
            registry=registry,
        )
        self.total_revived_messages = prom.Counter(
            "dramatiq_message_revives_total",
            "The total number of messages revived from dead workers.",
            ["queue_name", "actor_name"],
            registry=registry,
        )
        self.inprogress_messages = prom.Gauge(
            "dramatiq_messages_inprogress",
            "The number of messages in progress.",
            ["queue_name", "actor_name"],
            registry=registry,
            multiprocess_mode="livesum",
        )
        self.inprogress_delayed_messages = prom.Gauge(
            "dramatiq_delayed_messages_inprogress",
            "The number of delayed messages in memory.",
            ["queue_name", "actor_name"],
            registry=registry,
        )
        self.message_durations = prom.Histogram(
            "dramatiq_message_duration_milliseconds",
            "The time spent processing messages.",
            ["queue_name", "actor_name"],
            buckets=(5, 10, 25, 50, 75, 100, 250, 500, 750, 1000, 2500, 5000,
                     7500, 10000, 30000, 60000, 600000, 900000, float("inf")),
            registry=registry,
        )
Example #5
0
def initialize(name, host, port):
    queue_latency = prometheus_client.Histogram('queue_latency', 'queue latency', ['app', 'queue'])
    queue_counter = prometheus_client.Counter('queue_counter', 'queue counter', ['app', 'queue'])
    dequeue_latency = prometheus_client.Histogram('dequeue_latency', 'queue latency', ['app', 'queue'])
    dequeue_counter = prometheus_client.Counter('dequeue_counter', 'queue counter', ['app', 'queue'])
    for _ in range(10):
        try:
            redis_conn = redis.Redis(host, port, decode_responses=True)
            break
        except Exception as e:
            print('ERROR', e)
            time.sleep(1)
    def push(queue, data):
        value = json.dumps(data)
        with queue_latency.labels(app=name, queue=queue).time():
            redis_conn.rpush(queue, value)
        queue_counter.labels(app=name, queue=queue).inc()
    def pop(queue, function):
        while True:
            try:
                _, value = redis_conn.blpop(queue)
                dequeue_counter.labels(app=name, queue=queue).inc()
                try:
                    with dequeue_latency.labels(app=name, queue=queue).time():
                        function(json.loads(value))
                except Exception as e:
                    print('ERROR:', e)
                    push(queue + '.dead', value)
            except Exception as e:
                print('ERROR:', e)
    return push, pop
Example #6
0
    def __init__(self):
        self.registry = prometheus_client.CollectorRegistry()

        self._video_processed = prometheus_client.Counter(
            "video_precessed",
            "Video processed count",
            labelnames=(),
            namespace="youtube",
            subsystem="video",
            unit="",
            registry=self.registry,
            labelvalues=None,
        )
        self._emails_found = prometheus_client.Counter(
            "emails_found",
            "Emails found",
            labelnames=(),
            namespace="youtube",
            subsystem="video",
            unit="",
            registry=self.registry,
            labelvalues=None,
        )
        self._unique_emails_found = prometheus_client.Counter(
            "unique_emails_found",
            "Unique emails found",
            labelnames=(),
            namespace="youtube",
            subsystem="video",
            unit="",
            registry=self.registry,
            labelvalues=None,
        )
Example #7
0
 def __init__(self, prefix, description, labels):
     """
     :param prefix: prefix to use for each metric name
     :param description: description of action to use in metric description
     :param labels: label names to define for each metric
     """
     self.full_prefix = '{}_{}'.format(self.__class__._PREFIX, prefix)
     self.progress = prometheus_client.Gauge(
         '{}_attempt_inprogress'.format(self.full_prefix),
         'In progress attempts to {}'.format(description),
         labels,
         registry=REGISTRY,
         multiprocess_mode='livesum')
     self.attempt_total = prometheus_client.Counter(
         '{}_attempt_total'.format(self.full_prefix),
         'Total attempts to {}'.format(description),
         labels,
         registry=REGISTRY)
     self.failure_total = prometheus_client.Counter(
         '{}_failure_total'.format(self.full_prefix),
         'Total failures to {}'.format(description),
         labels,
         registry=REGISTRY)
     self.duration = prometheus_client.Histogram(
         '{}_duration_seconds'.format(self.full_prefix),
         'Seconds to {}'.format(description),
         labels,
         registry=REGISTRY)
Example #8
0
def run(args):
    s3uri = args.s3uri
    localpath = args.localpath
    excludes = args.exclude
    interval = args.interval

    i = pc.Info('s3insync_version',
                'Version and config information for the client')
    i.info({
        'version': s3insync.__version__,
        'aws_repo': s3uri,
        'localpath': localpath,
    })
    start_time = pc.Gauge('s3insync_start_time',
                          'Time the sync process was started')
    start_time.set_to_current_time()

    last_sync = pc.Gauge('s3insync_last_sync_time',
                         'Time the last sync completed')
    op_count = pc.Counter('s3insync_operations',
                          'Count of operations',
                          labelnames=('type', ))
    failed_op_count = pc.Counter('s3insync_failed_operations',
                                 'Count of failed operations',
                                 labelnames=('type', ))
    files_in_s3 = pc.Gauge(
        's3insync_files_in_s3',
        'Number of files in S3',
    )

    pc.start_http_server(8087)
    src = r.S3Repo('s3', s3uri)
    dest = r.LocalFSRepo('fs', localpath,
                         os.path.join(os.getenv('HOME'), ".s3insync"))
    dest.ensure_directories()

    sync = sd.SyncDecider(excludes)

    set_exit = setup_signals()

    while not set_exit.is_set():
        logger.debug("Starting sync")
        start = time.monotonic()

        try:
            success, failures = sync.execute_sync(src, dest)
            files_in_s3.set(success.pop('total', 0))
            set_op_counts(success, op_count)
            set_op_counts(failures, failed_op_count)
            last_sync.set_to_current_time()
        except Exception:
            logger.exception("Failed to excute sync")

        duration = time.monotonic() - start
        logger.debug("Stopping sync after %g secs", duration)

        set_exit.wait(max(30, interval - duration))
 def __init__(self):
     self.metrics = {
         'notifications': prometheus_client.Counter(
             'prometheus_webhook_snmp_notifications',
             'Number of processed Prometheus Alertmanager notifications.'),
         'traps': prometheus_client.Counter(
             'prometheus_webhook_snmp_traps',
             'Number of sent SNMP traps.')
     }
Example #10
0
    def __init__(self, bot: Life) -> None:
        self.bot = bot

        self.process = psutil.Process()
        self.ready = False

        self.guild_stats = prometheus_client.Gauge(
            'counts',
            documentation='Guild counts',
            namespace='guild',
            labelnames=['guild_id', 'count'])

        self.socket_responses = prometheus_client.Counter(
            'socket_responses',
            documentation='Socket responses',
            namespace='life',
            labelnames=['response'])
        self.socket_events = prometheus_client.Counter(
            'socket_events',
            documentation='Socket events',
            namespace='life',
            labelnames=['event'])

        self.counters = prometheus_client.Counter('stats',
                                                  documentation='Life stats',
                                                  namespace='life',
                                                  labelnames=['stat'])
        self.gauges = prometheus_client.Gauge('counts',
                                              documentation='Life counts',
                                              namespace='life',
                                              labelnames=['count'])

        self.op_types = {
            0: 'DISPATCH',
            1: 'HEARTBEAT',
            2: 'IDENTIFY',
            3: 'PRESENCE',
            4: 'VOICE_STATE',
            5: 'VOICE_PING',
            6: 'RESUME',
            7: 'RECONNECT',
            8: 'REQUEST_MEMBERS',
            9: 'INVALIDATE_SESSION',
            10: 'HELLO',
            11: 'HEARTBEAT_ACK',
            12: 'GUILD_SYNC',
        }

        self.stats_five_minutes.start()
        self.stats_thirty_seconds.start()
Example #11
0
 def _setup_prom_data(self):
     self.middleware = PromScrapeMiddleware(self)
     util_bot.bot.middleware.append(self.middleware)
     self.messages_sent = prom.Counter('messages_sent',
                                       'Messages sent by channel',
                                       ['channel'])
     self.messages_received = prom.Counter('messages_received',
                                           'Messages received by channel',
                                           ['channel'])
     self.commands_executed = prom.Counter('commands_executed',
                                           'Commands executed by name',
                                           ['command'])
     self.hastebins_created = prom.Counter('hastebins_created',
                                           'Hastebins created')
    def __init__(self, client):
        self.prometheus_port = client.prometheus_port
        self.run_rule = client.run_rule
        self.writeback = client.writeback

        client.run_rule = self.metrics_run_rule
        client.writeback = self.metrics_writeback

        # initialize prometheus metrics to be exposed
        self.prom_scrapes = prometheus_client.Counter(
            'elastalert_scrapes', 'Number of scrapes for rule', ['rule_name'])
        self.prom_hits = prometheus_client.Counter('elastalert_hits',
                                                   'Number of hits for rule',
                                                   ['rule_name'])
        self.prom_matches = prometheus_client.Counter(
            'elastalert_matches', 'Number of matches for rule', ['rule_name'])
        self.prom_time_taken = prometheus_client.Counter(
            'elastalert_time_taken', 'Time taken to evaluate rule',
            ['rule_name'])
        self.prom_alerts_sent = prometheus_client.Counter(
            'elastalert_alerts_sent', 'Number of alerts sent for rule',
            ['rule_name'])
        self.prom_alerts_not_sent = prometheus_client.Counter(
            'elastalert_alerts_not_sent', 'Number of alerts not sent',
            ['rule_name'])
        self.prom_errors = prometheus_client.Counter(
            'elastalert_errors', 'Number of errors for rule')
        self.prom_alerts_silenced = prometheus_client.Counter(
            'elastalert_alerts_silenced', 'Number of silenced alerts',
            ['rule_name'])
Example #13
0
    def _create_metrics(self):
        """Creates a registry and records metrics"""
        self.registry = prometheus_client.CollectorRegistry()
        self.quota_free_count = prometheus_client.Gauge(
            'kuryr_quota_free_count', 'Amount of quota available'
            ' for the network resource',
            labelnames={'resource'},
            registry=self.registry)

        self.port_quota_per_subnet = prometheus_client.Gauge(
            'kuryr_port_quota_per_subnet', 'Amount of ports available'
            ' on Subnet',
            labelnames={'subnet_id', 'subnet_name'},
            registry=self.registry)

        self.lbs_members_count = prometheus_client.Gauge(
            'kuryr_critical_lb_members_count', 'Amount of members per '
            'critical Load Balancer pool',
            labelnames={'lb_name', 'lb_pool_name'},
            registry=self.registry)

        self.lbs_state = prometheus_client.Enum('kuryr_critical_lb_state',
                                                'Critical Load Balancer State',
                                                labelnames={'lb_name'},
                                                states=[
                                                    'ERROR', 'ACTIVE',
                                                    'DELETED',
                                                    'PENDING_CREATE',
                                                    'PENDING_UPDATE',
                                                    'PENDING_DELETE'
                                                ],
                                                registry=self.registry)

        buckets = (10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, 120, _INF)
        self.pod_creation_latency = prometheus_client.Histogram(
            'kuryr_pod_creation_latency', 'Time taken for a pod to have'
            ' Kuryr annotations set',
            buckets=buckets,
            registry=self.registry)

        self.load_balancer_readiness = prometheus_client.Counter(
            'kuryr_load_balancer_readiness', 'This counter is increased when '
            'Kuryr notices that an Octavia load balancer is stuck in an '
            'unexpected state',
            registry=self.registry)

        self.port_readiness = prometheus_client.Counter(
            'kuryr_port_readiness', 'This counter is increased when Kuryr '
            'times out waiting for Neutron to move port to ACTIVE',
            registry=self.registry)
Example #14
0
    def __init__(self, bot, prefix) -> None:
        self.command_counter = prom.Counter(
            f"{prefix}_commands_ran", "How many times commands were ran",
            ["command_name", "cluster"])

        self.user_message_raw_count = prom.Counter(
            f"{prefix}_user_message_raw_count",
            "Raw count of how many messages we have seen from users",
            ["cluster"])
        self.bot_message_raw_count = prom.Counter(
            f"{prefix}_bot_message_raw_count",
            "Raw count of how many messages we have seen from bots",
            ["cluster"])
        self.own_message_raw_count = prom.Counter(
            f"{prefix}_own_message_raw_count",
            "Raw count of how many messages GearBot has send", ["cluster"])

        self.bot_guilds = prom.Gauge(f"{prefix}_guilds",
                                     "How many guilds the bot is in",
                                     ["cluster"])

        self.bot_users = prom.Gauge(f"{prefix}_users",
                                    "How many users the bot can see",
                                    ["cluster"])
        self.bot_users_unique = prom.Gauge(
            f"{prefix}_users_unique", "How many unique users the bot can see",
            ["cluster"])
        self.bot_event_counts = prom.Counter(f"{prefix}_event_counts",
                                             "How much each event occurred",
                                             ["event_name", "cluster"])

        self.bot_latency = prom.Gauge(f"{prefix}_latency",
                                      "Current bot latency", ["cluster"])

        self.uid_usage = prom.Counter(
            f"{prefix}_context_uid_usage",
            "Times uid was used from the context command", ["type", "cluster"])
        self.userinfo_usage = prom.Counter(
            f"{prefix}_context_userinfo_usage",
            "Times userinfo was used from the context command",
            ["type", "cluster"])
        self.inf_search_usage = prom.Counter(
            f"{prefix}_context_inf_search_usage",
            "Times inf serach was used from the context command",
            ["type", "cluster"])

        bot.metrics_reg.register(self.command_counter)
        bot.metrics_reg.register(self.user_message_raw_count)
        bot.metrics_reg.register(self.bot_message_raw_count)
        bot.metrics_reg.register(self.bot_guilds)
        bot.metrics_reg.register(self.bot_users)
        bot.metrics_reg.register(self.bot_users_unique)
        bot.metrics_reg.register(self.bot_event_counts)
        bot.metrics_reg.register(self.own_message_raw_count)
        bot.metrics_reg.register(self.bot_latency)
        bot.metrics_reg.register(self.uid_usage)
        bot.metrics_reg.register(self.userinfo_usage)
        bot.metrics_reg.register(self.inf_search_usage)
Example #15
0
def csets_filter_worker(args, config, db):
    class FilterAmqp(messagebus.Amqp):
        def on_message(self, payload, message):
            logger.info('Filter: {}'.format(payload))
            start = time.time()
            if cset_filter(self.config, self.db, payload):
                amqp.send(payload,
                          schema_name='cset',
                          schema_version=1,
                          routing_key='analysis_cset.osmtracker')
                m_events.labels('analysis', 'in').inc()
            m_events.labels('filter', 'out').inc()
            elapsed = time.time() - start
            m_filter_time.observe(elapsed)
            logger.info('Filtering of cid {} took {:.2f}s'.format(
                payload['cid'], elapsed))
            message.ack()

    amqp = FilterAmqp(args.amqp_url, AMQP_EXCHANGE_TOPIC, 'topic', AMQP_QUEUES,
                      [AMQP_FILTER_QUEUE])
    amqp.config = config
    amqp.db = db

    if args.metrics:
        m_events = prometheus_client.Counter('osmtracker_events',
                                             'Number of events', EVENT_LABELS)
        m_filter_time = prometheus_client.Histogram(
            'osmtracker_changeset_filter_processing_time_seconds',
            'Changeset filtering time (seconds)')

    logger.debug('Starting filter worker')
    amqp.run()
Example #16
0
    def __init__(self, kubeconfig, token_path):
        token_file = None
        token_path = token_path or "/var/run/secrets/kubernetes.io/serviceaccount"
        if kubeconfig and os.path.exists(kubeconfig):
            logger.debug("Using configuration from kubeconfig %s" % kubeconfig)
            kubernetes.config.load_kube_config(config_file=kubeconfig)
        elif os.path.exists(token_path):
            logger.debug("Using configuration from token in %s" % token_path)
            loader = kubernetes.config.incluster_config.InClusterConfigLoader(
                os.path.join(token_path, "token"), os.path.join(token_path, "ca.crt"),
            )
            loader.load_and_set()
        else:
            raise Exception("No kubeconfig or token found")

        if token_file:
            loader = kubernetes.config.incluster_config.InClusterConfigLoader(
                "/var/run/secrets/kubernetes.io/serviceaccount/token",
                "/var/run/secrets/kubernetes.io/serviceaccount/ca.crt",
            )
            configuration = kubernetes.client.Configuration()
            configuration.host = "https://192.168.1.10:6443"
            loader.load_and_set()

        self.v1 = kubernetes.client.CoreV1Api()
        self.appsv1 = kubernetes.client.AppsV1Api()
        self.client = kubernetes.client.ApiClient()
        self.custom = kubernetes.client.CustomObjectsApi()

        self.deleted_total = prometheus.Counter(
            "memguardian_deleted_pod_total",
            "Total deleted pods from start.",
            ["namespace", "owner"],
        )
Example #17
0
def setup_status(app) -> prometheus_client.CollectorRegistry:
    """Add /status to serve Prometheus-driven runtime metrics."""
    registry = prometheus_client.CollectorRegistry(auto_describe=True)
    app["request_count"] = prometheus_client.Counter(
        "requests_total",
        "Total Request Count",
        ["app_name", "method", "endpoint", "http_status"],
        registry=registry,
    )
    app["request_latency"] = prometheus_client.Histogram(
        "request_latency_seconds",
        "Request latency",
        ["app_name", "endpoint"],
        registry=registry,
    )
    app["request_in_progress"] = prometheus_client.Gauge(
        "requests_in_progress_total",
        "Requests in progress",
        ["app_name", "endpoint", "method"],
        registry=registry,
    )
    prometheus_client.Info("server", "API server version",
                           registry=registry).info({
                               "version":
                               metadata.__version__,
                               "commit":
                               getattr(metadata, "__commit__", "null"),
                               "build_date":
                               getattr(metadata, "__date__", "null"),
                           })
    app.middlewares.insert(0, instrument)
    # passing StatusRenderer(registry) without __call__ triggers a spurious DeprecationWarning
    # FIXME(vmarkovtsev): https://github.com/aio-libs/aiohttp/issues/4519
    app.router.add_get("/status", StatusRenderer(registry).__call__)
    return registry
Example #18
0
    def __init__(
            self,
            reporter_order=(
                'hdf5_reporter',
                'dashboard_reporter',
            ),
    ):

        self.reporter_order = reporter_order

        # counters
        self.cycle_counter = prom.Counter('wepy_cycle_idx', "")

        # gauges
        self.walker_size_g = prom.Gauge('wepy_walker_single_size_bytes', "")
        self.ensemble_size_g = prom.Gauge('wepy_walker_ensemble_size_bytes',
                                          "")

        self.runner_size_g = prom.Gauge('wepy_runner_size_bytes', "")
        self.resampler_size_g = prom.Gauge('wepy_resampler_size_bytes', "")
        self.bc_size_g = prom.Gauge('wepy_bc_size_bytes', "")
        self.mapper_size_g = prom.Gauge('wepy_mapper_size_bytes', "")

        self.sim_manager_size_g = prom.Gauge('wepy_sim_manager_size_bytes', "")

        self.reporter_size_g = prom.Gauge(
            'wepy_reporters_size_bytes',
            "",
            ["name"],
        )
Example #19
0
def consumer(name, instance, host, port):
    consumer_latency = prometheus_client.Histogram(
        'consumer_latency', 'consumer latency', ['app', 'instance', 'topic'])
    consumer_counter = prometheus_client.Counter('consumer_counter',
                                                 'consumer counter',
                                                 ['app', 'instance', 'topic'])
    for _ in range(100):
        try:
            consumer = kafka.KafkaConsumer(bootstrap_servers='{}:{}'.format(
                host, port),
                                           group_id=name,
                                           auto_offset_reset='earliest',
                                           enable_auto_commit=False)
            break
        except Exception as e:
            print('ERROR', e)
            time.sleep(1)

    def consume(topic, function):
        consumer.subscribe([topic])
        for message in consumer:
            consumer_counter.labels(app=name, instance=instance,
                                    topic=topic).inc()
            with consumer_latency.labels(app=name,
                                         instance=instance,
                                         topic=topic).time():
                function(json.loads(message.value.decode('utf-8')))
            consumer.commit()

    return consume
Example #20
0
def initialize(name):
    app = flask.Flask(name)
    request_latency = prometheus_client.Histogram(
        'request_latency', 'request latency',
        ['app', 'method', 'path', 'status'])
    request_counter = prometheus_client.Counter(
        'request_counter', 'request counter',
        ['app', 'method', 'path', 'status'])

    @app.route('/health')
    def health():
        return 'OK'

    @app.route('/version')
    def version():
        return '0.1.0'

    @app.before_request
    def before_request():
        flask.request.start_time = time.time()

    @app.after_request
    def after_request(response):
        latency = time.time() - flask.request.start_time
        request_latency.labels(app=name,
                               method=flask.request.method,
                               path=flask.request.path,
                               status=response.status_code).observe(latency)
        request_counter.labels(app=name,
                               method=flask.request.method,
                               path=flask.request.path,
                               status=response.status_code).inc()
        return response

    return app
Example #21
0
def initialize(name, host, port):
    db_latency = prometheus_client.Histogram('db_latency', 'db latency',
                                             ['app', 'query'])
    db_counter = prometheus_client.Counter('db_counter', 'db counter',
                                           ['app', 'query'])
    for _ in range(10):
        try:
            postgres = psycopg2.connect(host=host,
                                        port=port,
                                        dbname='postgres',
                                        user='******',
                                        password='******')
            break
        except Exception as e:
            print('ERROR', e)
            time.sleep(1)
    execute('CREATE EXTENSION IF NOT EXISTS "uuid-ossp";')

    def execute(query, values=None):
        with db_latency.labels(app=name, query=query).time():
            cursor = postgres.cursor()
            cursor.execute(query, values)
            data = cursor.fetchall() if cursor.description else []
            postgres.commit()
            cursor.close()
        db_counter.labels(app=name, query=query).inc()
        return data

    return execute
Example #22
0
    def __init__(self):
        self._feed_pk_to_system_id_and_feed_id = {}
        self._feed_pk_to_successful_update_data: typing.Dict[int, typing.Tuple[
            float, float]] = {}

        self._num_updates = prometheus.Counter(
            PROMETHEUS_NUM_UPDATES,
            "Number of feed updates of a given feed, status and result",
            ["system_id", "feed_id", "status", "result"],
        )
        self._last_update = prometheus.Gauge(
            PROMETHEUS_LAST_UPDATE,
            "Time since the last update of a given feed, status and result",
            ["system_id", "feed_id", "status", "result"],
        )
        self._num_entities = prometheus.Gauge(
            PROMETHEUS_NUM_ENTITIES,
            "Number of entities of a given type present from a given feed",
            ["system_id", "feed_id", "entity_type"],
        )
        self._update_latency = prometheus.Gauge(
            PROMETHEUS_SUCCESSFUL_UPDATE_LATENCY,
            "Number of seconds between successful updates of a feed",
            ["system_id", "feed_id"],
        )
Example #23
0
def initialize(name, host, port):
    db_latency = prometheus_client.Histogram('db_latency', 'db latency',
                                             ['app', 'query'])
    db_counter = prometheus_client.Counter('db_counter', 'db counter',
                                           ['app', 'query'])
    for _ in range(100):
        try:
            session = cassandra.cluster.Cluster(
                [host],
                load_balancing_policy=cassandra.policies.RoundRobinPolicy(),
                port=port).connect()
            break
        except Exception as e:
            print('ERROR', e)
            time.sleep(1)
    session.execute(
        "CREATE KEYSPACE IF NOT EXISTS hjalp WITH replication = { 'class': 'SimpleStrategy', 'replication_factor': '2' }"
    )
    session.set_keyspace('hjalp')

    def execute(query, values=tuple()):
        with db_latency.labels(app=name, query=query).time():
            rows = session.execute(session.prepare(query).bind(values))
            data = [r._asdict() for r in rows]
        db_counter.labels(app=name, query=query).inc()
        return data

    return execute
Example #24
0
def install_stacksampler(interval=0.005):
    """Samples the stack every INTERVAL seconds of user time.
	We could use user+sys time but that leads to interrupting syscalls,
	which may affect performance, and we care mostly about user time anyway.
	"""
    # Note we only start each next timer once the previous timer signal has been processed.
    # There are two reasons for this:
    # 1. Avoid handling a signal while already handling a signal, however unlikely,
    #    as this could lead to a deadlock due to locking inside prometheus_client.
    # 2. Avoid biasing the results by effectively not including the time taken to do the actual
    #    stack sampling.

    flamegraph = prom.Counter(
        "flamegraph",
        "Approx time consumed by each unique stack trace seen by sampling the stack",
        ["stack"])
    # HACK: It's possible to deadlock if we handle a signal during a prometheus collect
    # operation that locks our flamegraph metric. We then try to take the lock when recording the
    # metric, but can't.
    # As a hacky work around, we replace the lock with a dummy lock that doesn't actually lock anything.
    # This is reasonably safe. We know that only one copy of sample() will ever run at once,
    # and nothing else but sample() and collect() will touch the metric, leaving two possibilities:
    # 1. Multiple collects happen at once: Safe. They only do read operations.
    # 2. A sample during a collect: Safe. The collect only does a copy inside the locked part,
    #    so it just means it'll either get a copy with the new label set, or without it.
    # This presumes the implementation doesn't change to make that different, however.
    flamegraph._lock = gevent.lock.DummySemaphore()
    # There is also a lock we need to bypass on the actual counter values themselves.
    # Since they get created dynamically, this means we need to replace the lock function
    # that is used to create them.
    # This unfortunately means we go without locking for all metrics, not just this one,
    # however this is safe because we are using gevent, not threading. The lock is only
    # used to make incrementing/decrementing the counter thread-safe, which is not a concern
    # under gevent since there are no switch points under the lock.
    import prometheus_client.values
    prometheus_client.values.Lock = gevent.lock.DummySemaphore

    def sample(signum, frame):
        stack = []
        while frame is not None:
            stack.append(frame)
            frame = frame.f_back
        # format each frame as FUNCTION(MODULE)
        stack = ";".join("{}({})".format(frame.f_code.co_name,
                                         frame.f_globals.get('__name__'))
                         for frame in stack[::-1])
        # increase counter by interval, so final units are in seconds
        flamegraph.labels(stack).inc(interval)
        # schedule the next signal
        signal.setitimer(signal.ITIMER_VIRTUAL, interval)

    def cancel():
        signal.setitimer(signal.ITIMER_VIRTUAL, 0)

    atexit.register(cancel)

    signal.signal(signal.SIGVTALRM, sample)
    # deliver the first signal in INTERVAL seconds
    signal.setitimer(signal.ITIMER_VIRTUAL, interval)
class Metrics(object):
    RequestCounter = prom.Counter('http_requests_total',
                                  'Total number of HTTP requests.',
                                  ['method', 'scheme'])
    ResponseCounter = prom.Counter('http_responses_total',
                                   'Total number of HTTP responses.',
                                   ['status'])
    LatencyHistogram = prom.Histogram('http_latency_seconds',
                                      'Overall HTTP transaction latency.')
    RequestSizeHistogram = prom.Histogram(
        'http_requests_body_bytes',
        'Breakdown of HTTP requests by content length.',
        buckets=powers_of(5, 11))
    ResponseSizeHistogram = prom.Histogram(
        'http_responses_body_bytes',
        'Breakdown of HTTP responses by content length.',
        buckets=powers_of(5, 11))
Example #26
0
def gteCounter(name, description, labels):
    if name in counters:
        counter = counters[name]
    else:
        print("Creating Counter: {}".format(name))
        counter = prometheus_client.Counter(name, description, labels)
        counters[name] = counter
    return counter
 def _recreate_metrics(self, registry):
     self._failure_counter = prometheus_client.Counter(
         'transmission_failures',
         'Number of failed transmissions',
         registry=registry)
     self._transmission_gauge = prometheus_client.Gauge(
         'zone_transmissions',
         'Transmissions during the last job',
         registry=registry)
Example #28
0
async def setup(
    port: int,
    consul_host: Optional[str] = "127.0.0.1",
    use_IPs=False,
):
    app = web.Application()

    app['consul_host'] = consul_host
    app['cfg'] = {
        "port": port,
        "use_ips": use_IPs,
    }

    jinja_env = Environment(loader=FileSystemLoader('templates'),
                            autoescape=select_autoescape(['html', 'xml']))

    app['jinja_env'] = jinja_env

    if util.is_rpi3():
        low_voltage_observed = prometheus_client.Gauge(
            "rpi_low_voltage_observed",
            "Raspberry PI low voltage observed over observation window", [])

        asyncio.ensure_future(
            monitor_voltage(lambda x: low_voltage_observed.set(x)))

    if util.is_rpi3():
        journald_logged = prometheus_client.Counter(
            "journald_logged", "Message was logged to journald", [])

        asyncio.ensure_future(
            run_journalctl(
                lambda x: journald_logged.inc(),
                lambda x: None,
            ))

        asyncio.ensure_future(
            read_temperature.monitor_temperatures(TEMPERATURE))

    if util.is_rpi3():
        asyncio.create_task(update_time())

    app.add_routes([
        web.get('/', handle),
        web.get('/a', host_handler),
        web.get('/static/{name}.js', static_text_handler("js")),
        web.get('/health', health.health_check),
        web.get('/metrics', handle_metrics),
        web.get('/stop', stop),
        web.get('/start', start),
        web.get('/restart', restart),
        web.get('/restart-host', restart_host),
        web.get('/shutdown-host', shutdown_host),
        web.get('/time', gettime),
    ])

    return app
Example #29
0
 def get_prometheus_counter(self):
     counter = getattr(prometheus.REGISTRY, '_command_executor_counter',
                       None)
     if not counter:
         counter = prometheus.Counter(
             'cds_ce_execution_error_total',
             'How many times CE actions (upload, prepare env and execute) got executed and failed for each CBA python script',
             ['step', 'blueprint_name', 'blueprint_version', 'script_name'])
         prometheus.REGISTRY._command_executor_counter = counter
     return counter
Example #30
0
    def test_prometheus(self, request):
        """Increment prometheus metric for testing"""
        if not pkg_is_installed('prometheus-client'):
            return Response('Not Supported', status=501)

        if not hasattr(self, 'test_counter'):
            import prometheus_client
            self.test_counter = prometheus_client.Counter('test', 'test')
        self.test_counter.inc()
        return Response('Incremented test counter')