Python Gauge.inc Examples, prometheus_client.Gauge.inc Python Examples

Example #1

0

Show file

 def inc_counter(self, key, amount=1):
     """ Increment counter
     """
     prometheus_counter = Gauge(  # pylint: disable=no-value-for-parameter
         key
     )
     prometheus_counter.inc(amount)

Example #2

0

Show file

File: sbr-openstack-bot.py Project: harshad16/sbr-openstack

    def publish_comments(self, comment, link):
        """
        Publish the comment on the customer case.
        """
        comment_endpoint = f'https://api.access.redhat.com/rs/cases/{self.ticket}/comments'
        payload = {
            "label": "Solution by the bot",
            "text": comment,
            "uri": link,
            "draft": False,
            "caseNumber": str(self.ticket),
            "public": False
        }

        comment_response = requests.post(comment_endpoint, json=payload, auth=(self.rhn_username, self.rhn_password))
        if comment_response.status_code == 200 or comment_response.status_code == 201:
            print('comment to customer cases was successfully published')
            return True
        else:
            print('comment to customer cases was NOT successfully published')
            metric_name = self.job + '-publish-comment'
            metric_name = metric_name.replace('-', '_')
            job_comment_metric = Gauge(metric_name, 'Error of comment publish on customer case',
                                       registry=prometheus_registry)
            job_comment_metric.inc()
            return False

Example #3

0

Show file

def main():
    g = Gauge('bcr_gauge_example', 'Testing how Prometheus Gauge works')   
    start_http_server(8000)
    while True:
      g.inc(3)
      time.sleep(5)
      g.dec(2)

Example #4

0

Show file

    def test_nolabels(self):
        gauge = Gauge('g', 'help', registry=self.registry)
        gauge.inc()

        self.gb.push()
        self.t.join()

        self.assertEqual(b'g 1.0 1434898897\n', self.data)

Example #5

0

Show file

def ensure_backups(args):
    push_job_started_metric(args.prom_push_gateway_endpoint, job_ensure_backups)
    start_time = time.time()

    # ensure-backups job specific metrics
    bigtable_backup_job_num_tables_backed_up = Gauge('bigtable_backup_job_num_tables_backed_up', 'Number of table backups found during last run', registry=registry)
    bigtable_backup_job_num_backup_ups = Gauge('bigtable_backup_job_num_backup_ups', 'Sum of number of backups per table found during last run', registry=registry)

    # Read all the existing backups
    popen = subprocess.Popen(['bigtable-backup', 'list-backups', '-ojson', '--backup-path', args.destination_path],
                             stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
    popen.wait()

    # build and push metrics related to existing backups
    backups = json.loads(popen.stdout.readline())
    if (args.duration == None and args.period_from == None) or (args.duration != None and args.period_from != None):
        raise ValueError("Either of --duration or --periodic-table-duration must be set")

    bigtable_backup_job_num_tables_backed_up.set(len(backups))
    for __, timestamps in backups.items():
        bigtable_backup_job_num_backup_ups.inc(len(timestamps))

    push_metrics(args.prom_push_gateway_endpoint, job_ensure_backups)

    if args.period_from == None:
        period_from = datetime.utcnow() - timedelta(days=args.duration)
        args.period_from = valid_date(period_from.strftime("%Y-%m-%d"))
        args.period_to = valid_date(datetime.utcnow().strftime("%Y-%m-%d"))

    oldest_table_number = int(args.period_from.timestamp() / args.periodic_table_duration)
    newest_table_number = int(args.period_to.timestamp() / args.periodic_table_duration)
    active_table_number = time.time() / args.periodic_table_duration

    print("Checking right backups exist")
    while oldest_table_number <= newest_table_number:
        table_id = args.bigtable_table_id_prefix + str(oldest_table_number)
        oldest_table_number += 1
        if table_id not in backups:
            print("backup for {} not found".format(table_id))
            create_backup(table_id, args)
        bigtable_backup_job_backups_created.inc(1)
            
    print("Checking whether all the backups are created after their period is over and deleting old unwanted backups")
    for table_id, timestamps in backups.items():
        table_number = int(table_id.rsplit("_", 1)[-1])
        last_timestamp_from_table_number = find_last_timestamp_from_table_number(table_number,
                                                                                 args.periodic_table_duration)

        # Checking whether backup is created after last timestamp of tables period.
        if last_timestamp_from_table_number > timestamps[-1]:
            create_backup(table_id, args)

        # Retain only most recent backup for non active table
        if table_number != active_table_number and len(timestamps) > 1:
            for timestamp in timestamps[:-1]:
                delete_backup(table_id, str(timestamp), args)

    push_job_finished_metric(args.prom_push_gateway_endpoint, job_ensure_backups, int(time.time() - start_time))

Example #6

0

Show file

File: promethuesEngine.py Project: kcx2366425574/python-mall

    def send_gauge(cls, metrics_name, help_info, value, inc=None):
        g = Gauge(metrics_name, help_info)

        if inc is None:
            g.set(value)
        else:
            if inc:
                g.inc(value)
            else:
                g.dec(value)

Example #7

0

Show file

File: futils.py Project: elfchief/calico

 def increment(self, stat, by=1):
     self.stats[stat] += by
     # Update the associated Prometheus gauge.
     if stat not in self.prom_gauges:
         gauge = Gauge(sanitize_name("felix_" + self.name + " " + stat),
                       "%s: %s" % (self.name, stat))
         self.prom_gauges[stat] = gauge
     else:
         gauge = self.prom_gauges[stat]
     gauge.inc(by)

Example #8

0

Show file

 def increment(self, stat, by=1):
     self.stats[stat] += by
     # Update the associated Prometheus gauge.
     if stat not in self.prom_gauges:
         gauge = Gauge(sanitize_name("felix_" + self.name + " " + stat),
                       "%s: %s" % (self.name, stat))
         self.prom_gauges[stat] = gauge
     else:
         gauge = self.prom_gauges[stat]
     gauge.inc(by)

Example #9

0

Show file

File: prometheus.py Project: tibordp/pyncette

class OperationMetricSet:
    """Collection of Prometheus metrics representing a logical operation"""

    requests: Counter
    requests_duration: Histogram
    exceptions: Counter
    requests_in_progress: Gauge

    def __init__(self, operation_name: str, labels: List[str]):
        self.requests = Counter(
            f"pyncette_{operation_name}_total",
            f"Total count of {operation_name} operations",
            labels,
        )
        self.requests_duration = Histogram(
            f"pyncette_{operation_name}_duration_seconds",
            f"Histogram of {operation_name} processing time",
            labels,
        )
        self.exceptions = Counter(
            f"pyncette_{operation_name}_failures_total",
            f"Total count of failed {operation_name} failures",
            [*labels, "exception_type"],
        )
        self.requests_in_progress = Gauge(
            f"pyncette_{operation_name}_in_progress",
            f"Gauge of {operation_name} operations currently being processed",
            labels,
        )

    @contextlib.asynccontextmanager
    async def measure(self, **labels: Dict[str, str]) -> AsyncIterator[None]:
        """An async context manager that measures the execution of the wrapped code"""
        if labels:
            self.requests_in_progress.labels(**labels).inc()
            self.requests.labels(**labels).inc()
        else:
            self.requests_in_progress.inc()
            self.requests.inc()

        before_time = time.perf_counter()
        try:
            yield
        except Exception as e:
            self.exceptions.labels(**labels, exception_type=type(e).__name__).inc()
            raise e from None
        finally:
            if labels:
                self.requests_duration.labels(**labels).observe(
                    time.perf_counter() - before_time
                )
                self.requests_in_progress.labels(**labels).dec()
            else:
                self.requests_duration.observe(time.perf_counter() - before_time)
                self.requests_in_progress.dec()

Example #10

0

Show file

File: test_client.py Project: gdvalle/client_python

class TestGauge(unittest.TestCase):
    def setUp(self):
        self.registry = CollectorRegistry()
        self.gauge = Gauge('g', 'help', registry=self.registry)

    def test_gauge(self):
        self.assertEqual(0, self.registry.get_sample_value('g'))
        self.gauge.inc()
        self.assertEqual(1, self.registry.get_sample_value('g'))
        self.gauge.dec(3)
        self.assertEqual(-2, self.registry.get_sample_value('g'))
        self.gauge.set(9)
        self.assertEqual(9, self.registry.get_sample_value('g'))

    def test_function_decorator(self):
        self.assertEqual(0, self.registry.get_sample_value('g'))

        @self.gauge.track_inprogress()
        def f():
            self.assertEqual(1, self.registry.get_sample_value('g'))

        f()
        self.assertEqual(0, self.registry.get_sample_value('g'))

    def test_block_decorator(self):
        self.assertEqual(0, self.registry.get_sample_value('g'))
        with self.gauge.track_inprogress():
            self.assertEqual(1, self.registry.get_sample_value('g'))
        self.assertEqual(0, self.registry.get_sample_value('g'))

    def test_gauge_function(self):
        x = {}
        self.gauge.set_function(lambda: len(x))
        self.assertEqual(0, self.registry.get_sample_value('g'))
        self.gauge.inc()
        self.assertEqual(0, self.registry.get_sample_value('g'))
        x['a'] = None
        self.assertEqual(1, self.registry.get_sample_value('g'))

    def test_function_decorator(self):
        self.assertEqual(0, self.registry.get_sample_value('g'))

        @self.gauge.time()
        def f():
            time.sleep(.001)

        f()
        self.assertNotEqual(0, self.registry.get_sample_value('g'))

    def test_block_decorator(self):
        self.assertEqual(0, self.registry.get_sample_value('g'))
        with self.gauge.time():
            time.sleep(.001)
        self.assertNotEqual(0, self.registry.get_sample_value('g'))

Example #11

0

Show file

File: test_client.py Project: barkerd427/client_python

class TestGauge(unittest.TestCase):
    def setUp(self):
        self.registry = CollectorRegistry()
        self.gauge = Gauge('g', 'help', registry=self.registry)

    def test_gauge(self):
        self.assertEqual(0, self.registry.get_sample_value('g'))
        self.gauge.inc()
        self.assertEqual(1, self.registry.get_sample_value('g'))
        self.gauge.dec(3)
        self.assertEqual(-2, self.registry.get_sample_value('g'))
        self.gauge.set(9)
        self.assertEqual(9, self.registry.get_sample_value('g'))

    def test_function_decorator(self):
        self.assertEqual(0, self.registry.get_sample_value('g'))

        @self.gauge.track_inprogress()
        def f():
            self.assertEqual(1, self.registry.get_sample_value('g'))

        f()
        self.assertEqual(0, self.registry.get_sample_value('g'))

    def test_block_decorator(self):
        self.assertEqual(0, self.registry.get_sample_value('g'))
        with self.gauge.track_inprogress():
            self.assertEqual(1, self.registry.get_sample_value('g'))
        self.assertEqual(0, self.registry.get_sample_value('g'))

    def test_gauge_function(self):
        x = {}
        self.gauge.set_function(lambda: len(x))
        self.assertEqual(0, self.registry.get_sample_value('g'))
        self.gauge.inc()
        self.assertEqual(0, self.registry.get_sample_value('g'))
        x['a'] = None
        self.assertEqual(1, self.registry.get_sample_value('g'))

    def test_function_decorator(self):
        self.assertEqual(0, self.registry.get_sample_value('g'))

        @self.gauge.time()
        def f():
            time.sleep(.001)

        f()
        self.assertNotEqual(0, self.registry.get_sample_value('g'))

    def test_block_decorator(self):
        self.assertEqual(0, self.registry.get_sample_value('g'))
        with self.gauge.time():
            time.sleep(.001)
        self.assertNotEqual(0, self.registry.get_sample_value('g'))

Example #12

0

Show file

class TelemetryClient(object):
    @Inject
    def __init__(self, environment: SystemEnvironmentProperties):
        self.endpoint = environment.get("PROMETHEUS_GATEWAY_ENDPOINT")
        self.registry = CollectorRegistry()
        self.get_request_counter = Counter("invertpdf_get_request_count",
                                           "Number of successful GET requests",
                                           registry=self.registry)
        self.post_request_counter = Counter(
            "invertpdf_post_request_count",
            "Number of successful POST requests",
            registry=self.registry)
        self.duration_histogram = Histogram("invertpdf_request_duration_ms",
                                            "Request duration",
                                            registry=self.registry,
                                            buckets=[
                                                0, 50, 100, 200, 500, 1000,
                                                2000, 5000, 10000, 30000,
                                                60000, 1800000, 3600000
                                            ])
        self.failure_counter = Counter("invertpdf_failed_requests",
                                       "Number of failed requests",
                                       registry=self.registry)

        self.requests_in_progress = Gauge("invertpdf_requests_in_progress",
                                          "Number of pending requests",
                                          registry=self.registry)
        self.free_disk = Gauge("invertpdf_free_disk_space",
                               "Free disk space on tmpfs",
                               registry=self.registry)
        self.logger = logging.getLogger(self.__class__.__name__)

    def track_request(self, method: str, duration: int):
        self.logger.info(f"Request took {duration}ms.")
        self.duration_histogram.observe(duration)
        if method == "GET":
            self.get_request_counter.inc()
        elif method == "POST":
            self.post_request_counter.inc()

    def track_failure(self, method: str, duration: int):
        self.failure_counter.inc()
        self.duration_histogram.observe(duration)

    def track_start(self):
        self.requests_in_progress.inc()

    def track_end(self):
        self.requests_in_progress.dec()

    def submit(self):
        push_to_gateway(self.endpoint, "invertpdf", self.registry)

Example #13

0

Show file

File: main.py Project: pokebotum/prometheus-delivery

def update_stats(name):
    metric = metrics.get(name, None)
    value = int(request.args.get("value", 1))
    if metric is None:
        metric = Gauge(name, name)
        metrics[name] = metric
    if request.method == "DELETE":
        metric.dec(value)
    elif request.method == "POST":
        metric.inc(value)
    elif request.method == "PATCH":
        metric.set(value)
    return ""

Example #14

0

Show file

File: sbr-openstack-bot.py Project: harshad16/sbr-openstack

 def execute_citellus(self, sosreport_dir):
     """
     Run Citellus on the Customer Ticket sos-report
     """
     if self.check_sosreports(sosreport_dir):
         os.system(f'python3 citellus/citellus.py {sosreport_dir}')
         _LOGGER.info('Citellus execution on the sosreport is completed successfully')
         return True
     else:
         _LOGGER.error('Unable to provide sosreport to Citellus for execution')
         metric_name = self.job + '-sosreport-error'
         metric_name = metric_name.replace('-', '_')
         job_comment_metric = Gauge(metric_name, 'unable to send sosreport to citellus',
                                    registry=prometheus_registry)
         job_comment_metric.inc()
         pass

Example #15

0

Show file

File: sbr-openstack-bot.py Project: harshad16/sbr-openstack

    def get_solutions(self, sosreport_dir):
        """
        Gather the solutions from the access.redhat/solutions.
        """
        f = open(f'{sosreport_dir}/citellus.json')
        report = json.load(f)

        hash_map = []
        solution_data = []
        solution = ''

        for hash_key, plugin in report['results'].items():
            if plugin.get('result').get('rc') == 20:
                if plugin.get('kb') and self.redhat_solutions in plugin.get('kb'):
                    kbase_id = re.search(r'\d+$', plugin.get('kb')).group(0)
                    if kbase_id not in hash_map:
                        hash_map.append(kbase_id)
                        url = 'https://api.access.redhat.com/rs/solutions/' + kbase_id
                        response = requests.get(url, auth=(self.rhn_username, self.rhn_password))
                        if response.status_code == 200:
                            xml = response.text
                            tree = ET.fromstring(xml)
                            try:
                                resolution = tree.find('{http://www.redhat.com/gss/strata}resolution')
                                solution = resolution.find('{http://www.redhat.com/gss/strata}text').text
                            except Exception as e:
                                _LOGGER.error('xml parsing of the solution failed!')
                                metric_name = self.job + '-solution-xml-parse-' + str(kbase_id)
                                metric_name = metric_name.replace('-', '_')
                                job_comment_metric = Gauge(metric_name, 'solution xml parsing failed',
                                                           registry=prometheus_registry)
                                job_comment_metric.inc()
                                pass
                            if solution:
                                plugin['result']['solution'] = solution
                        else:
                            _LOGGER.error('Request to solution api failed!')
                            metric_name = self.job + '-solution-request-' + str(kbase_id)
                            metric_name = metric_name.replace('-', '_')
                            job_comment_metric = Gauge(metric_name, 'solution request failed due to authentication',
                                                       registry=prometheus_registry)
                            job_comment_metric.inc()
                            pass
                solution_data.append(plugin)

        solution_data = sorted(solution_data, key=lambda val: val['priority'], reverse=True)
        return solution_data

Example #16

0

Show file

File: sbr-openstack-bot.py Project: harshad16/sbr-openstack

    def main(self):
        """ 
        Execute SBR OpenStack bot.
        """
        job_name = self.job + '-job-exec-time'
        job_name = job_name.replace('-', '_')
        job_metric_time = Gauge(job_name, 'Runtime of application job execution', registry=prometheus_registry)
        try:
            with job_metric_time.time():
                solutions = list()
                complete = False
                remote_host, remote_port, remote_dir = self.get_ticket_config()
                if remote_host and remote_port and remote_dir:
                    self.ssh_copy_attachments(remote_host, remote_port, remote_dir)
                    if os.path.isdir(self.path):
                        sosreports = self.get_all_sosreports()
                        print("List of extracted sosreports: ", sosreports)
                        for sosreport in sosreports:
                            execution_path = f"{self.path}/{sosreport}"
                            self.execute_citellus(execution_path)
                            solution_data = self.get_solutions(execution_path)
                            solutions.append(solution_data)
                            comment, link = self.generate_comments(solution_data)
                            print("Comment:", comment)
                            complete = True
                            if comment:
                                complete = self.publish_comments(comment, link)

                if complete:
                    print('Script successfully completed')
                else:
                    metric_name = self.job + '-application-failed'
                    metric_name = metric_name.replace('-', '_')
                    job_comment_metric = Gauge(metric_name, 'Script Unable to process the ticket',
                                               registry=prometheus_registry)
                    job_comment_metric.inc()
                    _LOGGER.info('Script Unable to process the ticket')
                    print('Script Failed!')
        except Exception as e:
            print("Script Failed!")
            traceback.print_exc()
        self.pushgateway(self.job)

Example #17

0

Show file

File: sbr-openstack-bot.py Project: harshad16/sbr-openstack

    def get_all_sosreports(self):
        """
        Extract the SOS report based upon there compression type.
        """
        print('Extracting the compressed file!')
        sosreports = list()
        metric_count = 0
        for sosreport in os.listdir(self.path):
            try:
                if sosreport.startswith("."):
                    continue
                if tarfile.is_tarfile(f'{self.path}/{sosreport}'):
                    print("sosreport is compressed as tar file")
                    sosreport_tar_obj = tarfile.open(f'{self.path}/{sosreport}')
                    sosreport_tar_obj.extractall(path=self.path)
                    os.chmod(f'{self.path}/{sosreport_tar_obj.getnames()[0]}', 0o755)
                    os.remove(f'{self.path}/{sosreport}')
                    sosreports.append(sosreport_tar_obj.getnames()[0])
                    _LOGGER.info('Extracted the tar compressed sosreport from attachments')
                elif zipfile.is_zipfile(f'{self.path}/{sosreport}'):
                    print("sosreport is compressed as zip file")
                    sosreport_zip_obj = zipfile.ZipFile(f'{self.path}/{sosreport}')
                    sosreport_zip_obj.extractall(path=f'{self.path}')
                    os.chmod(f'{self.path}/{sosreport_zip_obj.namelist()[0]}', 0o755)
                    os.remove(f'{self.path}/{sosreport}')
                    sosreports.append(sosreport_zip_obj.namelist()[0])
                    _LOGGER.info('Extracted the zipped sosreport from attachments')
                else:
                    print("failed sosreport extraction! compression type is not tar or zip! file:", sosreport)
                    pass
            except:
                print('Error occurred in file: ', sosreport)
                metric_name = self.job + '-sosreport-extract-' + str(metric_count)
                metric_count += 1
                metric_name = metric_name.replace('-', '_')
                job_comment_metric = Gauge(metric_name, 'unable to extract sosreport', registry=prometheus_registry)
                job_comment_metric.inc()
                pass

        return sosreports

Example #18

0

Show file

File: sbr-openstack-bot.py Project: harshad16/sbr-openstack

 def ssh_copy_attachments(self, remote_host, remote_port, remote_directory):
     """
     Copy the ticket attachments from the storage server to /cases/<ticket> directory.
     """
     try:
         print("Ticket attachment Directory: ", remote_directory)
         escape_known_host = f"-o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no"
         scp_command = f"sshpass -f {self.rh_pwd_dir} scp {escape_known_host} -r -P {remote_port} {self.user}@{remote_host}:{remote_directory} /cases/{self.ticket}"
         scp_process = subprocess.run(scp_command.split(' '))
         if scp_process.returncode == 0:
             _LOGGER.info('Successfully fetched the attachments')
         elif scp_process.returncode == 1:
             metric_name = self.job + '-scp-error'
             metric_name = metric_name.replace('-', '_')
             job_comment_metric = Gauge(metric_name, 'unable to scp ticket attachments as the file is not found',
                                        registry=prometheus_registry)
             job_comment_metric.inc(1)
             _LOGGER.error('Unable to fetch attachments as file is not found')
         elif scp_process.returncode == 5:
             metric_name = self.job + '-scp-error'
             metric_name = metric_name.replace('-', '_')
             job_comment_metric = Gauge(metric_name,
                                        'unable to scp ticket attachments due to authentication failure',
                                        registry=prometheus_registry)
             job_comment_metric.inc(5)
             _LOGGER.error('Unable to fetch attachments due to authentication')
         else:
             _LOGGER.error(f'Unable to fetch attachments due to error code {scp_process.returncode}')
             raise Exception('scp failed!,Unable to fetch attachments.')
     except:
         metric_name = self.job + '-scp-error'
         metric_name = metric_name.replace('-', '_')
         job_comment_metric = Gauge(metric_name, 'unable to scp ticket attachments', registry=prometheus_registry)
         job_comment_metric.inc(2)
         _LOGGER.error('scp failed!, fetching attachments is not possible.')
         raise Exception('scp failed!, fetching attachments is not possible.')
     return True

Example #19

0

Show file

from prometheus_client import Counter
from prometheus_client import Gauge

c = Counter('my_failures', 'Description of counter')
print(c)
c.inc()  # Increment by 1
print(c)
c.inc(1.6)  # Increment by given value
print(c)
print(c.collect())

g = Gauge("my_gauge", "my description of gauge")
g.set(1)
g.inc(1)
g.dec(2)
g.track_inprogress()  # 在进入时加1，在退出时减一


@g.track_inprogress()
def f():
    pass

Example #20

0

Show file

File: request_handling.py Project: jina-ai/jina

class RequestHandler:
    """
    Class that handles the requests arriving to the gateway and the result extracted from the requests future.

    :param metrics_registry: optional metrics registry for prometheus used if we need to expose metrics from the executor or from the data request handler
    :param runtime_name: optional runtime_name that will be registered during monitoring
    """

    def __init__(
        self,
        metrics_registry: Optional['CollectorRegistry'] = None,
        runtime_name: Optional[str] = None,
    ):
        self._request_init_time = {} if metrics_registry else None
        self._executor_endpoint_mapping = None

        if metrics_registry:
            with ImportExtensions(
                required=True,
                help_text='You need to install the `prometheus_client` to use the montitoring functionality of jina',
            ):
                from prometheus_client import Gauge, Summary

            self._receiving_request_metrics = Summary(
                'receiving_request_seconds',
                'Time spent processing request',
                registry=metrics_registry,
                namespace='jina',
                labelnames=('runtime_name',),
            ).labels(runtime_name)

            self._pending_requests_metrics = Gauge(
                'number_of_pending_requests',
                'Number of pending requests',
                registry=metrics_registry,
                namespace='jina',
                labelnames=('runtime_name',),
            ).labels(runtime_name)

        else:
            self._receiving_request_metrics = None
            self._pending_requests_metrics = None

    def handle_request(
        self, graph: 'TopologyGraph', connection_pool: 'GrpcConnectionPool'
    ) -> Callable[['Request'], 'asyncio.Future']:
        """
        Function that handles the requests arriving to the gateway. This will be passed to the streamer.

        :param graph: The TopologyGraph of the Flow.
        :param connection_pool: The connection pool to be used to send messages to specific nodes of the graph
        :return: Return a Function that given a Request will return a Future from where to extract the response
        """

        async def gather_endpoints(request_graph):
            nodes = request_graph.all_nodes
            try:
                tasks_to_get_endpoints = [
                    node.get_endpoints(connection_pool) for node in nodes
                ]
                endpoints = await asyncio.gather(*tasks_to_get_endpoints)
            except InternalNetworkError as err:
                err_code = err.code()
                if err_code == grpc.StatusCode.UNAVAILABLE:
                    err._details = (
                        err.details()
                        + f' |Gateway: Communication error with deployment at address(es) {err.dest_addr}. Head or worker(s) may be down.'
                    )
                    raise err
                else:
                    raise

            self._executor_endpoint_mapping = {}
            for node, (endp, _) in zip(nodes, endpoints):
                self._executor_endpoint_mapping[node.name] = endp.endpoints

        def _handle_request(request: 'Request') -> 'asyncio.Future':
            if self._receiving_request_metrics:
                self._request_init_time[request.request_id] = time.time()
            if self._pending_requests_metrics:
                self._pending_requests_metrics.inc()
            # important that the gateway needs to have an instance of the graph per request
            request_graph = copy.deepcopy(graph)

            if graph.has_filter_conditions:
                request_doc_ids = request.data.docs[
                    :, 'id'
                ]  # used to maintain order of docs that are filtered by executors
            tasks_to_respond = []
            tasks_to_ignore = []
            endpoint = request.header.exec_endpoint
            r = request.routes.add()
            r.executor = 'gateway'
            r.start_time.GetCurrentTime()
            # If the request is targeting a specific deployment, we can send directly to the deployment instead of
            # querying the graph
            for origin_node in request_graph.origin_nodes:
                leaf_tasks = origin_node.get_leaf_tasks(
                    connection_pool,
                    request,
                    None,
                    endpoint=endpoint,
                    executor_endpoint_mapping=self._executor_endpoint_mapping,
                    target_executor_pattern=request.header.target_executor,
                )
                # Every origin node returns a set of tasks that are the ones corresponding to the leafs of each of their
                # subtrees that unwrap all the previous tasks. It starts like a chain of waiting for tasks from previous
                # nodes
                tasks_to_respond.extend([task for ret, task in leaf_tasks if ret])
                tasks_to_ignore.extend([task for ret, task in leaf_tasks if not ret])

            def _sort_response_docs(response):
                # sort response docs according to their order in the initial request
                def sort_by_request_order(doc):
                    if doc.id in request_doc_ids:
                        return request_doc_ids.index(doc.id)
                    else:
                        return len(request_doc_ids)  # put new/unknown docs at the end

                sorted_docs = sorted(response.data.docs, key=sort_by_request_order)
                response.data.docs = DocumentArray(sorted_docs)

            async def _process_results_at_end_gateway(
                tasks: List[asyncio.Task], request_graph: TopologyGraph
            ) -> asyncio.Future:
                if self._executor_endpoint_mapping is None:
                    await asyncio.gather(gather_endpoints(request_graph))

                partial_responses = await asyncio.gather(*tasks)
                partial_responses, metadatas = zip(*partial_responses)
                filtered_partial_responses = list(
                    filter(lambda x: x is not None, partial_responses)
                )

                response = filtered_partial_responses[0]
                request_graph.add_routes(response)

                if graph.has_filter_conditions:
                    _sort_response_docs(response)

                return response

            # In case of empty topologies
            if not tasks_to_respond:
                r.end_time.GetCurrentTime()
                future = asyncio.Future()
                future.set_result((request, {}))
                tasks_to_respond.append(future)
            return asyncio.ensure_future(
                _process_results_at_end_gateway(tasks_to_respond, request_graph)
            )

        return _handle_request

    def handle_result(self) -> Callable[['Request'], 'asyncio.Future']:
        """
        Function that handles the result when extracted from the request future

        :return: Return a Function that returns a request to be returned to the client
        """

        def _handle_result(result: 'Request'):
            """
            Function that handles the result when extracted from the request future

            :param result: The result returned to the gateway. It extracts the request to be returned to the client
            :return: Returns a request to be returned to the client
            """

            for route in result.routes:
                if route.executor == 'gateway':
                    route.end_time.GetCurrentTime()

            if self._receiving_request_metrics:
                init_time = self._request_init_time.pop(
                    result.request_id
                )  # need to pop otherwise it stays in memory forever
                self._receiving_request_metrics.observe(time.time() - init_time)

            if self._pending_requests_metrics:
                self._pending_requests_metrics.dec()

            return result

        return _handle_result

Example #21

0

Show file

    c = Counter('static_increment_counter', 'Counter that increments by 50 every 2 seconds')
    g = Gauge('static_increment_gauge', 'Gauge that increments by 50 every 2 seconds')

    c_rand = Counter('random_increment_counter', 'Counter that increments in a random fashion every 15 seconds')
    g_rand = Gauge('random_increment_gauge', 'Gauge that increments in a random fashion every 15 seconds')

    #i  = 1
    run = 0
    random.seed()

    while True:
        log.info("*******RUN %i **********",run)

        # Increment the counter and gauge by the same value at each run
        c.inc(50)
        g.inc(50)

        rand = random.randint(1,1000)
        c_rand.inc(rand)
        g_rand.inc(rand)
        log.info("Incremented Rand by {0}".format(rand))
        # Scrape endpoint to get metric values logged to stdout
        metrics = requests.get("http://*****:*****@example.com\"")
                 log.info(y)

        time.sleep(15)
        run+=1

Example #22

0

Show file

File: broadcast_websocket.py Project: wst021/awx

class BroadcastWebsocketStats():
    def __init__(self, local_hostname, remote_hostname):
        self._local_hostname = local_hostname
        self._remote_hostname = remote_hostname
        self._registry = CollectorRegistry()

        # TODO: More robust replacement
        self.name = self.safe_name(self._local_hostname)
        self.remote_name = self.safe_name(self._remote_hostname)

        self._messages_received_total = Counter(f'awx_{self.remote_name}_messages_received_total',
                                                'Number of messages received, to be forwarded, by the broadcast websocket system',
                                                registry=self._registry)
        self._messages_received = Gauge(f'awx_{self.remote_name}_messages_received',
                                        'Number forwarded messages received by the broadcast websocket system, for the duration of the current connection',
                                        registry=self._registry)
        self._connection = Enum(f'awx_{self.remote_name}_connection',
                                'Websocket broadcast connection',
                                states=['disconnected', 'connected'],
                                registry=self._registry)
        self._connection_start = Gauge(f'awx_{self.remote_name}_connection_start',
                                       'Time the connection was established',
                                       registry=self._registry)

        self._messages_received_per_minute = Gauge(f'awx_{self.remote_name}_messages_received_per_minute',
                                                   'Messages received per minute',
                                                   registry=self._registry)
        self._internal_messages_received_per_minute = FixedSlidingWindow()

    def safe_name(self, s):
        # Replace all non alpha-numeric characters with _
        return re.sub('[^0-9a-zA-Z]+', '_', s)

    def unregister(self):
        self._registry.unregister(f'awx_{self.remote_name}_messages_received')
        self._registry.unregister(f'awx_{self.remote_name}_connection')

    def record_message_received(self):
        self._internal_messages_received_per_minute.record()
        self._messages_received.inc()
        self._messages_received_total.inc()

    def record_connection_established(self):
        self._connection.state('connected')
        self._connection_start.set_to_current_time()
        self._messages_received.set(0)

    def record_connection_lost(self):
        self._connection.state('disconnected')

    def get_connection_duration(self):
        return (datetime.datetime.now() - self._connection_established_ts).total_seconds()

    def render(self):
        msgs_per_min = self._internal_messages_received_per_minute.render()
        self._messages_received_per_minute.set(msgs_per_min)

    def serialize(self):
        self.render()

        registry_data = generate_latest(self._registry).decode('UTF-8')
        return registry_data

Example #23

0

Show file

File: app.py Project: hemajv/prometheus

#Test code for exporting metrics to localhost push gateway
from prometheus_client import CollectorRegistry, Gauge, push_to_gateway, Summary, Histogram, Counter, start_http_server
import os

registry = CollectorRegistry()
_PACKAGES_NEW = Gauge('packages_added',
                      'Packages newly added',
                      registry=registry)

#Implement a logic for checking condition to increase gauge count
for i in range(10):
    #some logic for checking if any new packages have been added
    packages_added = True
    if packages_added == True:
        _PACKAGES_NEW.inc()

push_gateway = os.getenv('PROMETHEUS_PUSH_GATEWAY', 'pushgateway:9091')
if push_gateway:
    try:
        push_to_gateway(push_gateway,
                        job='package-releases',
                        registry=registry)
    except Exception as e:
        print('An error occurred pushing the metrics: {}'.format(str(e)))

Example #24

0

Show file

# Create a metric to track time spent and requests made.
counter = Counter('sobi3ch_counter', 'Description of a counter')
gauge = Gauge('sobi3ch_gauge', 'Description of gauge')
gauge.set(50)
SUMMARY = Summary('sobi3ch_summary_request_processing_seconds',
                  'Time spent processing request')
histogram = Histogram('sobi3ch_histogram_request_latency_seconds',
                      'Description of histogram')


# Decorate function with metric.
@SUMMARY.time()
def process_request(t):
    """A dummy function that takes some time."""
    time.sleep(t)


if __name__ == '__main__':
    # Start up the server to expose the metrics.
    start_http_server(8000)
    # Generate some requests.
    while True:
        r = random.random()
        process_request(r)
        if r > 0.8:
            counter.inc()
        if r < 0.5:
            gauge.inc()  # Increment by 1
        else:
            gauge.dec()
        histogram.observe(4.7)  # Observe 4.7 (seconds in this case)

Example #25

0

Show file

class PrometheusMonitor(Monitor):
    """
    Prometheus Faust Sensor.

    This sensor, records statistics using prometheus_client and expose
    them using the aiohttp server running under /metrics by default

    Usage:
        import faust
        from faust.sensors.prometheus import PrometheusMonitor

        app = faust.App('example', broker='kafka://')
        app.monitor = PrometheusMonitor(app, pattern='/metrics')
    """

    ERROR = 'error'
    COMPLETED = 'completed'
    KEYS_RETRIEVED = 'keys_retrieved'
    KEYS_UPDATED = 'keys_updated'
    KEYS_DELETED = 'keys_deleted'

    def __init__(self,
                 app: AppT,
                 pattern: str = '/metrics',
                 **kwargs: Any) -> None:
        self.app = app
        self.pattern = pattern

        if prometheus_client is None:
            raise ImproperlyConfigured(
                'prometheus_client requires `pip install prometheus_client`.')

        self._initialize_metrics()
        self.expose_metrics()
        super().__init__(**kwargs)

    def _initialize_metrics(self) -> None:
        """
        Initialize Prometheus metrics
        """
        # On message received
        self.messages_received = Counter('messages_received',
                                         'Total messages received')
        self.active_messages = Gauge('active_messages',
                                     'Total active messages')
        self.messages_received_per_topics = Counter(
            'messages_received_per_topic', 'Messages received per topic',
            ['topic'])
        self.messages_received_per_topics_partition = Gauge(
            'messages_received_per_topics_partition',
            'Messages received per topic/partition', ['topic', 'partition'])
        self.events_runtime_latency = Histogram('events_runtime_ms',
                                                'Events runtime in ms')

        # On Event Stream in
        self.total_events = Counter('total_events', 'Total events received')
        self.total_active_events = Gauge('total_active_events',
                                         'Total active events')
        self.total_events_per_stream = Counter('total_events_per_stream',
                                               'Events received per Stream',
                                               ['stream'])

        # On table changes get/set/del keys
        self.table_operations = Counter('table_operations',
                                        'Total table operations',
                                        ['table', 'operation'])

        # On message send
        self.topic_messages_sent = Counter('topic_messages_sent',
                                           'Total messages sent per topic',
                                           ['topic'])
        self.total_sent_messages = Counter('total_sent_messages',
                                           'Total messages sent')
        self.producer_send_latency = Histogram('producer_send_latency',
                                               'Producer send latency in ms')
        self.total_error_messages_sent = Counter('total_error_messages_sent',
                                                 'Total error messages sent')
        self.producer_error_send_latency = Histogram(
            'producer_error_send_latency', 'Producer error send latency in ms')

        # Assignment
        self.assignment_operations = Counter(
            'assignment_operations',
            'Total assigment operations (completed/error)', ['operation'])
        self.assign_latency = Histogram('assign_latency',
                                        'Assignment latency in ms')

        # Revalances
        self.total_rebalances = Gauge('total_rebalances', 'Total rebalances')
        self.total_rebalances_recovering = Gauge(
            'total_rebalances_recovering', 'Total rebalances recovering')
        self.revalance_done_consumer_latency = Histogram(
            'revalance_done_consumer_latency',
            'Consumer replying that rebalance is done to broker in ms')
        self.revalance_done_latency = Histogram(
            'revalance_done_latency', 'Revalance finished latency in ms')

        # Count Metrics by name
        self.count_metrics_by_name = Gauge('metrics_by_name',
                                           'Total metrics by name', ['metric'])

        # Web
        self.http_status_codes = Counter('http_status_codes',
                                         'Total http_status code',
                                         ['status_code'])
        self.http_latency = Histogram('http_latency',
                                      'Http response latency in ms')

        # Topic/Partition Offsets
        self.topic_partition_end_offset = Gauge(
            'topic_partition_end_offset', 'Offset ends per topic/partition',
            ['topic', 'partition'])
        self.topic_partition_offset_commited = Gauge(
            'topic_partition_offset_commited',
            'Offset commited per topic/partition', ['topic', 'partition'])
        self.consumer_commit_latency = Histogram(
            'consumer_commit_latency', 'Consumer commit latency in ms')

    def on_message_in(self, tp: TP, offset: int, message: Message) -> None:
        """Call before message is delegated to streams."""
        super().on_message_in(tp, offset, message)

        self.messages_received.inc()
        self.active_messages.inc()
        self.messages_received_per_topics.labels(topic=tp.topic).inc()
        self.messages_received_per_topics_partition.labels(
            topic=tp.topic, partition=tp.partition).set(offset)

    def on_stream_event_in(self, tp: TP, offset: int, stream: StreamT,
                           event: EventT) -> typing.Optional[typing.Dict]:
        """Call when stream starts processing an event."""
        state = super().on_stream_event_in(tp, offset, stream, event)
        self.total_events.inc()
        self.total_active_events.inc()
        self.total_events_per_stream.labels(
            stream=f'stream.{self._stream_label(stream)}.events').inc()

        return state

    def _stream_label(self, stream: StreamT) -> str:
        return self._normalize(
            stream.shortlabel.lstrip('Stream:'), ).strip('_').lower()

    def on_stream_event_out(self,
                            tp: TP,
                            offset: int,
                            stream: StreamT,
                            event: EventT,
                            state: typing.Dict = None) -> None:
        """Call when stream is done processing an event."""
        super().on_stream_event_out(tp, offset, stream, event, state)
        self.total_active_events.dec()
        self.events_runtime_latency.observe(
            self.secs_to_ms(self.events_runtime[-1]))

    def on_message_out(self, tp: TP, offset: int, message: Message) -> None:
        """Call when message is fully acknowledged and can be committed."""
        super().on_message_out(tp, offset, message)
        self.active_messages.dec()

    def on_table_get(self, table: CollectionT, key: typing.Any) -> None:
        """Call when value in table is retrieved."""
        super().on_table_get(table, key)
        self.table_operations.labels(table=f'table.{table.name}',
                                     operation=self.KEYS_RETRIEVED).inc()

    def on_table_set(self, table: CollectionT, key: typing.Any,
                     value: typing.Any) -> None:
        """Call when new value for key in table is set."""
        super().on_table_set(table, key, value)
        self.table_operations.labels(table=f'table.{table.name}',
                                     operation=self.KEYS_UPDATED).inc()

    def on_table_del(self, table: CollectionT, key: typing.Any) -> None:
        """Call when key in a table is deleted."""
        super().on_table_del(table, key)
        self.table_operations.labels(table=f'table.{table.name}',
                                     operation=self.KEYS_DELETED).inc()

    def on_commit_completed(self, consumer: ConsumerT,
                            state: typing.Any) -> None:
        """Call when consumer commit offset operation completed."""
        super().on_commit_completed(consumer, state)
        self.consumer_commit_latency.observe(
            self.ms_since(typing.cast(float, state)))

    def on_send_initiated(self, producer: ProducerT, topic: str,
                          message: PendingMessage, keysize: int,
                          valsize: int) -> typing.Any:
        """Call when message added to producer buffer."""
        self.topic_messages_sent.labels(topic=f'topic.{topic}').inc()

        return super().on_send_initiated(producer, topic, message, keysize,
                                         valsize)

    def on_send_completed(self, producer: ProducerT, state: typing.Any,
                          metadata: RecordMetadata) -> None:
        """Call when producer finished sending message."""
        super().on_send_completed(producer, state, metadata)
        self.total_sent_messages.inc()
        self.producer_send_latency.observe(
            self.ms_since(typing.cast(float, state)))

    def on_send_error(self, producer: ProducerT, exc: BaseException,
                      state: typing.Any) -> None:
        """Call when producer was unable to publish message."""
        super().on_send_error(producer, exc, state)
        self.total_error_messages_sent.inc()
        self.producer_error_send_latency.observe(
            self.ms_since(typing.cast(float, state)))

    def on_assignment_error(self, assignor: PartitionAssignorT,
                            state: typing.Dict, exc: BaseException) -> None:
        """Partition assignor did not complete assignor due to error."""
        super().on_assignment_error(assignor, state, exc)
        self.assignment_operations.labels(operation=self.ERROR).inc()
        self.assign_latency.observe(self.ms_since(state['time_start']))

    def on_assignment_completed(self, assignor: PartitionAssignorT,
                                state: typing.Dict) -> None:
        """Partition assignor completed assignment."""
        super().on_assignment_completed(assignor, state)
        self.assignment_operations.labels(operation=self.COMPLETED).inc()
        self.assign_latency.observe(self.ms_since(state['time_start']))

    def on_rebalance_start(self, app: AppT) -> typing.Dict:
        """Cluster rebalance in progress."""
        state = super().on_rebalance_start(app)
        self.total_rebalances.inc()

        return state

    def on_rebalance_return(self, app: AppT, state: typing.Dict) -> None:
        """Consumer replied assignment is done to broker."""
        super().on_rebalance_return(app, state)
        self.total_rebalances.dec()
        self.total_rebalances_recovering.inc()
        self.revalance_done_consumer_latency.observe(
            self.ms_since(state['time_return']))

    def on_rebalance_end(self, app: AppT, state: typing.Dict) -> None:
        """Cluster rebalance fully completed (including recovery)."""
        super().on_rebalance_end(app, state)
        self.total_rebalances_recovering.dec()
        self.revalance_done_latency.observe(self.ms_since(state['time_end']))

    def count(self, metric_name: str, count: int = 1) -> None:
        """Count metric by name."""
        super().count(metric_name, count=count)
        self.count_metrics_by_name.labels(metric=metric_name).inc(count)

    def on_tp_commit(self, tp_offsets: TPOffsetMapping) -> None:
        """Call when offset in topic partition is committed."""
        super().on_tp_commit(tp_offsets)
        for tp, offset in tp_offsets.items():
            self.topic_partition_offset_commited.labels(
                topic=tp.topic, partition=tp.partition).set(offset)

    def track_tp_end_offset(self, tp: TP, offset: int) -> None:
        """Track new topic partition end offset for monitoring lags."""
        super().track_tp_end_offset(tp, offset)
        self.topic_partition_end_offset.labels(
            topic=tp.topic, partition=tp.partition).set(offset)

    def on_web_request_end(self,
                           app: AppT,
                           request: web.Request,
                           response: typing.Optional[web.Response],
                           state: typing.Dict,
                           *,
                           view: web.View = None) -> None:
        """Web server finished working on request."""
        super().on_web_request_end(app, request, response, state, view=view)
        status_code = int(state['status_code'])
        self.http_status_codes.labels(status_code=status_code).inc()
        self.http_latency.observe(self.ms_since(state['time_end']))

    def expose_metrics(self) -> None:
        """Expose promethues metrics using the current aiohttp application."""
        @self.app.page(self.pattern)
        async def metrics_handler(self: _web.View,
                                  request: _web.Request) -> _web.Response:
            headers = {
                'Content-Type': 'text/plain; version=0.0.4; charset=utf-8',
            }

            return cast(
                _web.Response,
                Response(body=generate_latest(REGISTRY),
                         headers=headers,
                         status=200))

Example #26

0

Show file

                    registry=registry)
nodes_online = Gauge('total_online',
                     'total online nodes',
                     namespace='gluon',
                     registry=registry)
clients_total = Gauge('clients_total',
                      'clients total',
                      namespace='gluon',
                      registry=registry)
traffic_total = Gauge('traffic_total',
                      'traffic total', ['type'],
                      namespace='gluon',
                      registry=registry)

for node in data:
    nodes_total.inc()

    nid = node['nodeid']
    d = node['last_response']
    hostname = d['nodeinfo']['hostname']

    # default labels
    deflbl = {
        'nodeid': nid,
        'hostname': hostname,
        'fw': d['nodeinfo']['software']['firmware']['release']
    }

    # check node status
    if node['status'] != 'Up':
        online.labels(**deflbl).set(0)

Example #27

0

Show file

File: main.py Project: xebia-france/hello-prometheus

}, {
    'id': gen_new_uuid(),
    'content': 'This is a second joke',
    'reactions': 0
}]
number_jokes_counter.inc()
number_jokes_counter.inc()

channel_members = [{
    'id': gen_new_uuid(),
    'name': 'Horgix'
}, {
    'id': gen_new_uuid(),
    'name': 'Frédéric'
}]
number_channel_members_gauge.inc()
number_channel_members_gauge.inc()


@app.route('/')
def main():
    pass  # requests tracked by default


@app.route('/jokes')
def get_jokes():
    return api_response_from_dict(jokes)


@app.route('/add_joke')
def add_joke():

Example #28

0

Show file

File: prometheus.py Project: vvscloud/buildbot-prometheus

class Prometheus(service.BuildbotService):
    '''
    This service exposes buildbot metrics to Prometheus.

    Metrics state is initialised at service start and is (mostly) retained
    through a reconfiguration. Instance attributes holding a Prometheus
    metrics item are prefixed with a symbol indicating the kind of metric
    they are. For example:

    - Counters: c_<attr_label>
    - Gauges: g_<attr_label>
    - Histogram: h_<attr_label>
    - Summary: s_<attr_label>

    '''

    name = "Prometheus"
    namespace = 'buildbot'

    def __init__(self, port=9101, **kwargs):
        service.BuildbotService.__init__(self, **kwargs)
        self.port = port
        self.server = None
        self.consumers = []
        self.registry = None
        self.create_metrics()

    @defer.inlineCallbacks
    def reconfigService(self, builders=None, **kwargs):
        '''
        Accumulated metrics are maintained through a reconfigure.
        '''
        log.msg("Reconfiguring Prometheus reporter")
        yield service.BuildbotService.reconfigService(self)
        self.registerConsumers()

    @defer.inlineCallbacks
    def startService(self):
        log.msg("Starting Prometheus reporter")
        yield service.BuildbotService.startService(self)
        root = Resource()
        root.putChild(b'metrics', MetricsResource(registry=self.registry))
        self.server = reactor.listenTCP(self.port, Site(root))
        log.msg("Prometheus service starting on {}".format(self.server.port))

    @defer.inlineCallbacks
    def stopService(self):
        log.msg("Stopping Prometheus reporter")
        yield self.server.stopListening()
        yield service.BuildbotService.stopService(self)
        self.removeConsumers()

    def create_metrics(self):
        '''
        Create the Prometheus metrics that will be exposed.
        '''
        log.msg("Creating Prometheus metrics")
        self.registry = CollectorRegistry()

        # build metrics
        builds_labels = ['builder_id', 'worker_id']
        self.g_builds_duration = Gauge(
            'builds_duration_seconds',
            'Number of seconds spent performing builds',
            labelnames=builds_labels,
            namespace=self.namespace,
            registry=self.registry)
        self.c_builds_success = Counter('builds_success',
                                        'Number of builds reporting success',
                                        labelnames=builds_labels,
                                        namespace=self.namespace,
                                        registry=self.registry)
        self.c_builds_failure = Counter('builds_failure',
                                        'Number of builds reporting failure',
                                        labelnames=builds_labels,
                                        namespace=self.namespace,
                                        registry=self.registry)
        self.c_builds_error = Counter('builds_error',
                                      'Number of builds reporting error',
                                      labelnames=builds_labels,
                                      namespace=self.namespace,
                                      registry=self.registry)

        # builders metrics
        builders_labels = ['builder_id', 'builder_name']
        self.g_builders_running_total = Gauge(
            'builders_running_total',
            'Total number of builders running',
            namespace=self.namespace,
            registry=self.registry)
        self.g_builders_running = Gauge('builders_running',
                                        'Number of builders running',
                                        labelnames=builders_labels,
                                        namespace=self.namespace,
                                        registry=self.registry)

        # buildsets metrics
        buildsets_labels = ['buildset_id']
        self.g_buildsets_duration = Gauge(
            'buildsets_duration_seconds',
            'Number of seconds spent performing buildsets',
            labelnames=buildsets_labels,
            namespace=self.namespace,
            registry=self.registry)
        self.c_buildsets_success = Counter(
            'buildsets_success',
            'Number of buildsets reporting success',
            labelnames=buildsets_labels,
            namespace=self.namespace,
            registry=self.registry)
        self.c_buildsets_failure = Counter(
            'buildsets_failure',
            'Number of buildsets reporting failure',
            labelnames=buildsets_labels,
            namespace=self.namespace,
            registry=self.registry)
        self.c_buildsets_error = Counter('buildsets_error',
                                         'Number of buildsets reporting error',
                                         labelnames=buildsets_labels,
                                         namespace=self.namespace,
                                         registry=self.registry)

        # build requests metrics
        build_requests_labels = ['builder_id']
        self.g_build_requests_duration = Gauge(
            'build_requests_duration_seconds',
            'Number of seconds spent performing build requests',
            labelnames=build_requests_labels,
            namespace=self.namespace,
            registry=self.registry)
        self.c_build_requests_success = Counter(
            'build_requests_success',
            'Number of build requests reporting success',
            labelnames=build_requests_labels,
            namespace=self.namespace,
            registry=self.registry)
        self.c_build_requests_failure = Counter(
            'build_requests_failure',
            'Number of build requests reporting failure',
            labelnames=build_requests_labels,
            namespace=self.namespace,
            registry=self.registry)
        self.c_build_requests_error = Counter(
            'build_requests_error',
            'Number of build requests reporting error',
            labelnames=build_requests_labels,
            namespace=self.namespace,
            registry=self.registry)

        # steps metrics
        steps_labels = ['step_number', 'step_name', 'builder_id', 'worker_id']
        self.g_steps_duration = Gauge(
            'steps_duration_seconds',
            'Number of seconds spent performing build steps',
            labelnames=steps_labels,
            namespace=self.namespace,
            registry=self.registry)
        self.c_steps_success = Counter('steps_success',
                                       'Number of steps reporting success',
                                       labelnames=steps_labels,
                                       namespace=self.namespace,
                                       registry=self.registry)
        self.c_steps_failure = Counter('steps_failure',
                                       'Number of steps reporting failure',
                                       labelnames=steps_labels,
                                       namespace=self.namespace,
                                       registry=self.registry)
        self.c_steps_error = Counter('steps_error',
                                     'Number of steps reporting error',
                                     labelnames=steps_labels,
                                     namespace=self.namespace,
                                     registry=self.registry)

        # workers metrics
        workers_labels = ['worker_id', 'worker_name']
        self.g_workers_running_total = Gauge('workers_running_total',
                                             'Total number of workers running',
                                             namespace=self.namespace,
                                             registry=self.registry)
        self.g_workers_running = Gauge('workers_running',
                                       'Number of workers running',
                                       labelnames=workers_labels,
                                       namespace=self.namespace,
                                       registry=self.registry)

    @defer.inlineCallbacks
    def registerConsumers(self):
        self.removeConsumers()
        startConsuming = self.master.mq.startConsuming

        handlers = (
            (('builds', None, None), self.buildsConsumer),
            (('builders', None, None), self.buildersConsumer),
            (('buildsets', None, None), self.buildSetsConsumer),
            (('buildrequests', None, None), self.buildRequestsConsumer),
            (('steps', None, None), self.stepsConsumer),
            (('workers', None, None), self.workersConsumer),
        )
        for routingKey, handler in handlers:
            consumer = yield startConsuming(handler, routingKey)
            self.consumers.append(consumer)

    @defer.inlineCallbacks
    def removeConsumers(self):
        for consumer in self.consumers:
            yield consumer.stopConsuming()
        self.consumers = []

    # @defer.inlineCallbacks
    def buildsConsumer(self, key, msg):
        '''
        This method is responsible for updating build related metrics. There
        are four build set metrics:

        - buildbot_builds_duration_seconds,
        - buildbot_builds_success,
        - buildbot_builds_failure,
        - buildbot_builds_error

        buildbot_builds_duration_seconds is a gauge metric used to
        track the duration of individual builds by making use of Prometheus
        multi dimensional labels. As builds complete, an instance of this
        metric is created by passing builder_id and worker_id labels and
        then setting the value. This allows visualisation tools to query and
        filter metrics for specific builder combinations.

        Similarly, the other counter metrics record success, failure and
        error states for each build.
        '''
        action = key[2]
        labels = dict(builder_id=msg['builderid'], worker_id=msg['workerid'])
        # build_info = yield self.master.data.get(("builds", msg['buildid']))

        if action == 'finished':

            assert msg['complete']
            build_started = msg['started_at']
            build_finished = msg['complete_at']
            build_duration = build_finished - build_started
            duration_seconds = build_duration
            self.g_builds_duration.labels(**labels).set(duration_seconds)

            build_status = resolve_results_status(msg['results'])
            if build_status == 'success':
                self.c_builds_success.labels(**labels).inc()
            elif build_status == 'failure':
                self.c_builds_failure.labels(**labels).inc()
            elif build_status == 'error':
                self.c_builds_error.labels(**labels).inc()

    def buildersConsumer(self, key, msg):
        '''
        The Buildmaster runs a collection of Builders, each of which handles a
        single type of build (e.g. full versus quick), on one or more workers.
        Builders serve as a kind of queue for a particular type of build. Each
        Builder gets a separate column in the waterfall display. In general,
        each Builder runs independently.

        Each builder is a long-lived object which controls a sequence of Builds.
        Each Builder is created when the config file is first parsed, and lives
        forever (or rather until it is removed from the config file). It mediates
        the connections to the workers that do all the work, and is responsible
        for creating the Build objects - Builds.

        This method is responsible for updating builder related metrics. There
        are two builder metrics ``buildbot_builders_running_total`` and
        ``buildbot_builders_running``.

        ``buildbot_builders_running_total`` is a gauge metric used to track the
        total number of running builders. As builders start the metric is
        increased and as they stop the metric is decreased. No extra labels are
        used with this metric.

        ``buildbot_builders_running`` is a gauge metric used to track the
        running state of individual workers by making use of Prometheus multi
        dimensional labels. As builders start, an instance of this metric is
        created by passing ``builder_id`` and ``builder_name`` labels and then
        incremented. When the worker disconnects the same gauge metric is
        decreased. This means that a gauge value of 1 indicates started while
        a gauge value of 0 indicates stopped.
        '''
        action = key[2]
        labels = dict(builder_id=msg['builderid'], builder_name=msg['name'])

        if action == 'started':
            self.g_builders_running_total.inc()
            self.g_builders_running.labels(**labels).inc()
        elif action == 'stopped':
            self.g_builders_running_total.dec()
            self.g_builders_running.labels(**labels).dec()

    # @defer.inlineCallbacks
    def buildSetsConsumer(self, key, msg):
        '''
        A BuildSet is the name given to a set of Builds that all compile/test
        the same version of the tree on multiple Builders. In general, all these
        component Builds will perform the same sequence of Steps, using the same
        source code, but on different platforms or against a different set of
        libraries.

        Each scheduler creates and submits BuildSet objects to the BuildMaster.
        The buildmaster is responsible for turning the BuildSet into a set of
        BuildRequest objects and queueing them on the appropriate Builders.

        This method is responsible for updating build set related metrics.
        There are four build set metrics:

        - buildbot_buildsets_duration_seconds,
        - buildbot_buildsets_success,
        - buildbot_buildsets_failure,
        - buildbot_buildsets_error

        buildbot_buildsets_duration_seconds is a gauge metric used to
        track the duration of individual build sets by making use of
        Prometheus multi dimensional labels. As build sets complete, an
        instance of this metric is created by passing buildset_id labels and
        then setting the value. This allows visualisation tools to query and
        filter metrics for specific builder combinations.

        Similarly, the other counter metrics record success, failure and
        error states for each build set.

        '''
        action = key[2]
        # TODO: substitute bsid for something more useful. bsid is just
        # a number that increments. A better choice would be something
        # like the repo, project, etc
        labels = dict(buildset_id=msg['bsid'])

        # buildset_info = yield self.master.data.get(("buildsets", msg['bsid']))

        if action == 'complete':

            assert msg['complete']
            buildset_started = msg['submitted_at']
            buildset_finished = msg['complete_at']
            buildset_duration = buildset_finished - buildset_started
            duration_seconds = buildset_duration
            self.g_buildsets_duration.labels(**labels).set(duration_seconds)

            bs_success = resolve_results_status(msg['results'])
            if bs_success == 'success':
                self.c_buildsets_success.labels(**labels).inc()
            elif bs_success == 'failure':
                self.c_buildsets_failure.labels(**labels).inc()
            elif bs_success == 'error':
                self.c_buildsets_error.labels(**labels).inc()

    def buildRequestsConsumer(self, key, msg):
        '''
        A BuildRequest is a request to build a specific set of source code
        on a single Builder. Each Builder runs the BuildRequest as soon as
        it can (i.e. when an associated worker becomes free).

        This method is responsible for updating build request related metrics.
        There are four nuild request metrics:

        - buildbot_build_requests_duration_seconds
        - buildbot_build_requests_success
        - buildbot_build_requests_failure
        - buildbot_build_requests_error

        buildbot_build_requests_duration_seconds is a gauge metric used to
        track the duration of individual build requests by making use of
        Prometheus multi dimensional labels. As build requests complete, an
        instance of this metric is created by passing builder_id labels and
        then setting the value. This allows visualisation tools to query and
        filter metrics for specific builder combinations.

        Similarly, the other counter metrics record success, failure and
        error states for each build request.
        '''
        action = key[2]
        labels = dict(builder_id=msg['builderid'])

        if action == 'complete':
            assert msg['complete']
            br_started = msg['submitted_at']
            br_finished = msg['complete_at']
            br_duration = br_finished - br_started
            duration_seconds = br_duration
            self.g_build_requests_duration.labels(
                **labels).set(duration_seconds)

            br_success = resolve_results_status(msg['results'])
            if br_success == 'success':
                self.c_build_requests_success.labels(**labels).inc()
            elif br_success == 'failure':
                self.c_build_requests_failure.labels(**labels).inc()
            elif br_success == 'error':
                self.c_build_requests_error.labels(**labels).inc()

    @defer.inlineCallbacks
    def stepsConsumer(self, key, msg):
        '''
        This method is responsible for updating step related metrics. There
        are four steps metrics:

        - buildbot_steps_duration_seconds,
        - buildbot_steps_success
        - buildbot_steps_failure
        - buildbot_steps_error

        buildbot_steps_duration_seconds is a gauge metric used to track
        the duration of individual steps by making use of Prometheus multi
        dimensional labels. As steps complete, an instance of this metric is
        created by passing step_number, step_name, builder_id and worker_id
        labels and then setting the value. This allows visualisation tools
        to query and filter metrics for specific step, builder and worker
        combinations.

        Similarly, the other counter metrics record success, failure and
        error states for each step.
        '''
        action = key[2]

        build_info = yield self.master.data.get(("builds", msg['buildid']))

        labels = dict(step_number=msg['number'],
                      step_name=msg['name'],
                      builder_id=build_info['builderid'],
                      worker_id=build_info['workerid'])

        if action == 'finished':
            assert msg['complete']
            step_started = msg['started_at']
            step_finished = msg['complete_at']
            step_duration = step_finished - step_started
            duration_seconds = step_duration
            self.g_steps_duration.labels(**labels).set(duration_seconds)

            step_success = resolve_results_status(msg['results'])
            if step_success == 'success':
                self.c_steps_success.labels(**labels).inc()
            elif step_success == 'failure':
                self.c_steps_failure.labels(**labels).inc()
            elif step_success == 'error':
                self.c_steps_error.labels(**labels).inc()

    def workersConsumer(self, key, msg):
        '''
        This method is responsible for updating worker related metrics. There
        are two worker metrics ``buildbot_workers_running_total`` and
        ``buildbot_workers_running``.

        ``buildbot_workers_running_total`` is a gauge metric used to track the
        total number of running workers. As workers connect the metric is
        increased and as they disconnect the metric is decreased. No extra
        labels are used with this metric.

        ``buildbot_workers_running`` is a gauge metric used to track the
        running state of individual workers by making use of Prometheus multi
        dimensional labels. As workers connect, an instance of this metric is
        created by passing ``worker_id`` and ``worker_name`` labels and then
        incremented. When the worker disconnects the same gauge metric is
        decreased. This means that a gauge value of 1 indicates connected while
        a gauge value of 0 indicates disconnected.
        '''
        action = key[2]

        labels = dict(worker_id=msg['workerid'], worker_name=msg['name'])

        if action == 'connected':
            self.g_workers_running_total.inc()
            self.g_workers_running.labels(**labels).inc()
        elif action == 'disconnected':
            self.g_workers_running_total.dec()
            self.g_workers_running.labels(**labels).dec()

Example #29

0

Show file

File: prometheus_client_example.py Project: linktimecloud/sdk-documents

def process_request(t):
    """A dummy function that takes some time."""
    time.sleep(t)

if __name__ == '__main__':
    # Start up the server to expose the metrics.
    start_http_server(8111)

    # examples for counter/gauge/summary/histogram
    c = Counter('myfake_failures_total', 'Description of counter')
    g = Gauge('myfake_inprogress_requests', 'Description of gauge')
    s = Summary('myfake_summary_request_latency_seconds', 'Description of summary')
    h = Histogram('myfake_histogram_request_latency_seconds', 'Description of histogram')
    while True:
        # counter example
        c.inc()  # Increment by 1
        # c.inc(random.random())  # Increment by given value

        # gauge example
        g.inc()  # Increment by 1
        # g.dec(10)  # Decrement by given value
        # g.set(4.2)  # Set to a given value

        # summary example
        s.observe(1.1)  # Observe 1.1 (seconds in this case)
        # Generate some requests.
        process_request(random.random())

        # histogram example
        h.observe(4.7)  # Observe 4.7 (seconds in this case)