Esempio n. 1
0
    def collect(self):
        """Called by prometheus client when it reads metrics.

        Note: may be called by a separate thread.
        """
        in_flight = GaugeMetricFamily(self.name + "_total", self.desc, labels=self.labels)

        metrics_by_key = {}

        # We copy so that we don't mutate the list while iterating
        with self._lock:
            keys = list(self._registrations)

        for key in keys:
            with self._lock:
                callbacks = set(self._registrations[key])

            in_flight.add_metric(key, len(callbacks))

            metrics = self._metrics_class()
            metrics_by_key[key] = metrics
            for callback in callbacks:
                callback(metrics)

        yield in_flight

        for name in self.sub_metrics:
            gauge = GaugeMetricFamily("_".join([self.name, name]), "", labels=self.labels)
            for key, metrics in six.iteritems(metrics_by_key):
                gauge.add_metric(key, getattr(metrics, name))
            yield gauge
 def collect(self):
     result_dict = {}
     apps = self.client.list_apps(embed_task_stats=True)
     for app_attribute in self.APP_ATTIBUTES:
         metric_family = GaugeMetricFamily(
             self.get_metric_key(app_attribute, 'apps'),
             documentation='from v2/apps?embed=apps.taskStats value of %s' % app_attribute,
             labels=["id"])
         for app in apps:
             labels = [app.id]
             value = self.get_metric_value(app_attribute, app)
             if value is None:
                 continue
             metric_family.add_metric(labels, value)
         yield metric_family
     queue = self.client.list_queue()
     for queue_attribute in self.QUEUE_ATTRIBUTES:
         metric_family = GaugeMetricFamily(
             self.get_metric_key(queue_attribute, 'queue'),
             documentation='from v2/queue value of %s' % queue_attribute,
             labels=["id"])
         for queue_item in queue:
             labels = [queue_item.app.id]
             value = self.get_metric_value(queue_attribute, queue_item)
             if value is None:
                 continue
             metric_family.add_metric(labels, value)
         yield metric_family
    def collect(self):
        background_process_in_flight_count = GaugeMetricFamily(
            "synapse_background_process_in_flight_count",
            "Number of background processes in flight",
            labels=["name"],
        )

        # We copy the dict so that it doesn't change from underneath us.
        # We also copy the process lists as that can also change
        with _bg_metrics_lock:
            _background_processes_copy = {
                k: list(v)
                for k, v in six.iteritems(_background_processes)
            }

        for desc, processes in six.iteritems(_background_processes_copy):
            background_process_in_flight_count.add_metric(
                (desc,), len(processes),
            )
            for process in processes:
                process.update_metrics()

        yield background_process_in_flight_count

        # now we need to run collect() over each of the static Counters, and
        # yield each metric they return.
        for m in (
                _background_process_ru_utime,
                _background_process_ru_stime,
                _background_process_db_txn_count,
                _background_process_db_txn_duration,
                _background_process_db_sched_duration,
        ):
            for r in m.collect():
                yield r
Esempio n. 4
0
 def collect(self):
     cm = GaugeMetricFamily(
         "python_twisted_reactor_last_seen",
         "Seconds since the Twisted reactor was last seen",
     )
     cm.add_metric([], time.time() - last_ticked)
     yield cm
Esempio n. 5
0
 def collect(self):
     logger.debug('Polling...')
     if not self.session_id:
         self.session_id = get_session_id(self.base_url, self.login, self.password)
     tickets_count = get_tickes_count(self.base_url, self.session_id)
     support_tickets_total = GaugeMetricFamily(
         'support_tickets_total', 'Number of tickets', labels=['project', 'status'])
     for status_data in tickets_count:
         if status_data['department_id'] in self.department_ids:
             support_tickets_total.add_metric([
                 status_data['department'], status_data['name']], status_data['count'])
     yield support_tickets_total
Esempio n. 6
0
    def collect(self):
        if not HAVE_PROC_SELF_STAT:
            return

        with open("/proc/self/stat") as s:
            line = s.read()
            raw_stats = line.split(") ", 1)[1].split(" ")

            user = GaugeMetricFamily("process_cpu_user_seconds_total", "")
            user.add_metric([], float(raw_stats[11]) / self.ticks_per_sec)
            yield user

            sys = GaugeMetricFamily("process_cpu_system_seconds_total", "")
            sys.add_metric([], float(raw_stats[12]) / self.ticks_per_sec)
            yield sys
    def collect(self):
        start = time.time()

        # Request data from Azure Status
        status = self._request_data()

        for region_section in status[1]:
            for category in status[1][region_section]:
                for service in status[1][region_section][category]:
                    for region in status[1][region_section][category][service]:
                        metric_name = "azure_status_{}_{}_status".format(category, service).replace(".", "_")
                        metric = GaugeMetricFamily(metric_name, 'Azure Status for {}'.format(metric_name), labels=["region"])

                        metric.add_metric([region], STATUSES[status[1][region_section][category][service][region]])
                        yield metric

        duration = time.time() - start
        COLLECTION_TIME.observe(duration)
Esempio n. 8
0
    def collect(self) -> Iterator[GaugeMetricFamily]:
        active, idle, dead = 0, 0, 0
        for slave in self._get_slaves():
            if slave.is_alive(use_cached=True) and slave.current_build_id is not None:
                active += 1
            elif slave.is_alive(use_cached=True) and slave.current_build_id is None:
                idle += 1
            elif not slave.is_alive(use_cached=True) and not slave.is_shutdown():
                # Slave is not alive and was not deliberately put in shutdown mode. Count it as dead.
                dead += 1
            else:
                # If not slave.is_alive() and slave.is_shutdown() = True then we have deliberately
                # and gracefully killed the slave. We do not want to categorize such a slave as 'dead'
                pass

        slaves_gauge = GaugeMetricFamily('slaves', 'Total number of slaves', labels=['state'])
        slaves_gauge.add_metric(['active'], active)
        slaves_gauge.add_metric(['idle'], idle)
        slaves_gauge.add_metric(['dead'], dead)
        yield slaves_gauge
Esempio n. 9
0
    def collect(self):

        g = GaugeMetricFamily(self.name, self.desc, labels=self.labels)

        try:
            calls = self.caller()
        except Exception:
            logger.exception(
                "Exception running callback for LaterGauge(%s)",
                self.name,
            )
            yield g
            return

        if isinstance(calls, dict):
            for k, v in six.iteritems(calls):
                g.add_metric(k, v)
        else:
            g.add_metric([], calls)

        yield g
class MachineTypeScrapeImporter:
    def __init__(self):
        self.runnable = GaugeMetricFamily(
            "hydra_machine_type_runnable",
            "Number of currently runnable builds",
            labels=["machineType"])
        self.running = GaugeMetricFamily(
            "hydra_machine_type_running",
            "Number of currently running builds",
            labels=["machineType"])
        self.wait_time = CounterMetricFamily(
            "hydra_machine_type_wait_time_total",
            "Number of seconds spent waiting",
            labels=["machineType"])
        self.last_active = CounterMetricFamily(
            "hydra_machine_type_last_active_total",
            "Last time this machine type was active",
            labels=["machineType"])


    def load_machine_type(self, name, report):
        self.runnable.add_metric([name], report.destructive_read("runnable"))
        self.running.add_metric([name], report.destructive_read("running"))
        try:
            self.wait_time.add_metric([name], report.destructive_read("waitTime"))
        except KeyError:
            pass
        try:
            self.last_active.add_metric([name], report.destructive_read("lastActive"))
        except KeyError:
            pass

        debug_remaining_state(report)

    def metrics(self):
        yield self.runnable
        yield self.running
        yield self.wait_time
        yield self.last_active
Esempio n. 11
0
    def collect(self):
        session = requests.Session()
        session.trust_env = False
        session.auth = (self.sonar_user, self.sonar_password)
        session.verify = False

        req_string = self.rest_url + '/resources?metrics=ncloc,coverage'
        res = session.get(req_string)
        # METRIC: detailed test results
        c = GaugeMetricFamily('sonar_metrics', 'SonarQube Metrics',
                              labels=['name', 'key'])
        if res:
            results = res.json()
            #pp = pprint.PrettyPrinter()
            for result in results:
                # pp.pprint(result)
                for msr in result['msr']:
                    c.add_metric([result['name'], msr['key']], msr['val'])

            yield c
        else:
            print "Error fetching from " + req_string
            print res
Esempio n. 12
0
    def collect(self):
        session = requests.Session()
        session.trust_env = False
        session.auth = (self.sonar_user, self.sonar_password)
        session.verify = False

        req_string = self.rest_url + '/resources?metrics=' + self.metrics
        res = session.get(req_string)
        # METRIC: detailed test results
        c = GaugeMetricFamily('sonar_metrics', 'SonarQube Metrics', labels=['name', 'key'])
        try:
            if res:
                results = res.json()
                for result in results:
                    for msr in result['msr']:
                        c.add_metric([result['name'], msr['key']], msr['val'])

                yield c
            else:
                logging.error("Error fetching from " + req_string)
                logging.error(res)
        except KeyError:
            logging.error("Could not retrieve metrics from: " + self.metrics)
            logging.error("Check argument sonar_metrics")
Esempio n. 13
0
 def convert_meter_metric(cls, marathon_key, marathon_metric):
     metric_key = cls.convert_metric_key(marathon_key)
     metric_key = '%s_rate' % metric_key
     g = GaugeMetricFamily(
         name=metric_key,
         documentation='from %s' % marathon_key,
         labels=('window',))
     g.add_metric(('1m',), marathon_metric['m1_rate'])
     g.add_metric(('5m',), marathon_metric['m5_rate'])
     g.add_metric(('15m',), marathon_metric['m15_rate'])
     g.add_metric(('mean',), marathon_metric['mean_rate'])
     return g
    def collect(self):
        '''collect metrics'''

        # Task metrics
        task_info = get_task_state_info()
        t_state = GaugeMetricFamily(
            'airflow_task_status',
            'Shows the number of task starts with this status',
            labels=['dag_id', 'task_id', 'owner', 'status']
        )
        for task in task_info:
            t_state.add_metric([task.dag_id, task.task_id, task.owners, task.state or 'none'], task.value)
        yield t_state

        # Dag Metrics
        dag_info = get_dag_state_info()
        d_state = GaugeMetricFamily(
            'airflow_dag_status',
            'Shows the number of dag starts with this status',
            labels=['dag_id', 'owner', 'status']
        )
        for dag in dag_info:
            d_state.add_metric([dag.dag_id, dag.owners, dag.state], dag.count)
        yield d_state

        # DagRun metrics
        dag_duration = GaugeMetricFamily(
            'airflow_dag_run_duration',
            'Duration of currently running dag_runs in seconds',
            labels=['dag_id', 'run_id']
        )
        driver = Session.bind.driver
        for dag in get_dag_duration_info():
            if driver == 'mysqldb' or driver == 'pysqlite':
                dag_duration.add_metric([dag.dag_id, dag.run_id], dag.duration)
            else:
                dag_duration.add_metric([dag.dag_id, dag.run_id], dag.duration.seconds)
        yield dag_duration
class ArraySpaceMetrics():
    """
    Base class for FlashArray Prometheus array space metrics
    """
    def __init__(self, fa):
        self.fa = fa
        self.data_reduction = GaugeMetricFamily(
            'purefa_array_space_datareduction_ratio',
            'FlashArray overall data reduction',
            labels=['dimension'],
            unit='ratio')
        self.capacity = GaugeMetricFamily('purefa_array_space_capacity_bytes',
                                          'FlashArray overall space capacity',
                                          labels=['dimension'])
        self.provisioned = GaugeMetricFamily(
            'purefa_array_space_provisioned_bytes',
            'FlashArray overall provisioned space',
            labels=['dimension'])
        self.used = GaugeMetricFamily('purefa_array_space_used_bytes',
                                      'FlashArray overall used space',
                                      labels=['dimension'])

    def _data_reduction(self):
        """
        Create metrics of gauge type for array data reduction.
        Metrics values can be iterated over.
        """
        for k in mappings.array_drr_mapping:
            self.data_reduction.add_metric(
                mappings.array_drr_mapping[k],
                self.fa.get_array_elem(k)
                if self.fa.get_array_elem(k) is not None else 0)

    def _capacity(self):
        """
        Create metrics of gauge type for array capacity indicators.
        Metrics values can be iterated over.
        """
        for k in mappings.array_capacity_mapping:
            self.capacity.add_metric(
                mappings.array_capacity_mapping[k],
                self.fa.get_array_elem(k)
                if self.fa.get_array_elem(k) is not None else 0)

    def _provisioned(self):
        """
        Create metrics of gauge type for array provisioned space indicators.
        Metrics values can be iterated over.
        """
        for k in mappings.array_provisioned_mapping:
            self.provisioned.add_metric(
                mappings.array_provisioned_mapping[k],
                self.fa.get_array_elem(k)
                if self.fa.get_array_elem(k) is not None else 0)

    def _used(self):
        """
        Create metrics of gauge type for array used space indicators.
        Metrics values can be iterated over.
        """
        for k in mappings.array_used_mapping:
            self.used.add_metric(
                mappings.array_used_mapping[k],
                self.fa.get_array_elem(k)
                if self.fa.get_array_elem(k) is not None else 0)

    def get_metrics(self):
        self._data_reduction()
        self._capacity()
        self._provisioned()
        self._used()
        yield self.data_reduction
        yield self.capacity
        yield self.provisioned
        yield self.used
    def collect(self):

        start = time.time()

        # Perform REST API call to fetch data
        data = call_rest_api('/mgmt/status/default/QueueManagersStatus',
                             self.ip, self.port, self.session, self.timeout)
        if data == '':
            return

        # Update Prometheus metrics
        for qm in data['QueueManagersStatus']:

            g = GaugeMetricFamily(
                'mqa_queue_manager_cpu_usage',
                'The instantaneous CPU usage by the queue manager as a percentage of the CPU load',
                labels=['appliance', 'qm', 'status'])
            g.add_metric([self.appliance, qm['Name'], qm['Status']],
                         qm['CpuUsage'])
            yield g

            g = GaugeMetricFamily(
                'mqa_queue_manager_memory_bytes_used',
                'The amount of memory in bytes that is currently in use by the queue manager',
                labels=['appliance', 'qm', 'status'])
            # Memory in MB not MiB
            #g.add_metric([self.appliance, qm['Name']], qm['UsedMemory'] * 1048576)
            g.add_metric([self.appliance, qm['Name'], qm['Status']],
                         qm['UsedMemory'] * 1000000)
            yield g

            g = GaugeMetricFamily(
                'mqa_queue_manager_fs_bytes_used',
                'The amount of file system in bytes that is currently in use by the queue manager',
                labels=['appliance', 'qm', 'status'])
            # Memory in MB not MiB
            #g.add_metric([self.appliance, qm['Name']], qm['UsedFs'] * 1048576)
            g.add_metric([self.appliance, qm['Name'], qm['Status']],
                         qm['UsedFs'] * 1000000)
            yield g

            g = GaugeMetricFamily(
                'mqa_queue_manager_fs_bytes_allocated',
                'The amount of file system in bytes allocated for the queue manager',
                labels=['appliance', 'qm', 'status'])
            # Memory in MB not MiB
            #g.add_metric([self.appliance, qm['Name']], qm['TotalFs'] * 1048576)
            g.add_metric([self.appliance, qm['Name'], qm['Status']],
                         qm['TotalFs'] * 1000000)
            yield g

            i = InfoMetricFamily('mqa_queue_manager',
                                 'MQ Appliance queue manager information')
            i.add_metric(
                [
                    'appliance', 'qm', 'status', 'haRole', 'haStatus',
                    'drRole', 'drStatus'
                ], {
                    'appliance':
                    self.appliance,
                    'qm':
                    qm['Name'],
                    'status':
                    qm['Status'],
                    'haRole':
                    'Unknown' if qm['HaRole'] == '' else qm['HaRole'],
                    'haStatus':
                    'Unknown' if qm['HaStatus'] == '' else qm['HaStatus'],
                    'drRole':
                    'Unknown' if qm['DrRole'] == '' else qm['DrRole'],
                    'drStatus':
                    'Unknown' if qm['DrStatus'] == '' else qm['DrStatus']
                })
            yield i

        g = GaugeMetricFamily(
            'mqa_exporter_queue_managers_elapsed_time_seconds',
            'Exporter eleapsed time to collect queue managers metrics',
            labels=['appliance'])
        g.add_metric([self.appliance], time.time() - start)
        yield g
Esempio n. 17
0
 def test_gauge_labels(self):
     cmf = GaugeMetricFamily('g', 'help', labels=['a'])
     cmf.add_metric(['b'], 2)
     self.custom_collector(cmf)
     self.assertEqual(2, self.registry.get_sample_value('g', {'a': 'b'}))
Esempio n. 18
0
    def collect(self, name: str) -> Generator:
        '''
        Yields metrics for all managed readings.

        :param name: Name of the inverter'
        '''

        if self.serial_number is not None and self.parameter_file is not None and \
                self.control_software_version is not None:
            yield InfoMetricFamily(
                'rctmon_inverter', 'Information about the inverter', {
                    'inverter': name,
                    'serial_number': self.serial_number,
                    'parameter_file': self.parameter_file,
                    'control_software_version': self.control_software_version
                })
        # Generators
        if self.have_generator_a or self.have_generator_b:
            gen_voltage = GaugeMetricFamily('rctmon_generator_voltage',
                                            'Solar generator voltage',
                                            labels=['inverter', 'generator'],
                                            unit='volt')
            gen_power = GaugeMetricFamily('rctmon_generator_power',
                                          'Solar generator power',
                                          labels=['inverter', 'generator'],
                                          unit='watt')
            gen_mpp_tgt_volts = GaugeMetricFamily(
                'rctmon_generator_mpp_target_voltage', 'Target voltage of MPP '
                'tracker',
                labels=['inverter', 'generator'],
                unit='volt')
            gen_mpp_search_stp = GaugeMetricFamily(
                'rctmon_generator_mpp_search_step',
                'MPP search step',
                labels=['inverter', 'generator'],
                unit='volt')

            def collect_gen(gen: SolarGeneratorReadings, name: str,
                            gen_name: str) -> None:
                if gen.voltage is not None:
                    gen_voltage.add_metric([name, gen_name], gen.voltage)
                if gen.power is not None:
                    gen_power.add_metric([name, gen_name], gen.power)
                if gen.mpp_target_voltage is not None:
                    gen_mpp_tgt_volts.add_metric([name, gen_name],
                                                 gen.mpp_target_voltage)
                if gen.mpp_search_step is not None:
                    gen_mpp_search_stp.add_metric([name, gen_name],
                                                  gen.mpp_search_step)

            if self.have_generator_a:
                collect_gen(self.solar_generator_a, name, 'a')
            if self.have_generator_b:
                collect_gen(self.solar_generator_b, name, 'b')
            yield gen_voltage
            yield gen_power
            yield gen_mpp_tgt_volts
            yield gen_mpp_search_stp

        temp = GaugeMetricFamily('rctmon_temperature',
                                 'Temperature values in °C',
                                 labels=['inverter', 'sensor'])
        if self.temperature_heatsink is not None:  # db.temp1
            temp.add_metric([name, 'heatsink'], self.temperature_heatsink)
        if self.temperature_heatsink_batt is not None:  # db.temp2
            temp.add_metric([name, 'heatsink_battery_actuator'],
                            self.temperature_heatsink_batt)
        if self.temperature_core is not None:  # db.core_temp
            temp.add_metric([name, 'core'], self.temperature_core)
        yield temp

        if self.inverter_status is not None:
            ivs = GaugeMetricFamily('rctmon_inverter_status',
                                    'Status of the inverter',
                                    labels=['inverter'])
            ivs.add_metric([name], self.inverter_status)
            yield ivs

        if self.inverter_grid_separated is not None:
            igs = GaugeMetricFamily('rctmon_inverter_grid_separated',
                                    'Status of the island mode',
                                    labels=['inverter'])
            igs.add_metric([name], self.inverter_grid_separated)
            yield igs

        faults = GaugeMetricFamily('rctmon_inverter_faults',
                                   'Fault registers',
                                   labels=['inverter', 'register'])
        if self.fault0 is not None:
            faults.add_metric([name, '0'], self.fault0)
        if self.fault1 is not None:
            faults.add_metric([name, '1'], self.fault1)
        if self.fault2 is not None:
            faults.add_metric([name, '2'], self.fault2)
        if self.fault3 is not None:
            faults.add_metric([name, '3'], self.fault3)
        yield faults

        yield from self.household.collect(name)
        yield from self.grid.collect(name)

        if self.power_switch_available:
            yield from self.power_switch_readings.collect(name)
Esempio n. 19
0
    def collect(self):
        from prometheus_client.core import CounterMetricFamily, GaugeMetricFamily

        sem_ext = self.server.extensions["semaphores"]

        semaphore_max_leases_family = GaugeMetricFamily(
            "semaphore_max_leases",
            "Maximum leases allowed per semaphore, this will be constant for each semaphore during its lifetime.",
            labels=["name"],
        )
        semaphore_active_leases_family = GaugeMetricFamily(
            "semaphore_active_leases",
            "Amount of currently active leases per semaphore.",
            labels=["name"],
        )
        semaphore_pending_leases = GaugeMetricFamily(
            "semaphore_pending_leases",
            "Amount of currently pending leases per semaphore.",
            labels=["name"],
        )

        semaphore_acquire_total = CounterMetricFamily(
            "semaphore_acquire_total",
            "Total number of leases acquired per semaphore.",
            labels=["name"],
        )

        semaphore_release_total = CounterMetricFamily(
            "semaphore_release_total",
            "Total number of leases released per semaphore.\n"
            "Note: if a semaphore is closed while there are still leases active, this count will not equal "
            "`semaphore_acquired_total` after execution.",
            labels=["name"],
        )

        semaphore_average_pending_lease_time = GaugeMetricFamily(
            "semaphore_average_pending_lease_time",
            "Exponential moving average of the time it took to acquire a lease per semaphore.\n"
            "Note: this only includes time spent on scheduler side, "
            "it does"
            " not include time spent on communication.\n"
            "Note: this average is calculated based on order of leases instead of time of lease acquisition.",
            labels=["name"],
            unit="s",
        )

        for semaphore_name, semaphore_max_leases in sem_ext.max_leases.items():
            semaphore_max_leases_family.add_metric([semaphore_name],
                                                   semaphore_max_leases)
            semaphore_active_leases_family.add_metric(
                [semaphore_name], len(sem_ext.leases[semaphore_name]))
            semaphore_pending_leases.add_metric(
                [semaphore_name], sem_ext.metrics["pending"][semaphore_name])
            semaphore_acquire_total.add_metric(
                [semaphore_name],
                sem_ext.metrics["acquire_total"][semaphore_name])
            semaphore_release_total.add_metric(
                [semaphore_name],
                sem_ext.metrics["release_total"][semaphore_name])
            semaphore_average_pending_lease_time.add_metric(
                [semaphore_name],
                sem_ext.metrics["average_pending_lease_time"][semaphore_name],
            )
        yield semaphore_max_leases_family
        yield semaphore_active_leases_family
        yield semaphore_pending_leases
        yield semaphore_acquire_total
        yield semaphore_release_total
        yield semaphore_average_pending_lease_time
Esempio n. 20
0
    def collect(self):
        try:
            log.debug('Querying for clocks information...')
            graphics_clock_mhz = nvmlDeviceGetClockInfo(
                self.device, NVML_CLOCK_GRAPHICS)
            metric = GaugeMetricFamily(self.prefix + 'clock_gpu_hz',
                                       self.prefix_s + "GPU clock",
                                       labels=self.labels.keys())
            metric.add_metric(self.labels.values(),
                              graphics_clock_mhz * 1000000)
            yield metric
            mem_clock_mhz = nvmlDeviceGetClockInfo(self.device, NVML_CLOCK_MEM)
            metric = GaugeMetricFamily(self.prefix + 'clock_mem_hz',
                                       self.prefix_s + "MEM clock",
                                       labels=self.labels.keys())
            metric.add_metric(self.labels.values(), mem_clock_mhz * 1000000)
            yield metric

            log.debug('Querying for temperature information...')
            gpu_temperature_c = nvmlDeviceGetTemperature(
                self.device, NVML_TEMPERATURE_GPU)
            metric = GaugeMetricFamily(self.prefix + 'gpu_temperature_c',
                                       self.prefix_s + "GPU temperature",
                                       labels=self.labels.keys())
            metric.add_metric(self.labels.values(), gpu_temperature_c)
            yield metric

            log.debug('Querying for fan information...')
            metric = GaugeMetricFamily(self.prefix + 'fan_speed_percent',
                                       self.prefix_s + "fan speed",
                                       labels=self.labels.keys())
            metric.add_metric(self.labels.values(),
                              nvmlDeviceGetFanSpeed(self.device))
            yield metric

            log.debug('Querying for power information...')
            power_usage_w = nvmlDeviceGetPowerUsage(self.device) / 1000.0
            metric = GaugeMetricFamily(self.prefix + 'power_draw_watt',
                                       self.prefix_s + "power draw",
                                       labels=self.labels.keys())
            metric.add_metric(self.labels.values(), power_usage_w)
            yield metric
            metric = GaugeMetricFamily(self.prefix + 'power_state',
                                       self.prefix_s + "power state",
                                       labels=self.labels.keys())
            metric.add_metric(self.labels.values(),
                              nvmlDeviceGetPowerState(self.device))
            yield metric

            log.debug('Querying for memory information...')
            mem_info = nvmlDeviceGetMemoryInfo(self.device)
            metric = GaugeMetricFamily(self.prefix + 'memory_total_bytes',
                                       self.prefix_s + "total memory",
                                       labels=self.labels.keys())
            metric.add_metric(self.labels.values(), mem_info.total)
            yield metric
            metric = GaugeMetricFamily(self.prefix + 'memory_used_bytes',
                                       self.prefix_s + "used memory",
                                       labels=self.labels.keys())
            metric.add_metric(self.labels.values(), mem_info.used)
            yield metric

            log.info('collected power:%.1fW temp:%dc gpu:%dMHz mem:%dMHz',
                     power_usage_w, gpu_temperature_c, graphics_clock_mhz,
                     mem_clock_mhz)
        except Exception as e:
            log.warning(e, exc_info=True)
  def collect(self):
    if self.telegram == None:
      return

    yield GaugeMetricFamily('current_electricity_usage_kw', 'Current electricity usage by client in kW.',
      self.telegram[obiref.CURRENT_ELECTRICITY_USAGE].value)

    yield GaugeMetricFamily('current_electricity_delivery_kw', 'Current electricity delivery by client in kW.',
      self.telegram[obiref.CURRENT_ELECTRICITY_DELIVERY].value)

    try:
      instantaneous_voltage_v = GaugeMetricFamily('instantaneous_voltage_v', 'Instantaneous voltage per phase in Volt.', labels=['phase'])
      instantaneous_voltage_v.add_metric(['L1'], self.telegram[obiref.INSTANTANEOUS_VOLTAGE_L1].value)
      instantaneous_voltage_v.add_metric(['L2'], self.telegram[obiref.INSTANTANEOUS_VOLTAGE_L2].value)
      instantaneous_voltage_v.add_metric(['L3'], self.telegram[obiref.INSTANTANEOUS_VOLTAGE_L3].value)
      yield instantaneous_voltage_v
    except KeyError:
      # Not all meters provide this data
      pass

    try:
      instantaneous_current_c = GaugeMetricFamily('instantaneous_current_c', 'Instantaneous current per phase in Ampere.', labels=['phase'])
      instantaneous_current_c.add_metric(['L1'], self.telegram[obiref.INSTANTANEOUS_CURRENT_L1].value)
      instantaneous_current_c.add_metric(['L2'], self.telegram[obiref.INSTANTANEOUS_CURRENT_L2].value)
      instantaneous_current_c.add_metric(['L3'], self.telegram[obiref.INSTANTANEOUS_CURRENT_L3].value)
      yield instantaneous_current_c
    except KeyError:
      # Not all meters provide this data
      pass

    try:
      instantaneous_power_positive_kw = GaugeMetricFamily('instantaneous_power_positive_kw', 'Instantaneous positive power per phase in kW.', labels=['phase'])
      instantaneous_power_positive_kw.add_metric(['L1'], self.telegram[obiref.INSTANTANEOUS_ACTIVE_POWER_L1_POSITIVE].value)
      instantaneous_power_positive_kw.add_metric(['L2'], self.telegram[obiref.INSTANTANEOUS_ACTIVE_POWER_L2_POSITIVE].value)
      instantaneous_power_positive_kw.add_metric(['L3'], self.telegram[obiref.INSTANTANEOUS_ACTIVE_POWER_L3_POSITIVE].value)
      yield instantaneous_power_positive_kw
    except KeyError:
      # Not all meters provide this data
      pass

    try:
      instantaneous_power_negative_kw = GaugeMetricFamily('instantaneous_power_negative_kw', 'Instantaneous negative power per phase in kW.', labels=['phase'])
      instantaneous_power_negative_kw.add_metric(['L1'], self.telegram[obiref.INSTANTANEOUS_ACTIVE_POWER_L1_NEGATIVE].value)
      instantaneous_power_negative_kw.add_metric(['L2'], self.telegram[obiref.INSTANTANEOUS_ACTIVE_POWER_L2_NEGATIVE].value)
      instantaneous_power_negative_kw.add_metric(['L3'], self.telegram[obiref.INSTANTANEOUS_ACTIVE_POWER_L3_NEGATIVE].value)
      yield instantaneous_power_negative_kw
    except KeyError:
      # Not all meters provide this data
      pass

    yield GaugeMetricFamily('current_electricity_tariff', 'Current electricity tariff active.',
      int(self.telegram[obiref.ELECTRICITY_ACTIVE_TARIFF].value))

    electricity_used_kwh = CounterMetricFamily('electricity_used_kwh', 'Electricity used by client in kWh.', labels=['tariff'])
    for index, field  in enumerate(obiref.ELECTRICITY_USED_TARIFF_ALL):
      electricity_used_kwh.add_metric(['%d' % (index + 1)], self.telegram[field].value)
    yield electricity_used_kwh

    electricity_delivered_kwh = CounterMetricFamily('electricity_delivered_kwh', 'Electricity delivered by client in kWh.', labels=['tariff'])
    for index, field  in enumerate(obiref.ELECTRICITY_DELIVERED_TARIFF_ALL):
      electricity_delivered_kwh.add_metric(['%d' % (index + 1)], self.telegram[field].value)
    yield electricity_delivered_kwh

    voltage_sag_count = CounterMetricFamily('voltage_sag_count', 'Number of voltage sags.', labels=['phase'])
    voltage_sag_count.add_metric(['L1'], self.telegram[obiref.VOLTAGE_SAG_L1_COUNT].value)
    voltage_sag_count.add_metric(['L2'], self.telegram[obiref.VOLTAGE_SAG_L2_COUNT].value)
    voltage_sag_count.add_metric(['L3'], self.telegram[obiref.VOLTAGE_SAG_L3_COUNT].value)
    yield voltage_sag_count

    voltage_swell_count = CounterMetricFamily('voltage_swell_count', 'Number of voltage swells.', labels=['phase'])
    voltage_swell_count.add_metric(['L1'], self.telegram[obiref.VOLTAGE_SWELL_L1_COUNT].value)
    voltage_swell_count.add_metric(['L2'], self.telegram[obiref.VOLTAGE_SWELL_L2_COUNT].value)
    voltage_swell_count.add_metric(['L3'], self.telegram[obiref.VOLTAGE_SWELL_L3_COUNT].value)
    yield voltage_swell_count

    yield CounterMetricFamily('long_power_failure_count', 'Number of power long failures in any phase.',
        self.telegram[obiref.LONG_POWER_FAILURE_COUNT].value)

    try:
      yield CounterMetricFamily('short_power_failure_count', 'Number of power short failures in any phase.',
          self.telegram[obiref.SHORT_POWER_FAILURE_COUNT].value)
    except KeyError:
      # Not all meters provide this data
      pass

    yield CounterMetricFamily('gas_used_m3', 'Gas delivered to client in m3.',
        self.telegram[obiref.HOURLY_GAS_METER_READING].value)
Esempio n. 22
0
    def collect(self):
        try:
            wiki_pages = GaugeMetricFamily("wiki_pages",
                                           "wiki_pages",
                                           labels=["lang_code", "language"])

            wiki_articles = GaugeMetricFamily(
                "wiki_articles",
                "wiki_articles",
                labels=["lang_code", "language"],
            )

            wiki_edits = GaugeMetricFamily("wiki_edits",
                                           "wiki_edits",
                                           labels=["lang_code", "language"])

            wiki_images = GaugeMetricFamily("wiki_images",
                                            "wiki_images",
                                            labels=["lang_code", "language"])

            wiki_users = GaugeMetricFamily("wiki_users",
                                           "wiki_users",
                                           labels=["lang_code", "language"])

            wiki_activeusers = GaugeMetricFamily(
                "wiki_activeusers",
                "wiki_activeusers",
                labels=["lang_code", "language"],
            )

            wiki_admins = GaugeMetricFamily("wiki_admins",
                                            "wiki_admins",
                                            labels=["lang_code", "language"])

            for lang in indic_wikipedia:
                logger.info("Language: %s", lang)

                url = "https://" + lang + ".wikipedia.org/w/api.php"

                PARAMS = {
                    "action": "query",
                    "meta": "siteinfo",
                    "formatversion": "2",
                    "format": "json",
                    "siprop": "statistics",
                }

                r = requests.get(url=url, params=PARAMS)

                data = r.json()

                wiki_pages.add_metric(
                    [lang, all_lang[lang]["name"]],
                    data["query"]["statistics"]["pages"],
                )
                wiki_articles.add_metric(
                    [lang, all_lang[lang]["name"]],
                    data["query"]["statistics"]["articles"],
                )
                wiki_edits.add_metric(
                    [lang, all_lang[lang]["name"]],
                    data["query"]["statistics"]["edits"],
                )
                wiki_images.add_metric(
                    [lang, all_lang[lang]["name"]],
                    data["query"]["statistics"]["images"],
                )
                wiki_users.add_metric(
                    [lang, all_lang[lang]["name"]],
                    data["query"]["statistics"]["users"],
                )
                wiki_activeusers.add_metric(
                    [lang, all_lang[lang]["name"]],
                    data["query"]["statistics"]["activeusers"],
                )
                wiki_admins.add_metric(
                    [lang, all_lang[lang]["name"]],
                    data["query"]["statistics"]["admins"],
                )

            yield wiki_pages
            yield wiki_articles
            yield wiki_edits
            yield wiki_images
            yield wiki_users
            yield wiki_activeusers
            yield wiki_admins

        except Exception:
            logger.error(traceback.format_exc())
    def collect(self):
        """Collect metrics."""
        # Task metrics
        task_info = get_task_state_info()
        t_state = GaugeMetricFamily(
            "airflow_task_status",
            "Shows the number of task instances with particular status",
            labels=["dag_id", "task_id", "owner", "status"],
        )
        for task in task_info:
            t_state.add_metric(
                [task.dag_id, task.task_id, task.owners, task.state or "none"],
                task.value,
            )
        yield t_state

        task_duration = GaugeMetricFamily(
            "airflow_task_duration",
            "Duration of successful tasks in seconds",
            labels=["task_id", "dag_id", "execution_date"],
        )
        for task in get_task_duration_info():
            task_duration_value = (task.end_date -
                                   task.start_date).total_seconds()
            task_duration.add_metric(
                [
                    task.task_id, task.dag_id,
                    task.execution_date.strftime("%Y-%m-%d-%H-%M")
                ],
                task_duration_value,
            )
        yield task_duration

        task_failure_count = GaugeMetricFamily(
            "airflow_task_fail_count",
            "Count of failed tasks",
            labels=["dag_id", "task_id"],
        )
        for task in get_task_failure_counts():
            task_failure_count.add_metric([task.dag_id, task.task_id],
                                          task.count)
        yield task_failure_count

        # Dag Metrics
        dag_info = get_dag_state_info()
        d_state = GaugeMetricFamily(
            "airflow_dag_status",
            "Shows the number of dag starts with this status",
            labels=["dag_id", "owner", "status"],
        )
        for dag in dag_info:
            d_state.add_metric([dag.dag_id, dag.owners, dag.state], dag.count)
        yield d_state

        dag_duration = GaugeMetricFamily(
            "airflow_dag_run_duration",
            "Duration of successful dag_runs in seconds",
            labels=["dag_id"],
        )
        for dag in get_dag_duration_info():
            dag_duration_value = (dag.end_date -
                                  dag.start_date).total_seconds()
            dag_duration.add_metric([dag.dag_id], dag_duration_value)
        yield dag_duration

        # Scheduler Metrics
        dag_scheduler_delay = GaugeMetricFamily(
            "airflow_dag_scheduler_delay",
            "Airflow DAG scheduling delay",
            labels=["dag_id"],
        )

        for dag in get_dag_scheduler_delay():
            dag_scheduling_delay_value = (dag.start_date -
                                          dag.execution_date).total_seconds()
            dag_scheduler_delay.add_metric([dag.dag_id],
                                           dag_scheduling_delay_value)
        yield dag_scheduler_delay

        # XCOM parameters

        xcom_params = GaugeMetricFamily(
            "airflow_xcom_parameter",
            "Airflow Xcom Parameter",
            labels=["dag_id", "task_id"],
        )

        xcom_config = load_xcom_config()
        for tasks in xcom_config.get("xcom_params", []):
            for param in get_xcom_params(tasks["task_id"]):
                xcom_value = extract_xcom_parameter(param.value)

                if tasks["key"] in xcom_value:
                    xcom_params.add_metric([param.dag_id, param.task_id],
                                           xcom_value[tasks["key"]])

        yield xcom_params

        task_scheduler_delay = GaugeMetricFamily(
            "airflow_task_scheduler_delay",
            "Airflow Task scheduling delay",
            labels=["queue"],
        )

        for task in get_task_scheduler_delay():
            task_scheduling_delay_value = (task.start_date -
                                           task.queued_dttm).total_seconds()
            task_scheduler_delay.add_metric([task.queue],
                                            task_scheduling_delay_value)
        yield task_scheduler_delay

        num_queued_tasks_metric = GaugeMetricFamily(
            "airflow_num_queued_tasks",
            "Airflow Number of Queued Tasks",
        )

        num_queued_tasks = get_num_queued_tasks()
        num_queued_tasks_metric.add_metric([], num_queued_tasks)
        yield num_queued_tasks_metric
Esempio n. 24
0
    def collect(self):
        cm = GaugeMetricFamily("python_gc_counts", "GC object counts", labels=["gen"])
        for n, m in enumerate(gc.get_count()):
            cm.add_metric([str(n)], m)

        yield cm
Esempio n. 25
0
    def collect(self):
        session = requests.Session()
        session.trust_env = False
        session.auth = (self.bamboo_user, self.bamboo_password)
        session.verify = False

        # METRIC: detailed test results
        c = GaugeMetricFamily('bamboo_test_results', 'Bamboo Test Results', labels=['name', 'job', 'className', 'methodName'])
        for job in self.bamboo_test_jobs:
            res = session.get(self.web_url + '/rest/api/latest/result/' + job + '/latest.json?expand=testResults.allTests')
            if res:
                results = res.json()
                for testResult in res.json()['testResults']['allTests']['testResult']:
                    c.add_metric([results['plan']['name'], job, testResult['className'], testResult['methodName']], testResult['status'] == 'successful')
            else:
                print "error fetching test results"
                print res
        yield c

        # METRIC: bamboo agent state
        c = GaugeMetricFamily('bamboo_build_state', 'Bamboo Build Dashboard', labels=['state', 'host'])
        res = session.get(self.web_url + '/build/admin/ajax/getDashboardSummary.action')
        if res:
            dashboard_summary = res.json()
            for host, values in self.tally_agent_info(dashboard_summary).iteritems():
                for state, state_count in values.iteritems():
                    c.add_metric([state, host], state_count)
            yield c
        else:
            print res

        # Collect results tagged
        d = {}
        r = session.get(
            self.web_url + '/rest/api/latest/result.json?favourite&expand=results.result.buildDurationInSeconds')
        if r.ok:
            # NOTE: this may return multiple results for the same plan - need to use highest build number
            results = r.json()
            for result in results['results']['result']:
                key = result['plan']['key']
                if key in d and d[key]['number'] < result['number']:
                    continue  # don't overwrite with older build
                d[key] = result
        else:
            print r

        # METRIC: build status (favourites)
        METRICS = ['buildNumber', 'buildDurationInSeconds']
        TEST_METRICS = ['failedTestCount', 'skippedTestCount', 'quarantinedTestCount', 'successfulTestCount']

        statusMetric = GaugeMetricFamily('build_results', 'Status of flagged plans', labels=['name', 'state'])
        testMetric = GaugeMetricFamily('test_counts', 'Test result counts', labels=['shortName', 'countType'])
        metrics = {x: GaugeMetricFamily(x, x, labels=['shortName']) for x in METRICS}
        for key, result in d.iteritems():
            statusMetric.add_metric([result['plan']['shortName'], result['state']], result['successful'])
            for name in TEST_METRICS:
                testMetric.add_metric([result['plan']['shortName'], name], result[name])
            for name, metric in metrics.iteritems():
                metric.add_metric([result['plan']['shortName']], result[name])

        yield statusMetric
        yield testMetric
        for metric in metrics.itervalues():
            yield metric
 def trivial_gauge(self, name, help, value):
     c = GaugeMetricFamily(f"hydra_{name}", help)
     c.add_metric([], value)
     return c
class MachineScrapeImporter:
    def __init__(self):
        labels = [ "host" ]
        self.consective_failures = GaugeMetricFamily(
            "hydra_machine_consecutive_failures",
            "Number of consecutive failed builds",
            labels=labels)
        self.current_jobs = GaugeMetricFamily(
            "hydra_machine_current_jobs",
            "Number of current jobs",
            labels=labels)
        self.idle_since = GaugeMetricFamily(
            "hydra_machine_idle_since",
            "When the current idle period started",
            labels=labels)
        self.disabled_until = GaugeMetricFamily(
            "hydra_machine_disabled_until",
            "When the machine will be used again",
            labels=labels)
        self.enabled = GaugeMetricFamily(
            "hydra_machine_enabled",
            "If the machine is enabled (1) or not (0)",
            labels=labels)
        self.last_failure = CounterMetricFamily(
            "hydra_machine_last_failure",
            "timestamp of the last failure",
            labels=labels)
        self.number_steps_done = CounterMetricFamily(
            "hydra_machine_steps_done_total",
            "Total count of the steps completed",
            labels=labels)
        self.total_step_build_time = CounterMetricFamily(
            "hydra_machine_step_build_time_total",
            "Number of seconds spent building steps",
            labels=labels)
        self.total_step_time = CounterMetricFamily(
            "hydra_machine_step_time_total",
            "Number of seconds spent on steps",
            labels=labels)

    def load_machine(self, name, report):
        report.unused_read("mandatoryFeatures")
        report.unused_read("supportedFeatures")
        report.unused_read("systemTypes")
        report.unused_read("avgStepBuildTime")
        report.unused_read("avgStepTime")
        labels = [name]
        self.consective_failures.add_metric(
            labels,
            report.destructive_read("consecutiveFailures")
        )
        self.current_jobs.add_metric(
            labels,
            report.destructive_read("currentJobs")
        )
        try:
            self.idle_since.add_metric(
                labels,
                report.destructive_read("idleSince")
            )
        except KeyError:
            pass
        self.disabled_until.add_metric(
            labels,
            report.destructive_read("disabledUntil")
        )
        self.enabled.add_metric(
            labels,
            1 if report.destructive_read("enabled") else 0
        )
        self.last_failure.add_metric(
            labels,
            report.destructive_read("lastFailure")
        )
        self.number_steps_done.add_metric(
            labels,
            report.destructive_read("nrStepsDone")
        )
        self.total_step_build_time.add_metric(
            labels,
            report.destructive_read_default("totalStepBuildTime", default=0)
        )
        self.total_step_time.add_metric(
            labels,
            report.destructive_read_default("totalStepTime", default=0)
        )
        debug_remaining_state(report)
    def metrics(self):
        yield self.consective_failures
        yield self.current_jobs
        yield self.idle_since
        yield self.disabled_until
        yield self.enabled
        yield self.last_failure
        yield self.number_steps_done
        yield self.total_step_build_time
        yield self.total_step_time
Esempio n. 28
0
    def parse_for_prom(self):
        general = self._metrics['general']
        """ return general info """
        info = InfoMetricFamily(self.prefix + '_general', '', labels=[])
        info.add_metric([], general)
        yield info
        """ separate general info for value """
        gauge = GaugeMetricFamily(self.prefix + '_general_status',
                                  '',
                                  labels=['power_state'])
        gauge.add_metric([general['power_state']],
                         self._cast(general['power_state']))
        yield gauge

        gauge = GaugeMetricFamily(self.prefix + '_general_health',
                                  '',
                                  labels=['health'])
        gauge.add_metric([general['health']], self._cast(general['health']))
        yield gauge

        gauge = GaugeMetricFamily(self.prefix + '_general_state',
                                  '',
                                  labels=['state'])
        gauge.add_metric([general['state']], self._cast(general['state']))
        yield gauge

        fans = self._metrics['fan']
        """ return thermal metrics """
        gauge = GaugeMetricFamily(self.prefix + '_fan_redundancy',
                                  '',
                                  labels=['health', 'state'])
        gauge.add_metric([fans['redundancy_health'], fans['redundancy_state']],
                         int(
                             self._cast(fans['redundancy_health']) +
                             self._cast(fans['redundancy_state'])))
        yield gauge

        gauge = GaugeMetricFamily(self.prefix + '_fan_rpm',
                                  '',
                                  labels=['name', 'low_limit'])
        gauge_status = GaugeMetricFamily(self.prefix + '_fan',
                                         '',
                                         labels=['name', 'health', 'state'])
        for fan in fans['list']:
            gauge.add_metric([fan['name'], str(fan['low_limit'])],
                             self._cast(fan['rpm']))
            gauge_status.add_metric(
                [fan['name'], fan['health'], fan['state']],
                int(self._cast(fan['health']) + self._cast(fan['state'])))
        yield gauge
        yield gauge_status

        thermal = self._metrics['thermal']
        """ return thermal metrics """
        gauge = GaugeMetricFamily(self.prefix + '_thermal_location',
                                  '',
                                  labels=['name', 'limit'])

        for location in thermal['location']:
            gauge.add_metric(
                [location['name'], str(location['limit'])], location['degres'])
        yield gauge
        """ return power metrics """
        power = self._metrics['power']
        gauge = GaugeMetricFamily(self.prefix + '_power',
                                  '',
                                  labels=['health', 'state'])
        gauge.add_metric(
            [power['health'], power['state']],
            int(self._cast(power['state']) + self._cast(power['health'])))
        yield gauge

        gauge = GaugeMetricFamily(self.prefix + '_power_comsumption',
                                  'watt consumption',
                                  labels=['type', 'unit', 'limit'])
        gauge.add_metric(
            ['average', 'watt', str(power['limit'])], int(power['average']))
        gauge.add_metric(
            ['maxconsumed', 'watt', str(power['limit'])],
            int(power['maxconsumed']))
        gauge.add_metric(
            ['minconsumed', 'watt', str(power['limit'])],
            int(power['minconsumed']))
        yield gauge
        """ return power supply metrics """
        gauge = GaugeMetricFamily(
            self.prefix + '_power_supply',
            '',
            labels=['name', 'capacity', 'health', 'state'])
        for powersupply in power['powersupplies']:
            gauge.add_metric([
                powersupply['name'],
                str(powersupply['power_capacity']), powersupply['health'],
                powersupply['state']
            ],
                             int(
                                 self._cast(powersupply['state']) +
                                 self._cast(powersupply['health'])))

        yield gauge
    def collect(self):
        g = GaugeMetricFamily('vrops_vm_properties',
                              'testtest',
                              labels=[
                                  'vccluster', 'datacenter', 'virtualmachine',
                                  'hostsystem', 'propkey'
                              ])
        i = InfoMetricFamily(
            'vrops_vm',
            'testtest',
            labels=['vccluster', 'datacenter', 'virtualmachine', 'hostsystem'])
        if os.environ['DEBUG'] >= '1':
            print(self.name, 'starts with collecting the metrics')

        for target in self.get_vms_by_target():
            token = self.get_target_tokens()
            token = token[target]

            if not token:
                print("skipping", target, "in", self.name, ", no token")

            uuids = self.target_vms[target]

            if 'number_metrics' in self.property_yaml[self.name]:
                for property_pair in self.property_yaml[
                        self.name]['number_metrics']:
                    property_label = property_pair['label']
                    propkey = property_pair['property']
                    values = Resources.get_latest_number_properties_multiple(
                        target, token, uuids, propkey)
                    if not values:
                        continue
                    for value_entry in values:
                        if 'data' not in value_entry:
                            continue
                        data = value_entry['data']
                        vm_id = value_entry['resourceId']
                        if vm_id not in self.vms:
                            continue
                        g.add_metric(labels=[
                            self.vms[vm_id]['cluster'],
                            self.vms[vm_id]['datacenter'],
                            self.vms[vm_id]['name'],
                            self.vms[vm_id]['parent_host_name'], property_label
                        ],
                                     value=data)

            if 'enum_metrics' in self.property_yaml[self.name]:
                for property_pair in self.property_yaml[
                        self.name]['enum_metrics']:
                    property_label = property_pair['label']
                    propkey = property_pair['property']
                    expected_state = property_pair['expected']
                    values = Resources.get_latest_enum_properties_multiple(
                        target, token, uuids, propkey, expected_state)
                    if not values:
                        continue
                    for value_entry in values:
                        if 'data' not in value_entry:
                            continue
                        data = value_entry['data']
                        vm_id = value_entry['resourceId']
                        latest_state = value_entry['latest_state']
                        if vm_id not in self.vms:
                            continue
                        g.add_metric(labels=[
                            self.vms[vm_id]['cluster'],
                            self.vms[vm_id]['datacenter'],
                            self.vms[vm_id]['name'],
                            self.vms[vm_id]['parent_host_name'],
                            property_label + ": " + latest_state
                        ],
                                     value=data)

            if 'info_metrics' in self.property_yaml[self.name]:
                for property_pair in self.property_yaml[
                        self.name]['info_metrics']:
                    property_label = property_pair['label']
                    propkey = property_pair['property']
                    values = Resources.get_latest_info_properties_multiple(
                        target, token, uuids, propkey)
                    if not values:
                        continue
                    for value_entry in values:
                        if 'data' not in value_entry:
                            continue
                        vm_id = value_entry['resourceId']
                        info_value = value_entry['data']
                        if vm_id not in self.vms:
                            continue
                        i.add_metric(labels=[
                            self.vms[vm_id]['cluster'],
                            self.vms[vm_id]['datacenter'],
                            self.vms[vm_id]['name'],
                            self.vms[vm_id]['parent_host_name']
                        ],
                                     value={property_label: info_value})

        # self.post_metrics(g.name)
        # self.post_metrics(i.name + '_info')
        yield g
        yield i
Esempio n. 30
0
    def collect(self):
        # bearerbox server status
        metric = GaugeMetricFamily('bearerbox_up',
                                   'Could the bearerbox server be reached')

        response = self.parse_kannel_status()

        if response is None:
            metric.add_sample('bearerbox_up', value=0, labels={})
            yield metric
            return []

        metric.add_sample('bearerbox_up', value=1, labels={})
        yield metric

        # Version info
        version = bearerbox_version(response['gateway']['version'])
        metric = GaugeMetricFamily('bearerbox_build_info',
                                   'Kannel bearerbox version info')
        metric.add_sample('bearerbox_build_info',
                          value=1,
                          labels={'version': version})
        yield metric

        # Gauge for the bearerbox uptime, in seconds
        uptime = uptime_to_secs(response['gateway']['status'])
        metric = GaugeMetricFamily('bearerbox_uptime_seconds',
                                   'Current uptime in seconds (*)')
        metric.add_sample('bearerbox_uptime_seconds', value=uptime, labels={})
        yield metric

        # WDP, SMS & DLR metrics
        message_type = ['sms', 'dlr']
        if self._collect_wdp is True:
            message_type = ['wdp'] + message_type

        for type in message_type:
            for k, v in response['gateway'][type].items():
                if isinstance(v, dict):
                    for k2, v2 in v.items():
                        metric_name = 'bearerbox_{0}_{1}_{2}'.format(
                            type, k, k2)
                        if k2 == 'total':
                            metric_help = 'Total number of {0} {1}'.format(
                                type.upper(), k)
                            metric = CounterMetricFamily(
                                metric_name, metric_help)
                        else:
                            metric_help = 'Number of {0} {1} in queue'.format(
                                k, type.upper())
                            metric = GaugeMetricFamily(metric_name,
                                                       metric_help)

                        metric.add_sample(metric_name,
                                          value=int(v2),
                                          labels={})
                        yield metric

                elif k not in ['inbound', 'outbound']:
                    metric_name = 'bearerbox_{0}_{1}'.format(type, k)
                    metric_value = v
                    metric_labels = {}

                    if type == 'sms' and k == 'storesize':
                        metric_help = 'Number of SMS in storesize'
                    elif type == 'dlr':
                        if k == 'queued':
                            metric_help = 'Number of DLRs in queue'
                        elif k == 'storage':
                            metric_help = 'DLR storage type info'
                            metric_value = 1
                            metric_labels = {'storage': v}

                    metric = GaugeMetricFamily(metric_name, metric_help)
                    metric.add_sample(metric_name,
                                      value=int(metric_value),
                                      labels=metric_labels)
                    yield metric

        # Box metrics
        box_connections = {b: 0 for b in self._box_connections}
        box_details = {}
        metric_box_connections = GaugeMetricFamily(
            'bearerbox_box_connections', 'Number of box connections')
        metric_box_queue = GaugeMetricFamily(
            'bearerbox_box_queue', 'Number of messages in box queue')

        if self._collect_box_uptime is True:
            metric_box_uptime = GaugeMetricFamily(
                'bearerbox_box_uptime_seconds', 'Box uptime in seconds (*)')
        if response['gateway']['boxes'] != '':
            # when there's only one box connected on the gateway
            # xmltodict returns an OrderedDict instead of a list of OrderedDicts
            if not isinstance(response['gateway']['boxes']['box'], list):
                response['gateway']['boxes']['box'] = [
                    response['gateway']['boxes']['box']
                ]

            for box in response['gateway']['boxes']['box']:
                if box['type'] in box_connections.keys():
                    box_connections[box['type']] += 1
                else:
                    box_connections[box['type']] = 1

                # some type of boxes (e.g wapbox) don't have IDs.
                if 'id' not in box.keys():
                    box['id'] = ""

                tuplkey = (box['type'], box['id'], box['IP'])

                # some type of boxs (e.g wapbox) don't have queues.
                if 'queue' in box.keys():
                    if tuplkey in box_details.keys():
                        box_details[tuplkey]['queue'] += int(box['queue'])
                    else:
                        box_details[tuplkey] = {}
                        box_details[tuplkey]['queue'] = int(box['queue'])

                # collect box uptime metrics
                # In case of multiple boxes with same type, id and host.
                # Only the uptime of the first occurence will be exposed
                # in order to avoid duplicates.
                if self._collect_box_uptime is True:
                    if tuplkey in box_details.keys():
                        if 'uptime' not in box_details[tuplkey].keys():
                            box_details[tuplkey]['uptime'] = uptime_to_secs(
                                box['status'])
                    else:
                        box_details[tuplkey] = {}
                        box_details[tuplkey]['uptime'] = uptime_to_secs(
                            box['status'])

        for key, value in box_connections.items():
            metric_box_connections.add_sample('bearerbox_box_connections',
                                              value=value,
                                              labels={'type': key})
        yield metric_box_connections

        for key, value in box_details.items():
            box_labels = {'type': key[0], 'id': key[1], 'ipaddr': key[2]}
            if 'queue' in value.keys():
                metric_box_queue.add_sample('bearerbox_box_queue',
                                            value=value['queue'],
                                            labels=box_labels)
            if self._collect_box_uptime is True:
                metric_box_uptime.add_sample('bearerbox_box_uptime_seconds',
                                             value=value['uptime'],
                                             labels=box_labels)

        yield metric_box_queue
        if self._collect_box_uptime is True:
            yield metric_box_uptime

        # SMSC metrics
        metric = GaugeMetricFamily('bearerbox_smsc_connections',
                                   'Number of SMSC connections')
        metric.add_sample('bearerbox_smsc_connections',
                          value=int(response['gateway']['smscs']['count']),
                          labels={})
        yield metric

        if self._filter_smsc is False:
            metric_failed = CounterMetricFamily(
                'bearerbox_smsc_failed_messages_total',
                'Total number of SMSC failed messages',
                labels=["smsc_id"])
            metric_queued = GaugeMetricFamily('bearerbox_smsc_queued_messages',
                                              'Number of SMSC queued messages',
                                              labels=["smsc_id"])
            metric_sms_received = CounterMetricFamily(
                'bearerbox_smsc_received_sms_total',
                'Total number of received SMS by SMSC',
                labels=["smsc_id"])
            metric_sms_sent = CounterMetricFamily(
                'bearerbox_smsc_sent_sms_total',
                'Total number of SMS sent to SMSC',
                labels=["smsc_id"])
            metric_dlr_received = CounterMetricFamily(
                'bearerbox_smsc_received_dlr_total',
                'Total number of DLRs received by SMSC',
                labels=["smsc_id"])
            metric_dlr_sent = CounterMetricFamily(
                'bearerbox_smsc_sent_dlr_total',
                'Total number of DLRs sent to SMSC',
                labels=["smsc_id"])

            # Group SMSCs by smsc-id
            smsc_stats_by_id = OrderedDict()

            # when there's only one smsc connection on the gateway
            # xmltodict returns an OrderedDict instead of a list of OrderedDicts
            if not isinstance(response['gateway']['smscs']['smsc'], list):
                response['gateway']['smscs']['smsc'] = [
                    response['gateway']['smscs']['smsc']
                ]

            for smsc in response['gateway']['smscs']['smsc']:
                smscid = smsc['id']
                if smscid in smsc_stats_by_id:
                    smsc_stats_by_id[smscid]['failed'] += int(smsc['failed'])
                    smsc_stats_by_id[smscid]['queued'] += int(smsc['queued'])
                    smsc_stats_by_id[smscid]['sms']['received'] += int(
                        smsc['sms']['received'])
                    smsc_stats_by_id[smscid]['sms']['sent'] += int(
                        smsc['sms']['sent'])
                    smsc_stats_by_id[smscid]['dlr']['received'] += int(
                        smsc['dlr']['received'])
                    smsc_stats_by_id[smscid]['dlr']['sent'] += int(
                        smsc['dlr']['sent'])
                else:
                    smsc_stats_by_id[smscid] = OrderedDict()
                    smsc_stats_by_id[smscid]['failed'] = int(smsc['failed'])
                    smsc_stats_by_id[smscid]['queued'] = int(smsc['queued'])
                    smsc_stats_by_id[smscid]['sms'] = OrderedDict()
                    smsc_stats_by_id[smscid]['sms']['received'] = int(
                        smsc['sms']['received'])
                    smsc_stats_by_id[smscid]['sms']['sent'] = int(
                        smsc['sms']['sent'])
                    smsc_stats_by_id[smscid]['dlr'] = OrderedDict()
                    smsc_stats_by_id[smscid]['dlr']['received'] = int(
                        smsc['dlr']['received'])
                    smsc_stats_by_id[smscid]['dlr']['sent'] = int(
                        smsc['dlr']['sent'])

            for smsc in smsc_stats_by_id:
                metric_failed.add_metric([smsc],
                                         smsc_stats_by_id[smsc]['failed'])
                metric_queued.add_metric([smsc],
                                         smsc_stats_by_id[smsc]['queued'])
                metric_sms_received.add_metric(
                    [smsc], smsc_stats_by_id[smsc]['sms']['received'])
                metric_sms_sent.add_metric(
                    [smsc], smsc_stats_by_id[smsc]['sms']['sent'])
                metric_dlr_received.add_metric(
                    [smsc], smsc_stats_by_id[smsc]['dlr']['received'])
                metric_dlr_sent.add_metric(
                    [smsc], smsc_stats_by_id[smsc]['dlr']['sent'])

            yield metric_failed
            yield metric_queued
            yield metric_sms_received
            yield metric_sms_sent
            yield metric_dlr_received
            yield metric_dlr_sent
Esempio n. 31
0
    def collect(self, name: str) -> Generator:
        '''
        Yields metrics for the grid
        '''
        if self.software_version is not None and self.bootloader_version is not None:
            yield InfoMetricFamily(
                'rctmon_powerswitch', 'Information about the Power Switch', {
                    'inverter': name,
                    'software_version': str(self.software_version),
                    'bootloader_version': str(self.bootloader_version)
                })

        grid_voltage = GaugeMetricFamily('rctmon_grid_voltage',
                                         'Grid voltage by phase',
                                         labels=['inverter', 'phase'],
                                         unit='volt')
        if self.grid_voltage_l1 is not None:
            grid_voltage.add_metric([name, 'l1'], self.grid_voltage_l1)
        if self.grid_voltage_l2 is not None:
            grid_voltage.add_metric([name, 'l2'], self.grid_voltage_l2)
        if self.grid_voltage_l3 is not None:
            grid_voltage.add_metric([name, 'l3'], self.grid_voltage_l3)
        yield grid_voltage

        grid_frequency = GaugeMetricFamily('rctmon_grid_frequency',
                                           'Grid frequency by phase',
                                           labels=['inverter', 'phase'],
                                           unit='hertz')
        if self.grid_frequency_l1 is not None:
            grid_frequency.add_metric([name, 'l1'], self.grid_frequency_l1)
        if self.grid_frequency_l2 is not None:
            grid_frequency.add_metric([name, 'l2'], self.grid_frequency_l2)
        if self.grid_frequency_l3 is not None:
            grid_frequency.add_metric([name, 'l3'], self.grid_frequency_l3)
        yield grid_frequency

        ps_frequency = GaugeMetricFamily('rctmon_powerstorage_frequency',
                                         'Power Storage frequency by phase',
                                         labels=['inverter', 'phase'],
                                         unit='hertz')
        if self.power_storage_frequency_l1 is not None:
            ps_frequency.add_metric([name, 'l1'],
                                    self.power_storage_frequency_l1)
        if self.power_storage_frequency_l2 is not None:
            ps_frequency.add_metric([name, 'l2'],
                                    self.power_storage_frequency_l2)
        if self.power_storage_frequency_l3 is not None:
            ps_frequency.add_metric([name, 'l3'],
                                    self.power_storage_frequency_l3)
        yield ps_frequency
Esempio n. 32
0
    def collect(self):
        # TODO handle missing labels, probably return 500?
        labels = self.get_labels()
        labels_p75 = labels.copy()  # Work on copy of labels variable to avoid other metrics getting quantile label
        labels_p75.update({'quantile': '0.75'})
        labels_p99 = labels.copy()  # Work on copy of labels variable to avoid other metrics getting quantile label
        labels_p99.update({'quantile': '0.99'})

        response = requests.get(args.uri)
        metrics = response.json()['metrics']
        # iterate over all metrics
        for k in metrics:
            metric_name = re.sub('\.|-|\s', '_', k).lower()
            metric_name = 'stellar_core_' + metric_name

            if metrics[k]['type'] == 'timer':
                # we have a timer, expose as a Prometheus Summary
                # we convert stellar-core time units to seconds, as per Prometheus best practices
                metric_name = metric_name + '_seconds'
                if 'sum' in metrics[k]:
                    # use libmedida sum value
                    total_duration = metrics[k]['sum']
                else:
                    # compute sum value
                    total_duration = (metrics[k]['mean'] * metrics[k]['count'])
                summary = SummaryMetricFamily(metric_name, 'libmedida metric type: ' + metrics[k]['type'], labels=labels.keys())
                summary.add_metric(labels.values(), count_value=metrics[k]['count'],
                                   sum_value=(duration_to_seconds(total_duration, metrics[k]['duration_unit'])))
                # add stellar-core calculated quantiles to our summary
                summary.add_sample(metric_name, labels=labels_p75,
                                   value=(duration_to_seconds(metrics[k]['75%'], metrics[k]['duration_unit'])))
                summary.add_sample(metric_name, labels=labels_p99,
                                   value=(duration_to_seconds(metrics[k]['99%'], metrics[k]['duration_unit'])))
                yield summary
            elif metrics[k]['type'] == 'counter':
                # we have a counter, this is a Prometheus Gauge
                g = GaugeMetricFamily(metric_name, 'libmedida metric type: ' + metrics[k]['type'], labels=labels.keys())
                g.add_metric(labels.values(), metrics[k]['count'])
                yield g
            elif metrics[k]['type'] == 'meter':
                # we have a meter, this is a Prometheus Counter
                c = CounterMetricFamily(metric_name, 'libmedida metric type: ' + metrics[k]['type'], labels=labels.keys())
                c.add_metric(labels.values(), metrics[k]['count'])
                yield c

        # Export metrics from the info endpoint
        response = requests.get(args.info_uri)
        info = response.json()['info']
        if not all([i in info for i in self.info_keys]):
            print('WARNING: info endpoint did not return all required fields')
            return

        # Ledger metrics
        for core_name, prom_name in self.ledger_metrics.items():
            g = GaugeMetricFamily('stellar_core_ledger_{}'.format(prom_name),
                                  'Stellar core ledger metric name: {}'.format(core_name),
                                  labels=labels.keys())
            g.add_metric(labels.values(), info['ledger'][core_name])
            yield g

        # Quorum metrics are reported under dynamic name for example:
        # "quorum" : {
        #   "758110" : {
        #     "agree" : 3,
        tmp = info['quorum'].values()[0]
        for metric in self.quorum_metrics:
            g = GaugeMetricFamily('stellar_core_quorum_{}'.format(metric),
                                  'Stellar core quorum metric: {}'.format(metric),
                                  labels=labels.keys())
            g.add_metric(labels.values(), tmp[metric])
            yield g

        # Peers metrics
        g = GaugeMetricFamily('stellar_core_peers_authenticated_count',
                              'Stellar core authenticated_count count',
                              labels=labels.keys())
        g.add_metric(labels.values(), info['peers']['authenticated_count'])
        yield g
        g = GaugeMetricFamily('stellar_core_peers_pending_count',
                              'Stellar core pending_count count',
                              labels=labels.keys())
        g.add_metric(labels.values(), info['peers']['pending_count'])
        yield g

        g = GaugeMetricFamily('stellar_core_protocol_version',
                              'Stellar core protocol_version',
                              labels=labels.keys())
        g.add_metric(labels.values(), info['protocol_version'])
        yield g

        g = GaugeMetricFamily('stellar_core_synced', 'Stellar core sync status', labels=labels.keys())
        if info['state'] == 'Synced!':
            g.add_metric(labels.values(), 1)
        else:
            g.add_metric(labels.values(), 0)
        yield g

        g = GaugeMetricFamily('stellar_core_started_on', 'Stellar core start time in epoch', labels=labels.keys())
        date = datetime.strptime(info['startedOn'], "%Y-%m-%dT%H:%M:%SZ")
        g.add_metric(labels.values(), int(date.strftime('%s')))
        yield g
class ClientsPerformanceMetrics():
    """
    Base class for FlashBlade Prometheus clients performance metrics
    """
    def __init__(self, fb):
        self.fb = fb
        self.latency = GaugeMetricFamily(
            'purefb_client_performance_latency_usec',
            'FlashBlade latency',
            labels=['name', 'port', 'dimension'])
        self.iops = GaugeMetricFamily('purefb_client_performance_iops',
                                      'FlashBlade IOPS',
                                      labels=['name', 'port', 'dimension'])
        self.ops_size = GaugeMetricFamily(
            'purefb_client_performance_opns_bytes',
            'FlashBlade client average bytes per operations',
            labels=['name', 'port', 'dimension'])
        self.throughput = GaugeMetricFamily(
            'purefb_client_performance_throughput_bytes',
            'FlashBlade client_throughput',
            labels=['name', 'port', 'dimension'])
        self.clients_performance = fb.get_clients_performance()

    def _latency(self):
        """
        Create metrics of gauge type for client latency metrics.
        """
        for cperf in self.clients_performance:
            client, port = cperf.name.split(':')
            self.latency.add_metric([client, port, 'read'],
                                    cperf.usec_per_read_op)
            self.latency.add_metric([client, port, 'write'],
                                    cperf.usec_per_write_op)
            self.latency.add_metric([client, port, 'other'],
                                    cperf.usec_per_other_op)

    def _iops(self):
        """
        Create metrics of gauge type for client iops metrics.
        """
        for cperf in self.clients_performance:
            client, port = cperf.name.split(':')
            self.iops.add_metric([client, port, 'read'], cperf.reads_per_sec)
            self.iops.add_metric([client, port, 'write'], cperf.writes_per_sec)
            self.iops.add_metric([client, port, 'other'], cperf.others_per_sec)

    def _ops_size(self):
        """
        Create metrics of gauge type for client operations size  metrics.
        """
        for cperf in self.clients_performance:
            client, port = cperf.name.split(':')
            self.ops_size.add_metric([client, port, 'per_op'],
                                     cperf.bytes_per_op)
            self.ops_size.add_metric([client, port, 'read'],
                                     cperf.bytes_per_read)
            self.ops_size.add_metric([client, port, 'write'],
                                     cperf.bytes_per_write)

    def _throughput(self):
        """
        Create metrics of gauge type for client throughput metrics.
        """
        for cperf in self.clients_performance:
            client, port = cperf.name.split(':')
            self.throughput.add_metric([client, port, 'read'],
                                       cperf.read_bytes_per_sec)
            self.throughput.add_metric([client, port, 'write'],
                                       cperf.write_bytes_per_sec)

    def get_metrics(self):
        self._latency()
        self._iops()
        self._ops_size()
        self._throughput()
        yield self.latency
        yield self.iops
        yield self.ops_size
        yield self.throughput
Esempio n. 34
0
    def collect(self):
        sds011 = SDS011(self._sds011, use_query_mode=True)
        sds011.sleep(sleep=False)
        time.sleep(int(self._sleep))

        sds011s = tuple(sds011.query())

        sds011.sleep()

        pm25 = GaugeMetricFamily('airfilter_dust',
                                 'dust of size 2,5',
                                 labels=['sensor', 'pm'])
        pm25.add_metric(['sds011', '2.5'], sds011s[0])
        yield pm25

        pm10 = GaugeMetricFamily('airfilter_dust',
                                 'dust of size 10',
                                 labels=['sensor', 'pm'])
        pm10.add_metric(['sds011', '10'], sds011s[1])
        yield pm10

        if self._ccs811 == 'true':
            ccs811 = CCS811_RPi()

            configuration = 0b100000
            ccs811.configureSensor(configuration)

            hdc1000 = SDL_Pi_HDC1000()
            hdc1000.turnHeaterOff()
            hdc1000.setTemperatureResolution(
                HDC1000_CONFIG_TEMPERATURE_RESOLUTION_14BIT)
            hdc1000.setHumidityResolution(
                HDC1000_CONFIG_HUMIDITY_RESOLUTION_14BIT)

            humidity = hdc1000.readHumidity()
            temperature = hdc1000.readTemperature()

            ccs811.setCompensation(temperature, humidity)

            humid = GaugeMetricFamily('airfilter_humidity',
                                      'humidity reading',
                                      labels=['sensor'])
            humid.add_metric(['ccs811'], humidity)
            yield humid

            temp = GaugeMetricFamily('airfilter_temperature',
                                     'temperature reading',
                                     labels=['sensor'])
            temp.add_metric(['ccs811'], temperature)
            yield temp

            statusbyte = ccs811.readStatus()
            status = GaugeMetricFamily('airfilter_statusbyte',
                                       'statusbyte',
                                       labels=['sensor', 'statusbyte'])
            status.add_metric(['ccs811', bin(statusbyte)], 1)
            yield status

            error = ccs811.checkError(statusbyte)

            failure = GaugeMetricFamily('airfilter_error',
                                        '1 if error on sensor',
                                        labels=['sensor'])
            if (error):
                failure.add_metric(['ccs811'], 1)
            else:
                failure.add_metric(['ccs811'], 0)
            yield failure

            samples = GaugeMetricFamily('airfilter_samples',
                                        '0 if no new samples',
                                        labels=['sensor'])
            res = GaugeMetricFamily('airfilter_result',
                                    '1 if valid result',
                                    labels=['sensor'])
            eco2 = GaugeMetricFamily('airfilter_eco2',
                                     'eco2 reading',
                                     labels=['sensor', 'unit'])
            tvoc = GaugeMetricFamily('airfilter_tvoc',
                                     'tvoc reading',
                                     labels=['sensor', 'unit'])

            if (ccs811.checkDataReady(statusbyte)):
                samples.add_metric(['ccs811'], 1)
                yield samples

                result = ccs811.readAlg()
                if (result):
                    res.add_metric(['ccs811'], 1)
                    yield res

                    eco2.add_metric(['ccs811', 'ppm'], result['eCO2'])
                    yield eco2

                    tvoc.add_metric(['ccs811', 'ppb'], result['TVOC'])
                    yield tvoc

                else:
                    res.add_metric(['ccs811'], 0)
                    yield res

                    eco2.add_metric(['ccs811', 'ppm'], 0)
                    yield eco2

                    tvoc.add_metric(['ccs811', 'ppb'], 0)
                    yield tvoc

            else:
                samples.add_metric(['ccs811'], 0)
                yield samples
                res.add_metric(['ccs811'], 0)
                yield res
                eco2.add_metric(['ccs811', 'ppm'], 0)
                yield eco2
                tvoc.add_metric(['ccs811', 'ppb'], 0)
                yield tvoc
 def collect_cpu_usage(self, environments_instances_health):
     start = time.time()
     instance_cpu_usage = GaugeMetricFamily(
         self.metric_prefix + 'enhanced_cpu_usage_percent',
         'CPU utilization per instance and state',
         labels=['environment_name', 'instance_id', 'state'])
     for environment, instances_health in environments_instances_health:
         if instances_health != "None":
             for instance_health in instances_health:
                 instance_cpu_usage.add_metric(
                     [environment, instance_health['InstanceId'], 'User'],
                     instance_health['System']['CPUUtilization']['User'] if
                     'CPUUtilization' in instance_health['System'] else 0)
                 instance_cpu_usage.add_metric(
                     [environment, instance_health['InstanceId'], 'Nice'],
                     instance_health['System']['CPUUtilization']['Nice'] if
                     'CPUUtilization' in instance_health['System'] else 0)
                 instance_cpu_usage.add_metric(
                     [environment, instance_health['InstanceId'], 'System'],
                     instance_health['System']['CPUUtilization']['System']
                     if 'CPUUtilization' in instance_health['System'] else
                     0)
                 instance_cpu_usage.add_metric(
                     [environment, instance_health['InstanceId'], 'Idle'],
                     instance_health['System']['CPUUtilization']['Idle'] if
                     'CPUUtilization' in instance_health['System'] else 0)
                 instance_cpu_usage.add_metric(
                     [environment, instance_health['InstanceId'], 'IOWait'],
                     instance_health['System']['CPUUtilization']['IOWait']
                     if 'CPUUtilization' in instance_health['System'] else
                     0)
                 instance_cpu_usage.add_metric(
                     [environment, instance_health['InstanceId'], 'IRQ'],
                     instance_health['System']['CPUUtilization']['IRQ'] if
                     'CPUUtilization' in instance_health['System'] else 0)
                 instance_cpu_usage.add_metric(
                     [
                         environment, instance_health['InstanceId'],
                         'SoftIRQ'
                     ],
                     instance_health['System']['CPUUtilization']['SoftIRQ']
                     if 'CPUUtilization' in instance_health['System'] else
                     0)
     end = time.time()
     self.metric_collector_duration.add_metric(['cpu_usage'], end - start)
     return instance_cpu_usage
Esempio n. 36
0
    def _collect(self):
        # type: () -> Generator[GaugeMetricFamily]

        m_uri_down = GaugeMetricFamily(
            'heritrix3_crawl_job_uris_downloaded_total',
            'Total URIs downloaded by a Heritrix3 crawl job',
            labels=["jobname", "deployment", "status", "id"]) # No hyphens in label names please!

        m_uri_known = GaugeMetricFamily(
            'heritrix3_crawl_job_uris_known_total',
            'Total URIs discovered by a Heritrix3 crawl job',
            labels=["jobname", "deployment", "status", "id"]) # No hyphens in label names please!

        m_uris = GaugeMetricFamily(
            'heritrix3_crawl_job_uris_total',
            'URI counters from a Heritrix3 crawl job, labeled by kind',
            labels=["jobname", "deployment", "id", "kind"]) # No hyphens in label names please!

        m_bytes = GaugeMetricFamily(
            'heritrix3_crawl_job_bytes_total',
            'Byte counters from a Heritrix3 crawl job, labeled by kind',
            labels=["jobname", "deployment", "id", "kind"]) # No hyphens in label names please!

        m_qs = GaugeMetricFamily(
            'heritrix3_crawl_job_queues_total',
            'Queue counters from a Heritrix3 crawl job, labeled by kind',
            labels=["jobname", "deployment", "id", "kind"]) # No hyphens in label names please!

        m_ts = GaugeMetricFamily(
            'heritrix3_crawl_job_threads_total',
            'Thread counters from a Heritrix3 crawl job, labeled by kind',
            labels=["jobname", "deployment", "id", "kind"]) # No hyphens in label names please!

        result = self.run_api_requests()

        for job in result:
            # Allow debugging:
            logger.debug("Input:\n%s" % json.dumps(job, indent=2))

            # Get hold of the state and flags etc
            name = job['job_name']
            id = job['id']
            deployment = job['deployment']
            state = job['state'] or {}
            status = state['status'] or None

            # Get the URI metrics
            try:
                # URIs:
                ji = state.get('details',{}).get('job',{})
                docs_total = ji.get('uriTotalsReport',{}).get('downloadedUriCount', 0.0)
                known_total = ji.get('uriTotalsReport',{}).get('totalUriCount', 0.0)
                m_uri_down.add_metric([name, deployment, status, id], docs_total)
                m_uri_known.add_metric([name, deployment, status, id], known_total)
                # New-style metrics:
                m_uris.add_metric([name, deployment, id, 'downloaded'], docs_total)
                m_uris.add_metric([name, deployment, id, 'queued'], known_total)
                m_uris.add_metric([name, deployment, id, 'novel'],
                          ji.get('sizeTotalsReport', {}).get('novelCount', 0.0))
                m_uris.add_metric([name, deployment, id, 'deduplicated'],
                          ji.get('sizeTotalsReport', {}).get('dupByHashCount', 0.0))
                m_uris.add_metric([name, deployment, id, 'deepest-queue-depth'],
                          ji.get('loadReport', {}).get('deepestQueueDepth', 0.0))
                m_uris.add_metric([name, deployment, id, 'average-queue-depth'],
                          ji.get('loadReport', {}).get('averageQueueDepth', 0.0))

                # Bytes:
                m_bytes.add_metric([name, deployment, id, 'novel'],
                          ji.get('sizeTotalsReport', {}).get('novel', 0.0))
                m_bytes.add_metric([name, deployment, id, 'deduplicated'],
                          ji.get('sizeTotalsReport', {}).get('dupByHash', 0.0))
                m_bytes.add_metric([name, deployment, id, 'warc-novel-content'],
                          ji.get('sizeTotalsReport', {}).get('warcNovelContentBytes', 0.0))

                # Queues:
                m_qs.add_metric([name, deployment, id, 'total'],
                          ji.get('frontierReport', {}).get('totalQueues', 0.0))
                m_qs.add_metric([name, deployment, id, 'in-process'],
                          ji.get('frontierReport', {}).get('inProcessQueues', 0.0))
                m_qs.add_metric([name, deployment, id, 'ready'],
                          ji.get('frontierReport', {}).get('readyQueues', 0.0))
                m_qs.add_metric([name, deployment, id, 'snoozed'],
                          ji.get('frontierReport', {}).get('snoozedQueues', 0.0))
                m_qs.add_metric([name, deployment, id, 'active'],
                          ji.get('frontierReport', {}).get('activeQueues', 0.0))
                m_qs.add_metric([name, deployment, id, 'inactive'],
                          ji.get('frontierReport', {}).get('inactiveQueues', 0.0))
                m_qs.add_metric([name, deployment, id, 'ineligible'],
                          ji.get('frontierReport', {}).get('ineligibleQueues', 0.0))
                m_qs.add_metric([name, deployment, id, 'retired'],
                          ji.get('frontierReport', {}).get('retiredQueues', 0.0))
                m_qs.add_metric([name, deployment, id, 'exhausted'],
                          ji.get('frontierReport', {}).get('exhaustedQueues', 0.0))

                # Threads:
                m_ts.add_metric([name, deployment, id, 'total'],
                          ji.get('loadReport', {}).get('totalThreads', 0.0))
                m_ts.add_metric([name, deployment, id, 'busy'],
                          ji.get('loadReport', {}).get('busyThreads', 0.0))
                m_ts.add_metric([name, deployment, id, 'toe-count'],
                          ji.get('threadReport', {}).get('toeCount', 0.0))
                # Congestion ratio can be literal 'null':
                congestion = ji.get('loadReport', {}).get('congestionRatio', 0.0)
                if congestion is not None:
                    m_ts.add_metric([name, deployment, id, 'congestion-ratio'], congestion)
                # Thread Steps (could be an array or just one entry):
                steps = ji.get('threadReport', {}).get('steps', {}).get('value',[])
                if isinstance(steps, basestring):
                    steps = [steps]
                for step_value in steps:
                    splut = re.split(' ', step_value, maxsplit=1)
                    if len(splut) == 2:
                        count, step = splut
                        step = "step-%s" % step.lower()
                        m_ts.add_metric([name, deployment, id, step], float(count))
                    else:
                        logger.warning("Could not handle step value: %s" % step_value)
                # Thread Processors (could be an array or just one entry):
                procs = ji.get('threadReport', {}).get('processors', {}).get('value', [])
                if isinstance(procs, basestring):
                    procs = [procs]
                for proc_value in procs:
                    splut = re.split(' ', proc_value, maxsplit=1)
                    if len(splut) == 2:
                        count, proc = splut
                        proc = "processor-%s" % proc.lower()
                        m_ts.add_metric([name, deployment, id, proc], float(count))
                    else:
                        logger.warning("Could not handle processor value: '%s'" % proc_value)

            except (KeyError, TypeError, ValueError) as e:
                logger.exception("Exception while parsing metrics!")
                logger.info("Printing raw JSON in case there's an underlying issue: %s" % json.dumps(job)[:1024])


        # And return the metrics:
        yield m_uri_down
        yield m_uri_known
        yield m_uris
        yield m_bytes
        yield m_qs
        yield m_ts
 def collect_health_status(self, environments_health):
     start = time.time()
     health_status = GaugeMetricFamily(
         self.metric_prefix + 'enhanced_health_status',
         'The health status of the environment',
         labels=['environment_name', 'color', 'health_status'])
     for environment, health in environments_health:
         if health != "None" and 'ApplicationMetrics' in health:
             health_status.add_metric(
                 [environment, 'Green', 'Ok'],
                 1 if health['HealthStatus'] == 'Ok' else 0)
             health_status.add_metric(
                 [environment, 'Yellow', 'Warning'],
                 1 if health['HealthStatus'] == 'Warning' else 0)
             health_status.add_metric(
                 [environment, 'Red', 'Degraded'],
                 1 if health['HealthStatus'] == 'Degraded' else 0)
             health_status.add_metric(
                 [environment, 'Red', 'Severe'],
                 1 if health['HealthStatus'] == 'Severe' else 0)
             health_status.add_metric(
                 [environment, 'Green', 'Info'],
                 1 if health['HealthStatus'] == 'Info' else 0)
             health_status.add_metric(
                 [environment, 'Grey', 'Pending'],
                 1 if health['HealthStatus'] == 'Pending' else 0)
             health_status.add_metric(
                 [environment, 'Grey', 'Unknown'],
                 1 if health['HealthStatus'] == 'Unknown' else 0)
             health_status.add_metric(
                 [environment, 'Grey', 'Suspended'],
                 1 if health['HealthStatus'] == 'Suspended' else 0)
     end = time.time()
     self.metric_collector_duration.add_metric(['health_status'],
                                               end - start)
     return health_status
    def collect(self):
        self.reconnect()

        yield GaugeMetricFamily("tor_written_bytes",
                                "Tor written data counter",
                                value=int(
                                    self.tor.get_info("traffic/written")))
        yield GaugeMetricFamily("tor_read_bytes",
                                "Tor received data counter",
                                value=int(self.tor.get_info("traffic/read")))

        version = GaugeMetricFamily("tor_version",
                                    "Tor version as a label",
                                    labels=["version"])
        version.add_metric([str(torctl.get_version())], 1)
        yield version
        version_status = GaugeMetricFamily(
            "tor_version_status",
            "Tor version status {new, old, unrecommended, recommended, new in series, obsolete, unknown} as a label",
            labels=["version_status"])
        version_status.add_metric(
            [self.tor.get_info("status/version/current")], 1)
        yield version_status
        yield GaugeMetricFamily(
            "tor_network_liveness",
            "Indicates whether tor believes that the network is currently reachable",
            value=int(self.tor.get_info("network-liveness") == "up"))
        reachable = GaugeMetricFamily(
            "tor_reachable",
            "Indicates whether our OR/Dir port is reachable",
            labels=["port"])
        for entry in self.tor.get_info(
                "status/reachability-succeeded").split():
            k, v = entry.split("=")
            reachable.add_metric([k], int(v))
        yield reachable
        yield GaugeMetricFamily(
            "tor_circuit_established",
            "Indicates whether Tor is capable of establishing circuits",
            value=int(self.tor.get_info("status/circuit-established")))
        # For some reason, 0 actually means that Tor is active, keep it that way
        yield GaugeMetricFamily(
            "tor_dormant",
            "Indicates whether Tor is currently active and building circuits (note that 0 corresponds to Tor being active)",
            value=int(self.tor.get_info("dormant")))

        effective_rate = self.tor.get_effective_rate(None)
        effective_burst_rate = self.tor.get_effective_rate(None, burst=True)
        if effective_rate is not None and effective_burst_rate is not None:
            yield GaugeMetricFamily("tor_effective_rate",
                                    "Shows Tor effective rate",
                                    value=int(effective_rate))
            yield GaugeMetricFamily("tor_effective_burst_rate",
                                    "Shows Tor effective burst rate",
                                    value=int(effective_burst_rate))

        try:
            fingerprint_value = self.tor.get_info("fingerprint")
            fingerprint = GaugeMetricFamily("tor_fingerprint",
                                            "Tor fingerprint as a label",
                                            labels=["fingerprint"])
            fingerprint.add_metric([fingerprint_value], 1)
            yield fingerprint
        except (stem.ProtocolError, stem.OperationFailed):
            # happens when not running in server mode
            pass
        nickname = GaugeMetricFamily("tor_nickname",
                                     "Tor nickname as a label",
                                     labels=["nickname"])
        nickname.add_metric([self.tor.get_conf("Nickname", "Unnamed")], 1)
        yield nickname

        # Connection counting
        # This won't work/will return wrong results if we are not running on
        # the same box as the Tor daemon is.
        # DisableDebuggerAttachment has to be set to 0
        # TODO: Count individual OUT/DIR/Control connections, see arm sources
        # for reference
        try:
            tor_pid = self.tor.get_pid()
            connections = stem.util.connection.get_connections(
                process_pid=tor_pid)
            yield GaugeMetricFamily(
                "tor_connection_count",
                "Amount of connections the Tor daemon has open",
                value=len(connections))
            # Let's hope this does not break when there is NTP sync or
            # something
            uptime = time.time() - stem.util.system.start_time(tor_pid)
            yield GaugeMetricFamily("tor_uptime",
                                    "Tor daemon uptime",
                                    value=uptime)
        except (OSError, IOError):
            # This happens if the PID does not exists (on another machine).
            pass
        try:
            has_flags = self.tor.get_network_status().flags
        except stem.DescriptorUnavailable:
            # The tor daemon fails with this for a few minutes after startup
            # (before figuring out its own flags?)
            has_flags = []
        except stem.ControllerError:
            # Happens when the daemon is not running in server mode
            has_flags = []
        flags = GaugeMetricFamily("tor_flags",
                                  "Has a Tor flag",
                                  labels=["flag"])
        for flag in [
                "Authority", "BadExit", "Exit", "Fast", "Guard", "HSDir",
                "NoEdConsensus", "Stable", "Running", "Valid", "V2Dir"
        ]:
            flags.add_metric([flag], int(flag in has_flags))
        yield flags

        try:
            accs = self.tor.get_accounting_stats()
            yield GaugeMetricFamily("tor_accounting_read_bytes",
                                    "Tor accounting read bytes",
                                    accs.read_bytes)
            yield GaugeMetricFamily("tor_accounting_left_read_bytes",
                                    "Tor accounting read bytes left",
                                    accs.read_bytes_left)
            yield GaugeMetricFamily("tor_accounting_read_limit_bytes",
                                    "Tor accounting read bytes limit",
                                    accs.read_limit)
            yield GaugeMetricFamily("tor_accounting_write_bytes",
                                    "Tor accounting write bytes",
                                    accs.written_bytes)
            yield GaugeMetricFamily("tor_accounting_left_write_bytes",
                                    "Tor accounting write bytes left",
                                    accs.write_bytes_left)
            yield GaugeMetricFamily("tor_accounting_write_limit_bytes",
                                    "Tor accounting write bytes limit",
                                    accs.write_limit)
        except stem.ControllerError:
            # happens when accounting isn't enabled
            pass
def scrape():
    global START
    today = datetime.utcnow().date()
    START = datetime.timestamp(datetime.combine(today, datetime.min.time()))

    tasks = retrieve_recent_koji_tasks()

    koji_tasks_total_family = CounterMetricFamily(
        'koji_tasks_total', 'Count of all koji tasks', labels=TASK_LABELS
    )
    for value, labels in koji_tasks_total(tasks):
        koji_tasks_total_family.add_metric(labels, value)

    koji_task_errors_total_family = CounterMetricFamily(
        'koji_task_errors_total', 'Count of all koji task errors', labels=TASK_LABELS
    )
    error_tasks = only(tasks, states=error_states)
    for value, labels in koji_tasks_total(error_tasks):
        koji_task_errors_total_family.add_metric(labels, value)

    koji_task_completions_total_family = CounterMetricFamily(
        'koji_task_completions_total', 'Count of all koji task completed', labels=TASK_LABELS
    )
    completed_tasks = only(tasks, states=completed_states)
    for value, labels in koji_tasks_total(completed_tasks):
        koji_task_completions_total_family.add_metric(labels, value)

    koji_in_progress_tasks_family = GaugeMetricFamily(
        'koji_in_progress_tasks',
        'Count of all in-progress koji tasks',
        labels=TASK_LABELS,
    )
    in_progress_tasks = retrieve_open_koji_tasks()
    for value, labels in koji_tasks_total(in_progress_tasks):
        koji_in_progress_tasks_family.add_metric(labels, value)

    koji_waiting_tasks_family = GaugeMetricFamily(
        'koji_waiting_tasks',
        'Count of all waiting, unscheduled koji tasks',
        labels=TASK_LABELS,
    )
    waiting_tasks = retrieve_waiting_koji_tasks()
    for value, labels in koji_tasks_total(waiting_tasks):
        koji_waiting_tasks_family.add_metric(labels, value)

    koji_task_duration_seconds_family = HistogramMetricFamily(
        'koji_task_duration_seconds',
        'Histogram of koji task durations',
        labels=TASK_LABELS,
    )
    for buckets, duration_sum, labels in koji_task_duration_seconds(
        tasks, calculate_overall_duration
    ):
        koji_task_duration_seconds_family.add_metric(labels, buckets, sum_value=duration_sum)

    koji_task_waiting_duration_seconds_family = HistogramMetricFamily(
        'koji_task_waiting_duration_seconds',
        'Histogram of koji tasks durations while waiting',
        labels=TASK_LABELS,
    )
    for buckets, duration_sum, labels in koji_task_duration_seconds(
        tasks, calculate_waiting_duration
    ):
        koji_task_waiting_duration_seconds_family.add_metric(
            labels, buckets, sum_value=duration_sum
        )

    koji_task_in_progress_duration_seconds_family = HistogramMetricFamily(
        'koji_task_in_progress_duration_seconds',
        'Histogram of koji task durations while in-progress',
        labels=TASK_LABELS,
    )
    for buckets, duration_sum, labels in koji_task_duration_seconds(
        tasks, calculate_in_progress_duration
    ):
        koji_task_in_progress_duration_seconds_family.add_metric(
            labels, buckets, sum_value=duration_sum
        )


    koji_enabled_hosts_count_family = GaugeMetricFamily(
        'koji_enabled_hosts_count',
        'Count of all koji hosts by channel',
        labels=HOST_LABELS,
    )
    koji_enabled_hosts_capacity_family = GaugeMetricFamily(
        'koji_enabled_hosts_capacity',
        'Reported capacity of all koji hosts by channel',
        labels=HOST_LABELS,
    )

    koji_hosts_last_update_family = GaugeMetricFamily(
        'koji_hosts_last_update',
        'Gauge of last update from host',
        labels=BUILDER_LABELS,
    )

    hosts = retrieve_hosts_by_channel()

    # result_object is a VirtualCall object from the use of the MultiCallSession from the Koji API
    for result_object, labels in koji_hosts_last_update(hosts):
        koji_hosts_last_update_family.add_metric(labels, result_object.result)
    for value, labels in koji_enabled_hosts_count(hosts):
        koji_enabled_hosts_count_family.add_metric(labels, value)
    for value, labels in koji_enabled_hosts_capacity(hosts):
        koji_enabled_hosts_capacity_family.add_metric(labels, value)

    koji_task_load_family = GaugeMetricFamily(
        'koji_task_load',
        'Task load of all koji builders by channel',
        labels=HOST_LABELS,
    )
    task_load = retrieve_task_load_by_channel()
    for value, labels in koji_task_load(task_load):
        koji_task_load_family.add_metric(labels, value)

    # Replace this in one atomic operation to avoid race condition to the Expositor
    metrics.update(
        {
            'koji_tasks_total': koji_tasks_total_family,
            'koji_task_errors_total': koji_task_errors_total_family,
            'koji_task_completions_total': koji_task_completions_total_family,
            'koji_in_progress_tasks': koji_in_progress_tasks_family,
            'koji_waiting_tasks': koji_waiting_tasks_family,
            'koji_task_duration_seconds': koji_task_duration_seconds_family,
            'koji_task_waiting_duration_seconds': koji_task_waiting_duration_seconds_family,
            'koji_task_in_progress_duration_seconds': koji_task_in_progress_duration_seconds_family,
            'koji_enabled_hosts_count': koji_enabled_hosts_count_family,
            'koji_enabled_hosts_capacity': koji_enabled_hosts_capacity_family,
            'koji_task_load': koji_task_load_family,
            'koji_hosts_last_update': koji_hosts_last_update_family,
        }
    )
Esempio n. 40
0
    def collect(self):
        gauge_nodes_states_total = GaugeMetricFamily('slurm_nodes_state_total', 'Slurm nodes states, total per state. From sinfo.', labels=['state'])

        # Gather down nodes, and exclude unknown status nodes (down*)
        try:
            stdout, stderr = subprocess.Popen("sinfo --format='%T %D' | grep down | grep -v '*' |awk -F ' ' '{print $2}'", stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=True).communicate()
            try:
                nb_nodes_down = float(stdout)
            except ValueError:
                nb_nodes_down = 0.0
            print("Slurm Exporter. nb_nodes_down: "+str(nb_nodes_down))
            gauge_nodes_states_total.add_metric(["down"], nb_nodes_down)
        except OSError as e:
            print("Execution failed:", e, file=stderr)

#        try:
#            nb_nodes_down = subprocess.call("sinfo --format='%T %D' | grep down | grep -v '*' |awk -F ' ' '{print $2}'", shell=True)
#            if nb_nodes_down < 0:
#                print("Child was terminated by signal", -nb_nodes_down, file=sys.stderr)
#            else:
#                print("Child returned", nb_nodes_down, file=sys.stderr)
#                gauge_nodes_states_total.add_metric(["down"], nb_nodes_down)
#        except OSError as e:
#            print("Execution failed:", e, file=sys.stderr)

        # Gather drain nodes, and exclude unknown status nodes (drain*)
        try:
            stdout, stderr = subprocess.Popen("sinfo --format='%T %D' | grep drain | grep -v '*' |awk -F ' ' '{print $2}'", stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=True).communicate()
            try:
                nb_nodes_drain = float(stdout)
            except ValueError:
                nb_nodes_drain = 0.0
            print("Slurm Exporter. nb_nodes_drain: "+str(nb_nodes_drain))
            gauge_nodes_states_total.add_metric(["drain"], nb_nodes_drain)
        except OSError as e:
            print("Execution failed:", e, file=stderr)

        # Gather idle nodes, and exclude unknown status nodes (idle*)
        try:
            stdout, stderr = subprocess.Popen("sinfo --format='%T %D' | grep idle | grep -v '*' |awk -F ' ' '{print $2}'", stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=True).communicate()
            try:
                nb_nodes_idle = float(stdout)
            except ValueError:
                nb_nodes_idle = 0.0
            print("Slurm Exporter. nb_nodes_idle: "+str(nb_nodes_idle))
            gauge_nodes_states_total.add_metric(["idle"], nb_nodes_idle)
        except OSError as e:
            print("Execution failed:", e, file=stderr)

        # Gather alloc nodes, and exclude unknown status nodes (alloc*)
        try:
            stdout, stderr = subprocess.Popen("sinfo --format='%T %D' | grep alloc | grep -v '*' |awk -F ' ' '{print $2}'", stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=True).communicate()
            try:
                nb_nodes_alloc = float(stdout)
            except ValueError:
                nb_nodes_alloc = 0.0
            print("Slurm Exporter. nb_nodes_alloc: "+str(nb_nodes_alloc))
            gauge_nodes_states_total.add_metric(["alloc"], nb_nodes_alloc)
        except OSError as e:
            print("Execution failed:", e, file=stderr)

        # Deduce remaining nodes, and assume they are unknown state
        try:
            stdout, stderr = subprocess.Popen("sinfo --format=%D | grep -v NODES", stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=True).communicate()
            try:
                nb_nodes = float(stdout)
            except ValueError:
                nb_nodes = 0.0
            print("Slurm Exporter. nb_nodes_unk: "+str(nb_nodes-(nb_nodes_alloc+nb_nodes_idle+nb_nodes_drain+nb_nodes_down)))
            gauge_nodes_states_total.add_metric(["unk"], nb_nodes-(nb_nodes_alloc+nb_nodes_idle+nb_nodes_drain+nb_nodes_down))
        except OSError as e:
            print("Execution failed:", e, file=stderr)

        yield gauge_nodes_states_total
Esempio n. 41
0
    def collect(self):

        # @stats is a pretty-printer object with __str__() returning a nice table,
        # plus some fields that contain data from that table.
        # unfortunately, fields are pretty-printed themselves (i. e. '4.5MB').
        stats = gc.get_stats(memory_pressure=False)  # type: ignore
        # @s contains same fields as @stats, but as actual integers.
        s = stats._s  # type: ignore

        # also note that field naming is completely braindead
        # and only vaguely correlates with the pretty-printed table.
        # >>>> gc.get_stats(False)
        # Total memory consumed:
        #     GC used:            8.7MB (peak: 39.0MB)        # s.total_gc_memory, s.peak_memory
        #        in arenas:            3.0MB                  # s.total_arena_memory
        #        rawmalloced:          1.7MB                  # s.total_rawmalloced_memory
        #        nursery:              4.0MB                  # s.nursery_size
        #     raw assembler used: 31.0kB                      # s.jit_backend_used
        #     -----------------------------
        #     Total:              8.8MB                       # stats.memory_used_sum
        #
        #     Total memory allocated:
        #     GC allocated:            38.7MB (peak: 41.1MB)  # s.total_allocated_memory, s.peak_allocated_memory
        #        in arenas:            30.9MB                 # s.peak_arena_memory
        #        rawmalloced:          4.1MB                  # s.peak_rawmalloced_memory
        #        nursery:              4.0MB                  # s.nursery_size
        #     raw assembler allocated: 1.0MB                  # s.jit_backend_allocated
        #     -----------------------------
        #     Total:                   39.7MB                 # stats.memory_allocated_sum
        #
        #     Total time spent in GC:  0.073                  # s.total_gc_time

        pypy_gc_time = CounterMetricFamily(
            "pypy_gc_time_seconds_total",
            "Total time spent in PyPy GC",
            labels=[],
        )
        pypy_gc_time.add_metric([], s.total_gc_time / 1000)
        yield pypy_gc_time

        pypy_mem = GaugeMetricFamily(
            "pypy_memory_bytes",
            "Memory tracked by PyPy allocator",
            labels=["state", "class", "kind"],
        )
        # memory used by JIT assembler
        pypy_mem.add_metric(["used", "", "jit"], s.jit_backend_used)
        pypy_mem.add_metric(["allocated", "", "jit"], s.jit_backend_allocated)
        # memory used by GCed objects
        pypy_mem.add_metric(["used", "", "arenas"], s.total_arena_memory)
        pypy_mem.add_metric(["allocated", "", "arenas"], s.peak_arena_memory)
        pypy_mem.add_metric(["used", "", "rawmalloced"],
                            s.total_rawmalloced_memory)
        pypy_mem.add_metric(["allocated", "", "rawmalloced"],
                            s.peak_rawmalloced_memory)
        pypy_mem.add_metric(["used", "", "nursery"], s.nursery_size)
        pypy_mem.add_metric(["allocated", "", "nursery"], s.nursery_size)
        # totals
        pypy_mem.add_metric(["used", "totals", "gc"], s.total_gc_memory)
        pypy_mem.add_metric(["allocated", "totals", "gc"],
                            s.total_allocated_memory)
        pypy_mem.add_metric(["used", "totals", "gc_peak"], s.peak_memory)
        pypy_mem.add_metric(["allocated", "totals", "gc_peak"],
                            s.peak_allocated_memory)
        yield pypy_mem
Esempio n. 42
0
    def collect(self, name: str) -> Generator:
        if self.power_total is not None:
            pass

        power = GaugeMetricFamily('rctmon_grid_power',
                                  'Power to or from the grid by phase',
                                  labels=['inverter', 'phase'],
                                  unit='watt')
        if self.power_l1 is not None:
            power.add_metric([name, 'l1'], self.power_l1)
        if self.power_l2 is not None:
            power.add_metric([name, 'l2'], self.power_l2)
        if self.power_l3 is not None:
            power.add_metric([name, 'l3'], self.power_l3)
        yield power

        voltage = GaugeMetricFamily('rctmon_grid_voltage',
                                    'Grid voltage by phase',
                                    labels=['inverter', 'phase'],
                                    unit='volt')
        if self.voltage_l1 is not None:
            voltage.add_metric([name, 'l1'], self.voltage_l1)
        if self.voltage_l2 is not None:
            voltage.add_metric([name, 'l2'], self.voltage_l2)
        if self.voltage_l3 is not None:
            voltage.add_metric([name, 'l3'], self.voltage_l3)
        yield voltage

        p2p_voltage = GaugeMetricFamily('rctmon_grid_voltage_phase_to_phase',
                                        'Grid voltage phase to phase',
                                        labels=['inverter', 'measurement'],
                                        unit='volt')
        if self.phase_to_phase_voltage_1 is not None:
            p2p_voltage.add_metric([name, '1'], self.phase_to_phase_voltage_1)
        if self.phase_to_phase_voltage_2 is not None:
            p2p_voltage.add_metric([name, '2'], self.phase_to_phase_voltage_2)
        if self.phase_to_phase_voltage_3 is not None:
            p2p_voltage.add_metric([name, '3'], self.phase_to_phase_voltage_3)
        yield p2p_voltage

        frequency = GaugeMetricFamily('rctmon_grid_frequency',
                                      'Grid frequency',
                                      labels=['inverter'],
                                      unit='hertz')
        if self.frequency is not None:
            frequency.add_metric([name], self.frequency)
        yield frequency
Esempio n. 43
0
class ArraySpaceMetrics():
    """
    Base class for FlashBlade Prometheus array space metrics
    """
    def __init__(self, fb):
        self.fb = fb
        self.capacity = GaugeMetricFamily('purefb_array_capacity_bytes',
                                          'FlashBlade total capacity in bytes',
                                          labels=[])
        self.space = GaugeMetricFamily('purefb_array_space_bytes',
                                       'FlashBlade used space in bytes',
                                       labels=['dimension'])
        self.reduction = GaugeMetricFamily('purefb_array_space_data_reduction',
                                           'FlashBlade overall data reduction',
                                           labels=[])
        self.array_space = fb.get_array_space().space
        self.array_capacity = fb.get_array_space().capacity

    def _space(self):
        """
        Create metrics of gauge type for array space indicators.
        """
        if self.array_space is None:
            return
        self.space.add_metric(['unique'], self.array_space.unique)
        self.space.add_metric(['virtual'], self.array_space.virtual)
        self.space.add_metric(['total_physical'],
                              self.array_space.total_physical)
        self.space.add_metric(['snapshots'], self.array_space.snapshots)

    def _capacity(self):
        """
        Create metrics of gauge type for array capacity indicator.
        """
        if self.array_capacity is None:
            return
        self.capacity.add_metric([], self.array_capacity)

    def _reduction(self):
        """
        Create metrics of gauge type for array data redution indicator.
        """
        if self.array_space is None:
            return
        self.reduction.add_metric([], self.array_space.data_reduction)

    def get_metrics(self):
        self._capacity()
        self._space()
        self._reduction()
        yield self.capacity
        yield self.space
        yield self.reduction
Esempio n. 44
0
    def collect(self):
        self.logger.debug("starting collection ... ")

        global_error_metric = GaugeMetricFamily(
            "smart_system_error",
            "flag indicating that there is a problem with the helper daemon",
        )

        try:
            sock = self._get_connected_socket()
            data = self._recv_smart_info(sock)
        except OSError:
            self.logger.error(
                "failed to get data from helper daemon at %s",
                self.socket_path,
                exc_info=True,
            )
            global_error_metric.add_metric([], 1)
            return [global_error_metric]

        global_error_metric.add_metric([], 0)

        error_metrics = GaugeMetricFamily(
            "smart_access_error",
            "flag indicating that there is a problem accessing the device",
            labels=["port"],
        )

        warning_metrics = GaugeMetricFamily(
            "smart_metric_error",
            "flag indicating that there is a problem converting metrics "
            "from the device",
            labels=["port"],
        )

        attr_metrics = {}

        def get_attr_metric(device, id_, name):
            nonlocal attr_metrics

            try:
                metric_name, type_ = self.attrmap.get_metric_for_attribute(
                    device, id_, name
                )
            except KeyError:
                return None

            try:
                return attr_metrics[metric_name]
            except KeyError:
                class_ = {
                    attrmeta.MetricType.GAUGE: GaugeMetricFamily,
                    attrmeta.MetricType.COUNTER: CounterMetricFamily,
                }[type_]

                metric = class_(
                    "smart_{}".format(metric_name),
                    "S.M.A.R.T. metric based on attribute {}".format(name),
                    labels=self.labels,
                )

                attr_metrics[metric_name] = metric
                return metric

        for devinfo in data:
            port = devinfo["port"]

            if devinfo["error"]:
                error_metrics.add_metric(
                    [port],
                    1.
                )
                continue

            has_warnings = False

            error_metrics.add_metric(
                [port],
                0.
            )

            device = devinfo["model"]
            family = devinfo["family"]
            serial = devinfo["serial"]

            self.logger.debug("device %r", device)

            for attrinfo in devinfo["attrs"]:
                self.logger.debug("smart attribute %r", attrinfo)

                id_ = int(attrinfo["ID#"])
                name = attrinfo["Name"]

                try:
                    type_, _, _ = self.devicedb.get_info_for_attr(device, id_)
                except KeyError as exc:
                    try:
                        # check if attribute is explicitly configured
                        # in that case, we want to warn
                        self.attrmap.get_metric_for_attribute(
                            device, id_, name
                        )
                    except KeyError:
                        # not configured -> no warning
                        self.logger.debug(
                            "omitting unconfigured attribute which is missing"
                            " in device db: #%d (%s) on device %r"
                            " (lookup failed for %s)",
                            id_,
                            name,
                            device,
                            exc,
                        )
                    else:
                        self.logger.warning(
                            "explicitly configured attribute #%d (%s) on"
                            " device %r is missing in devicedb -- cannot"
                            " generate metric! (lookup failed for %s)",
                            id_,
                            name,
                            device,
                            exc,
                        )
                        has_warnings = True
                    continue

                metric = get_attr_metric(device, id_, name)
                if metric is None:
                    continue

                self.logger.debug(
                    "registering %s of #%d on metric %s",
                    type_,
                    id_,
                    metric,
                )

                metric.add_metric(
                    [port, device, family, serial],
                    float(attrinfo[type_])
                )

            if has_warnings:
                warning_metrics.add_metric(
                    [port],
                    int(has_warnings)
                )

        return [
            global_error_metric,
            error_metrics,
            warning_metrics,
        ] + list(attr_metrics.values())
Esempio n. 45
0
    def collect(self):
        nsip = self.nsip
        data = {}
        for entity in self.metrics.keys():
            logger.info('Collecting metric %s for %s' % (entity, nsip))
            try:
                data[entity] = collect_data(nsip, entity, self.username,
                                            self.password, self.protocol,
                                            self.nitro_timeout)
            except Exception as e:
                logger.warning('Could not collect metric: ' + str(e))

        # Add labels to metrics and provide to Prometheus
        log_prefix_match = True

        for entity_name, entity in self.metrics.items():
            if ('labels' in entity.keys()):
                label_names = [v[1] for v in entity['labels']]
                label_names.append('nsip')
            else:
                label_names = []
                label_names.append('nsip')
            # Provide collected metric to Prometheus as a counter
            for ns_metric_name, prom_metric_name in entity.get('counters', []):
                c = CounterMetricFamily(prom_metric_name,
                                        ns_metric_name,
                                        labels=label_names)
                entity_stats = data.get(entity_name, [])
                if (type(entity_stats) is not list):
                    entity_stats = [entity_stats]

                for data_item in entity_stats:
                    if not data_item:
                        continue

                    if ns_metric_name not in data_item.keys():
                        logger.warning(
                            'Counter stats for %s not enabled in adc  %s, so could not add to %s'
                            % (ns_metric_name, nsip, entity_name))
                        break

                    if ('labels' in entity.keys()):
                        label_values = [
                            data_item[key]
                            for key in [v[0] for v in entity['labels']]
                        ]

                        # populate and update k8s_ingress_lbvs metrics if in k8s-CIC enviroment
                        if entity_name == "k8s_ingress_lbvs":
                            if os.environ.get(
                                    'KUBERNETES_SERVICE_HOST') is not None:
                                prefix_match = update_lbvs_label(
                                    self.k8s_cic_prefix, label_values,
                                    ns_metric_name, log_prefix_match)
                                if not prefix_match:
                                    log_prefix_match = False
                                    continue
                            else:
                                continue
                        label_values.append(nsip)
                    else:
                        label_values = [nsip]
                    try:
                        c.add_metric(label_values,
                                     float(data_item[ns_metric_name]))
                    except Exception as e:
                        logger.error(
                            'Caught exception while adding counter %s to %s: %s'
                            % (ns_metric_name, entity_name, str(e)))

                yield c

            # Provide collected metric to Prometheus as a gauge
            for ns_metric_name, prom_metric_name in entity.get('gauges', []):
                g = GaugeMetricFamily(prom_metric_name,
                                      ns_metric_name,
                                      labels=label_names)
                entity_stats = data.get(entity_name, [])
                if (type(entity_stats) is not list):
                    entity_stats = [entity_stats]

                for data_item in entity_stats:
                    if not data_item:
                        continue

                    if ns_metric_name not in data_item.keys():
                        logger.warning(
                            'Gauge stats for %s not enabled in adc  %s, so could not add to %s'
                            % (ns_metric_name, nsip, entity_name))
                        break

                    if ('labels' in entity.keys()):
                        label_values = [
                            data_item[key]
                            for key in [v[0] for v in entity['labels']]
                        ]

                        # populate and update k8s_ingress_lbvs metrics if in k8s-CIC enviroment
                        if entity_name == "k8s_ingress_lbvs":
                            if os.environ.get(
                                    'KUBERNETES_SERVICE_HOST') is not None:
                                prefix_match = update_lbvs_label(
                                    self.k8s_cic_prefix, label_values,
                                    ns_metric_name, log_prefix_match)
                                if not prefix_match:
                                    log_prefix_match = False
                                    continue
                            else:
                                continue

                        label_values.append(nsip)
                    else:
                        label_values = [nsip]
                    try:
                        g.add_metric(label_values,
                                     float(data_item[ns_metric_name]))
                    except Exception as e:
                        logger.error(
                            'Caught exception while adding counter %s to %s: %s'
                            % (ns_metric_name, entity_name, str(e)))

                yield g
Esempio n. 46
0
 def collect(self):
     g = GaugeMetricFamily("tcp_syn_stats",
                           'tcp syn statistics',
                           labels=['type'])
     g.add_metric(['mean'], self.mean)
     g.add_metric(['median'], self.median)
     g.add_metric(['media_high'], self.median_high)
     g.add_metric(['meadian_low'], self.median_low)
     g.add_metric(['variance'], self.variance)
     g.add_metric(['threshold'], self.getThreshold())
     g.add_metric(['max'], self.max)
     g.add_metric(['sum'], self.sum)
     yield g
Esempio n. 47
0
    async def collect(self):
        """
        Collects channel metrics.
        """

        channel_metrics = {
            'variable_rtp_audio_in_raw_bytes': GaugeMetricFamily(
                'rtp_audio_in_raw_bytes_total',
                'Total number of bytes received via this channel.',
                labels=['id']),
            'variable_rtp_audio_out_raw_bytes': GaugeMetricFamily(
                'rtp_audio_out_raw_bytes_total',
                'Total number of bytes sent via this channel.',
                labels=['id']),
            'variable_rtp_audio_in_media_bytes': GaugeMetricFamily(
                'rtp_audio_in_media_bytes_total',
                'Total number of media bytes received via this channel.',
                labels=['id']),
            'variable_rtp_audio_out_media_bytes': GaugeMetricFamily(
                'rtp_audio_out_media_bytes_total',
                'Total number of media bytes sent via this channel.',
                labels=['id']),
            'variable_rtp_audio_in_packet_count': GaugeMetricFamily(
                'rtp_audio_in_packets_total',
                'Total number of packets received via this channel.',
                labels=['id']),
            'variable_rtp_audio_out_packet_count': GaugeMetricFamily(
                'rtp_audio_out_packets_total',
                'Total number of packets sent via this channel.',
                labels=['id']),
            'variable_rtp_audio_in_media_packet_count': GaugeMetricFamily(
                'rtp_audio_in_media_packets_total',
                'Total number of media packets received via this channel.',
                labels=['id']),
            'variable_rtp_audio_out_media_packet_count': GaugeMetricFamily(
                'rtp_audio_out_media_packets_total',
                'Total number of media packets sent via this channel.',
                labels=['id']),
            'variable_rtp_audio_in_skip_packet_count': GaugeMetricFamily(
                'rtp_audio_in_skip_packets_total',
                'Total number of inbound packets discarded by this channel.',
                labels=['id']),
            'variable_rtp_audio_out_skip_packet_count': GaugeMetricFamily(
                'rtp_audio_out_skip_packets_total',
                'Total number of outbound packets discarded by this channel.',
                labels=['id']),
            'variable_rtp_audio_in_jitter_packet_count': GaugeMetricFamily(
                'rtp_audio_in_jitter_packets_total',
                'Total number of ? packets in this channel.',
                labels=['id']),
            'variable_rtp_audio_in_dtmf_packet_count': GaugeMetricFamily(
                'rtp_audio_in_dtmf_packets_total',
                'Total number of ? packets in this channel.',
                labels=['id']),
            'variable_rtp_audio_out_dtmf_packet_count': GaugeMetricFamily(
                'rtp_audio_out_dtmf_packets_total',
                'Total number of ? packets in this channel.',
                labels=['id']),
            'variable_rtp_audio_in_cng_packet_count': GaugeMetricFamily(
                'rtp_audio_in_cng_packets_total',
                'Total number of ? packets in this channel.',
                labels=['id']),
            'variable_rtp_audio_out_cng_packet_count': GaugeMetricFamily(
                'rtp_audio_out_cng_packets_total',
                'Total number of ? packets in this channel.',
                labels=['id']),
            'variable_rtp_audio_in_flush_packet_count': GaugeMetricFamily(
                'rtp_audio_in_flush_packets_total',
                'Total number of ? packets in this channel.',
                labels=['id']),
            'variable_rtp_audio_in_largest_jb_size': GaugeMetricFamily(
                'rtp_audio_in_jitter_buffer_bytes_max',
                'Largest jitterbuffer size in this channel.',
                labels=['id']),
            'variable_rtp_audio_in_jitter_min_variance': GaugeMetricFamily(
                'rtp_audio_in_jitter_seconds_min',
                'Minimal jitter in seconds.',
                labels=['id']),
            'variable_rtp_audio_in_jitter_max_variance': GaugeMetricFamily(
                'rtp_audio_in_jitter_seconds_max',
                'Maximum jitter in seconds.',
                labels=['id']),
            'variable_rtp_audio_in_jitter_loss_rate': GaugeMetricFamily(
                'rtp_audio_in_jitter_loss_rate',
                'Ratio of lost packets due to inbound jitter.',
                labels=['id']),
            'variable_rtp_audio_in_jitter_burst_rate': GaugeMetricFamily(
                'rtp_audio_in_jitter_burst_rate',
                'Ratio of packet bursts due to inbound jitter.',
                labels=['id']),
            'variable_rtp_audio_in_mean_interval': GaugeMetricFamily(
                'rtp_audio_in_mean_interval_seconds',
                'Mean interval in seconds of inbound packets',
                labels=['id']),
            'variable_rtp_audio_in_flaw_total': GaugeMetricFamily(
                'rtp_audio_in_flaw_total',
                'Total number of flaws detected in the channel',
                labels=['id']),
            'variable_rtp_audio_in_quality_percentage': GaugeMetricFamily(
                'rtp_audio_in_quality_percent',
                'Audio quality in percent',
                labels=['id']),
            'variable_rtp_audio_in_mos': GaugeMetricFamily(
                'rtp_audio_in_quality_mos',
                'Audio quality as Mean Opinion Score, (between 1 and 5)',
                labels=['id']),
            'variable_rtp_audio_rtcp_octet_count': GaugeMetricFamily(
                'rtcp_audio_bytes_total',
                'Total number of rtcp bytes in this channel.',
                labels=['id']),
            'variable_rtp_audio_rtcp_packet_count': GaugeMetricFamily(
                'rtcp_audio_packets_total',
                'Total number of rtcp packets in this channel.',
                labels=['id']),
        }

        channel_info_metric = GaugeMetricFamily(
            'rtp_channel_info',
            'FreeSWITCH RTP channel info',
            labels=['id', 'name', 'user_agent'])

        millisecond_metrics = [
            'variable_rtp_audio_in_jitter_min_variance',
            'variable_rtp_audio_in_jitter_max_variance',
            'variable_rtp_audio_in_mean_interval',
        ]

        (_, result) = await self._esl.send('api show calls as json')
        for row in json.loads(result).get('rows', []):
            uuid = row['uuid']

            await self._esl.send(f'api uuid_set_media_stats {uuid}')
            (_, result) = await self._esl.send(f'api uuid_dump {uuid} json')
            channelvars = json.loads(result)

            label_values = [uuid]
            for key, metric_value in channelvars.items():
                if key in millisecond_metrics:
                    metric_value = float(metric_value) / 1000.
                if key in channel_metrics:
                    channel_metrics[key].add_metric(
                        label_values, metric_value)

            user_agent = channelvars.get('variable_sip_user_agent', 'Unknown')
            channel_info_label_values = [uuid, row['name'], user_agent]
            channel_info_metric.add_metric(
                channel_info_label_values, 1)

        return itertools.chain(
            channel_metrics.values(),
            [channel_info_metric])
Esempio n. 48
0
    def collect(self):
        global hub_sessions
        global hub_sessions_lock
        global client_sessions
        global client_sessions_lock

        hub_received_messages = CounterMetricFamily(
            'metronome2_hub_received_messages',
            'Messages received by the metronome hub',
            labels=['sid'])
        hub_holes_created = CounterMetricFamily('metronome2_hub_holes_created',
                                                'Holes created within session',
                                                labels=['sid'])
        hub_holes_closed = CounterMetricFamily('metronome2_hub_holes_closed',
                                               'Holes closed within session',
                                               labels=['sid'])
        hub_holes_timed_out = CounterMetricFamily(
            'metronome2_hub_holes_timed_out',
            'Holes timed out within session',
            labels=['sid'])
        hub_holes_current = GaugeMetricFamily('metronome2_hub_holes_current',
                                              'Current holes within session',
                                              labels=['sid'])
        hub_payload_bytes = CounterMetricFamily(
            'metronome2_hub_received_bytes',
            'Payload bytes received by the hub',
            labels=['sid'])
        hub_intermessage_gap_mavg_seconds = GaugeMetricFamily(
            'metronome2_hub_intermessage_gap_mavg',
            'Moving average of intermessage gap',
            labels=['sid'])
        hub_receive_time_window_messages = GaugeMetricFamily(
            'metronome2_hub_receive_time_window_messages',
            'Messages received by time window',
            labels=['sid', 'window'])

        client_unexpected_increments = CounterMetricFamily(
            'metronome2_client_seq_unexpected_increment',
            'Unexpected sequence number increments',
            labels=['sid'])
        client_unexpected_decrements = CounterMetricFamily(
            'metronome2_client_seq_unexpected_decrement',
            'Unexpected sequence number decrements',
            labels=['sid'])
        client_sent_messages = CounterMetricFamily(
            'metronome2_client_sent_messages',
            'Messages sent by the metronome client',
            labels=['sid'])
        client_received_messages = CounterMetricFamily(
            'metronome2_client_received_messages',
            'Messages received by the metronome client',
            labels=['sid'])
        client_timely_received_messages = CounterMetricFamily(
            'metronome2_client_timely_received_messages',
            'Timely messages received by the metronome client',
            labels=['sid'])
        client_lost_messages = CounterMetricFamily(
            'metronome2_client_lost_messages', 'Messages lost', labels=['sid'])
        client_inflight_messages = GaugeMetricFamily(
            'metronome2_client_inflight_messages',
            'Current messages in-flight',
            labels=['sid'])
        client_rtt_worst_seconds = GaugeMetricFamily(
            'metronome2_client_rtt_worst',
            'Worst RTT seen by client',
            labels=['sid'])
        client_rtt_best_seconds = GaugeMetricFamily(
            'metronome2_client_rtt_best',
            'Worst RTT seen by client',
            labels=['sid'])
        client_rtt_mavg_seconds = GaugeMetricFamily(
            'metronome2_client_rtt_mavg',
            'Moving average of RTT',
            labels=['sid'])
        client_payload_bytes = CounterMetricFamily(
            'metronome2_client_received_bytes',
            'Payload bytes received by the client',
            labels=['sid'])
        client_intermessage_gap_mavg_seconds = GaugeMetricFamily(
            'metronome2_client_intermessage_gap_mavg',
            'Moving average of intermessage gap',
            labels=['sid'])
        client_receive_time_window_messages = GaugeMetricFamily(
            'metronome2_client_receive_time_window_messages',
            'Messages received by time window',
            labels=['sid', 'window'])

        with hub_sessions_lock:
            for sid, session_info in hub_sessions.items():
                hub_received_messages.add_metric(
                    [sid],
                    session_info.get('received_messages'),
                    timestamp=session_info.get('timestamp'))
                hub_holes_created.add_metric(
                    [sid],
                    session_info.get('holes_created'),
                    timestamp=session_info.get('timestamp'))
                hub_holes_closed.add_metric(
                    [sid],
                    session_info.get('holes_closed'),
                    timestamp=session_info.get('timestamp'))
                hub_holes_timed_out.add_metric(
                    [sid],
                    session_info.get('holes_timed_out'),
                    timestamp=session_info.get('timestamp'))
                hub_holes_current.add_metric(
                    [sid],
                    session_info.get('holes_current'),
                    timestamp=session_info.get('timestamp'))
                hub_payload_bytes.add_metric(
                    [sid],
                    session_info.get('received_bytes'),
                    timestamp=session_info.get('timestamp'))
                if session_info.get('intermessage_gap_mavg') is not None:
                    hub_intermessage_gap_mavg_seconds.add_metric(
                        [sid],
                        session_info.get('intermessage_gap_mavg'),
                        timestamp=session_info.get('timestamp'))
                if session_info.get('receive_time_windows') is not None:
                    i = 0
                    for window in session_info.get('receive_time_windows'):
                        hub_receive_time_window_messages.add_metric(
                            [sid, str(i)],
                            window,
                            timestamp=session_info.get('timestamp'))
                        i += 1

        with client_sessions_lock:
            for sid, session_info in client_sessions.items():
                client_unexpected_increments.add_metric(
                    [sid],
                    session_info.get('seq_unexpected_increment'),
                    timestamp=session_info.get('timestamp'))
                client_unexpected_decrements.add_metric(
                    [sid],
                    session_info.get('seq_unexpected_decrement'),
                    timestamp=session_info.get('timestamp'))
                client_sent_messages.add_metric(
                    [sid],
                    session_info.get('sent_messages'),
                    timestamp=session_info.get('timestamp'))
                client_received_messages.add_metric(
                    [sid],
                    session_info.get('received_messages'),
                    timestamp=session_info.get('timestamp'))
                client_timely_received_messages.add_metric(
                    [sid],
                    session_info.get('timely_received_messages'),
                    timestamp=session_info.get('timestamp'))
                client_lost_messages.add_metric(
                    [sid],
                    session_info.get('lost_messages'),
                    timestamp=session_info.get('timestamp'))
                client_inflight_messages.add_metric(
                    [sid],
                    session_info.get('inflight_messages'),
                    timestamp=session_info.get('timestamp'))
                if session_info.get('rtt_worst') is not None:
                    client_rtt_worst_seconds.add_metric(
                        [sid],
                        session_info.get('rtt_worst'),
                        timestamp=session_info.get('timestamp'))
                if session_info.get('rtt_best') is not None:
                    client_rtt_best_seconds.add_metric(
                        [sid],
                        session_info.get('rtt_best'),
                        timestamp=session_info.get('timestamp'))
                if session_info.get('rtt_mavg') is not None:
                    client_rtt_mavg_seconds.add_metric(
                        [sid],
                        session_info.get('rtt_mavg'),
                        timestamp=session_info.get('timestamp'))
                if session_info.get('received_bytes') is not None:
                    client_payload_bytes.add_metric(
                        [sid],
                        session_info.get('received_bytes'),
                        timestamp=session_info.get('timestamp'))
                if session_info.get('intermessage_gap_mavg') is not None:
                    client_intermessage_gap_mavg_seconds.add_metric(
                        [sid],
                        session_info.get('intermessage_gap_mavg'),
                        timestamp=session_info.get('timestamp'))
                if session_info.get('receive_time_windows') is not None:
                    i = 0
                    for window in session_info.get('receive_time_windows'):
                        client_receive_time_window_messages.add_metric(
                            [sid, str(i)],
                            window,
                            timestamp=session_info.get('timestamp'))
                        i += 1

        yield hub_received_messages
        yield hub_holes_created
        yield hub_holes_closed
        yield hub_holes_timed_out
        yield hub_holes_current
        yield hub_payload_bytes
        yield hub_intermessage_gap_mavg_seconds
        yield hub_receive_time_window_messages

        yield client_unexpected_increments
        yield client_unexpected_decrements
        yield client_sent_messages
        yield client_received_messages
        yield client_timely_received_messages
        yield client_lost_messages
        yield client_inflight_messages
        yield client_rtt_worst_seconds
        yield client_rtt_best_seconds
        yield client_rtt_mavg_seconds
        yield client_payload_bytes
        yield client_intermessage_gap_mavg_seconds
        yield client_receive_time_window_messages
    def collect(self):
        
        # Collect metrics from NetScalers
        data = {}
        for nsip in self.nsips:
            data[nsip] = {}    
            for entity in self.metrics.keys():  # cycle through metrics json to get required entities whose stats need to be collected
                print('>>> Collecting stats for: %s::%s' % (nsip, entity))
                try:
                    data[nsip][entity] = collect_data(nsip, entity, self.username, self.password, self.secure)
                except Exception as e:
                    print('>>> Caught exception while collecting data: ' + str(e))

        # Provide collected stats to Prometheus as a counter/guage with desired labels
        for entity_name, entity in self.metrics.items():
            if('labels' in entity.keys()):
                label_names = [v[1] for v in entity['labels']]
                label_names.append('nsip')
            else:
                label_names = []
                label_names.append('nsip')
            
            for ns_metric_name, prom_metric_name in entity.get('counters', []): 
                
                c = CounterMetricFamily(prom_metric_name, ns_metric_name, labels=label_names)
                for nsip in self.nsips:
                    entity_stats = data[nsip].get(entity_name, [])
                    if( type(entity_stats) is not list):
                        entity_stats = [entity_stats]
                    
                    for data_item in entity_stats:
                        if('labels' in entity.keys()):
                            label_values = [data_item[key] for key in [v[0] for v in entity['labels']]]
                            label_values.append(nsip)
                        else:
                            label_values = [nsip]
                        try:
                            c.add_metric(label_values, float(data_item[ns_metric_name]))
                        except Exception as e:
                            print('>>> Caught exception while adding counter %s to %s: %s' %(ns_metric_name, entity_name, str(e)))
                yield c

            for ns_metric_name, prom_metric_name in entity.get('gauges', []):
                
                g = GaugeMetricFamily(prom_metric_name, ns_metric_name, labels=label_names)
                for nsip in self.nsips:
                    entity_stats = data[nsip].get(entity_name, [])
                    if(type(entity_stats) is not list):
                        entity_stats = [entity_stats]
                    
                    for data_item in entity_stats:
                        if('labels' in entity.keys()):
                            label_values = [data_item[key] for key in [v[0] for v in entity['labels']]]
                            label_values.append(nsip)
                        else:
                            label_values = [nsip]
                        try:
                            g.add_metric(label_values, float(data_item[ns_metric_name]))
                        except Exception as e:
                            print('>>> Caught exception while adding guage %s to %s: %s' %(ns_metric_name, entity_name, str(e)) )
                yield g