def collect(self): """Called by prometheus client when it reads metrics. Note: may be called by a separate thread. """ in_flight = GaugeMetricFamily(self.name + "_total", self.desc, labels=self.labels) metrics_by_key = {} # We copy so that we don't mutate the list while iterating with self._lock: keys = list(self._registrations) for key in keys: with self._lock: callbacks = set(self._registrations[key]) in_flight.add_metric(key, len(callbacks)) metrics = self._metrics_class() metrics_by_key[key] = metrics for callback in callbacks: callback(metrics) yield in_flight for name in self.sub_metrics: gauge = GaugeMetricFamily("_".join([self.name, name]), "", labels=self.labels) for key, metrics in six.iteritems(metrics_by_key): gauge.add_metric(key, getattr(metrics, name)) yield gauge
def collect(self): result_dict = {} apps = self.client.list_apps(embed_task_stats=True) for app_attribute in self.APP_ATTIBUTES: metric_family = GaugeMetricFamily( self.get_metric_key(app_attribute, 'apps'), documentation='from v2/apps?embed=apps.taskStats value of %s' % app_attribute, labels=["id"]) for app in apps: labels = [app.id] value = self.get_metric_value(app_attribute, app) if value is None: continue metric_family.add_metric(labels, value) yield metric_family queue = self.client.list_queue() for queue_attribute in self.QUEUE_ATTRIBUTES: metric_family = GaugeMetricFamily( self.get_metric_key(queue_attribute, 'queue'), documentation='from v2/queue value of %s' % queue_attribute, labels=["id"]) for queue_item in queue: labels = [queue_item.app.id] value = self.get_metric_value(queue_attribute, queue_item) if value is None: continue metric_family.add_metric(labels, value) yield metric_family
def collect(self): background_process_in_flight_count = GaugeMetricFamily( "synapse_background_process_in_flight_count", "Number of background processes in flight", labels=["name"], ) # We copy the dict so that it doesn't change from underneath us. # We also copy the process lists as that can also change with _bg_metrics_lock: _background_processes_copy = { k: list(v) for k, v in six.iteritems(_background_processes) } for desc, processes in six.iteritems(_background_processes_copy): background_process_in_flight_count.add_metric( (desc,), len(processes), ) for process in processes: process.update_metrics() yield background_process_in_flight_count # now we need to run collect() over each of the static Counters, and # yield each metric they return. for m in ( _background_process_ru_utime, _background_process_ru_stime, _background_process_db_txn_count, _background_process_db_txn_duration, _background_process_db_sched_duration, ): for r in m.collect(): yield r
def collect(self): cm = GaugeMetricFamily( "python_twisted_reactor_last_seen", "Seconds since the Twisted reactor was last seen", ) cm.add_metric([], time.time() - last_ticked) yield cm
def collect(self): logger.debug('Polling...') if not self.session_id: self.session_id = get_session_id(self.base_url, self.login, self.password) tickets_count = get_tickes_count(self.base_url, self.session_id) support_tickets_total = GaugeMetricFamily( 'support_tickets_total', 'Number of tickets', labels=['project', 'status']) for status_data in tickets_count: if status_data['department_id'] in self.department_ids: support_tickets_total.add_metric([ status_data['department'], status_data['name']], status_data['count']) yield support_tickets_total
def collect(self): if not HAVE_PROC_SELF_STAT: return with open("/proc/self/stat") as s: line = s.read() raw_stats = line.split(") ", 1)[1].split(" ") user = GaugeMetricFamily("process_cpu_user_seconds_total", "") user.add_metric([], float(raw_stats[11]) / self.ticks_per_sec) yield user sys = GaugeMetricFamily("process_cpu_system_seconds_total", "") sys.add_metric([], float(raw_stats[12]) / self.ticks_per_sec) yield sys
def collect(self): start = time.time() # Request data from Azure Status status = self._request_data() for region_section in status[1]: for category in status[1][region_section]: for service in status[1][region_section][category]: for region in status[1][region_section][category][service]: metric_name = "azure_status_{}_{}_status".format(category, service).replace(".", "_") metric = GaugeMetricFamily(metric_name, 'Azure Status for {}'.format(metric_name), labels=["region"]) metric.add_metric([region], STATUSES[status[1][region_section][category][service][region]]) yield metric duration = time.time() - start COLLECTION_TIME.observe(duration)
def collect(self) -> Iterator[GaugeMetricFamily]: active, idle, dead = 0, 0, 0 for slave in self._get_slaves(): if slave.is_alive(use_cached=True) and slave.current_build_id is not None: active += 1 elif slave.is_alive(use_cached=True) and slave.current_build_id is None: idle += 1 elif not slave.is_alive(use_cached=True) and not slave.is_shutdown(): # Slave is not alive and was not deliberately put in shutdown mode. Count it as dead. dead += 1 else: # If not slave.is_alive() and slave.is_shutdown() = True then we have deliberately # and gracefully killed the slave. We do not want to categorize such a slave as 'dead' pass slaves_gauge = GaugeMetricFamily('slaves', 'Total number of slaves', labels=['state']) slaves_gauge.add_metric(['active'], active) slaves_gauge.add_metric(['idle'], idle) slaves_gauge.add_metric(['dead'], dead) yield slaves_gauge
def collect(self): g = GaugeMetricFamily(self.name, self.desc, labels=self.labels) try: calls = self.caller() except Exception: logger.exception( "Exception running callback for LaterGauge(%s)", self.name, ) yield g return if isinstance(calls, dict): for k, v in six.iteritems(calls): g.add_metric(k, v) else: g.add_metric([], calls) yield g
class MachineTypeScrapeImporter: def __init__(self): self.runnable = GaugeMetricFamily( "hydra_machine_type_runnable", "Number of currently runnable builds", labels=["machineType"]) self.running = GaugeMetricFamily( "hydra_machine_type_running", "Number of currently running builds", labels=["machineType"]) self.wait_time = CounterMetricFamily( "hydra_machine_type_wait_time_total", "Number of seconds spent waiting", labels=["machineType"]) self.last_active = CounterMetricFamily( "hydra_machine_type_last_active_total", "Last time this machine type was active", labels=["machineType"]) def load_machine_type(self, name, report): self.runnable.add_metric([name], report.destructive_read("runnable")) self.running.add_metric([name], report.destructive_read("running")) try: self.wait_time.add_metric([name], report.destructive_read("waitTime")) except KeyError: pass try: self.last_active.add_metric([name], report.destructive_read("lastActive")) except KeyError: pass debug_remaining_state(report) def metrics(self): yield self.runnable yield self.running yield self.wait_time yield self.last_active
def collect(self): session = requests.Session() session.trust_env = False session.auth = (self.sonar_user, self.sonar_password) session.verify = False req_string = self.rest_url + '/resources?metrics=ncloc,coverage' res = session.get(req_string) # METRIC: detailed test results c = GaugeMetricFamily('sonar_metrics', 'SonarQube Metrics', labels=['name', 'key']) if res: results = res.json() #pp = pprint.PrettyPrinter() for result in results: # pp.pprint(result) for msr in result['msr']: c.add_metric([result['name'], msr['key']], msr['val']) yield c else: print "Error fetching from " + req_string print res
def collect(self): session = requests.Session() session.trust_env = False session.auth = (self.sonar_user, self.sonar_password) session.verify = False req_string = self.rest_url + '/resources?metrics=' + self.metrics res = session.get(req_string) # METRIC: detailed test results c = GaugeMetricFamily('sonar_metrics', 'SonarQube Metrics', labels=['name', 'key']) try: if res: results = res.json() for result in results: for msr in result['msr']: c.add_metric([result['name'], msr['key']], msr['val']) yield c else: logging.error("Error fetching from " + req_string) logging.error(res) except KeyError: logging.error("Could not retrieve metrics from: " + self.metrics) logging.error("Check argument sonar_metrics")
def convert_meter_metric(cls, marathon_key, marathon_metric): metric_key = cls.convert_metric_key(marathon_key) metric_key = '%s_rate' % metric_key g = GaugeMetricFamily( name=metric_key, documentation='from %s' % marathon_key, labels=('window',)) g.add_metric(('1m',), marathon_metric['m1_rate']) g.add_metric(('5m',), marathon_metric['m5_rate']) g.add_metric(('15m',), marathon_metric['m15_rate']) g.add_metric(('mean',), marathon_metric['mean_rate']) return g
def collect(self): '''collect metrics''' # Task metrics task_info = get_task_state_info() t_state = GaugeMetricFamily( 'airflow_task_status', 'Shows the number of task starts with this status', labels=['dag_id', 'task_id', 'owner', 'status'] ) for task in task_info: t_state.add_metric([task.dag_id, task.task_id, task.owners, task.state or 'none'], task.value) yield t_state # Dag Metrics dag_info = get_dag_state_info() d_state = GaugeMetricFamily( 'airflow_dag_status', 'Shows the number of dag starts with this status', labels=['dag_id', 'owner', 'status'] ) for dag in dag_info: d_state.add_metric([dag.dag_id, dag.owners, dag.state], dag.count) yield d_state # DagRun metrics dag_duration = GaugeMetricFamily( 'airflow_dag_run_duration', 'Duration of currently running dag_runs in seconds', labels=['dag_id', 'run_id'] ) driver = Session.bind.driver for dag in get_dag_duration_info(): if driver == 'mysqldb' or driver == 'pysqlite': dag_duration.add_metric([dag.dag_id, dag.run_id], dag.duration) else: dag_duration.add_metric([dag.dag_id, dag.run_id], dag.duration.seconds) yield dag_duration
class ArraySpaceMetrics(): """ Base class for FlashArray Prometheus array space metrics """ def __init__(self, fa): self.fa = fa self.data_reduction = GaugeMetricFamily( 'purefa_array_space_datareduction_ratio', 'FlashArray overall data reduction', labels=['dimension'], unit='ratio') self.capacity = GaugeMetricFamily('purefa_array_space_capacity_bytes', 'FlashArray overall space capacity', labels=['dimension']) self.provisioned = GaugeMetricFamily( 'purefa_array_space_provisioned_bytes', 'FlashArray overall provisioned space', labels=['dimension']) self.used = GaugeMetricFamily('purefa_array_space_used_bytes', 'FlashArray overall used space', labels=['dimension']) def _data_reduction(self): """ Create metrics of gauge type for array data reduction. Metrics values can be iterated over. """ for k in mappings.array_drr_mapping: self.data_reduction.add_metric( mappings.array_drr_mapping[k], self.fa.get_array_elem(k) if self.fa.get_array_elem(k) is not None else 0) def _capacity(self): """ Create metrics of gauge type for array capacity indicators. Metrics values can be iterated over. """ for k in mappings.array_capacity_mapping: self.capacity.add_metric( mappings.array_capacity_mapping[k], self.fa.get_array_elem(k) if self.fa.get_array_elem(k) is not None else 0) def _provisioned(self): """ Create metrics of gauge type for array provisioned space indicators. Metrics values can be iterated over. """ for k in mappings.array_provisioned_mapping: self.provisioned.add_metric( mappings.array_provisioned_mapping[k], self.fa.get_array_elem(k) if self.fa.get_array_elem(k) is not None else 0) def _used(self): """ Create metrics of gauge type for array used space indicators. Metrics values can be iterated over. """ for k in mappings.array_used_mapping: self.used.add_metric( mappings.array_used_mapping[k], self.fa.get_array_elem(k) if self.fa.get_array_elem(k) is not None else 0) def get_metrics(self): self._data_reduction() self._capacity() self._provisioned() self._used() yield self.data_reduction yield self.capacity yield self.provisioned yield self.used
def collect(self): start = time.time() # Perform REST API call to fetch data data = call_rest_api('/mgmt/status/default/QueueManagersStatus', self.ip, self.port, self.session, self.timeout) if data == '': return # Update Prometheus metrics for qm in data['QueueManagersStatus']: g = GaugeMetricFamily( 'mqa_queue_manager_cpu_usage', 'The instantaneous CPU usage by the queue manager as a percentage of the CPU load', labels=['appliance', 'qm', 'status']) g.add_metric([self.appliance, qm['Name'], qm['Status']], qm['CpuUsage']) yield g g = GaugeMetricFamily( 'mqa_queue_manager_memory_bytes_used', 'The amount of memory in bytes that is currently in use by the queue manager', labels=['appliance', 'qm', 'status']) # Memory in MB not MiB #g.add_metric([self.appliance, qm['Name']], qm['UsedMemory'] * 1048576) g.add_metric([self.appliance, qm['Name'], qm['Status']], qm['UsedMemory'] * 1000000) yield g g = GaugeMetricFamily( 'mqa_queue_manager_fs_bytes_used', 'The amount of file system in bytes that is currently in use by the queue manager', labels=['appliance', 'qm', 'status']) # Memory in MB not MiB #g.add_metric([self.appliance, qm['Name']], qm['UsedFs'] * 1048576) g.add_metric([self.appliance, qm['Name'], qm['Status']], qm['UsedFs'] * 1000000) yield g g = GaugeMetricFamily( 'mqa_queue_manager_fs_bytes_allocated', 'The amount of file system in bytes allocated for the queue manager', labels=['appliance', 'qm', 'status']) # Memory in MB not MiB #g.add_metric([self.appliance, qm['Name']], qm['TotalFs'] * 1048576) g.add_metric([self.appliance, qm['Name'], qm['Status']], qm['TotalFs'] * 1000000) yield g i = InfoMetricFamily('mqa_queue_manager', 'MQ Appliance queue manager information') i.add_metric( [ 'appliance', 'qm', 'status', 'haRole', 'haStatus', 'drRole', 'drStatus' ], { 'appliance': self.appliance, 'qm': qm['Name'], 'status': qm['Status'], 'haRole': 'Unknown' if qm['HaRole'] == '' else qm['HaRole'], 'haStatus': 'Unknown' if qm['HaStatus'] == '' else qm['HaStatus'], 'drRole': 'Unknown' if qm['DrRole'] == '' else qm['DrRole'], 'drStatus': 'Unknown' if qm['DrStatus'] == '' else qm['DrStatus'] }) yield i g = GaugeMetricFamily( 'mqa_exporter_queue_managers_elapsed_time_seconds', 'Exporter eleapsed time to collect queue managers metrics', labels=['appliance']) g.add_metric([self.appliance], time.time() - start) yield g
def test_gauge_labels(self): cmf = GaugeMetricFamily('g', 'help', labels=['a']) cmf.add_metric(['b'], 2) self.custom_collector(cmf) self.assertEqual(2, self.registry.get_sample_value('g', {'a': 'b'}))
def collect(self, name: str) -> Generator: ''' Yields metrics for all managed readings. :param name: Name of the inverter' ''' if self.serial_number is not None and self.parameter_file is not None and \ self.control_software_version is not None: yield InfoMetricFamily( 'rctmon_inverter', 'Information about the inverter', { 'inverter': name, 'serial_number': self.serial_number, 'parameter_file': self.parameter_file, 'control_software_version': self.control_software_version }) # Generators if self.have_generator_a or self.have_generator_b: gen_voltage = GaugeMetricFamily('rctmon_generator_voltage', 'Solar generator voltage', labels=['inverter', 'generator'], unit='volt') gen_power = GaugeMetricFamily('rctmon_generator_power', 'Solar generator power', labels=['inverter', 'generator'], unit='watt') gen_mpp_tgt_volts = GaugeMetricFamily( 'rctmon_generator_mpp_target_voltage', 'Target voltage of MPP ' 'tracker', labels=['inverter', 'generator'], unit='volt') gen_mpp_search_stp = GaugeMetricFamily( 'rctmon_generator_mpp_search_step', 'MPP search step', labels=['inverter', 'generator'], unit='volt') def collect_gen(gen: SolarGeneratorReadings, name: str, gen_name: str) -> None: if gen.voltage is not None: gen_voltage.add_metric([name, gen_name], gen.voltage) if gen.power is not None: gen_power.add_metric([name, gen_name], gen.power) if gen.mpp_target_voltage is not None: gen_mpp_tgt_volts.add_metric([name, gen_name], gen.mpp_target_voltage) if gen.mpp_search_step is not None: gen_mpp_search_stp.add_metric([name, gen_name], gen.mpp_search_step) if self.have_generator_a: collect_gen(self.solar_generator_a, name, 'a') if self.have_generator_b: collect_gen(self.solar_generator_b, name, 'b') yield gen_voltage yield gen_power yield gen_mpp_tgt_volts yield gen_mpp_search_stp temp = GaugeMetricFamily('rctmon_temperature', 'Temperature values in °C', labels=['inverter', 'sensor']) if self.temperature_heatsink is not None: # db.temp1 temp.add_metric([name, 'heatsink'], self.temperature_heatsink) if self.temperature_heatsink_batt is not None: # db.temp2 temp.add_metric([name, 'heatsink_battery_actuator'], self.temperature_heatsink_batt) if self.temperature_core is not None: # db.core_temp temp.add_metric([name, 'core'], self.temperature_core) yield temp if self.inverter_status is not None: ivs = GaugeMetricFamily('rctmon_inverter_status', 'Status of the inverter', labels=['inverter']) ivs.add_metric([name], self.inverter_status) yield ivs if self.inverter_grid_separated is not None: igs = GaugeMetricFamily('rctmon_inverter_grid_separated', 'Status of the island mode', labels=['inverter']) igs.add_metric([name], self.inverter_grid_separated) yield igs faults = GaugeMetricFamily('rctmon_inverter_faults', 'Fault registers', labels=['inverter', 'register']) if self.fault0 is not None: faults.add_metric([name, '0'], self.fault0) if self.fault1 is not None: faults.add_metric([name, '1'], self.fault1) if self.fault2 is not None: faults.add_metric([name, '2'], self.fault2) if self.fault3 is not None: faults.add_metric([name, '3'], self.fault3) yield faults yield from self.household.collect(name) yield from self.grid.collect(name) if self.power_switch_available: yield from self.power_switch_readings.collect(name)
def collect(self): from prometheus_client.core import CounterMetricFamily, GaugeMetricFamily sem_ext = self.server.extensions["semaphores"] semaphore_max_leases_family = GaugeMetricFamily( "semaphore_max_leases", "Maximum leases allowed per semaphore, this will be constant for each semaphore during its lifetime.", labels=["name"], ) semaphore_active_leases_family = GaugeMetricFamily( "semaphore_active_leases", "Amount of currently active leases per semaphore.", labels=["name"], ) semaphore_pending_leases = GaugeMetricFamily( "semaphore_pending_leases", "Amount of currently pending leases per semaphore.", labels=["name"], ) semaphore_acquire_total = CounterMetricFamily( "semaphore_acquire_total", "Total number of leases acquired per semaphore.", labels=["name"], ) semaphore_release_total = CounterMetricFamily( "semaphore_release_total", "Total number of leases released per semaphore.\n" "Note: if a semaphore is closed while there are still leases active, this count will not equal " "`semaphore_acquired_total` after execution.", labels=["name"], ) semaphore_average_pending_lease_time = GaugeMetricFamily( "semaphore_average_pending_lease_time", "Exponential moving average of the time it took to acquire a lease per semaphore.\n" "Note: this only includes time spent on scheduler side, " "it does" " not include time spent on communication.\n" "Note: this average is calculated based on order of leases instead of time of lease acquisition.", labels=["name"], unit="s", ) for semaphore_name, semaphore_max_leases in sem_ext.max_leases.items(): semaphore_max_leases_family.add_metric([semaphore_name], semaphore_max_leases) semaphore_active_leases_family.add_metric( [semaphore_name], len(sem_ext.leases[semaphore_name])) semaphore_pending_leases.add_metric( [semaphore_name], sem_ext.metrics["pending"][semaphore_name]) semaphore_acquire_total.add_metric( [semaphore_name], sem_ext.metrics["acquire_total"][semaphore_name]) semaphore_release_total.add_metric( [semaphore_name], sem_ext.metrics["release_total"][semaphore_name]) semaphore_average_pending_lease_time.add_metric( [semaphore_name], sem_ext.metrics["average_pending_lease_time"][semaphore_name], ) yield semaphore_max_leases_family yield semaphore_active_leases_family yield semaphore_pending_leases yield semaphore_acquire_total yield semaphore_release_total yield semaphore_average_pending_lease_time
def collect(self): try: log.debug('Querying for clocks information...') graphics_clock_mhz = nvmlDeviceGetClockInfo( self.device, NVML_CLOCK_GRAPHICS) metric = GaugeMetricFamily(self.prefix + 'clock_gpu_hz', self.prefix_s + "GPU clock", labels=self.labels.keys()) metric.add_metric(self.labels.values(), graphics_clock_mhz * 1000000) yield metric mem_clock_mhz = nvmlDeviceGetClockInfo(self.device, NVML_CLOCK_MEM) metric = GaugeMetricFamily(self.prefix + 'clock_mem_hz', self.prefix_s + "MEM clock", labels=self.labels.keys()) metric.add_metric(self.labels.values(), mem_clock_mhz * 1000000) yield metric log.debug('Querying for temperature information...') gpu_temperature_c = nvmlDeviceGetTemperature( self.device, NVML_TEMPERATURE_GPU) metric = GaugeMetricFamily(self.prefix + 'gpu_temperature_c', self.prefix_s + "GPU temperature", labels=self.labels.keys()) metric.add_metric(self.labels.values(), gpu_temperature_c) yield metric log.debug('Querying for fan information...') metric = GaugeMetricFamily(self.prefix + 'fan_speed_percent', self.prefix_s + "fan speed", labels=self.labels.keys()) metric.add_metric(self.labels.values(), nvmlDeviceGetFanSpeed(self.device)) yield metric log.debug('Querying for power information...') power_usage_w = nvmlDeviceGetPowerUsage(self.device) / 1000.0 metric = GaugeMetricFamily(self.prefix + 'power_draw_watt', self.prefix_s + "power draw", labels=self.labels.keys()) metric.add_metric(self.labels.values(), power_usage_w) yield metric metric = GaugeMetricFamily(self.prefix + 'power_state', self.prefix_s + "power state", labels=self.labels.keys()) metric.add_metric(self.labels.values(), nvmlDeviceGetPowerState(self.device)) yield metric log.debug('Querying for memory information...') mem_info = nvmlDeviceGetMemoryInfo(self.device) metric = GaugeMetricFamily(self.prefix + 'memory_total_bytes', self.prefix_s + "total memory", labels=self.labels.keys()) metric.add_metric(self.labels.values(), mem_info.total) yield metric metric = GaugeMetricFamily(self.prefix + 'memory_used_bytes', self.prefix_s + "used memory", labels=self.labels.keys()) metric.add_metric(self.labels.values(), mem_info.used) yield metric log.info('collected power:%.1fW temp:%dc gpu:%dMHz mem:%dMHz', power_usage_w, gpu_temperature_c, graphics_clock_mhz, mem_clock_mhz) except Exception as e: log.warning(e, exc_info=True)
def collect(self): if self.telegram == None: return yield GaugeMetricFamily('current_electricity_usage_kw', 'Current electricity usage by client in kW.', self.telegram[obiref.CURRENT_ELECTRICITY_USAGE].value) yield GaugeMetricFamily('current_electricity_delivery_kw', 'Current electricity delivery by client in kW.', self.telegram[obiref.CURRENT_ELECTRICITY_DELIVERY].value) try: instantaneous_voltage_v = GaugeMetricFamily('instantaneous_voltage_v', 'Instantaneous voltage per phase in Volt.', labels=['phase']) instantaneous_voltage_v.add_metric(['L1'], self.telegram[obiref.INSTANTANEOUS_VOLTAGE_L1].value) instantaneous_voltage_v.add_metric(['L2'], self.telegram[obiref.INSTANTANEOUS_VOLTAGE_L2].value) instantaneous_voltage_v.add_metric(['L3'], self.telegram[obiref.INSTANTANEOUS_VOLTAGE_L3].value) yield instantaneous_voltage_v except KeyError: # Not all meters provide this data pass try: instantaneous_current_c = GaugeMetricFamily('instantaneous_current_c', 'Instantaneous current per phase in Ampere.', labels=['phase']) instantaneous_current_c.add_metric(['L1'], self.telegram[obiref.INSTANTANEOUS_CURRENT_L1].value) instantaneous_current_c.add_metric(['L2'], self.telegram[obiref.INSTANTANEOUS_CURRENT_L2].value) instantaneous_current_c.add_metric(['L3'], self.telegram[obiref.INSTANTANEOUS_CURRENT_L3].value) yield instantaneous_current_c except KeyError: # Not all meters provide this data pass try: instantaneous_power_positive_kw = GaugeMetricFamily('instantaneous_power_positive_kw', 'Instantaneous positive power per phase in kW.', labels=['phase']) instantaneous_power_positive_kw.add_metric(['L1'], self.telegram[obiref.INSTANTANEOUS_ACTIVE_POWER_L1_POSITIVE].value) instantaneous_power_positive_kw.add_metric(['L2'], self.telegram[obiref.INSTANTANEOUS_ACTIVE_POWER_L2_POSITIVE].value) instantaneous_power_positive_kw.add_metric(['L3'], self.telegram[obiref.INSTANTANEOUS_ACTIVE_POWER_L3_POSITIVE].value) yield instantaneous_power_positive_kw except KeyError: # Not all meters provide this data pass try: instantaneous_power_negative_kw = GaugeMetricFamily('instantaneous_power_negative_kw', 'Instantaneous negative power per phase in kW.', labels=['phase']) instantaneous_power_negative_kw.add_metric(['L1'], self.telegram[obiref.INSTANTANEOUS_ACTIVE_POWER_L1_NEGATIVE].value) instantaneous_power_negative_kw.add_metric(['L2'], self.telegram[obiref.INSTANTANEOUS_ACTIVE_POWER_L2_NEGATIVE].value) instantaneous_power_negative_kw.add_metric(['L3'], self.telegram[obiref.INSTANTANEOUS_ACTIVE_POWER_L3_NEGATIVE].value) yield instantaneous_power_negative_kw except KeyError: # Not all meters provide this data pass yield GaugeMetricFamily('current_electricity_tariff', 'Current electricity tariff active.', int(self.telegram[obiref.ELECTRICITY_ACTIVE_TARIFF].value)) electricity_used_kwh = CounterMetricFamily('electricity_used_kwh', 'Electricity used by client in kWh.', labels=['tariff']) for index, field in enumerate(obiref.ELECTRICITY_USED_TARIFF_ALL): electricity_used_kwh.add_metric(['%d' % (index + 1)], self.telegram[field].value) yield electricity_used_kwh electricity_delivered_kwh = CounterMetricFamily('electricity_delivered_kwh', 'Electricity delivered by client in kWh.', labels=['tariff']) for index, field in enumerate(obiref.ELECTRICITY_DELIVERED_TARIFF_ALL): electricity_delivered_kwh.add_metric(['%d' % (index + 1)], self.telegram[field].value) yield electricity_delivered_kwh voltage_sag_count = CounterMetricFamily('voltage_sag_count', 'Number of voltage sags.', labels=['phase']) voltage_sag_count.add_metric(['L1'], self.telegram[obiref.VOLTAGE_SAG_L1_COUNT].value) voltage_sag_count.add_metric(['L2'], self.telegram[obiref.VOLTAGE_SAG_L2_COUNT].value) voltage_sag_count.add_metric(['L3'], self.telegram[obiref.VOLTAGE_SAG_L3_COUNT].value) yield voltage_sag_count voltage_swell_count = CounterMetricFamily('voltage_swell_count', 'Number of voltage swells.', labels=['phase']) voltage_swell_count.add_metric(['L1'], self.telegram[obiref.VOLTAGE_SWELL_L1_COUNT].value) voltage_swell_count.add_metric(['L2'], self.telegram[obiref.VOLTAGE_SWELL_L2_COUNT].value) voltage_swell_count.add_metric(['L3'], self.telegram[obiref.VOLTAGE_SWELL_L3_COUNT].value) yield voltage_swell_count yield CounterMetricFamily('long_power_failure_count', 'Number of power long failures in any phase.', self.telegram[obiref.LONG_POWER_FAILURE_COUNT].value) try: yield CounterMetricFamily('short_power_failure_count', 'Number of power short failures in any phase.', self.telegram[obiref.SHORT_POWER_FAILURE_COUNT].value) except KeyError: # Not all meters provide this data pass yield CounterMetricFamily('gas_used_m3', 'Gas delivered to client in m3.', self.telegram[obiref.HOURLY_GAS_METER_READING].value)
def collect(self): try: wiki_pages = GaugeMetricFamily("wiki_pages", "wiki_pages", labels=["lang_code", "language"]) wiki_articles = GaugeMetricFamily( "wiki_articles", "wiki_articles", labels=["lang_code", "language"], ) wiki_edits = GaugeMetricFamily("wiki_edits", "wiki_edits", labels=["lang_code", "language"]) wiki_images = GaugeMetricFamily("wiki_images", "wiki_images", labels=["lang_code", "language"]) wiki_users = GaugeMetricFamily("wiki_users", "wiki_users", labels=["lang_code", "language"]) wiki_activeusers = GaugeMetricFamily( "wiki_activeusers", "wiki_activeusers", labels=["lang_code", "language"], ) wiki_admins = GaugeMetricFamily("wiki_admins", "wiki_admins", labels=["lang_code", "language"]) for lang in indic_wikipedia: logger.info("Language: %s", lang) url = "https://" + lang + ".wikipedia.org/w/api.php" PARAMS = { "action": "query", "meta": "siteinfo", "formatversion": "2", "format": "json", "siprop": "statistics", } r = requests.get(url=url, params=PARAMS) data = r.json() wiki_pages.add_metric( [lang, all_lang[lang]["name"]], data["query"]["statistics"]["pages"], ) wiki_articles.add_metric( [lang, all_lang[lang]["name"]], data["query"]["statistics"]["articles"], ) wiki_edits.add_metric( [lang, all_lang[lang]["name"]], data["query"]["statistics"]["edits"], ) wiki_images.add_metric( [lang, all_lang[lang]["name"]], data["query"]["statistics"]["images"], ) wiki_users.add_metric( [lang, all_lang[lang]["name"]], data["query"]["statistics"]["users"], ) wiki_activeusers.add_metric( [lang, all_lang[lang]["name"]], data["query"]["statistics"]["activeusers"], ) wiki_admins.add_metric( [lang, all_lang[lang]["name"]], data["query"]["statistics"]["admins"], ) yield wiki_pages yield wiki_articles yield wiki_edits yield wiki_images yield wiki_users yield wiki_activeusers yield wiki_admins except Exception: logger.error(traceback.format_exc())
def collect(self): """Collect metrics.""" # Task metrics task_info = get_task_state_info() t_state = GaugeMetricFamily( "airflow_task_status", "Shows the number of task instances with particular status", labels=["dag_id", "task_id", "owner", "status"], ) for task in task_info: t_state.add_metric( [task.dag_id, task.task_id, task.owners, task.state or "none"], task.value, ) yield t_state task_duration = GaugeMetricFamily( "airflow_task_duration", "Duration of successful tasks in seconds", labels=["task_id", "dag_id", "execution_date"], ) for task in get_task_duration_info(): task_duration_value = (task.end_date - task.start_date).total_seconds() task_duration.add_metric( [ task.task_id, task.dag_id, task.execution_date.strftime("%Y-%m-%d-%H-%M") ], task_duration_value, ) yield task_duration task_failure_count = GaugeMetricFamily( "airflow_task_fail_count", "Count of failed tasks", labels=["dag_id", "task_id"], ) for task in get_task_failure_counts(): task_failure_count.add_metric([task.dag_id, task.task_id], task.count) yield task_failure_count # Dag Metrics dag_info = get_dag_state_info() d_state = GaugeMetricFamily( "airflow_dag_status", "Shows the number of dag starts with this status", labels=["dag_id", "owner", "status"], ) for dag in dag_info: d_state.add_metric([dag.dag_id, dag.owners, dag.state], dag.count) yield d_state dag_duration = GaugeMetricFamily( "airflow_dag_run_duration", "Duration of successful dag_runs in seconds", labels=["dag_id"], ) for dag in get_dag_duration_info(): dag_duration_value = (dag.end_date - dag.start_date).total_seconds() dag_duration.add_metric([dag.dag_id], dag_duration_value) yield dag_duration # Scheduler Metrics dag_scheduler_delay = GaugeMetricFamily( "airflow_dag_scheduler_delay", "Airflow DAG scheduling delay", labels=["dag_id"], ) for dag in get_dag_scheduler_delay(): dag_scheduling_delay_value = (dag.start_date - dag.execution_date).total_seconds() dag_scheduler_delay.add_metric([dag.dag_id], dag_scheduling_delay_value) yield dag_scheduler_delay # XCOM parameters xcom_params = GaugeMetricFamily( "airflow_xcom_parameter", "Airflow Xcom Parameter", labels=["dag_id", "task_id"], ) xcom_config = load_xcom_config() for tasks in xcom_config.get("xcom_params", []): for param in get_xcom_params(tasks["task_id"]): xcom_value = extract_xcom_parameter(param.value) if tasks["key"] in xcom_value: xcom_params.add_metric([param.dag_id, param.task_id], xcom_value[tasks["key"]]) yield xcom_params task_scheduler_delay = GaugeMetricFamily( "airflow_task_scheduler_delay", "Airflow Task scheduling delay", labels=["queue"], ) for task in get_task_scheduler_delay(): task_scheduling_delay_value = (task.start_date - task.queued_dttm).total_seconds() task_scheduler_delay.add_metric([task.queue], task_scheduling_delay_value) yield task_scheduler_delay num_queued_tasks_metric = GaugeMetricFamily( "airflow_num_queued_tasks", "Airflow Number of Queued Tasks", ) num_queued_tasks = get_num_queued_tasks() num_queued_tasks_metric.add_metric([], num_queued_tasks) yield num_queued_tasks_metric
def collect(self): cm = GaugeMetricFamily("python_gc_counts", "GC object counts", labels=["gen"]) for n, m in enumerate(gc.get_count()): cm.add_metric([str(n)], m) yield cm
def collect(self): session = requests.Session() session.trust_env = False session.auth = (self.bamboo_user, self.bamboo_password) session.verify = False # METRIC: detailed test results c = GaugeMetricFamily('bamboo_test_results', 'Bamboo Test Results', labels=['name', 'job', 'className', 'methodName']) for job in self.bamboo_test_jobs: res = session.get(self.web_url + '/rest/api/latest/result/' + job + '/latest.json?expand=testResults.allTests') if res: results = res.json() for testResult in res.json()['testResults']['allTests']['testResult']: c.add_metric([results['plan']['name'], job, testResult['className'], testResult['methodName']], testResult['status'] == 'successful') else: print "error fetching test results" print res yield c # METRIC: bamboo agent state c = GaugeMetricFamily('bamboo_build_state', 'Bamboo Build Dashboard', labels=['state', 'host']) res = session.get(self.web_url + '/build/admin/ajax/getDashboardSummary.action') if res: dashboard_summary = res.json() for host, values in self.tally_agent_info(dashboard_summary).iteritems(): for state, state_count in values.iteritems(): c.add_metric([state, host], state_count) yield c else: print res # Collect results tagged d = {} r = session.get( self.web_url + '/rest/api/latest/result.json?favourite&expand=results.result.buildDurationInSeconds') if r.ok: # NOTE: this may return multiple results for the same plan - need to use highest build number results = r.json() for result in results['results']['result']: key = result['plan']['key'] if key in d and d[key]['number'] < result['number']: continue # don't overwrite with older build d[key] = result else: print r # METRIC: build status (favourites) METRICS = ['buildNumber', 'buildDurationInSeconds'] TEST_METRICS = ['failedTestCount', 'skippedTestCount', 'quarantinedTestCount', 'successfulTestCount'] statusMetric = GaugeMetricFamily('build_results', 'Status of flagged plans', labels=['name', 'state']) testMetric = GaugeMetricFamily('test_counts', 'Test result counts', labels=['shortName', 'countType']) metrics = {x: GaugeMetricFamily(x, x, labels=['shortName']) for x in METRICS} for key, result in d.iteritems(): statusMetric.add_metric([result['plan']['shortName'], result['state']], result['successful']) for name in TEST_METRICS: testMetric.add_metric([result['plan']['shortName'], name], result[name]) for name, metric in metrics.iteritems(): metric.add_metric([result['plan']['shortName']], result[name]) yield statusMetric yield testMetric for metric in metrics.itervalues(): yield metric
def trivial_gauge(self, name, help, value): c = GaugeMetricFamily(f"hydra_{name}", help) c.add_metric([], value) return c
class MachineScrapeImporter: def __init__(self): labels = [ "host" ] self.consective_failures = GaugeMetricFamily( "hydra_machine_consecutive_failures", "Number of consecutive failed builds", labels=labels) self.current_jobs = GaugeMetricFamily( "hydra_machine_current_jobs", "Number of current jobs", labels=labels) self.idle_since = GaugeMetricFamily( "hydra_machine_idle_since", "When the current idle period started", labels=labels) self.disabled_until = GaugeMetricFamily( "hydra_machine_disabled_until", "When the machine will be used again", labels=labels) self.enabled = GaugeMetricFamily( "hydra_machine_enabled", "If the machine is enabled (1) or not (0)", labels=labels) self.last_failure = CounterMetricFamily( "hydra_machine_last_failure", "timestamp of the last failure", labels=labels) self.number_steps_done = CounterMetricFamily( "hydra_machine_steps_done_total", "Total count of the steps completed", labels=labels) self.total_step_build_time = CounterMetricFamily( "hydra_machine_step_build_time_total", "Number of seconds spent building steps", labels=labels) self.total_step_time = CounterMetricFamily( "hydra_machine_step_time_total", "Number of seconds spent on steps", labels=labels) def load_machine(self, name, report): report.unused_read("mandatoryFeatures") report.unused_read("supportedFeatures") report.unused_read("systemTypes") report.unused_read("avgStepBuildTime") report.unused_read("avgStepTime") labels = [name] self.consective_failures.add_metric( labels, report.destructive_read("consecutiveFailures") ) self.current_jobs.add_metric( labels, report.destructive_read("currentJobs") ) try: self.idle_since.add_metric( labels, report.destructive_read("idleSince") ) except KeyError: pass self.disabled_until.add_metric( labels, report.destructive_read("disabledUntil") ) self.enabled.add_metric( labels, 1 if report.destructive_read("enabled") else 0 ) self.last_failure.add_metric( labels, report.destructive_read("lastFailure") ) self.number_steps_done.add_metric( labels, report.destructive_read("nrStepsDone") ) self.total_step_build_time.add_metric( labels, report.destructive_read_default("totalStepBuildTime", default=0) ) self.total_step_time.add_metric( labels, report.destructive_read_default("totalStepTime", default=0) ) debug_remaining_state(report) def metrics(self): yield self.consective_failures yield self.current_jobs yield self.idle_since yield self.disabled_until yield self.enabled yield self.last_failure yield self.number_steps_done yield self.total_step_build_time yield self.total_step_time
def parse_for_prom(self): general = self._metrics['general'] """ return general info """ info = InfoMetricFamily(self.prefix + '_general', '', labels=[]) info.add_metric([], general) yield info """ separate general info for value """ gauge = GaugeMetricFamily(self.prefix + '_general_status', '', labels=['power_state']) gauge.add_metric([general['power_state']], self._cast(general['power_state'])) yield gauge gauge = GaugeMetricFamily(self.prefix + '_general_health', '', labels=['health']) gauge.add_metric([general['health']], self._cast(general['health'])) yield gauge gauge = GaugeMetricFamily(self.prefix + '_general_state', '', labels=['state']) gauge.add_metric([general['state']], self._cast(general['state'])) yield gauge fans = self._metrics['fan'] """ return thermal metrics """ gauge = GaugeMetricFamily(self.prefix + '_fan_redundancy', '', labels=['health', 'state']) gauge.add_metric([fans['redundancy_health'], fans['redundancy_state']], int( self._cast(fans['redundancy_health']) + self._cast(fans['redundancy_state']))) yield gauge gauge = GaugeMetricFamily(self.prefix + '_fan_rpm', '', labels=['name', 'low_limit']) gauge_status = GaugeMetricFamily(self.prefix + '_fan', '', labels=['name', 'health', 'state']) for fan in fans['list']: gauge.add_metric([fan['name'], str(fan['low_limit'])], self._cast(fan['rpm'])) gauge_status.add_metric( [fan['name'], fan['health'], fan['state']], int(self._cast(fan['health']) + self._cast(fan['state']))) yield gauge yield gauge_status thermal = self._metrics['thermal'] """ return thermal metrics """ gauge = GaugeMetricFamily(self.prefix + '_thermal_location', '', labels=['name', 'limit']) for location in thermal['location']: gauge.add_metric( [location['name'], str(location['limit'])], location['degres']) yield gauge """ return power metrics """ power = self._metrics['power'] gauge = GaugeMetricFamily(self.prefix + '_power', '', labels=['health', 'state']) gauge.add_metric( [power['health'], power['state']], int(self._cast(power['state']) + self._cast(power['health']))) yield gauge gauge = GaugeMetricFamily(self.prefix + '_power_comsumption', 'watt consumption', labels=['type', 'unit', 'limit']) gauge.add_metric( ['average', 'watt', str(power['limit'])], int(power['average'])) gauge.add_metric( ['maxconsumed', 'watt', str(power['limit'])], int(power['maxconsumed'])) gauge.add_metric( ['minconsumed', 'watt', str(power['limit'])], int(power['minconsumed'])) yield gauge """ return power supply metrics """ gauge = GaugeMetricFamily( self.prefix + '_power_supply', '', labels=['name', 'capacity', 'health', 'state']) for powersupply in power['powersupplies']: gauge.add_metric([ powersupply['name'], str(powersupply['power_capacity']), powersupply['health'], powersupply['state'] ], int( self._cast(powersupply['state']) + self._cast(powersupply['health']))) yield gauge
def collect(self): g = GaugeMetricFamily('vrops_vm_properties', 'testtest', labels=[ 'vccluster', 'datacenter', 'virtualmachine', 'hostsystem', 'propkey' ]) i = InfoMetricFamily( 'vrops_vm', 'testtest', labels=['vccluster', 'datacenter', 'virtualmachine', 'hostsystem']) if os.environ['DEBUG'] >= '1': print(self.name, 'starts with collecting the metrics') for target in self.get_vms_by_target(): token = self.get_target_tokens() token = token[target] if not token: print("skipping", target, "in", self.name, ", no token") uuids = self.target_vms[target] if 'number_metrics' in self.property_yaml[self.name]: for property_pair in self.property_yaml[ self.name]['number_metrics']: property_label = property_pair['label'] propkey = property_pair['property'] values = Resources.get_latest_number_properties_multiple( target, token, uuids, propkey) if not values: continue for value_entry in values: if 'data' not in value_entry: continue data = value_entry['data'] vm_id = value_entry['resourceId'] if vm_id not in self.vms: continue g.add_metric(labels=[ self.vms[vm_id]['cluster'], self.vms[vm_id]['datacenter'], self.vms[vm_id]['name'], self.vms[vm_id]['parent_host_name'], property_label ], value=data) if 'enum_metrics' in self.property_yaml[self.name]: for property_pair in self.property_yaml[ self.name]['enum_metrics']: property_label = property_pair['label'] propkey = property_pair['property'] expected_state = property_pair['expected'] values = Resources.get_latest_enum_properties_multiple( target, token, uuids, propkey, expected_state) if not values: continue for value_entry in values: if 'data' not in value_entry: continue data = value_entry['data'] vm_id = value_entry['resourceId'] latest_state = value_entry['latest_state'] if vm_id not in self.vms: continue g.add_metric(labels=[ self.vms[vm_id]['cluster'], self.vms[vm_id]['datacenter'], self.vms[vm_id]['name'], self.vms[vm_id]['parent_host_name'], property_label + ": " + latest_state ], value=data) if 'info_metrics' in self.property_yaml[self.name]: for property_pair in self.property_yaml[ self.name]['info_metrics']: property_label = property_pair['label'] propkey = property_pair['property'] values = Resources.get_latest_info_properties_multiple( target, token, uuids, propkey) if not values: continue for value_entry in values: if 'data' not in value_entry: continue vm_id = value_entry['resourceId'] info_value = value_entry['data'] if vm_id not in self.vms: continue i.add_metric(labels=[ self.vms[vm_id]['cluster'], self.vms[vm_id]['datacenter'], self.vms[vm_id]['name'], self.vms[vm_id]['parent_host_name'] ], value={property_label: info_value}) # self.post_metrics(g.name) # self.post_metrics(i.name + '_info') yield g yield i
def collect(self): # bearerbox server status metric = GaugeMetricFamily('bearerbox_up', 'Could the bearerbox server be reached') response = self.parse_kannel_status() if response is None: metric.add_sample('bearerbox_up', value=0, labels={}) yield metric return [] metric.add_sample('bearerbox_up', value=1, labels={}) yield metric # Version info version = bearerbox_version(response['gateway']['version']) metric = GaugeMetricFamily('bearerbox_build_info', 'Kannel bearerbox version info') metric.add_sample('bearerbox_build_info', value=1, labels={'version': version}) yield metric # Gauge for the bearerbox uptime, in seconds uptime = uptime_to_secs(response['gateway']['status']) metric = GaugeMetricFamily('bearerbox_uptime_seconds', 'Current uptime in seconds (*)') metric.add_sample('bearerbox_uptime_seconds', value=uptime, labels={}) yield metric # WDP, SMS & DLR metrics message_type = ['sms', 'dlr'] if self._collect_wdp is True: message_type = ['wdp'] + message_type for type in message_type: for k, v in response['gateway'][type].items(): if isinstance(v, dict): for k2, v2 in v.items(): metric_name = 'bearerbox_{0}_{1}_{2}'.format( type, k, k2) if k2 == 'total': metric_help = 'Total number of {0} {1}'.format( type.upper(), k) metric = CounterMetricFamily( metric_name, metric_help) else: metric_help = 'Number of {0} {1} in queue'.format( k, type.upper()) metric = GaugeMetricFamily(metric_name, metric_help) metric.add_sample(metric_name, value=int(v2), labels={}) yield metric elif k not in ['inbound', 'outbound']: metric_name = 'bearerbox_{0}_{1}'.format(type, k) metric_value = v metric_labels = {} if type == 'sms' and k == 'storesize': metric_help = 'Number of SMS in storesize' elif type == 'dlr': if k == 'queued': metric_help = 'Number of DLRs in queue' elif k == 'storage': metric_help = 'DLR storage type info' metric_value = 1 metric_labels = {'storage': v} metric = GaugeMetricFamily(metric_name, metric_help) metric.add_sample(metric_name, value=int(metric_value), labels=metric_labels) yield metric # Box metrics box_connections = {b: 0 for b in self._box_connections} box_details = {} metric_box_connections = GaugeMetricFamily( 'bearerbox_box_connections', 'Number of box connections') metric_box_queue = GaugeMetricFamily( 'bearerbox_box_queue', 'Number of messages in box queue') if self._collect_box_uptime is True: metric_box_uptime = GaugeMetricFamily( 'bearerbox_box_uptime_seconds', 'Box uptime in seconds (*)') if response['gateway']['boxes'] != '': # when there's only one box connected on the gateway # xmltodict returns an OrderedDict instead of a list of OrderedDicts if not isinstance(response['gateway']['boxes']['box'], list): response['gateway']['boxes']['box'] = [ response['gateway']['boxes']['box'] ] for box in response['gateway']['boxes']['box']: if box['type'] in box_connections.keys(): box_connections[box['type']] += 1 else: box_connections[box['type']] = 1 # some type of boxes (e.g wapbox) don't have IDs. if 'id' not in box.keys(): box['id'] = "" tuplkey = (box['type'], box['id'], box['IP']) # some type of boxs (e.g wapbox) don't have queues. if 'queue' in box.keys(): if tuplkey in box_details.keys(): box_details[tuplkey]['queue'] += int(box['queue']) else: box_details[tuplkey] = {} box_details[tuplkey]['queue'] = int(box['queue']) # collect box uptime metrics # In case of multiple boxes with same type, id and host. # Only the uptime of the first occurence will be exposed # in order to avoid duplicates. if self._collect_box_uptime is True: if tuplkey in box_details.keys(): if 'uptime' not in box_details[tuplkey].keys(): box_details[tuplkey]['uptime'] = uptime_to_secs( box['status']) else: box_details[tuplkey] = {} box_details[tuplkey]['uptime'] = uptime_to_secs( box['status']) for key, value in box_connections.items(): metric_box_connections.add_sample('bearerbox_box_connections', value=value, labels={'type': key}) yield metric_box_connections for key, value in box_details.items(): box_labels = {'type': key[0], 'id': key[1], 'ipaddr': key[2]} if 'queue' in value.keys(): metric_box_queue.add_sample('bearerbox_box_queue', value=value['queue'], labels=box_labels) if self._collect_box_uptime is True: metric_box_uptime.add_sample('bearerbox_box_uptime_seconds', value=value['uptime'], labels=box_labels) yield metric_box_queue if self._collect_box_uptime is True: yield metric_box_uptime # SMSC metrics metric = GaugeMetricFamily('bearerbox_smsc_connections', 'Number of SMSC connections') metric.add_sample('bearerbox_smsc_connections', value=int(response['gateway']['smscs']['count']), labels={}) yield metric if self._filter_smsc is False: metric_failed = CounterMetricFamily( 'bearerbox_smsc_failed_messages_total', 'Total number of SMSC failed messages', labels=["smsc_id"]) metric_queued = GaugeMetricFamily('bearerbox_smsc_queued_messages', 'Number of SMSC queued messages', labels=["smsc_id"]) metric_sms_received = CounterMetricFamily( 'bearerbox_smsc_received_sms_total', 'Total number of received SMS by SMSC', labels=["smsc_id"]) metric_sms_sent = CounterMetricFamily( 'bearerbox_smsc_sent_sms_total', 'Total number of SMS sent to SMSC', labels=["smsc_id"]) metric_dlr_received = CounterMetricFamily( 'bearerbox_smsc_received_dlr_total', 'Total number of DLRs received by SMSC', labels=["smsc_id"]) metric_dlr_sent = CounterMetricFamily( 'bearerbox_smsc_sent_dlr_total', 'Total number of DLRs sent to SMSC', labels=["smsc_id"]) # Group SMSCs by smsc-id smsc_stats_by_id = OrderedDict() # when there's only one smsc connection on the gateway # xmltodict returns an OrderedDict instead of a list of OrderedDicts if not isinstance(response['gateway']['smscs']['smsc'], list): response['gateway']['smscs']['smsc'] = [ response['gateway']['smscs']['smsc'] ] for smsc in response['gateway']['smscs']['smsc']: smscid = smsc['id'] if smscid in smsc_stats_by_id: smsc_stats_by_id[smscid]['failed'] += int(smsc['failed']) smsc_stats_by_id[smscid]['queued'] += int(smsc['queued']) smsc_stats_by_id[smscid]['sms']['received'] += int( smsc['sms']['received']) smsc_stats_by_id[smscid]['sms']['sent'] += int( smsc['sms']['sent']) smsc_stats_by_id[smscid]['dlr']['received'] += int( smsc['dlr']['received']) smsc_stats_by_id[smscid]['dlr']['sent'] += int( smsc['dlr']['sent']) else: smsc_stats_by_id[smscid] = OrderedDict() smsc_stats_by_id[smscid]['failed'] = int(smsc['failed']) smsc_stats_by_id[smscid]['queued'] = int(smsc['queued']) smsc_stats_by_id[smscid]['sms'] = OrderedDict() smsc_stats_by_id[smscid]['sms']['received'] = int( smsc['sms']['received']) smsc_stats_by_id[smscid]['sms']['sent'] = int( smsc['sms']['sent']) smsc_stats_by_id[smscid]['dlr'] = OrderedDict() smsc_stats_by_id[smscid]['dlr']['received'] = int( smsc['dlr']['received']) smsc_stats_by_id[smscid]['dlr']['sent'] = int( smsc['dlr']['sent']) for smsc in smsc_stats_by_id: metric_failed.add_metric([smsc], smsc_stats_by_id[smsc]['failed']) metric_queued.add_metric([smsc], smsc_stats_by_id[smsc]['queued']) metric_sms_received.add_metric( [smsc], smsc_stats_by_id[smsc]['sms']['received']) metric_sms_sent.add_metric( [smsc], smsc_stats_by_id[smsc]['sms']['sent']) metric_dlr_received.add_metric( [smsc], smsc_stats_by_id[smsc]['dlr']['received']) metric_dlr_sent.add_metric( [smsc], smsc_stats_by_id[smsc]['dlr']['sent']) yield metric_failed yield metric_queued yield metric_sms_received yield metric_sms_sent yield metric_dlr_received yield metric_dlr_sent
def collect(self, name: str) -> Generator: ''' Yields metrics for the grid ''' if self.software_version is not None and self.bootloader_version is not None: yield InfoMetricFamily( 'rctmon_powerswitch', 'Information about the Power Switch', { 'inverter': name, 'software_version': str(self.software_version), 'bootloader_version': str(self.bootloader_version) }) grid_voltage = GaugeMetricFamily('rctmon_grid_voltage', 'Grid voltage by phase', labels=['inverter', 'phase'], unit='volt') if self.grid_voltage_l1 is not None: grid_voltage.add_metric([name, 'l1'], self.grid_voltage_l1) if self.grid_voltage_l2 is not None: grid_voltage.add_metric([name, 'l2'], self.grid_voltage_l2) if self.grid_voltage_l3 is not None: grid_voltage.add_metric([name, 'l3'], self.grid_voltage_l3) yield grid_voltage grid_frequency = GaugeMetricFamily('rctmon_grid_frequency', 'Grid frequency by phase', labels=['inverter', 'phase'], unit='hertz') if self.grid_frequency_l1 is not None: grid_frequency.add_metric([name, 'l1'], self.grid_frequency_l1) if self.grid_frequency_l2 is not None: grid_frequency.add_metric([name, 'l2'], self.grid_frequency_l2) if self.grid_frequency_l3 is not None: grid_frequency.add_metric([name, 'l3'], self.grid_frequency_l3) yield grid_frequency ps_frequency = GaugeMetricFamily('rctmon_powerstorage_frequency', 'Power Storage frequency by phase', labels=['inverter', 'phase'], unit='hertz') if self.power_storage_frequency_l1 is not None: ps_frequency.add_metric([name, 'l1'], self.power_storage_frequency_l1) if self.power_storage_frequency_l2 is not None: ps_frequency.add_metric([name, 'l2'], self.power_storage_frequency_l2) if self.power_storage_frequency_l3 is not None: ps_frequency.add_metric([name, 'l3'], self.power_storage_frequency_l3) yield ps_frequency
def collect(self): # TODO handle missing labels, probably return 500? labels = self.get_labels() labels_p75 = labels.copy() # Work on copy of labels variable to avoid other metrics getting quantile label labels_p75.update({'quantile': '0.75'}) labels_p99 = labels.copy() # Work on copy of labels variable to avoid other metrics getting quantile label labels_p99.update({'quantile': '0.99'}) response = requests.get(args.uri) metrics = response.json()['metrics'] # iterate over all metrics for k in metrics: metric_name = re.sub('\.|-|\s', '_', k).lower() metric_name = 'stellar_core_' + metric_name if metrics[k]['type'] == 'timer': # we have a timer, expose as a Prometheus Summary # we convert stellar-core time units to seconds, as per Prometheus best practices metric_name = metric_name + '_seconds' if 'sum' in metrics[k]: # use libmedida sum value total_duration = metrics[k]['sum'] else: # compute sum value total_duration = (metrics[k]['mean'] * metrics[k]['count']) summary = SummaryMetricFamily(metric_name, 'libmedida metric type: ' + metrics[k]['type'], labels=labels.keys()) summary.add_metric(labels.values(), count_value=metrics[k]['count'], sum_value=(duration_to_seconds(total_duration, metrics[k]['duration_unit']))) # add stellar-core calculated quantiles to our summary summary.add_sample(metric_name, labels=labels_p75, value=(duration_to_seconds(metrics[k]['75%'], metrics[k]['duration_unit']))) summary.add_sample(metric_name, labels=labels_p99, value=(duration_to_seconds(metrics[k]['99%'], metrics[k]['duration_unit']))) yield summary elif metrics[k]['type'] == 'counter': # we have a counter, this is a Prometheus Gauge g = GaugeMetricFamily(metric_name, 'libmedida metric type: ' + metrics[k]['type'], labels=labels.keys()) g.add_metric(labels.values(), metrics[k]['count']) yield g elif metrics[k]['type'] == 'meter': # we have a meter, this is a Prometheus Counter c = CounterMetricFamily(metric_name, 'libmedida metric type: ' + metrics[k]['type'], labels=labels.keys()) c.add_metric(labels.values(), metrics[k]['count']) yield c # Export metrics from the info endpoint response = requests.get(args.info_uri) info = response.json()['info'] if not all([i in info for i in self.info_keys]): print('WARNING: info endpoint did not return all required fields') return # Ledger metrics for core_name, prom_name in self.ledger_metrics.items(): g = GaugeMetricFamily('stellar_core_ledger_{}'.format(prom_name), 'Stellar core ledger metric name: {}'.format(core_name), labels=labels.keys()) g.add_metric(labels.values(), info['ledger'][core_name]) yield g # Quorum metrics are reported under dynamic name for example: # "quorum" : { # "758110" : { # "agree" : 3, tmp = info['quorum'].values()[0] for metric in self.quorum_metrics: g = GaugeMetricFamily('stellar_core_quorum_{}'.format(metric), 'Stellar core quorum metric: {}'.format(metric), labels=labels.keys()) g.add_metric(labels.values(), tmp[metric]) yield g # Peers metrics g = GaugeMetricFamily('stellar_core_peers_authenticated_count', 'Stellar core authenticated_count count', labels=labels.keys()) g.add_metric(labels.values(), info['peers']['authenticated_count']) yield g g = GaugeMetricFamily('stellar_core_peers_pending_count', 'Stellar core pending_count count', labels=labels.keys()) g.add_metric(labels.values(), info['peers']['pending_count']) yield g g = GaugeMetricFamily('stellar_core_protocol_version', 'Stellar core protocol_version', labels=labels.keys()) g.add_metric(labels.values(), info['protocol_version']) yield g g = GaugeMetricFamily('stellar_core_synced', 'Stellar core sync status', labels=labels.keys()) if info['state'] == 'Synced!': g.add_metric(labels.values(), 1) else: g.add_metric(labels.values(), 0) yield g g = GaugeMetricFamily('stellar_core_started_on', 'Stellar core start time in epoch', labels=labels.keys()) date = datetime.strptime(info['startedOn'], "%Y-%m-%dT%H:%M:%SZ") g.add_metric(labels.values(), int(date.strftime('%s'))) yield g
class ClientsPerformanceMetrics(): """ Base class for FlashBlade Prometheus clients performance metrics """ def __init__(self, fb): self.fb = fb self.latency = GaugeMetricFamily( 'purefb_client_performance_latency_usec', 'FlashBlade latency', labels=['name', 'port', 'dimension']) self.iops = GaugeMetricFamily('purefb_client_performance_iops', 'FlashBlade IOPS', labels=['name', 'port', 'dimension']) self.ops_size = GaugeMetricFamily( 'purefb_client_performance_opns_bytes', 'FlashBlade client average bytes per operations', labels=['name', 'port', 'dimension']) self.throughput = GaugeMetricFamily( 'purefb_client_performance_throughput_bytes', 'FlashBlade client_throughput', labels=['name', 'port', 'dimension']) self.clients_performance = fb.get_clients_performance() def _latency(self): """ Create metrics of gauge type for client latency metrics. """ for cperf in self.clients_performance: client, port = cperf.name.split(':') self.latency.add_metric([client, port, 'read'], cperf.usec_per_read_op) self.latency.add_metric([client, port, 'write'], cperf.usec_per_write_op) self.latency.add_metric([client, port, 'other'], cperf.usec_per_other_op) def _iops(self): """ Create metrics of gauge type for client iops metrics. """ for cperf in self.clients_performance: client, port = cperf.name.split(':') self.iops.add_metric([client, port, 'read'], cperf.reads_per_sec) self.iops.add_metric([client, port, 'write'], cperf.writes_per_sec) self.iops.add_metric([client, port, 'other'], cperf.others_per_sec) def _ops_size(self): """ Create metrics of gauge type for client operations size metrics. """ for cperf in self.clients_performance: client, port = cperf.name.split(':') self.ops_size.add_metric([client, port, 'per_op'], cperf.bytes_per_op) self.ops_size.add_metric([client, port, 'read'], cperf.bytes_per_read) self.ops_size.add_metric([client, port, 'write'], cperf.bytes_per_write) def _throughput(self): """ Create metrics of gauge type for client throughput metrics. """ for cperf in self.clients_performance: client, port = cperf.name.split(':') self.throughput.add_metric([client, port, 'read'], cperf.read_bytes_per_sec) self.throughput.add_metric([client, port, 'write'], cperf.write_bytes_per_sec) def get_metrics(self): self._latency() self._iops() self._ops_size() self._throughput() yield self.latency yield self.iops yield self.ops_size yield self.throughput
def collect(self): sds011 = SDS011(self._sds011, use_query_mode=True) sds011.sleep(sleep=False) time.sleep(int(self._sleep)) sds011s = tuple(sds011.query()) sds011.sleep() pm25 = GaugeMetricFamily('airfilter_dust', 'dust of size 2,5', labels=['sensor', 'pm']) pm25.add_metric(['sds011', '2.5'], sds011s[0]) yield pm25 pm10 = GaugeMetricFamily('airfilter_dust', 'dust of size 10', labels=['sensor', 'pm']) pm10.add_metric(['sds011', '10'], sds011s[1]) yield pm10 if self._ccs811 == 'true': ccs811 = CCS811_RPi() configuration = 0b100000 ccs811.configureSensor(configuration) hdc1000 = SDL_Pi_HDC1000() hdc1000.turnHeaterOff() hdc1000.setTemperatureResolution( HDC1000_CONFIG_TEMPERATURE_RESOLUTION_14BIT) hdc1000.setHumidityResolution( HDC1000_CONFIG_HUMIDITY_RESOLUTION_14BIT) humidity = hdc1000.readHumidity() temperature = hdc1000.readTemperature() ccs811.setCompensation(temperature, humidity) humid = GaugeMetricFamily('airfilter_humidity', 'humidity reading', labels=['sensor']) humid.add_metric(['ccs811'], humidity) yield humid temp = GaugeMetricFamily('airfilter_temperature', 'temperature reading', labels=['sensor']) temp.add_metric(['ccs811'], temperature) yield temp statusbyte = ccs811.readStatus() status = GaugeMetricFamily('airfilter_statusbyte', 'statusbyte', labels=['sensor', 'statusbyte']) status.add_metric(['ccs811', bin(statusbyte)], 1) yield status error = ccs811.checkError(statusbyte) failure = GaugeMetricFamily('airfilter_error', '1 if error on sensor', labels=['sensor']) if (error): failure.add_metric(['ccs811'], 1) else: failure.add_metric(['ccs811'], 0) yield failure samples = GaugeMetricFamily('airfilter_samples', '0 if no new samples', labels=['sensor']) res = GaugeMetricFamily('airfilter_result', '1 if valid result', labels=['sensor']) eco2 = GaugeMetricFamily('airfilter_eco2', 'eco2 reading', labels=['sensor', 'unit']) tvoc = GaugeMetricFamily('airfilter_tvoc', 'tvoc reading', labels=['sensor', 'unit']) if (ccs811.checkDataReady(statusbyte)): samples.add_metric(['ccs811'], 1) yield samples result = ccs811.readAlg() if (result): res.add_metric(['ccs811'], 1) yield res eco2.add_metric(['ccs811', 'ppm'], result['eCO2']) yield eco2 tvoc.add_metric(['ccs811', 'ppb'], result['TVOC']) yield tvoc else: res.add_metric(['ccs811'], 0) yield res eco2.add_metric(['ccs811', 'ppm'], 0) yield eco2 tvoc.add_metric(['ccs811', 'ppb'], 0) yield tvoc else: samples.add_metric(['ccs811'], 0) yield samples res.add_metric(['ccs811'], 0) yield res eco2.add_metric(['ccs811', 'ppm'], 0) yield eco2 tvoc.add_metric(['ccs811', 'ppb'], 0) yield tvoc
def collect_cpu_usage(self, environments_instances_health): start = time.time() instance_cpu_usage = GaugeMetricFamily( self.metric_prefix + 'enhanced_cpu_usage_percent', 'CPU utilization per instance and state', labels=['environment_name', 'instance_id', 'state']) for environment, instances_health in environments_instances_health: if instances_health != "None": for instance_health in instances_health: instance_cpu_usage.add_metric( [environment, instance_health['InstanceId'], 'User'], instance_health['System']['CPUUtilization']['User'] if 'CPUUtilization' in instance_health['System'] else 0) instance_cpu_usage.add_metric( [environment, instance_health['InstanceId'], 'Nice'], instance_health['System']['CPUUtilization']['Nice'] if 'CPUUtilization' in instance_health['System'] else 0) instance_cpu_usage.add_metric( [environment, instance_health['InstanceId'], 'System'], instance_health['System']['CPUUtilization']['System'] if 'CPUUtilization' in instance_health['System'] else 0) instance_cpu_usage.add_metric( [environment, instance_health['InstanceId'], 'Idle'], instance_health['System']['CPUUtilization']['Idle'] if 'CPUUtilization' in instance_health['System'] else 0) instance_cpu_usage.add_metric( [environment, instance_health['InstanceId'], 'IOWait'], instance_health['System']['CPUUtilization']['IOWait'] if 'CPUUtilization' in instance_health['System'] else 0) instance_cpu_usage.add_metric( [environment, instance_health['InstanceId'], 'IRQ'], instance_health['System']['CPUUtilization']['IRQ'] if 'CPUUtilization' in instance_health['System'] else 0) instance_cpu_usage.add_metric( [ environment, instance_health['InstanceId'], 'SoftIRQ' ], instance_health['System']['CPUUtilization']['SoftIRQ'] if 'CPUUtilization' in instance_health['System'] else 0) end = time.time() self.metric_collector_duration.add_metric(['cpu_usage'], end - start) return instance_cpu_usage
def _collect(self): # type: () -> Generator[GaugeMetricFamily] m_uri_down = GaugeMetricFamily( 'heritrix3_crawl_job_uris_downloaded_total', 'Total URIs downloaded by a Heritrix3 crawl job', labels=["jobname", "deployment", "status", "id"]) # No hyphens in label names please! m_uri_known = GaugeMetricFamily( 'heritrix3_crawl_job_uris_known_total', 'Total URIs discovered by a Heritrix3 crawl job', labels=["jobname", "deployment", "status", "id"]) # No hyphens in label names please! m_uris = GaugeMetricFamily( 'heritrix3_crawl_job_uris_total', 'URI counters from a Heritrix3 crawl job, labeled by kind', labels=["jobname", "deployment", "id", "kind"]) # No hyphens in label names please! m_bytes = GaugeMetricFamily( 'heritrix3_crawl_job_bytes_total', 'Byte counters from a Heritrix3 crawl job, labeled by kind', labels=["jobname", "deployment", "id", "kind"]) # No hyphens in label names please! m_qs = GaugeMetricFamily( 'heritrix3_crawl_job_queues_total', 'Queue counters from a Heritrix3 crawl job, labeled by kind', labels=["jobname", "deployment", "id", "kind"]) # No hyphens in label names please! m_ts = GaugeMetricFamily( 'heritrix3_crawl_job_threads_total', 'Thread counters from a Heritrix3 crawl job, labeled by kind', labels=["jobname", "deployment", "id", "kind"]) # No hyphens in label names please! result = self.run_api_requests() for job in result: # Allow debugging: logger.debug("Input:\n%s" % json.dumps(job, indent=2)) # Get hold of the state and flags etc name = job['job_name'] id = job['id'] deployment = job['deployment'] state = job['state'] or {} status = state['status'] or None # Get the URI metrics try: # URIs: ji = state.get('details',{}).get('job',{}) docs_total = ji.get('uriTotalsReport',{}).get('downloadedUriCount', 0.0) known_total = ji.get('uriTotalsReport',{}).get('totalUriCount', 0.0) m_uri_down.add_metric([name, deployment, status, id], docs_total) m_uri_known.add_metric([name, deployment, status, id], known_total) # New-style metrics: m_uris.add_metric([name, deployment, id, 'downloaded'], docs_total) m_uris.add_metric([name, deployment, id, 'queued'], known_total) m_uris.add_metric([name, deployment, id, 'novel'], ji.get('sizeTotalsReport', {}).get('novelCount', 0.0)) m_uris.add_metric([name, deployment, id, 'deduplicated'], ji.get('sizeTotalsReport', {}).get('dupByHashCount', 0.0)) m_uris.add_metric([name, deployment, id, 'deepest-queue-depth'], ji.get('loadReport', {}).get('deepestQueueDepth', 0.0)) m_uris.add_metric([name, deployment, id, 'average-queue-depth'], ji.get('loadReport', {}).get('averageQueueDepth', 0.0)) # Bytes: m_bytes.add_metric([name, deployment, id, 'novel'], ji.get('sizeTotalsReport', {}).get('novel', 0.0)) m_bytes.add_metric([name, deployment, id, 'deduplicated'], ji.get('sizeTotalsReport', {}).get('dupByHash', 0.0)) m_bytes.add_metric([name, deployment, id, 'warc-novel-content'], ji.get('sizeTotalsReport', {}).get('warcNovelContentBytes', 0.0)) # Queues: m_qs.add_metric([name, deployment, id, 'total'], ji.get('frontierReport', {}).get('totalQueues', 0.0)) m_qs.add_metric([name, deployment, id, 'in-process'], ji.get('frontierReport', {}).get('inProcessQueues', 0.0)) m_qs.add_metric([name, deployment, id, 'ready'], ji.get('frontierReport', {}).get('readyQueues', 0.0)) m_qs.add_metric([name, deployment, id, 'snoozed'], ji.get('frontierReport', {}).get('snoozedQueues', 0.0)) m_qs.add_metric([name, deployment, id, 'active'], ji.get('frontierReport', {}).get('activeQueues', 0.0)) m_qs.add_metric([name, deployment, id, 'inactive'], ji.get('frontierReport', {}).get('inactiveQueues', 0.0)) m_qs.add_metric([name, deployment, id, 'ineligible'], ji.get('frontierReport', {}).get('ineligibleQueues', 0.0)) m_qs.add_metric([name, deployment, id, 'retired'], ji.get('frontierReport', {}).get('retiredQueues', 0.0)) m_qs.add_metric([name, deployment, id, 'exhausted'], ji.get('frontierReport', {}).get('exhaustedQueues', 0.0)) # Threads: m_ts.add_metric([name, deployment, id, 'total'], ji.get('loadReport', {}).get('totalThreads', 0.0)) m_ts.add_metric([name, deployment, id, 'busy'], ji.get('loadReport', {}).get('busyThreads', 0.0)) m_ts.add_metric([name, deployment, id, 'toe-count'], ji.get('threadReport', {}).get('toeCount', 0.0)) # Congestion ratio can be literal 'null': congestion = ji.get('loadReport', {}).get('congestionRatio', 0.0) if congestion is not None: m_ts.add_metric([name, deployment, id, 'congestion-ratio'], congestion) # Thread Steps (could be an array or just one entry): steps = ji.get('threadReport', {}).get('steps', {}).get('value',[]) if isinstance(steps, basestring): steps = [steps] for step_value in steps: splut = re.split(' ', step_value, maxsplit=1) if len(splut) == 2: count, step = splut step = "step-%s" % step.lower() m_ts.add_metric([name, deployment, id, step], float(count)) else: logger.warning("Could not handle step value: %s" % step_value) # Thread Processors (could be an array or just one entry): procs = ji.get('threadReport', {}).get('processors', {}).get('value', []) if isinstance(procs, basestring): procs = [procs] for proc_value in procs: splut = re.split(' ', proc_value, maxsplit=1) if len(splut) == 2: count, proc = splut proc = "processor-%s" % proc.lower() m_ts.add_metric([name, deployment, id, proc], float(count)) else: logger.warning("Could not handle processor value: '%s'" % proc_value) except (KeyError, TypeError, ValueError) as e: logger.exception("Exception while parsing metrics!") logger.info("Printing raw JSON in case there's an underlying issue: %s" % json.dumps(job)[:1024]) # And return the metrics: yield m_uri_down yield m_uri_known yield m_uris yield m_bytes yield m_qs yield m_ts
def collect_health_status(self, environments_health): start = time.time() health_status = GaugeMetricFamily( self.metric_prefix + 'enhanced_health_status', 'The health status of the environment', labels=['environment_name', 'color', 'health_status']) for environment, health in environments_health: if health != "None" and 'ApplicationMetrics' in health: health_status.add_metric( [environment, 'Green', 'Ok'], 1 if health['HealthStatus'] == 'Ok' else 0) health_status.add_metric( [environment, 'Yellow', 'Warning'], 1 if health['HealthStatus'] == 'Warning' else 0) health_status.add_metric( [environment, 'Red', 'Degraded'], 1 if health['HealthStatus'] == 'Degraded' else 0) health_status.add_metric( [environment, 'Red', 'Severe'], 1 if health['HealthStatus'] == 'Severe' else 0) health_status.add_metric( [environment, 'Green', 'Info'], 1 if health['HealthStatus'] == 'Info' else 0) health_status.add_metric( [environment, 'Grey', 'Pending'], 1 if health['HealthStatus'] == 'Pending' else 0) health_status.add_metric( [environment, 'Grey', 'Unknown'], 1 if health['HealthStatus'] == 'Unknown' else 0) health_status.add_metric( [environment, 'Grey', 'Suspended'], 1 if health['HealthStatus'] == 'Suspended' else 0) end = time.time() self.metric_collector_duration.add_metric(['health_status'], end - start) return health_status
def collect(self): self.reconnect() yield GaugeMetricFamily("tor_written_bytes", "Tor written data counter", value=int( self.tor.get_info("traffic/written"))) yield GaugeMetricFamily("tor_read_bytes", "Tor received data counter", value=int(self.tor.get_info("traffic/read"))) version = GaugeMetricFamily("tor_version", "Tor version as a label", labels=["version"]) version.add_metric([str(torctl.get_version())], 1) yield version version_status = GaugeMetricFamily( "tor_version_status", "Tor version status {new, old, unrecommended, recommended, new in series, obsolete, unknown} as a label", labels=["version_status"]) version_status.add_metric( [self.tor.get_info("status/version/current")], 1) yield version_status yield GaugeMetricFamily( "tor_network_liveness", "Indicates whether tor believes that the network is currently reachable", value=int(self.tor.get_info("network-liveness") == "up")) reachable = GaugeMetricFamily( "tor_reachable", "Indicates whether our OR/Dir port is reachable", labels=["port"]) for entry in self.tor.get_info( "status/reachability-succeeded").split(): k, v = entry.split("=") reachable.add_metric([k], int(v)) yield reachable yield GaugeMetricFamily( "tor_circuit_established", "Indicates whether Tor is capable of establishing circuits", value=int(self.tor.get_info("status/circuit-established"))) # For some reason, 0 actually means that Tor is active, keep it that way yield GaugeMetricFamily( "tor_dormant", "Indicates whether Tor is currently active and building circuits (note that 0 corresponds to Tor being active)", value=int(self.tor.get_info("dormant"))) effective_rate = self.tor.get_effective_rate(None) effective_burst_rate = self.tor.get_effective_rate(None, burst=True) if effective_rate is not None and effective_burst_rate is not None: yield GaugeMetricFamily("tor_effective_rate", "Shows Tor effective rate", value=int(effective_rate)) yield GaugeMetricFamily("tor_effective_burst_rate", "Shows Tor effective burst rate", value=int(effective_burst_rate)) try: fingerprint_value = self.tor.get_info("fingerprint") fingerprint = GaugeMetricFamily("tor_fingerprint", "Tor fingerprint as a label", labels=["fingerprint"]) fingerprint.add_metric([fingerprint_value], 1) yield fingerprint except (stem.ProtocolError, stem.OperationFailed): # happens when not running in server mode pass nickname = GaugeMetricFamily("tor_nickname", "Tor nickname as a label", labels=["nickname"]) nickname.add_metric([self.tor.get_conf("Nickname", "Unnamed")], 1) yield nickname # Connection counting # This won't work/will return wrong results if we are not running on # the same box as the Tor daemon is. # DisableDebuggerAttachment has to be set to 0 # TODO: Count individual OUT/DIR/Control connections, see arm sources # for reference try: tor_pid = self.tor.get_pid() connections = stem.util.connection.get_connections( process_pid=tor_pid) yield GaugeMetricFamily( "tor_connection_count", "Amount of connections the Tor daemon has open", value=len(connections)) # Let's hope this does not break when there is NTP sync or # something uptime = time.time() - stem.util.system.start_time(tor_pid) yield GaugeMetricFamily("tor_uptime", "Tor daemon uptime", value=uptime) except (OSError, IOError): # This happens if the PID does not exists (on another machine). pass try: has_flags = self.tor.get_network_status().flags except stem.DescriptorUnavailable: # The tor daemon fails with this for a few minutes after startup # (before figuring out its own flags?) has_flags = [] except stem.ControllerError: # Happens when the daemon is not running in server mode has_flags = [] flags = GaugeMetricFamily("tor_flags", "Has a Tor flag", labels=["flag"]) for flag in [ "Authority", "BadExit", "Exit", "Fast", "Guard", "HSDir", "NoEdConsensus", "Stable", "Running", "Valid", "V2Dir" ]: flags.add_metric([flag], int(flag in has_flags)) yield flags try: accs = self.tor.get_accounting_stats() yield GaugeMetricFamily("tor_accounting_read_bytes", "Tor accounting read bytes", accs.read_bytes) yield GaugeMetricFamily("tor_accounting_left_read_bytes", "Tor accounting read bytes left", accs.read_bytes_left) yield GaugeMetricFamily("tor_accounting_read_limit_bytes", "Tor accounting read bytes limit", accs.read_limit) yield GaugeMetricFamily("tor_accounting_write_bytes", "Tor accounting write bytes", accs.written_bytes) yield GaugeMetricFamily("tor_accounting_left_write_bytes", "Tor accounting write bytes left", accs.write_bytes_left) yield GaugeMetricFamily("tor_accounting_write_limit_bytes", "Tor accounting write bytes limit", accs.write_limit) except stem.ControllerError: # happens when accounting isn't enabled pass
def scrape(): global START today = datetime.utcnow().date() START = datetime.timestamp(datetime.combine(today, datetime.min.time())) tasks = retrieve_recent_koji_tasks() koji_tasks_total_family = CounterMetricFamily( 'koji_tasks_total', 'Count of all koji tasks', labels=TASK_LABELS ) for value, labels in koji_tasks_total(tasks): koji_tasks_total_family.add_metric(labels, value) koji_task_errors_total_family = CounterMetricFamily( 'koji_task_errors_total', 'Count of all koji task errors', labels=TASK_LABELS ) error_tasks = only(tasks, states=error_states) for value, labels in koji_tasks_total(error_tasks): koji_task_errors_total_family.add_metric(labels, value) koji_task_completions_total_family = CounterMetricFamily( 'koji_task_completions_total', 'Count of all koji task completed', labels=TASK_LABELS ) completed_tasks = only(tasks, states=completed_states) for value, labels in koji_tasks_total(completed_tasks): koji_task_completions_total_family.add_metric(labels, value) koji_in_progress_tasks_family = GaugeMetricFamily( 'koji_in_progress_tasks', 'Count of all in-progress koji tasks', labels=TASK_LABELS, ) in_progress_tasks = retrieve_open_koji_tasks() for value, labels in koji_tasks_total(in_progress_tasks): koji_in_progress_tasks_family.add_metric(labels, value) koji_waiting_tasks_family = GaugeMetricFamily( 'koji_waiting_tasks', 'Count of all waiting, unscheduled koji tasks', labels=TASK_LABELS, ) waiting_tasks = retrieve_waiting_koji_tasks() for value, labels in koji_tasks_total(waiting_tasks): koji_waiting_tasks_family.add_metric(labels, value) koji_task_duration_seconds_family = HistogramMetricFamily( 'koji_task_duration_seconds', 'Histogram of koji task durations', labels=TASK_LABELS, ) for buckets, duration_sum, labels in koji_task_duration_seconds( tasks, calculate_overall_duration ): koji_task_duration_seconds_family.add_metric(labels, buckets, sum_value=duration_sum) koji_task_waiting_duration_seconds_family = HistogramMetricFamily( 'koji_task_waiting_duration_seconds', 'Histogram of koji tasks durations while waiting', labels=TASK_LABELS, ) for buckets, duration_sum, labels in koji_task_duration_seconds( tasks, calculate_waiting_duration ): koji_task_waiting_duration_seconds_family.add_metric( labels, buckets, sum_value=duration_sum ) koji_task_in_progress_duration_seconds_family = HistogramMetricFamily( 'koji_task_in_progress_duration_seconds', 'Histogram of koji task durations while in-progress', labels=TASK_LABELS, ) for buckets, duration_sum, labels in koji_task_duration_seconds( tasks, calculate_in_progress_duration ): koji_task_in_progress_duration_seconds_family.add_metric( labels, buckets, sum_value=duration_sum ) koji_enabled_hosts_count_family = GaugeMetricFamily( 'koji_enabled_hosts_count', 'Count of all koji hosts by channel', labels=HOST_LABELS, ) koji_enabled_hosts_capacity_family = GaugeMetricFamily( 'koji_enabled_hosts_capacity', 'Reported capacity of all koji hosts by channel', labels=HOST_LABELS, ) koji_hosts_last_update_family = GaugeMetricFamily( 'koji_hosts_last_update', 'Gauge of last update from host', labels=BUILDER_LABELS, ) hosts = retrieve_hosts_by_channel() # result_object is a VirtualCall object from the use of the MultiCallSession from the Koji API for result_object, labels in koji_hosts_last_update(hosts): koji_hosts_last_update_family.add_metric(labels, result_object.result) for value, labels in koji_enabled_hosts_count(hosts): koji_enabled_hosts_count_family.add_metric(labels, value) for value, labels in koji_enabled_hosts_capacity(hosts): koji_enabled_hosts_capacity_family.add_metric(labels, value) koji_task_load_family = GaugeMetricFamily( 'koji_task_load', 'Task load of all koji builders by channel', labels=HOST_LABELS, ) task_load = retrieve_task_load_by_channel() for value, labels in koji_task_load(task_load): koji_task_load_family.add_metric(labels, value) # Replace this in one atomic operation to avoid race condition to the Expositor metrics.update( { 'koji_tasks_total': koji_tasks_total_family, 'koji_task_errors_total': koji_task_errors_total_family, 'koji_task_completions_total': koji_task_completions_total_family, 'koji_in_progress_tasks': koji_in_progress_tasks_family, 'koji_waiting_tasks': koji_waiting_tasks_family, 'koji_task_duration_seconds': koji_task_duration_seconds_family, 'koji_task_waiting_duration_seconds': koji_task_waiting_duration_seconds_family, 'koji_task_in_progress_duration_seconds': koji_task_in_progress_duration_seconds_family, 'koji_enabled_hosts_count': koji_enabled_hosts_count_family, 'koji_enabled_hosts_capacity': koji_enabled_hosts_capacity_family, 'koji_task_load': koji_task_load_family, 'koji_hosts_last_update': koji_hosts_last_update_family, } )
def collect(self): gauge_nodes_states_total = GaugeMetricFamily('slurm_nodes_state_total', 'Slurm nodes states, total per state. From sinfo.', labels=['state']) # Gather down nodes, and exclude unknown status nodes (down*) try: stdout, stderr = subprocess.Popen("sinfo --format='%T %D' | grep down | grep -v '*' |awk -F ' ' '{print $2}'", stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=True).communicate() try: nb_nodes_down = float(stdout) except ValueError: nb_nodes_down = 0.0 print("Slurm Exporter. nb_nodes_down: "+str(nb_nodes_down)) gauge_nodes_states_total.add_metric(["down"], nb_nodes_down) except OSError as e: print("Execution failed:", e, file=stderr) # try: # nb_nodes_down = subprocess.call("sinfo --format='%T %D' | grep down | grep -v '*' |awk -F ' ' '{print $2}'", shell=True) # if nb_nodes_down < 0: # print("Child was terminated by signal", -nb_nodes_down, file=sys.stderr) # else: # print("Child returned", nb_nodes_down, file=sys.stderr) # gauge_nodes_states_total.add_metric(["down"], nb_nodes_down) # except OSError as e: # print("Execution failed:", e, file=sys.stderr) # Gather drain nodes, and exclude unknown status nodes (drain*) try: stdout, stderr = subprocess.Popen("sinfo --format='%T %D' | grep drain | grep -v '*' |awk -F ' ' '{print $2}'", stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=True).communicate() try: nb_nodes_drain = float(stdout) except ValueError: nb_nodes_drain = 0.0 print("Slurm Exporter. nb_nodes_drain: "+str(nb_nodes_drain)) gauge_nodes_states_total.add_metric(["drain"], nb_nodes_drain) except OSError as e: print("Execution failed:", e, file=stderr) # Gather idle nodes, and exclude unknown status nodes (idle*) try: stdout, stderr = subprocess.Popen("sinfo --format='%T %D' | grep idle | grep -v '*' |awk -F ' ' '{print $2}'", stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=True).communicate() try: nb_nodes_idle = float(stdout) except ValueError: nb_nodes_idle = 0.0 print("Slurm Exporter. nb_nodes_idle: "+str(nb_nodes_idle)) gauge_nodes_states_total.add_metric(["idle"], nb_nodes_idle) except OSError as e: print("Execution failed:", e, file=stderr) # Gather alloc nodes, and exclude unknown status nodes (alloc*) try: stdout, stderr = subprocess.Popen("sinfo --format='%T %D' | grep alloc | grep -v '*' |awk -F ' ' '{print $2}'", stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=True).communicate() try: nb_nodes_alloc = float(stdout) except ValueError: nb_nodes_alloc = 0.0 print("Slurm Exporter. nb_nodes_alloc: "+str(nb_nodes_alloc)) gauge_nodes_states_total.add_metric(["alloc"], nb_nodes_alloc) except OSError as e: print("Execution failed:", e, file=stderr) # Deduce remaining nodes, and assume they are unknown state try: stdout, stderr = subprocess.Popen("sinfo --format=%D | grep -v NODES", stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=True).communicate() try: nb_nodes = float(stdout) except ValueError: nb_nodes = 0.0 print("Slurm Exporter. nb_nodes_unk: "+str(nb_nodes-(nb_nodes_alloc+nb_nodes_idle+nb_nodes_drain+nb_nodes_down))) gauge_nodes_states_total.add_metric(["unk"], nb_nodes-(nb_nodes_alloc+nb_nodes_idle+nb_nodes_drain+nb_nodes_down)) except OSError as e: print("Execution failed:", e, file=stderr) yield gauge_nodes_states_total
def collect(self): # @stats is a pretty-printer object with __str__() returning a nice table, # plus some fields that contain data from that table. # unfortunately, fields are pretty-printed themselves (i. e. '4.5MB'). stats = gc.get_stats(memory_pressure=False) # type: ignore # @s contains same fields as @stats, but as actual integers. s = stats._s # type: ignore # also note that field naming is completely braindead # and only vaguely correlates with the pretty-printed table. # >>>> gc.get_stats(False) # Total memory consumed: # GC used: 8.7MB (peak: 39.0MB) # s.total_gc_memory, s.peak_memory # in arenas: 3.0MB # s.total_arena_memory # rawmalloced: 1.7MB # s.total_rawmalloced_memory # nursery: 4.0MB # s.nursery_size # raw assembler used: 31.0kB # s.jit_backend_used # ----------------------------- # Total: 8.8MB # stats.memory_used_sum # # Total memory allocated: # GC allocated: 38.7MB (peak: 41.1MB) # s.total_allocated_memory, s.peak_allocated_memory # in arenas: 30.9MB # s.peak_arena_memory # rawmalloced: 4.1MB # s.peak_rawmalloced_memory # nursery: 4.0MB # s.nursery_size # raw assembler allocated: 1.0MB # s.jit_backend_allocated # ----------------------------- # Total: 39.7MB # stats.memory_allocated_sum # # Total time spent in GC: 0.073 # s.total_gc_time pypy_gc_time = CounterMetricFamily( "pypy_gc_time_seconds_total", "Total time spent in PyPy GC", labels=[], ) pypy_gc_time.add_metric([], s.total_gc_time / 1000) yield pypy_gc_time pypy_mem = GaugeMetricFamily( "pypy_memory_bytes", "Memory tracked by PyPy allocator", labels=["state", "class", "kind"], ) # memory used by JIT assembler pypy_mem.add_metric(["used", "", "jit"], s.jit_backend_used) pypy_mem.add_metric(["allocated", "", "jit"], s.jit_backend_allocated) # memory used by GCed objects pypy_mem.add_metric(["used", "", "arenas"], s.total_arena_memory) pypy_mem.add_metric(["allocated", "", "arenas"], s.peak_arena_memory) pypy_mem.add_metric(["used", "", "rawmalloced"], s.total_rawmalloced_memory) pypy_mem.add_metric(["allocated", "", "rawmalloced"], s.peak_rawmalloced_memory) pypy_mem.add_metric(["used", "", "nursery"], s.nursery_size) pypy_mem.add_metric(["allocated", "", "nursery"], s.nursery_size) # totals pypy_mem.add_metric(["used", "totals", "gc"], s.total_gc_memory) pypy_mem.add_metric(["allocated", "totals", "gc"], s.total_allocated_memory) pypy_mem.add_metric(["used", "totals", "gc_peak"], s.peak_memory) pypy_mem.add_metric(["allocated", "totals", "gc_peak"], s.peak_allocated_memory) yield pypy_mem
def collect(self, name: str) -> Generator: if self.power_total is not None: pass power = GaugeMetricFamily('rctmon_grid_power', 'Power to or from the grid by phase', labels=['inverter', 'phase'], unit='watt') if self.power_l1 is not None: power.add_metric([name, 'l1'], self.power_l1) if self.power_l2 is not None: power.add_metric([name, 'l2'], self.power_l2) if self.power_l3 is not None: power.add_metric([name, 'l3'], self.power_l3) yield power voltage = GaugeMetricFamily('rctmon_grid_voltage', 'Grid voltage by phase', labels=['inverter', 'phase'], unit='volt') if self.voltage_l1 is not None: voltage.add_metric([name, 'l1'], self.voltage_l1) if self.voltage_l2 is not None: voltage.add_metric([name, 'l2'], self.voltage_l2) if self.voltage_l3 is not None: voltage.add_metric([name, 'l3'], self.voltage_l3) yield voltage p2p_voltage = GaugeMetricFamily('rctmon_grid_voltage_phase_to_phase', 'Grid voltage phase to phase', labels=['inverter', 'measurement'], unit='volt') if self.phase_to_phase_voltage_1 is not None: p2p_voltage.add_metric([name, '1'], self.phase_to_phase_voltage_1) if self.phase_to_phase_voltage_2 is not None: p2p_voltage.add_metric([name, '2'], self.phase_to_phase_voltage_2) if self.phase_to_phase_voltage_3 is not None: p2p_voltage.add_metric([name, '3'], self.phase_to_phase_voltage_3) yield p2p_voltage frequency = GaugeMetricFamily('rctmon_grid_frequency', 'Grid frequency', labels=['inverter'], unit='hertz') if self.frequency is not None: frequency.add_metric([name], self.frequency) yield frequency
class ArraySpaceMetrics(): """ Base class for FlashBlade Prometheus array space metrics """ def __init__(self, fb): self.fb = fb self.capacity = GaugeMetricFamily('purefb_array_capacity_bytes', 'FlashBlade total capacity in bytes', labels=[]) self.space = GaugeMetricFamily('purefb_array_space_bytes', 'FlashBlade used space in bytes', labels=['dimension']) self.reduction = GaugeMetricFamily('purefb_array_space_data_reduction', 'FlashBlade overall data reduction', labels=[]) self.array_space = fb.get_array_space().space self.array_capacity = fb.get_array_space().capacity def _space(self): """ Create metrics of gauge type for array space indicators. """ if self.array_space is None: return self.space.add_metric(['unique'], self.array_space.unique) self.space.add_metric(['virtual'], self.array_space.virtual) self.space.add_metric(['total_physical'], self.array_space.total_physical) self.space.add_metric(['snapshots'], self.array_space.snapshots) def _capacity(self): """ Create metrics of gauge type for array capacity indicator. """ if self.array_capacity is None: return self.capacity.add_metric([], self.array_capacity) def _reduction(self): """ Create metrics of gauge type for array data redution indicator. """ if self.array_space is None: return self.reduction.add_metric([], self.array_space.data_reduction) def get_metrics(self): self._capacity() self._space() self._reduction() yield self.capacity yield self.space yield self.reduction
def collect(self): self.logger.debug("starting collection ... ") global_error_metric = GaugeMetricFamily( "smart_system_error", "flag indicating that there is a problem with the helper daemon", ) try: sock = self._get_connected_socket() data = self._recv_smart_info(sock) except OSError: self.logger.error( "failed to get data from helper daemon at %s", self.socket_path, exc_info=True, ) global_error_metric.add_metric([], 1) return [global_error_metric] global_error_metric.add_metric([], 0) error_metrics = GaugeMetricFamily( "smart_access_error", "flag indicating that there is a problem accessing the device", labels=["port"], ) warning_metrics = GaugeMetricFamily( "smart_metric_error", "flag indicating that there is a problem converting metrics " "from the device", labels=["port"], ) attr_metrics = {} def get_attr_metric(device, id_, name): nonlocal attr_metrics try: metric_name, type_ = self.attrmap.get_metric_for_attribute( device, id_, name ) except KeyError: return None try: return attr_metrics[metric_name] except KeyError: class_ = { attrmeta.MetricType.GAUGE: GaugeMetricFamily, attrmeta.MetricType.COUNTER: CounterMetricFamily, }[type_] metric = class_( "smart_{}".format(metric_name), "S.M.A.R.T. metric based on attribute {}".format(name), labels=self.labels, ) attr_metrics[metric_name] = metric return metric for devinfo in data: port = devinfo["port"] if devinfo["error"]: error_metrics.add_metric( [port], 1. ) continue has_warnings = False error_metrics.add_metric( [port], 0. ) device = devinfo["model"] family = devinfo["family"] serial = devinfo["serial"] self.logger.debug("device %r", device) for attrinfo in devinfo["attrs"]: self.logger.debug("smart attribute %r", attrinfo) id_ = int(attrinfo["ID#"]) name = attrinfo["Name"] try: type_, _, _ = self.devicedb.get_info_for_attr(device, id_) except KeyError as exc: try: # check if attribute is explicitly configured # in that case, we want to warn self.attrmap.get_metric_for_attribute( device, id_, name ) except KeyError: # not configured -> no warning self.logger.debug( "omitting unconfigured attribute which is missing" " in device db: #%d (%s) on device %r" " (lookup failed for %s)", id_, name, device, exc, ) else: self.logger.warning( "explicitly configured attribute #%d (%s) on" " device %r is missing in devicedb -- cannot" " generate metric! (lookup failed for %s)", id_, name, device, exc, ) has_warnings = True continue metric = get_attr_metric(device, id_, name) if metric is None: continue self.logger.debug( "registering %s of #%d on metric %s", type_, id_, metric, ) metric.add_metric( [port, device, family, serial], float(attrinfo[type_]) ) if has_warnings: warning_metrics.add_metric( [port], int(has_warnings) ) return [ global_error_metric, error_metrics, warning_metrics, ] + list(attr_metrics.values())
def collect(self): nsip = self.nsip data = {} for entity in self.metrics.keys(): logger.info('Collecting metric %s for %s' % (entity, nsip)) try: data[entity] = collect_data(nsip, entity, self.username, self.password, self.protocol, self.nitro_timeout) except Exception as e: logger.warning('Could not collect metric: ' + str(e)) # Add labels to metrics and provide to Prometheus log_prefix_match = True for entity_name, entity in self.metrics.items(): if ('labels' in entity.keys()): label_names = [v[1] for v in entity['labels']] label_names.append('nsip') else: label_names = [] label_names.append('nsip') # Provide collected metric to Prometheus as a counter for ns_metric_name, prom_metric_name in entity.get('counters', []): c = CounterMetricFamily(prom_metric_name, ns_metric_name, labels=label_names) entity_stats = data.get(entity_name, []) if (type(entity_stats) is not list): entity_stats = [entity_stats] for data_item in entity_stats: if not data_item: continue if ns_metric_name not in data_item.keys(): logger.warning( 'Counter stats for %s not enabled in adc %s, so could not add to %s' % (ns_metric_name, nsip, entity_name)) break if ('labels' in entity.keys()): label_values = [ data_item[key] for key in [v[0] for v in entity['labels']] ] # populate and update k8s_ingress_lbvs metrics if in k8s-CIC enviroment if entity_name == "k8s_ingress_lbvs": if os.environ.get( 'KUBERNETES_SERVICE_HOST') is not None: prefix_match = update_lbvs_label( self.k8s_cic_prefix, label_values, ns_metric_name, log_prefix_match) if not prefix_match: log_prefix_match = False continue else: continue label_values.append(nsip) else: label_values = [nsip] try: c.add_metric(label_values, float(data_item[ns_metric_name])) except Exception as e: logger.error( 'Caught exception while adding counter %s to %s: %s' % (ns_metric_name, entity_name, str(e))) yield c # Provide collected metric to Prometheus as a gauge for ns_metric_name, prom_metric_name in entity.get('gauges', []): g = GaugeMetricFamily(prom_metric_name, ns_metric_name, labels=label_names) entity_stats = data.get(entity_name, []) if (type(entity_stats) is not list): entity_stats = [entity_stats] for data_item in entity_stats: if not data_item: continue if ns_metric_name not in data_item.keys(): logger.warning( 'Gauge stats for %s not enabled in adc %s, so could not add to %s' % (ns_metric_name, nsip, entity_name)) break if ('labels' in entity.keys()): label_values = [ data_item[key] for key in [v[0] for v in entity['labels']] ] # populate and update k8s_ingress_lbvs metrics if in k8s-CIC enviroment if entity_name == "k8s_ingress_lbvs": if os.environ.get( 'KUBERNETES_SERVICE_HOST') is not None: prefix_match = update_lbvs_label( self.k8s_cic_prefix, label_values, ns_metric_name, log_prefix_match) if not prefix_match: log_prefix_match = False continue else: continue label_values.append(nsip) else: label_values = [nsip] try: g.add_metric(label_values, float(data_item[ns_metric_name])) except Exception as e: logger.error( 'Caught exception while adding counter %s to %s: %s' % (ns_metric_name, entity_name, str(e))) yield g
def collect(self): g = GaugeMetricFamily("tcp_syn_stats", 'tcp syn statistics', labels=['type']) g.add_metric(['mean'], self.mean) g.add_metric(['median'], self.median) g.add_metric(['media_high'], self.median_high) g.add_metric(['meadian_low'], self.median_low) g.add_metric(['variance'], self.variance) g.add_metric(['threshold'], self.getThreshold()) g.add_metric(['max'], self.max) g.add_metric(['sum'], self.sum) yield g
async def collect(self): """ Collects channel metrics. """ channel_metrics = { 'variable_rtp_audio_in_raw_bytes': GaugeMetricFamily( 'rtp_audio_in_raw_bytes_total', 'Total number of bytes received via this channel.', labels=['id']), 'variable_rtp_audio_out_raw_bytes': GaugeMetricFamily( 'rtp_audio_out_raw_bytes_total', 'Total number of bytes sent via this channel.', labels=['id']), 'variable_rtp_audio_in_media_bytes': GaugeMetricFamily( 'rtp_audio_in_media_bytes_total', 'Total number of media bytes received via this channel.', labels=['id']), 'variable_rtp_audio_out_media_bytes': GaugeMetricFamily( 'rtp_audio_out_media_bytes_total', 'Total number of media bytes sent via this channel.', labels=['id']), 'variable_rtp_audio_in_packet_count': GaugeMetricFamily( 'rtp_audio_in_packets_total', 'Total number of packets received via this channel.', labels=['id']), 'variable_rtp_audio_out_packet_count': GaugeMetricFamily( 'rtp_audio_out_packets_total', 'Total number of packets sent via this channel.', labels=['id']), 'variable_rtp_audio_in_media_packet_count': GaugeMetricFamily( 'rtp_audio_in_media_packets_total', 'Total number of media packets received via this channel.', labels=['id']), 'variable_rtp_audio_out_media_packet_count': GaugeMetricFamily( 'rtp_audio_out_media_packets_total', 'Total number of media packets sent via this channel.', labels=['id']), 'variable_rtp_audio_in_skip_packet_count': GaugeMetricFamily( 'rtp_audio_in_skip_packets_total', 'Total number of inbound packets discarded by this channel.', labels=['id']), 'variable_rtp_audio_out_skip_packet_count': GaugeMetricFamily( 'rtp_audio_out_skip_packets_total', 'Total number of outbound packets discarded by this channel.', labels=['id']), 'variable_rtp_audio_in_jitter_packet_count': GaugeMetricFamily( 'rtp_audio_in_jitter_packets_total', 'Total number of ? packets in this channel.', labels=['id']), 'variable_rtp_audio_in_dtmf_packet_count': GaugeMetricFamily( 'rtp_audio_in_dtmf_packets_total', 'Total number of ? packets in this channel.', labels=['id']), 'variable_rtp_audio_out_dtmf_packet_count': GaugeMetricFamily( 'rtp_audio_out_dtmf_packets_total', 'Total number of ? packets in this channel.', labels=['id']), 'variable_rtp_audio_in_cng_packet_count': GaugeMetricFamily( 'rtp_audio_in_cng_packets_total', 'Total number of ? packets in this channel.', labels=['id']), 'variable_rtp_audio_out_cng_packet_count': GaugeMetricFamily( 'rtp_audio_out_cng_packets_total', 'Total number of ? packets in this channel.', labels=['id']), 'variable_rtp_audio_in_flush_packet_count': GaugeMetricFamily( 'rtp_audio_in_flush_packets_total', 'Total number of ? packets in this channel.', labels=['id']), 'variable_rtp_audio_in_largest_jb_size': GaugeMetricFamily( 'rtp_audio_in_jitter_buffer_bytes_max', 'Largest jitterbuffer size in this channel.', labels=['id']), 'variable_rtp_audio_in_jitter_min_variance': GaugeMetricFamily( 'rtp_audio_in_jitter_seconds_min', 'Minimal jitter in seconds.', labels=['id']), 'variable_rtp_audio_in_jitter_max_variance': GaugeMetricFamily( 'rtp_audio_in_jitter_seconds_max', 'Maximum jitter in seconds.', labels=['id']), 'variable_rtp_audio_in_jitter_loss_rate': GaugeMetricFamily( 'rtp_audio_in_jitter_loss_rate', 'Ratio of lost packets due to inbound jitter.', labels=['id']), 'variable_rtp_audio_in_jitter_burst_rate': GaugeMetricFamily( 'rtp_audio_in_jitter_burst_rate', 'Ratio of packet bursts due to inbound jitter.', labels=['id']), 'variable_rtp_audio_in_mean_interval': GaugeMetricFamily( 'rtp_audio_in_mean_interval_seconds', 'Mean interval in seconds of inbound packets', labels=['id']), 'variable_rtp_audio_in_flaw_total': GaugeMetricFamily( 'rtp_audio_in_flaw_total', 'Total number of flaws detected in the channel', labels=['id']), 'variable_rtp_audio_in_quality_percentage': GaugeMetricFamily( 'rtp_audio_in_quality_percent', 'Audio quality in percent', labels=['id']), 'variable_rtp_audio_in_mos': GaugeMetricFamily( 'rtp_audio_in_quality_mos', 'Audio quality as Mean Opinion Score, (between 1 and 5)', labels=['id']), 'variable_rtp_audio_rtcp_octet_count': GaugeMetricFamily( 'rtcp_audio_bytes_total', 'Total number of rtcp bytes in this channel.', labels=['id']), 'variable_rtp_audio_rtcp_packet_count': GaugeMetricFamily( 'rtcp_audio_packets_total', 'Total number of rtcp packets in this channel.', labels=['id']), } channel_info_metric = GaugeMetricFamily( 'rtp_channel_info', 'FreeSWITCH RTP channel info', labels=['id', 'name', 'user_agent']) millisecond_metrics = [ 'variable_rtp_audio_in_jitter_min_variance', 'variable_rtp_audio_in_jitter_max_variance', 'variable_rtp_audio_in_mean_interval', ] (_, result) = await self._esl.send('api show calls as json') for row in json.loads(result).get('rows', []): uuid = row['uuid'] await self._esl.send(f'api uuid_set_media_stats {uuid}') (_, result) = await self._esl.send(f'api uuid_dump {uuid} json') channelvars = json.loads(result) label_values = [uuid] for key, metric_value in channelvars.items(): if key in millisecond_metrics: metric_value = float(metric_value) / 1000. if key in channel_metrics: channel_metrics[key].add_metric( label_values, metric_value) user_agent = channelvars.get('variable_sip_user_agent', 'Unknown') channel_info_label_values = [uuid, row['name'], user_agent] channel_info_metric.add_metric( channel_info_label_values, 1) return itertools.chain( channel_metrics.values(), [channel_info_metric])
def collect(self): global hub_sessions global hub_sessions_lock global client_sessions global client_sessions_lock hub_received_messages = CounterMetricFamily( 'metronome2_hub_received_messages', 'Messages received by the metronome hub', labels=['sid']) hub_holes_created = CounterMetricFamily('metronome2_hub_holes_created', 'Holes created within session', labels=['sid']) hub_holes_closed = CounterMetricFamily('metronome2_hub_holes_closed', 'Holes closed within session', labels=['sid']) hub_holes_timed_out = CounterMetricFamily( 'metronome2_hub_holes_timed_out', 'Holes timed out within session', labels=['sid']) hub_holes_current = GaugeMetricFamily('metronome2_hub_holes_current', 'Current holes within session', labels=['sid']) hub_payload_bytes = CounterMetricFamily( 'metronome2_hub_received_bytes', 'Payload bytes received by the hub', labels=['sid']) hub_intermessage_gap_mavg_seconds = GaugeMetricFamily( 'metronome2_hub_intermessage_gap_mavg', 'Moving average of intermessage gap', labels=['sid']) hub_receive_time_window_messages = GaugeMetricFamily( 'metronome2_hub_receive_time_window_messages', 'Messages received by time window', labels=['sid', 'window']) client_unexpected_increments = CounterMetricFamily( 'metronome2_client_seq_unexpected_increment', 'Unexpected sequence number increments', labels=['sid']) client_unexpected_decrements = CounterMetricFamily( 'metronome2_client_seq_unexpected_decrement', 'Unexpected sequence number decrements', labels=['sid']) client_sent_messages = CounterMetricFamily( 'metronome2_client_sent_messages', 'Messages sent by the metronome client', labels=['sid']) client_received_messages = CounterMetricFamily( 'metronome2_client_received_messages', 'Messages received by the metronome client', labels=['sid']) client_timely_received_messages = CounterMetricFamily( 'metronome2_client_timely_received_messages', 'Timely messages received by the metronome client', labels=['sid']) client_lost_messages = CounterMetricFamily( 'metronome2_client_lost_messages', 'Messages lost', labels=['sid']) client_inflight_messages = GaugeMetricFamily( 'metronome2_client_inflight_messages', 'Current messages in-flight', labels=['sid']) client_rtt_worst_seconds = GaugeMetricFamily( 'metronome2_client_rtt_worst', 'Worst RTT seen by client', labels=['sid']) client_rtt_best_seconds = GaugeMetricFamily( 'metronome2_client_rtt_best', 'Worst RTT seen by client', labels=['sid']) client_rtt_mavg_seconds = GaugeMetricFamily( 'metronome2_client_rtt_mavg', 'Moving average of RTT', labels=['sid']) client_payload_bytes = CounterMetricFamily( 'metronome2_client_received_bytes', 'Payload bytes received by the client', labels=['sid']) client_intermessage_gap_mavg_seconds = GaugeMetricFamily( 'metronome2_client_intermessage_gap_mavg', 'Moving average of intermessage gap', labels=['sid']) client_receive_time_window_messages = GaugeMetricFamily( 'metronome2_client_receive_time_window_messages', 'Messages received by time window', labels=['sid', 'window']) with hub_sessions_lock: for sid, session_info in hub_sessions.items(): hub_received_messages.add_metric( [sid], session_info.get('received_messages'), timestamp=session_info.get('timestamp')) hub_holes_created.add_metric( [sid], session_info.get('holes_created'), timestamp=session_info.get('timestamp')) hub_holes_closed.add_metric( [sid], session_info.get('holes_closed'), timestamp=session_info.get('timestamp')) hub_holes_timed_out.add_metric( [sid], session_info.get('holes_timed_out'), timestamp=session_info.get('timestamp')) hub_holes_current.add_metric( [sid], session_info.get('holes_current'), timestamp=session_info.get('timestamp')) hub_payload_bytes.add_metric( [sid], session_info.get('received_bytes'), timestamp=session_info.get('timestamp')) if session_info.get('intermessage_gap_mavg') is not None: hub_intermessage_gap_mavg_seconds.add_metric( [sid], session_info.get('intermessage_gap_mavg'), timestamp=session_info.get('timestamp')) if session_info.get('receive_time_windows') is not None: i = 0 for window in session_info.get('receive_time_windows'): hub_receive_time_window_messages.add_metric( [sid, str(i)], window, timestamp=session_info.get('timestamp')) i += 1 with client_sessions_lock: for sid, session_info in client_sessions.items(): client_unexpected_increments.add_metric( [sid], session_info.get('seq_unexpected_increment'), timestamp=session_info.get('timestamp')) client_unexpected_decrements.add_metric( [sid], session_info.get('seq_unexpected_decrement'), timestamp=session_info.get('timestamp')) client_sent_messages.add_metric( [sid], session_info.get('sent_messages'), timestamp=session_info.get('timestamp')) client_received_messages.add_metric( [sid], session_info.get('received_messages'), timestamp=session_info.get('timestamp')) client_timely_received_messages.add_metric( [sid], session_info.get('timely_received_messages'), timestamp=session_info.get('timestamp')) client_lost_messages.add_metric( [sid], session_info.get('lost_messages'), timestamp=session_info.get('timestamp')) client_inflight_messages.add_metric( [sid], session_info.get('inflight_messages'), timestamp=session_info.get('timestamp')) if session_info.get('rtt_worst') is not None: client_rtt_worst_seconds.add_metric( [sid], session_info.get('rtt_worst'), timestamp=session_info.get('timestamp')) if session_info.get('rtt_best') is not None: client_rtt_best_seconds.add_metric( [sid], session_info.get('rtt_best'), timestamp=session_info.get('timestamp')) if session_info.get('rtt_mavg') is not None: client_rtt_mavg_seconds.add_metric( [sid], session_info.get('rtt_mavg'), timestamp=session_info.get('timestamp')) if session_info.get('received_bytes') is not None: client_payload_bytes.add_metric( [sid], session_info.get('received_bytes'), timestamp=session_info.get('timestamp')) if session_info.get('intermessage_gap_mavg') is not None: client_intermessage_gap_mavg_seconds.add_metric( [sid], session_info.get('intermessage_gap_mavg'), timestamp=session_info.get('timestamp')) if session_info.get('receive_time_windows') is not None: i = 0 for window in session_info.get('receive_time_windows'): client_receive_time_window_messages.add_metric( [sid, str(i)], window, timestamp=session_info.get('timestamp')) i += 1 yield hub_received_messages yield hub_holes_created yield hub_holes_closed yield hub_holes_timed_out yield hub_holes_current yield hub_payload_bytes yield hub_intermessage_gap_mavg_seconds yield hub_receive_time_window_messages yield client_unexpected_increments yield client_unexpected_decrements yield client_sent_messages yield client_received_messages yield client_timely_received_messages yield client_lost_messages yield client_inflight_messages yield client_rtt_worst_seconds yield client_rtt_best_seconds yield client_rtt_mavg_seconds yield client_payload_bytes yield client_intermessage_gap_mavg_seconds yield client_receive_time_window_messages
def collect(self): # Collect metrics from NetScalers data = {} for nsip in self.nsips: data[nsip] = {} for entity in self.metrics.keys(): # cycle through metrics json to get required entities whose stats need to be collected print('>>> Collecting stats for: %s::%s' % (nsip, entity)) try: data[nsip][entity] = collect_data(nsip, entity, self.username, self.password, self.secure) except Exception as e: print('>>> Caught exception while collecting data: ' + str(e)) # Provide collected stats to Prometheus as a counter/guage with desired labels for entity_name, entity in self.metrics.items(): if('labels' in entity.keys()): label_names = [v[1] for v in entity['labels']] label_names.append('nsip') else: label_names = [] label_names.append('nsip') for ns_metric_name, prom_metric_name in entity.get('counters', []): c = CounterMetricFamily(prom_metric_name, ns_metric_name, labels=label_names) for nsip in self.nsips: entity_stats = data[nsip].get(entity_name, []) if( type(entity_stats) is not list): entity_stats = [entity_stats] for data_item in entity_stats: if('labels' in entity.keys()): label_values = [data_item[key] for key in [v[0] for v in entity['labels']]] label_values.append(nsip) else: label_values = [nsip] try: c.add_metric(label_values, float(data_item[ns_metric_name])) except Exception as e: print('>>> Caught exception while adding counter %s to %s: %s' %(ns_metric_name, entity_name, str(e))) yield c for ns_metric_name, prom_metric_name in entity.get('gauges', []): g = GaugeMetricFamily(prom_metric_name, ns_metric_name, labels=label_names) for nsip in self.nsips: entity_stats = data[nsip].get(entity_name, []) if(type(entity_stats) is not list): entity_stats = [entity_stats] for data_item in entity_stats: if('labels' in entity.keys()): label_values = [data_item[key] for key in [v[0] for v in entity['labels']]] label_values.append(nsip) else: label_values = [nsip] try: g.add_metric(label_values, float(data_item[ns_metric_name])) except Exception as e: print('>>> Caught exception while adding guage %s to %s: %s' %(ns_metric_name, entity_name, str(e)) ) yield g