def collect(self): c = CounterMetricFamily('redis', 'Help text', labels=['id']) for key in self.r.keys('*'): try: c.add_metric([key], float(self.r.get(key))) except ValueError: None # who cares yield c
def __init__(self): self.seconds = CounterMetricFamily( "hydra_jobset_seconds_total", "Total number of seconds the jobset has been building", labels=["name"]) self.shares_used = CounterMetricFamily( "hydra_jobset_shares_used_total", "Total shares the jobset has consumed", labels=["name"])
def __init__(self): self.runnable = GaugeMetricFamily( "hydra_machine_type_runnable", "Number of currently runnable builds", labels=["machineType"]) self.running = GaugeMetricFamily( "hydra_machine_type_running", "Number of currently running builds", labels=["machineType"]) self.wait_time = CounterMetricFamily( "hydra_machine_type_wait_time_total", "Number of seconds spent waiting", labels=["machineType"]) self.last_active = CounterMetricFamily( "hydra_machine_type_last_active_total", "Last time this machine type was active", labels=["machineType"])
class JobsetScrapeImporter: def __init__(self): self.seconds = CounterMetricFamily( "hydra_jobset_seconds_total", "Total number of seconds the jobset has been building", labels=["name"]) self.shares_used = CounterMetricFamily( "hydra_jobset_shares_used_total", "Total shares the jobset has consumed", labels=["name"]) def load_jobset(self, name, report): self.seconds.add_metric([name], report.destructive_read("seconds")) self.shares_used.add_metric([name], report.destructive_read("shareUsed")) debug_remaining_state(report) def metrics(self): yield self.seconds yield self.shares_used
class MachineTypeScrapeImporter: def __init__(self): self.runnable = GaugeMetricFamily( "hydra_machine_type_runnable", "Number of currently runnable builds", labels=["machineType"]) self.running = GaugeMetricFamily( "hydra_machine_type_running", "Number of currently running builds", labels=["machineType"]) self.wait_time = CounterMetricFamily( "hydra_machine_type_wait_time_total", "Number of seconds spent waiting", labels=["machineType"]) self.last_active = CounterMetricFamily( "hydra_machine_type_last_active_total", "Last time this machine type was active", labels=["machineType"]) def load_machine_type(self, name, report): self.runnable.add_metric([name], report.destructive_read("runnable")) self.running.add_metric([name], report.destructive_read("running")) try: self.wait_time.add_metric([name], report.destructive_read("waitTime")) except KeyError: pass try: self.last_active.add_metric([name], report.destructive_read("lastActive")) except KeyError: pass debug_remaining_state(report) def metrics(self): yield self.runnable yield self.running yield self.wait_time yield self.last_active
def __init__(self): labels = [ "host" ] self.consective_failures = GaugeMetricFamily( "hydra_machine_consecutive_failures", "Number of consecutive failed builds", labels=labels) self.current_jobs = GaugeMetricFamily( "hydra_machine_current_jobs", "Number of current jobs", labels=labels) self.idle_since = GaugeMetricFamily( "hydra_machine_idle_since", "When the current idle period started", labels=labels) self.disabled_until = GaugeMetricFamily( "hydra_machine_disabled_until", "When the machine will be used again", labels=labels) self.enabled = GaugeMetricFamily( "hydra_machine_enabled", "If the machine is enabled (1) or not (0)", labels=labels) self.last_failure = CounterMetricFamily( "hydra_machine_last_failure", "timestamp of the last failure", labels=labels) self.number_steps_done = CounterMetricFamily( "hydra_machine_steps_done_total", "Total count of the steps completed", labels=labels) self.total_step_build_time = CounterMetricFamily( "hydra_machine_step_build_time_total", "Number of seconds spent building steps", labels=labels) self.total_step_time = CounterMetricFamily( "hydra_machine_step_time_total", "Number of seconds spent on steps", labels=labels)
def _translate_to_prometheus(self, export_record: ExportRecord): prometheus_metric = None label_values = [] label_keys = [] for label_tuple in export_record.labels: label_keys.append(self._sanitize(label_tuple[0])) label_values.append(label_tuple[1]) metric_name = "" if self._prefix != "": metric_name = self._prefix + "_" metric_name += self._sanitize(export_record.instrument.name) description = getattr(export_record.instrument, "description", "") if isinstance(export_record.instrument, Counter): prometheus_metric = CounterMetricFamily(name=metric_name, documentation=description, labels=label_keys) prometheus_metric.add_metric( labels=label_values, value=export_record.aggregator.checkpoint) elif isinstance(export_record.instrument, Observer): prometheus_metric = GaugeMetricFamily(name=metric_name, documentation=description, labels=label_keys) prometheus_metric.add_metric( labels=label_values, value=export_record.aggregator.checkpoint.last) # TODO: Add support for histograms when supported in OT elif isinstance(export_record.instrument, ValueRecorder): value = export_record.aggregator.checkpoint if isinstance(export_record.aggregator, MinMaxSumCountAggregator): prometheus_metric = SummaryMetricFamily( name=metric_name, documentation=description, labels=label_keys, ) prometheus_metric.add_metric( labels=label_values, count_value=value.count, sum_value=value.sum, ) else: prometheus_metric = UnknownMetricFamily( name=metric_name, documentation=description, labels=label_keys, ) prometheus_metric.add_metric(labels=label_values, value=value) else: logger.warning("Unsupported metric type. %s", type(export_record.instrument)) return prometheus_metric
def test_counter_total(self): self.custom_collector(CounterMetricFamily('c_total', 'help', value=1)) self.assertEqual(1, self.registry.get_sample_value('c_total', {}))
def collect(self): global hub_sessions global hub_sessions_lock global client_sessions global client_sessions_lock hub_received_messages = CounterMetricFamily( 'metronome2_hub_received_messages', 'Messages received by the metronome hub', labels=['sid'] ) hub_holes_created = CounterMetricFamily( 'metronome2_hub_holes_created', 'Holes created within session', labels=['sid'] ) hub_holes_closed = CounterMetricFamily( 'metronome2_hub_holes_closed', 'Holes closed within session', labels=['sid'] ) hub_holes_timed_out = CounterMetricFamily( 'metronome2_hub_holes_timed_out', 'Holes timed out within session', labels=['sid'] ) hub_holes_current = GaugeMetricFamily( 'metronome2_hub_holes_current', 'Current holes within session', labels=['sid'] ) hub_payload_bytes = CounterMetricFamily( 'metronome2_hub_received_bytes', 'Payload bytes received by the hub', labels=['sid'] ) hub_intermessage_gap_mavg_seconds = GaugeMetricFamily( 'metronome2_hub_intermessage_gap_mavg', 'Moving average of intermessage gap', labels=['sid'] ) hub_receive_time_window_messages = GaugeMetricFamily( 'metronome2_hub_receive_time_window_messages', 'Messages received by time window', labels=['sid', 'window'] ) client_unexpected_increments = CounterMetricFamily( 'metronome2_client_seq_unexpected_increment', 'Unexpected sequence number increments', labels=['sid'] ) client_unexpected_decrements = CounterMetricFamily( 'metronome2_client_seq_unexpected_decrement', 'Unexpected sequence number decrements', labels=['sid'] ) client_sent_messages = CounterMetricFamily( 'metronome2_client_sent_messages', 'Messages sent by the metronome client', labels=['sid'] ) client_received_messages = CounterMetricFamily( 'metronome2_client_received_messages', 'Messages received by the metronome client', labels=['sid'] ) client_timely_received_messages = CounterMetricFamily( 'metronome2_client_timely_received_messages', 'Timely messages received by the metronome client', labels=['sid'] ) client_lost_messages = CounterMetricFamily( 'metronome2_client_lost_messages', 'Messages lost', labels=['sid'] ) client_inflight_messages = GaugeMetricFamily( 'metronome2_client_inflight_messages', 'Current messages in-flight', labels=['sid'] ) client_rtt_worst_seconds = GaugeMetricFamily( 'metronome2_client_rtt_worst', 'Worst RTT seen by client', labels=['sid'] ) client_rtt_best_seconds = GaugeMetricFamily( 'metronome2_client_rtt_best', 'Worst RTT seen by client', labels=['sid'] ) client_rtt_mavg_seconds = GaugeMetricFamily( 'metronome2_client_rtt_mavg', 'Moving average of RTT', labels=['sid'] ) client_payload_bytes = CounterMetricFamily( 'metronome2_client_received_bytes', 'Payload bytes received by the client', labels=['sid'] ) client_intermessage_gap_mavg_seconds = GaugeMetricFamily( 'metronome2_client_intermessage_gap_mavg', 'Moving average of intermessage gap', labels=['sid'] ) client_receive_time_window_messages = GaugeMetricFamily( 'metronome2_client_receive_time_window_messages', 'Messages received by time window', labels=['sid', 'window'] ) with hub_sessions_lock: for sid, session_info in hub_sessions.items(): hub_received_messages.add_metric( [sid], session_info.get('received_messages'), timestamp=session_info.get('timestamp') ) hub_holes_created.add_metric( [sid], session_info.get('holes_created'), timestamp=session_info.get('timestamp') ) hub_holes_closed.add_metric( [sid], session_info.get('holes_closed'), timestamp=session_info.get('timestamp') ) hub_holes_timed_out.add_metric( [sid], session_info.get('holes_timed_out'), timestamp=session_info.get('timestamp') ) hub_holes_current.add_metric( [sid], session_info.get('holes_current'), timestamp=session_info.get('timestamp') ) hub_payload_bytes.add_metric( [sid], session_info.get('received_bytes'), timestamp=session_info.get('timestamp') ) if session_info.get('intermessage_gap_mavg') is not None: hub_intermessage_gap_mavg_seconds.add_metric( [sid], session_info.get('intermessage_gap_mavg'), timestamp=session_info.get('timestamp') ) if session_info.get('receive_time_windows') is not None: i = 0 for window in session_info.get('receive_time_windows'): hub_receive_time_window_messages.add_metric( [sid, str(i)], window, timestamp=session_info.get('timestamp') ) i += 1 with client_sessions_lock: for sid, session_info in client_sessions.items(): client_unexpected_increments.add_metric( [sid], session_info.get('seq_unexpected_increment'), timestamp=session_info.get('timestamp') ) client_unexpected_decrements.add_metric( [sid], session_info.get('seq_unexpected_decrement'), timestamp=session_info.get('timestamp') ) client_sent_messages.add_metric( [sid], session_info.get('sent_messages'), timestamp=session_info.get('timestamp') ) client_received_messages.add_metric( [sid], session_info.get('received_messages'), timestamp=session_info.get('timestamp') ) client_timely_received_messages.add_metric( [sid], session_info.get('timely_received_messages'), timestamp=session_info.get('timestamp') ) client_lost_messages.add_metric( [sid], session_info.get('lost_messages'), timestamp=session_info.get('timestamp') ) client_inflight_messages.add_metric( [sid], session_info.get('inflight_messages'), timestamp=session_info.get('timestamp') ) if session_info.get('rtt_worst') is not None: client_rtt_worst_seconds.add_metric( [sid], session_info.get('rtt_worst'), timestamp=session_info.get('timestamp') ) if session_info.get('rtt_best') is not None: client_rtt_best_seconds.add_metric( [sid], session_info.get('rtt_best'), timestamp=session_info.get('timestamp') ) if session_info.get('rtt_mavg') is not None: client_rtt_mavg_seconds.add_metric( [sid], session_info.get('rtt_mavg'), timestamp=session_info.get('timestamp') ) if session_info.get('received_bytes') is not None: client_payload_bytes.add_metric( [sid], session_info.get('received_bytes'), timestamp=session_info.get('timestamp') ) if session_info.get('intermessage_gap_mavg') is not None: client_intermessage_gap_mavg_seconds.add_metric( [sid], session_info.get('intermessage_gap_mavg'), timestamp=session_info.get('timestamp') ) if session_info.get('receive_time_windows') is not None: i = 0 for window in session_info.get('receive_time_windows'): client_receive_time_window_messages.add_metric( [sid, str(i)], window, timestamp=session_info.get('timestamp') ) i += 1 yield hub_received_messages yield hub_holes_created yield hub_holes_closed yield hub_holes_timed_out yield hub_holes_current yield hub_payload_bytes yield hub_intermessage_gap_mavg_seconds yield hub_receive_time_window_messages yield client_unexpected_increments yield client_unexpected_decrements yield client_sent_messages yield client_received_messages yield client_timely_received_messages yield client_lost_messages yield client_inflight_messages yield client_rtt_worst_seconds yield client_rtt_best_seconds yield client_rtt_mavg_seconds yield client_payload_bytes yield client_intermessage_gap_mavg_seconds yield client_receive_time_window_messages
def metrics_setup_sta(self, metrics): metrics['c_sta_rx_bytes'] = CounterMetricFamily('unifi_sta_rx_bytes', 'Client RX bytes', labels=['mac', 'hostname', 'radio', 'essid']) metrics['c_sta_tx_bytes'] = CounterMetricFamily('unifi_sta_tx_bytes', 'Client TX bytes', labels=['mac', 'hostname', 'radio', 'essid']) metrics['g_sta_rssi'] = GaugeMetricFamily('unifi_sta_rssi', 'Client signal RSSI', labels=['mac', 'hostname', 'radio', 'essid'])
def collect(self): start = time.time() # Perform REST API call to fetch data data = call_rest_api('/mgmt/status/default/RaidPhysicalDriveStatus', self.ip, self.port, self.session, self.timeout) if data == '': return # Update Prometheus metrics for rpd in data['RaidPhysicalDriveStatus']: c = CounterMetricFamily( 'mqa_raid_physical_drive_progress_percent_total', 'The current progress percentage of the operation on the physical drive. Operations can be rebuild, copyback, patrol, or clear', labels=[ 'appliance', 'controllerID', 'deviceID', 'arrayID', 'logicalDriveID', 'position' ]) c.add_metric([ self.appliance, str(rpd['ControllerID']), str(rpd['DeviceID']), str(rpd['ArrayID']), str(rpd['LogicalDriveID']), rpd['Position'] ], rpd['ProgressPercent']) yield c c = CounterMetricFamily( 'mqa_raid_physical_drive_raw_size_bytes_total', 'The exact size of the drive in bytes', labels=[ 'appliance', 'controllerID', 'deviceID', 'arrayID', 'logicalDriveID', 'position' ]) c.add_metric([ self.appliance, str(rpd['ControllerID']), str(rpd['DeviceID']), str(rpd['ArrayID']), str(rpd['LogicalDriveID']), rpd['Position'] ], rpd['RawSize'] * 1000000) yield c c = CounterMetricFamily( 'mqa_raid_physical_drive_coerced_size_bytes_total', 'The normalized size in megabytes. The value is rounded down to an even multiple, which allows you to swap drives of the same nominal size but might not be the same raw size', labels=[ 'appliance', 'controllerID', 'deviceID', 'arrayID', 'logicalDriveID', 'position' ]) c.add_metric([ self.appliance, str(rpd['ControllerID']), str(rpd['DeviceID']), str(rpd['ArrayID']), str(rpd['LogicalDriveID']), rpd['Position'] ], rpd['CoercedSize'] * 1000000) yield c if rpd['Temperature'][:3] == 'n/a': temperature_celsius = -1 else: temperature_celsius = int(rpd['Temperature'][:3]) g = GaugeMetricFamily( 'mqa_raid_physical_drive_temperature_celsius', 'The temperature of the hard disk drive in celsius', labels=[ 'appliance', 'controllerID', 'deviceID', 'arrayID', 'logicalDriveID', 'position' ]) g.add_metric([ self.appliance, str(rpd['ControllerID']), str(rpd['DeviceID']), str(rpd['ArrayID']), str(rpd['LogicalDriveID']), rpd['Position'] ], temperature_celsius) yield g g = GaugeMetricFamily( 'mqa_raid_physical_drive_failure', 'If the hard disk failure state shows Yes, replace this drive as soon as possible to avoid possible data loss', labels=[ 'appliance', 'controllerID', 'deviceID', 'arrayID', 'logicalDriveID', 'position' ]) g.add_metric([ self.appliance, str(rpd['ControllerID']), str(rpd['DeviceID']), str(rpd['ArrayID']), str(rpd['LogicalDriveID']), rpd['Position'] ], 0 if rpd['Failure'] == 'No' else 1) yield g i = InfoMetricFamily( 'mqa_raid_physical_drive', 'MQ Appliance raid physical drive information') i.add_metric( [ 'appliance', 'controllerID', 'deviceID', 'arrayID', 'logicalDriveID', 'logicalDriveName', 'position', 'state', 'interfaceType', 'interfaceSpeed', 'sasAddress', 'vendorID', 'productID', 'revision', 'specificInfo', 'failure' ], { 'appliance': self.appliance, 'controllerID': str(rpd['ControllerID']), 'deviceID': str(rpd['DeviceID']), 'arrayID': str(rpd['ArrayID']), 'logicalDriveID': str(rpd['LogicalDriveID']), 'logicalDriveName': rpd['LogicalDriveName'], 'position': rpd['Position'], 'state': rpd['State'], 'interfaceType': rpd['InterfaceType'], 'interfaceSpeed': rpd['InterfaceSpeed'], 'sasAddress': rpd['SASaddress'], 'vendorID': rpd['VendorID'], 'productID': rpd['ProductID'], 'revision': rpd['Revision'], 'specificInfo': rpd['SpecificInfo'] }) yield i g = GaugeMetricFamily( 'mqa_exporter_raid_physical_drive_elapsed_time_seconds', 'Exporter eleapsed time to collect raid physical drive metrics', labels=['appliance']) g.add_metric([self.appliance], time.time() - start) yield g
def collect(self) -> Generator: ''' Prometheus custom collector. ''' if self.readings.bms_sn is not None: yield InfoMetricFamily('rctmon_bms_info', 'Information about the battery management system (BMS)', {'inverter': self.parent.name, 'serial_number': self.readings.bms_sn}) if self.readings.soc_min is not None: soc_min = GaugeMetricFamily('rctmon_battery_state_of_charge_min', 'Battery minimum state of charge', labels=['inverter'], unit='percent') soc_min.add_metric([self.parent.name], self.readings.soc_min) yield soc_min if self.readings.battery_voltage is not None: battery_voltage = GaugeMetricFamily('rctmon_battery_voltage', 'Battery Voltage', labels=['inverter']) battery_voltage.add_metric([self.parent.name], self.readings.battery_voltage) yield battery_voltage if self.readings.battery_power is not None: battery_power = GaugeMetricFamily('rctmon_battery_power', 'Battery Power', labels=['inverter']) battery_power.add_metric([self.parent.name], self.readings.battery_power) yield battery_power if self.readings.battery_state is not None: battery_state = GaugeMetricFamily('rctmon_battery_state', 'Battery state machine state', labels=['inverter']) battery_state.add_metric([self.parent.name], self.readings.battery_state) yield battery_state if self.readings.soc_target is not None: battery_soc_target = GaugeMetricFamily('rctmon_battery_state_of_charge_target', 'Battery target state of charge', labels=['inverter'], unit='percent') battery_soc_target.add_metric([self.parent.name], self.readings.soc_target) yield battery_soc_target if self.readings.soc is not None: battery_soc = GaugeMetricFamily('rctmon_battery_state_of_charge', 'Battery state of charge', labels=['inverter'], unit='percent') battery_soc.add_metric([self.parent.name], self.readings.soc) yield battery_soc if self.readings.soh is not None: battery_soh = GaugeMetricFamily('rctmon_battery_state_of_health', 'Battery state of health', labels=['inverter'], unit='percent') battery_soh.add_metric([self.parent.name], self.readings.soh) yield battery_soh if self.readings.temperature is not None: battery_temperature = GaugeMetricFamily('rctmon_battery_temperature', 'Battery temperature', labels=['inverter']) battery_temperature.add_metric([self.parent.name], self.readings.temperature) yield battery_temperature if self.readings.bat_status is not None: battery_bat_status = GaugeMetricFamily('rctmon_battery_bat_status', 'Battery status', labels=['inverter']) battery_bat_status.add_metric([self.parent.name], self.readings.bat_status) yield battery_bat_status if self.readings.impedance_fine is not None: battery_impedance_fine = GaugeMetricFamily('rctmon_battery_impedance_fine', 'Battery impedance (fine)', labels=['inverter']) battery_impedance_fine.add_metric([self.parent.name], self.readings.impedance_fine) yield battery_impedance_fine if self.readings.discharged_amp_hours is not None: battery_discharge_amp_hours = CounterMetricFamily('rctmon_battery_discharge', 'Battery cumulative ' 'discharge', labels=['inverter'], unit='amp_hours') battery_discharge_amp_hours.add_metric([self.parent.name], self.readings.discharged_amp_hours) yield battery_discharge_amp_hours if self.readings.stored_energy is not None: battery_stored_energy = CounterMetricFamily('rctmon_battery_stored_energy', 'Battery cumulative stored ' 'energy', labels=['inverter']) battery_stored_energy.add_metric([self.parent.name], self.readings.stored_energy) yield battery_stored_energy if self.num_batteries and self.num_batteries > 0: cycles = CounterMetricFamily('rctmon_battery_module_cycles', 'Number of cycles the battery has accumulated' ' over its lifetime', labels=['inverter', 'module']) for battery in self.batteries.values(): if battery: yield InfoMetricFamily('rctmon_battery_module', 'Information about individual battery modules', {'inverter': self.parent.name, 'module': str(battery.num), 'serial_number': battery.serial}) if battery.cycle_count is not None: cycles.add_metric([self.parent.name, str(battery.num)], battery.cycle_count) yield cycles
def collect(self): # https://github.com/prometheus/client_python#custom-collectors v = GaugeMetricFamily( "promgen_build_info", "Promgen Information", labels=["version", "python"] ) v.add_metric([version.__version__, platform.python_version()], 1) yield v try: yield CounterMetricFamily( "promgen_alerts_processed", "Alerts", models.Alert.objects.latest("id").id, ) except models.Alert.DoesNotExist: pass try: yield CounterMetricFamily( "promgen_alerts_failed", "Failed Alerts", models.AlertError.objects.latest("id").id, ) except models.AlertError.DoesNotExist: pass yield GaugeMetricFamily( "promgen_shards", "Registered Shards", models.Shard.objects.count() ) yield GaugeMetricFamily( "promgen_exporters", "Registered Exporters", models.Exporter.objects.count() ) yield GaugeMetricFamily( "promgen_services", "Registered Services", models.Service.objects.count() ) yield GaugeMetricFamily( "promgen_projects", "Registered Projects", models.Project.objects.count() ) yield GaugeMetricFamily( "promgen_rules", "Registered Rules", models.Rule.objects.count() ) yield GaugeMetricFamily( "promgen_urls", "Registered URLs", models.URL.objects.count() ) # TODO Properly de-duplicate after refactoring yield GaugeMetricFamily( "promgen_hosts", "Registered Hosts", len(models.Host.objects.values("name").annotate(Count("name"))), ) notifier = GaugeMetricFamily( "promgen_notifiers", "Registered Notifiers", labels=["type", "sender"] ) for entry in models.Sender.objects.values( "content_type__model", "sender" ).annotate(Count("sender"), count=Count("content_type")): notifier.add_metric( [entry["content_type__model"], entry["sender"]], entry["count"] ) yield notifier
class MachineScrapeImporter: def __init__(self): labels = [ "host" ] self.consective_failures = GaugeMetricFamily( "hydra_machine_consecutive_failures", "Number of consecutive failed builds", labels=labels) self.current_jobs = GaugeMetricFamily( "hydra_machine_current_jobs", "Number of current jobs", labels=labels) self.idle_since = GaugeMetricFamily( "hydra_machine_idle_since", "When the current idle period started", labels=labels) self.disabled_until = GaugeMetricFamily( "hydra_machine_disabled_until", "When the machine will be used again", labels=labels) self.enabled = GaugeMetricFamily( "hydra_machine_enabled", "If the machine is enabled (1) or not (0)", labels=labels) self.last_failure = CounterMetricFamily( "hydra_machine_last_failure", "timestamp of the last failure", labels=labels) self.number_steps_done = CounterMetricFamily( "hydra_machine_steps_done_total", "Total count of the steps completed", labels=labels) self.total_step_build_time = CounterMetricFamily( "hydra_machine_step_build_time_total", "Number of seconds spent building steps", labels=labels) self.total_step_time = CounterMetricFamily( "hydra_machine_step_time_total", "Number of seconds spent on steps", labels=labels) def load_machine(self, name, report): report.unused_read("mandatoryFeatures") report.unused_read("supportedFeatures") report.unused_read("systemTypes") report.unused_read("avgStepBuildTime") report.unused_read("avgStepTime") labels = [name] self.consective_failures.add_metric( labels, report.destructive_read("consecutiveFailures") ) self.current_jobs.add_metric( labels, report.destructive_read("currentJobs") ) try: self.idle_since.add_metric( labels, report.destructive_read("idleSince") ) except KeyError: pass self.disabled_until.add_metric( labels, report.destructive_read("disabledUntil") ) self.enabled.add_metric( labels, 1 if report.destructive_read("enabled") else 0 ) self.last_failure.add_metric( labels, report.destructive_read("lastFailure") ) self.number_steps_done.add_metric( labels, report.destructive_read("nrStepsDone") ) self.total_step_build_time.add_metric( labels, report.destructive_read_default("totalStepBuildTime", default=0) ) self.total_step_time.add_metric( labels, report.destructive_read_default("totalStepTime", default=0) ) debug_remaining_state(report) def metrics(self): yield self.consective_failures yield self.current_jobs yield self.idle_since yield self.disabled_until yield self.enabled yield self.last_failure yield self.number_steps_done yield self.total_step_build_time yield self.total_step_time
def test_counter_labels(self): cmf = CounterMetricFamily('c_total', 'help', labels=['a', 'c_total']) cmf.add_metric(['b', 'd'], 2) self.custom_collector(cmf) self.assertEqual(2, self.registry.get_sample_value('c_total', {'a': 'b', 'c_total': 'd'}))
def test_timestamps(self): families = text_string_to_metric_families("""# TYPE a counter # HELP a help a_total{foo="1"} 1 000 a_total{foo="2"} 1 0.0 a_total{foo="3"} 1 1.1 a_total{foo="4"} 1 12345678901234567890.1234567890 a_total{foo="5"} 1 1.5e3 # TYPE b counter # HELP b help b_total 2 1234567890 # EOF """) a = CounterMetricFamily("a", "help", labels=["foo"]) a.add_metric(["1"], 1, timestamp=Timestamp(0, 0)) a.add_metric(["2"], 1, timestamp=Timestamp(0, 0)) a.add_metric(["3"], 1, timestamp=Timestamp(1, 100000000)) a.add_metric(["4"], 1, timestamp=Timestamp(12345678901234567890, 123456789)) a.add_metric(["5"], 1, timestamp=1500.0) b = CounterMetricFamily("b", "help") b.add_metric([], 2, timestamp=Timestamp(1234567890, 0)) self.assertEqual([a, b], list(families))
def collect(self): c = CounterMetricFamily('env_dashboard', 'Help text', labels=['id']) for k, v in sorted(self.thread.bucket.iteritems()): c.add_metric([k], v) yield c
def trivial_counter(self, name, help, value): c = CounterMetricFamily(f"hydra_{name}_total", help) c.add_metric([], value) return c
def collect(self): # Collect metrics from NetScalers data = {} for nsip in self.nsips: data[nsip] = {} for entity in self.metrics.keys(): # cycle through metrics json to get required entities whose stats need to be collected print('>>> Collecting stats for: %s::%s' % (nsip, entity)) try: data[nsip][entity] = collect_data(nsip, entity, self.username, self.password, self.secure) except Exception as e: print('>>> Caught exception while collecting data: ' + str(e)) # Provide collected stats to Prometheus as a counter/guage with desired labels for entity_name, entity in self.metrics.items(): if('labels' in entity.keys()): label_names = [v[1] for v in entity['labels']] label_names.append('nsip') else: label_names = [] label_names.append('nsip') for ns_metric_name, prom_metric_name in entity.get('counters', []): c = CounterMetricFamily(prom_metric_name, ns_metric_name, labels=label_names) for nsip in self.nsips: entity_stats = data[nsip].get(entity_name, []) if( type(entity_stats) is not list): entity_stats = [entity_stats] for data_item in entity_stats: if('labels' in entity.keys()): label_values = [data_item[key] for key in [v[0] for v in entity['labels']]] label_values.append(nsip) else: label_values = [nsip] try: c.add_metric(label_values, float(data_item[ns_metric_name])) except Exception as e: print('>>> Caught exception while adding counter %s to %s: %s' %(ns_metric_name, entity_name, str(e))) yield c for ns_metric_name, prom_metric_name in entity.get('gauges', []): g = GaugeMetricFamily(prom_metric_name, ns_metric_name, labels=label_names) for nsip in self.nsips: entity_stats = data[nsip].get(entity_name, []) if(type(entity_stats) is not list): entity_stats = [entity_stats] for data_item in entity_stats: if('labels' in entity.keys()): label_values = [data_item[key] for key in [v[0] for v in entity['labels']]] label_values.append(nsip) else: label_values = [nsip] try: g.add_metric(label_values, float(data_item[ns_metric_name])) except Exception as e: print('>>> Caught exception while adding guage %s to %s: %s' %(ns_metric_name, entity_name, str(e)) ) yield g
def metric_processed_jobs(self): processed_jobs_amount = self._r.get(self._r_key('stat:processed')) or 0 metric = CounterMetricFamily('resque_processed_jobs', "Total number of processed jobs") metric.add_metric([], processed_jobs_amount) return metric
def collect(self): blazegraph_metrics = { '/Query Engine/queryStartCount': CounterMetricFamily( 'blazegraph_queries_start', 'Number of queries that have started since the start of the application.' ), '/Query Engine/queryDoneCount': CounterMetricFamily( 'blazegraph_queries_done', 'Number of queries completed since the start of the application.' ), '/Query Engine/queryErrorCount': CounterMetricFamily( 'blazegraph_queries_error', 'Number of queries in error since the start of the application.' ), '/Query Engine/queriesPerSecond': GaugeMetricFamily( 'blazegraph_queries_per_second', 'Number of queries per second (rolling average).'), '/Query Engine/operatorActiveCount': GaugeMetricFamily('blazegraph_operator_active_count', 'Number of active blazegraph operators'), '/Query Engine/runningQueriesCount': GaugeMetricFamily('blazegraph_running_queries_count', 'Number of running queries'), '/Query Engine/GeoSpatial/geoSpatialSearchRequests': GaugeMetricFamily( 'blazegraph_geospatial_search_requets', 'Number of geospatial search requests since the start of the application.' ), '/Journal/bytesReadPerSec': GaugeMetricFamily('blazegraph_journal_bytes_read_per_second', ''), '/Journal/bytesWrittenPerSec': GaugeMetricFamily('blazegraph_journal_bytes_written_per_second', ''), '/Journal/extent': GaugeMetricFamily('blazegraph_journal_extent', ''), '/Journal/commitCount': CounterMetricFamily('blazegraph_journal_commit_count', ''), '/Journal/commit/totalCommitSecs': GaugeMetricFamily('blazegraph_journal_total_commit_seconds', 'Total time spent in commit.'), '/Journal/commit/flushWriteSetSecs': GaugeMetricFamily('blazegraph_journal_flush_write_set_seconds', ''), '/Journal/Concurrency Manager/Read Service/Average Active Count': GaugeMetricFamily( 'blazegraph_journal_concurrency_read_average_active_count', 'Average Number of Read Active Threads'), '/JVM/Memory/DirectBufferPool/default/bytesUsed': GaugeMetricFamily( 'blazegraph_jvm_memory_direct_buffer_pool_default_bytes_used', ''), '/JVM/Memory/Runtime Free Memory': GaugeMetricFamily('blazegraph_jvm_memory_runtime_free_memory', 'Current amount of free memory in the JVM.'), '/JVM/Memory/Runtime Max Memory': GaugeMetricFamily('blazegraph_jvm_memory_runtime_max_memory', 'Max amount of memory the JVM can allocate.'), '/JVM/Memory/Runtime Total Memory': GaugeMetricFamily('blazegraph_jvm_memory_runtime_total_memory', 'Total amount of memory allocated to the JVM.'), '/JVM/Memory/Garbage Collectors/G1 Old Generation/Collection Count': CounterMetricFamily( 'blazegraph_jvm_memory_gc_g1_old_collecton_count', 'Number of old GC since JVM start.'), '/JVM/Memory/Garbage Collectors/G1 Old Generation/Cumulative Collection Time': GaugeMetricFamily( 'blazegraph_jvm_memory_gc_g1_old_cumulative_collection_time', 'Total time spent in old GC (seconds).'), '/JVM/Memory/Garbage Collectors/G1 Young Generation/Collection Count': CounterMetricFamily( 'blazegraph_jvm_memory_gc_g1_young_collection_count', 'Number of young GC since JVM start.'), '/JVM/Memory/Garbage Collectors/G1 Young Generation/Cumulative Collection Time': GaugeMetricFamily( 'blazegraph_jvm_memory_gc_g1_young_cumulative_collection_time', 'Total time spent in young GC (seconds).'), } for metric_name, metric_family in blazegraph_metrics.items(): if metric_name is None: log.warning('Unknown metric %r', metric_name) else: metric_value = self.get_counter(metric_name) try: value = float(metric_value) except (ValueError, TypeError): value = float('nan') metric_family.add_metric([], value) triple_metric = GaugeMetricFamily('blazegraph_triples', 'Count of triples in Blazegraph') lag_metric = CounterMetricFamily('blazegraph_lastupdated', 'Last update timestamp') try: sparql_query = """ prefix schema: <http://schema.org/> SELECT * WHERE { { SELECT ( COUNT( * ) AS ?count ) { ?s ?p ?o } } UNION { SELECT * WHERE { <http://www.wikidata.org> schema:dateModified ?y } } }""" data = self.execute_sparql(sparql_query) for binding in data['results']['bindings']: if 'count' in binding: triple_count = binding['count']['value'] triple_metric.add_metric([], float(triple_count)) elif 'y' in binding: lastUpdated = parse(binding['y']['value']) lag_metric.add_metric([], float(lastUpdated.strftime('%s'))) else: raise ValueError( 'SPARQL binding returned with unexpected key') except requests.exceptions.RequestException: log.exception("Error querying endpoint") triple_metric.add_metric([], float('nan')) lag_metric.add_metric([], float('nan')) alloc_metric = GaugeMetricFamily( 'blazegraph_allocators', 'Number of used FixedAllocators in Blazegraph') alloc_free_metric = GaugeMetricFamily( 'blazegraph_free_allocators', 'Number of free FixedAllocators in Blazegraph') allocs = self.fetch_allocators() if allocs: alloc_metric.add_metric([], allocs) alloc_free_metric.add_metric([], 256 * 1024 - allocs) else: alloc_metric.add_metric([], float('nan')) alloc_free_metric.add_metric([], float('nan')) yield triple_metric yield lag_metric yield alloc_metric yield alloc_free_metric for metric in blazegraph_metrics.values(): yield metric
def collect(self): yield GaugeMetricFamily('outgoing_queue_length', "number of messages in database that wait to be processed by GSM/3G-modem", value=call_api("sms.get_queue_length")) yield CounterMetricFamily('sentitems_length', "number of messages in database Sentitems folder", value=call_api("sms.get_sentitems_length")) yield GaugeMetricFamily('gsmsignal', "GSM/3G signal strength in percent: values between 0-100", value=call_api("signal.get_gsmsignal"))
def collect(self): self.http_requests_total = self.http_requests_total + 1 yield CounterMetricFamily('http_requests_total', 'Total HTTP requests', value = self.http_requests_total)
def collect(self): """ Scrape /server-status url and collect metrics """ # Counters accesses_total = CounterMetricFamily( 'apache_accesses_total', 'Total requests served count since startup', labels=['exporter_name']) traffic_total = CounterMetricFamily( 'apache_traffic_bytes_total', 'Total bytes transfered since startup', labels=['exporter_name']) balancer_acc = CounterMetricFamily( 'apache_balancer_requests_total', 'Total requests count', labels=['cluster', 'host', 'route', 'exporter_name']) balancer_wr = CounterMetricFamily( 'apache_balancer_write_bytes_total', 'Total bytes written', labels=['cluster', 'host', 'route', 'exporter_name']) balancer_rd = CounterMetricFamily( 'apache_balancer_read_bytes_total', 'Total bytes read', labels=['cluster', 'host', 'route', 'exporter_name']) # Gauges requests_sec = GaugeMetricFamily('apache_requests_per_second', 'Requests per second', labels=['exporter_name']) bytes_sec = GaugeMetricFamily('apache_io_bytes_per_second', 'Bytes write/read per second', labels=['exporter_name']) bytes_request = GaugeMetricFamily('apache_io_bytes_per_request', 'Bytes write/read per request', labels=['exporter_name']) route_ok = GaugeMetricFamily( 'apache_balancer_route_ok', 'Balancing status of the route is OK', labels=['cluster', 'host', 'route', 'exporter_name']) route_dis = GaugeMetricFamily( 'apache_balancer_route_disabled', 'Balancing status of the route is DISABLED', labels=['cluster', 'host', 'route', 'exporter_name']) route_err = GaugeMetricFamily( 'apache_balancer_route_error', 'Balancing status of the route is ERROR', labels=['cluster', 'host', 'route', 'exporter_name']) route_unk = GaugeMetricFamily( 'apache_balancer_route_unknown', 'Balancing status of the route is UNKNOWN', labels=['cluster', 'host', 'route', 'exporter_name']) scoreboard = GaugeMetricFamily('apache_scoreboard_current', 'Count of workers grouped by status', labels=['status', 'exporter_name']) latest_scrape = GaugeMetricFamily( 'apache_latest_scrape_duration_seconds', 'Latest scrape duration in seconds', labels=['metric_name', 'exporter_name']) operation_duration = GaugeMetricFamily( 'apache_operation_duration_seconds', 'Operation duration in seconds', labels=['operation', 'exporter_name']) # Histograms endpoint_response_time = HistogramMetricFamily( 'apache_endpoint_response_time_seconds', 'Response time by endpoints', labels=['method', 'endpoint', 'exporter_name']) try: exporter_name = os.environ['APACHE_EXPORTER_NAME'] except: exporter_name = 'none' start = time.clock() try: page = requests.get(self.url, verify=False) page.raise_for_status() except Exception as e: self.logger.error(f'Failed to Apache status page. Exception: {e}') duration = float("%.3g" % (time.clock() - start)) operation_duration.add_metric(['load_page', exporter_name], duration) start = time.clock() try: root = html.fromstring(page.content) except Exception as e: self.logger.error(f'Failed to parse page as html. Exception: {e}') duration = float("%.3g" % (time.clock() - start)) operation_duration.add_metric(['parse_page', exporter_name], duration) # Total traffic and accesses and requests,bytes per second/request start = time.clock() for x in range(1, 20): tmp_str = root.xpath("/html/body/dl[2]/dt[%d]" % x)[0].text.strip() if tmp_str.find('Total accesses:') >= 0: match = re.match('Total accesses: (.*) - Total Traffic: (.*)', tmp_str) _accesses_total = match.group(1) _traffic_total = self.str_to_bytes(match.group(2)) # Update metrics if they were found if _accesses_total is not None: accesses_total.add_metric([exporter_name], _accesses_total) if _traffic_total is not None: traffic_total.add_metric([exporter_name], _traffic_total) break duration = float("%.3g" % (time.clock() - start)) latest_scrape.add_metric(['apache_accesses_total', exporter_name], duration) latest_scrape.add_metric(['apache_traffic_bytes_total', exporter_name], duration) start = time.clock() for x in range(1, 20): tmp_str = root.xpath("/html/body/dl[2]/dt[%d]" % x)[0].text.strip() if tmp_str.find('requests') >= 0 and tmp_str.find('second') >= 0: match = re.match( '(.*) requests/sec - (.*/second) - (.*/request)', tmp_str) _requests_sec = match.group(1) _bytes_sec = self.str_to_bytes(match.group(2)) _bytes_request = self.str_to_bytes(match.group(3)) # Update metrics if they were found if _requests_sec is not None: requests_sec.add_metric([exporter_name], _requests_sec) if _bytes_sec is not None: bytes_sec.add_metric([exporter_name], _bytes_sec) if _bytes_request is not None: bytes_request.add_metric([exporter_name], _bytes_request) break duration = float("%.3g" % (time.clock() - start)) latest_scrape.add_metric(['apache_requests_per_second', exporter_name], duration) latest_scrape.add_metric(['apache_io_bytes_per_second', exporter_name], duration) latest_scrape.add_metric( ['apache_io_bytes_per_request', exporter_name], duration) # Get workers statuses start = time.clock() workers_map = {} workers = root.xpath('/html/body/pre')[0].text.strip() for symbol in range(0, len(workers)): if workers[symbol] in workers_map: workers_map[workers[symbol]] += 1 else: workers_map[workers[symbol]] = 1 # Update metrics for worker_status in workers_map: if worker_status == ".": status = "Open slot" elif worker_status == "_": status = "Waiting for Connection" elif worker_status == "S": status = "Starting up" elif worker_status == "R": status = "Reading Request" elif worker_status == "W": status = "Sending Reply" elif worker_status == "K": status = "Keepalive" elif worker_status == "D": status = "DNS Lookup" elif worker_status == "C": status = "Closing connection" elif worker_status == "L": status = "Logging" elif worker_status == "G": status = "Gracefully finishing" elif worker_status == "I": status = "Idle cleanup of worker" else: status = "Unknown" if worker_status != "\n": #Update workers scoreboard scoreboard.add_metric([status, exporter_name], int(workers_map[worker_status])) duration = float("%.3g" % (time.clock() - start)) latest_scrape.add_metric(['apache_scoreboard_current', exporter_name], duration) # Get balancing and routes status start = time.clock() try: cluster_xpaths = json.loads(os.environ['APACHE_EXPORTER_CLUSTERS']) except Exception as e: self.logger.error(f'Cannot load APACHE_EXPORTER_CLUSTERS. {e}') cluster_xpaths = None for cluster in cluster_xpaths: h = 0 for row in root.xpath(cluster_xpaths[cluster]): if h == 0: h += 1 continue else: host = "%s" % row[1].text route = "%s" % row[3].text status = row[2].text acc = row[7].text wr = self.str_to_bytes(row[8].text) rd = self.str_to_bytes(row[9].text) # Update nodes statuses ok, dis, err, unk = 0, 0, 0, 0 if status.find('Ok') >= 0: ok = 1 elif status.find('Dis') >= 0: dis = 1 elif status.find('Err') >= 0: err = 1 else: unk = 1 # Route statuses route_ok.add_metric([cluster, host, route, exporter_name], ok) route_dis.add_metric([cluster, host, route, exporter_name], dis) route_err.add_metric([cluster, host, route, exporter_name], err) route_unk.add_metric([cluster, host, route, exporter_name], unk) # Update requests, wr, rd counters balancer_acc.add_metric([cluster, host, route, exporter_name], int(acc)) balancer_wr.add_metric([cluster, host, route, exporter_name], int(wr)) balancer_rd.add_metric([cluster, host, route, exporter_name], int(rd)) duration = float("%.3g" % (time.clock() - start)) latest_scrape.add_metric(['apache_balancer_route_ok', exporter_name], duration) latest_scrape.add_metric( ['apache_balancer_route_disabled', exporter_name], duration) latest_scrape.add_metric( ['apache_balancer_route_error', exporter_name], duration) latest_scrape.add_metric( ['apache_balancer_route_unknown', exporter_name], duration) latest_scrape.add_metric( ['apache_balancer_requests_total', exporter_name], duration) latest_scrape.add_metric( ['apache_balancer_write_bytes_total', exporter_name], duration) latest_scrape.add_metric( ['apache_balancer_read_bytes_total', exporter_name], duration) # Get response time by endpoints start = time.clock() h = 0 for row in root.xpath('/html/body/table[1]/tr'): last_column = len(row) if h == 0: h += 1 for h in range(0, last_column): header = row[h].text.upper() if header == 'REQ': req_pos = h elif header == 'REQUEST': request_pos = h continue else: try: duration = float(row[req_pos].text) / 1000 url = ("%s" % row[request_pos].text).strip() method, url = self.sanitize_url(url) if method is not None and url is not None: self.put_histogram_values(method, url, duration) except: pass # group buckets into one list url_buckets = {} for i in self.url_count: if (i[0], i[1]) not in url_buckets: url_buckets[i[0], i[1]] = [[i[2], self.url_count[i]]] else: url_buckets[i[0], i[1]].append([i[2], self.url_count[i]]) for t in url_buckets: if (t[0], t[1]) in self.url_sum: endpoint_response_time.add_metric([t[0], t[1], exporter_name], buckets=url_buckets[t], sum_value=self.url_sum[t[0], t[1]]) duration = float("%.3g" % (time.clock() - start)) latest_scrape.add_metric( ['apache_endpoint_response_time_seconds', exporter_name], duration) # counters yield accesses_total yield traffic_total yield balancer_acc yield balancer_wr yield balancer_rd # gauges yield requests_sec yield bytes_sec yield bytes_request yield route_ok yield route_dis yield route_err yield route_unk yield scoreboard yield latest_scrape yield operation_duration # histograms if self.endpoint_stats: yield endpoint_response_time
def collect(self): #yield GaugeMetricFamily('my_gauge', 'Help text', value=7) vcpu = CounterMetricFamily('openstack_vcpu_usage', 'Help text', labels=['project']) ram_mb = CounterMetricFamily('openstack_ram_mb_usage', 'Help text', labels=['project']) instances = CounterMetricFamily('openstack_instances_usage', 'Help text', labels=['project']) usages = get_usage() # TODO!!! for usage in usages: project_name = usage['project_name'] instances.add_metric([project_name], usage['server_usage_count']) vcpu.add_metric([project_name, "vcpu"], usage.get('total_vcpus_usage')) ram_mb.add_metric([project.name], usage.get('total_memory_mb_usage')) yield vcpu yield ram_mb yield instances
def get_counters(self, metrics: dict): for metric, metric_value in metrics.items(): if not isinstance(metric_value, dict): continue for counter_name, counter_value in metric_value.items(): counter_name = re.sub(r'[-.]', '_', counter_name) if 'rate' in counter_name.lower(): continue if not counter_name.startswith('rundeck'): counter_name = 'rundeck_' + counter_name if metric == 'counters' and 'status' not in counter_name: counter_value = counter_value['count'] rundeck_counters = GaugeMetricFamily( counter_name, 'Rundeck counters metrics') rundeck_counters.add_metric([], counter_value) yield rundeck_counters elif metric == 'gauges': counter_value = counter_value['value'] if 'services' in counter_name: rundeck_gauges = CounterMetricFamily( counter_name, 'Rundeck gauges metrics') else: rundeck_gauges = GaugeMetricFamily( counter_name, 'Rundeck gauges metrics') if counter_value is not None: rundeck_gauges.add_metric([], counter_value) else: rundeck_gauges.add_metric([], 0) yield rundeck_gauges elif metric == 'meters' or metric == 'timers': for counter, value in counter_value.items(): if counter == 'count' and not isinstance(value, str): rundeck_meters_timers = CounterMetricFamily( counter_name, f"Rundeck {metric} metrics") rundeck_meters_timers.add_metric([], value) yield rundeck_meters_timers
def collect(self): if self.stats_access_pending or self.ns_session_pending: return if not self.login(): return data = {} self.stats_access_pending = True for entity in self.metrics.keys(): logger.debug('Collecting metric {} for {}'.format( entity, self.nsip)) try: status, entity_data = self.collect_data(entity) except Exception as e: logger.error('Could not collect metric :{}'.format(entity)) if status == self.FAILURE: self.ns_session_clear() return if entity_data: data[entity] = entity_data # Add labels to metrics and provide to Prometheus log_prefix_match = True for entity_name, entity in self.metrics.items(): if ('labels' in entity.keys()): label_names = [v[1] for v in entity['labels']] label_names.append('nsip') else: label_names = [] label_names.append('nsip') # Provide collected metric to Prometheus as a counter entity_stats = data.get(entity_name, []) if (type(entity_stats) is not list): entity_stats = [entity_stats] for ns_metric_name, prom_metric_name in entity.get('counters', []): c = CounterMetricFamily(prom_metric_name, ns_metric_name, labels=label_names) for data_item in entity_stats: if not data_item: continue if ns_metric_name not in data_item.keys(): logger.info( 'Counter stats {} not enabled for entity: {}'. format(ns_metric_name, entity_name)) break if ('labels' in entity.keys()): label_values = [ data_item[key] for key in [v[0] for v in entity['labels']] ] # populate and update k8s_ingress_lbvs metrics if in k8s-CIC enviroment if entity_name == "k8s_ingress_lbvs": if os.environ.get( 'KUBERNETES_SERVICE_HOST') is not None: prefix_match = self.update_lbvs_label( label_values, ns_metric_name, log_prefix_match) if not prefix_match: log_prefix_match = False continue else: continue label_values.append(self.nsip) else: label_values = [self.nsip] try: c.add_metric(label_values, float(data_item[ns_metric_name])) except Exception as e: logger.error( 'Caught exception while adding counter %s to %s: %s' % (ns_metric_name, entity_name, str(e))) yield c # Provide collected metric to Prometheus as a gauge for ns_metric_name, prom_metric_name in entity.get('gauges', []): g = GaugeMetricFamily(prom_metric_name, ns_metric_name, labels=label_names) for data_item in entity_stats: if not data_item: continue if ns_metric_name not in data_item.keys(): logger.info( 'Gauge stat {} not enabled for entity: {}'.format( ns_metric_name, entity_name)) break if ('labels' in entity.keys()): label_values = [ data_item[key] for key in [v[0] for v in entity['labels']] ] # populate and update k8s_ingress_lbvs metrics if in k8s-CIC enviroment if entity_name == "k8s_ingress_lbvs": if os.environ.get( 'KUBERNETES_SERVICE_HOST') is not None: prefix_match = self.update_lbvs_label( label_values, ns_metric_name, log_prefix_match) if not prefix_match: log_prefix_match = False continue else: continue label_values.append(self.nsip) else: label_values = [self.nsip] try: g.add_metric(label_values, float(data_item[ns_metric_name])) except Exception as e: logger.error( 'Caught exception while adding counter {} to {}: {}' .format(ns_metric_name, entity_name, str(e))) yield g self.stats_access_pending = False
def collect(self): # Loop through all metrics configured, and get datapoints # for them saved by the exporter. for daemon in self.metrics_config.keys(): for druid_metric_name in self.metrics_config[daemon]: metric_type = self.metrics_config[daemon][druid_metric_name][ 'type'] if metric_type == 'gauge' or metric_type == 'counter': try: self.counters[druid_metric_name] self.counters[druid_metric_name][daemon] except KeyError: continue if metric_type == 'gauge': metric_family_obj = GaugeMetricFamily else: metric_family_obj = CounterMetricFamily prometheus_metric = metric_family_obj( self.metrics_config[daemon][druid_metric_name] ['prometheus_metric_name'], self.metrics_config[daemon][druid_metric_name] ['description'], labels=map( lambda x: x.lower(), self.metrics_config[daemon] [druid_metric_name]['labels'])) label_values = list( self.counters[druid_metric_name][daemon].keys()) for label_value in label_values: value = self.counters[druid_metric_name][daemon][ label_value] prometheus_metric.add_metric(label_value, value) elif metric_type == 'histogram': try: self.histograms[druid_metric_name] self.histograms[druid_metric_name][daemon] except KeyError: continue prometheus_metric = HistogramMetricFamily( self.metrics_config[daemon][druid_metric_name] ['prometheus_metric_name'], self.metrics_config[daemon][druid_metric_name] ['description'], labels=map( lambda x: x.lower(), self.metrics_config[daemon] [druid_metric_name]['labels'])) label_values = list( self.histograms[druid_metric_name][daemon].keys()) for label_value in label_values: value = self.histograms[druid_metric_name][daemon][ label_value] buckets_without_sum = [[key, value] for key, value in value.items() if key != 'sum'] prometheus_metric.add_metric( label_value, buckets=buckets_without_sum, sum_value=value['sum']) else: log.info( 'metric type not supported: {}'.format(metric_type)) continue yield prometheus_metric registered = CounterMetricFamily( 'druid_exporter_datapoints_registered', 'Number of datapoints successfully registered ' 'by the exporter.') registered.add_metric([], self.datapoints_registered) yield registered
def _setup_empty_prometheus_metrics(self): """ The metrics we want to export. """ self._prometheus_metrics = { 'ops': CounterMetricFamily( 'radosgw_usage_ops_total', 'Number of operations', labels=["bucket", "owner", "category", "cluster"]), 'successful_ops': CounterMetricFamily( 'radosgw_usage_successful_ops_total', 'Number of successful operations', labels=["bucket", "owner", "category", "cluster"]), 'bytes_sent': CounterMetricFamily( 'radosgw_usage_sent_bytes_total', 'Bytes sent by the RADOSGW', labels=["bucket", "owner", "category", "cluster"]), 'bytes_received': CounterMetricFamily( 'radosgw_usage_received_bytes_total', 'Bytes received by the RADOSGW', labels=["bucket", "owner", "category", "cluster"]), 'bucket_usage_bytes': GaugeMetricFamily( 'radosgw_usage_bucket_bytes', 'Bucket used bytes', labels=["bucket", "owner", "zonegroup", "cluster"]), 'bucket_utilized_bytes': GaugeMetricFamily( 'radosgw_usage_bucket_utilized_bytes', 'Bucket utilized bytes', labels=["bucket", "owner", "zonegroup", "cluster"]), 'bucket_usage_objects': GaugeMetricFamily( 'radosgw_usage_bucket_objects', 'Number of objects in bucket', labels=["bucket", "owner", "zonegroup", "cluster"]), 'bucket_quota_enabled': GaugeMetricFamily( 'radosgw_usage_bucket_quota_enabled', 'Quota enabled for bucket', labels=["bucket", "owner", "zonegroup", "cluster"]), 'bucket_quota_max_size': GaugeMetricFamily( 'radosgw_usage_bucket_quota_size', 'Maximum allowed bucket size', labels=["bucket", "owner", "zonegroup", "cluster"]), 'bucket_quota_max_size_bytes': GaugeMetricFamily( 'radosgw_usage_bucket_quota_size_bytes', 'Maximum allowed bucket size in bytes', labels=["bucket", "owner", "zonegroup", "cluster"]), 'bucket_quota_max_objects': GaugeMetricFamily( 'radosgw_usage_bucket_quota_size_objects', 'Maximum allowed bucket size in number of objects', labels=["bucket", "owner", "zonegroup", "cluster"]), 'user_quota_enabled': GaugeMetricFamily('radosgw_usage_user_quota_enabled', 'User quota enabled for bucket', labels=["user", "cluster"]), 'user_quota_max_size': GaugeMetricFamily('radosgw_usage_user_quota_size', 'Maximum allowed bucket size for user', labels=["user", "cluster"]), 'user_quota_max_size_bytes': GaugeMetricFamily('radosgw_usage_user_quota_size_bytes', 'Maximum allowed bucket size in bytes for user', labels=["user", "cluster"]), 'user_quota_max_objects': GaugeMetricFamily( 'radosgw_usage_user_quota_size_objects', 'Maximum allowed bucket size in number of objects', labels=["user", "cluster"]), 'user_total_objects': GaugeMetricFamily('radosgw_usage_user_total_objects', 'Usage of objects by user', labels=["user", "cluster"]), 'user_total_bytes': GaugeMetricFamily('radosgw_usage_user_total_bytes', 'Usage of bytes by user', labels=["user", "cluster"]), 'scrape_duration_seconds': GaugeMetricFamily('radosgw_usage_scrape_duration_seconds', 'Ammount of time each scrape takes', labels=[]) }
def collect(self): data = {} for nsip in self.nsips: data[nsip] = {} for entity in self.metrics.keys(): logger.info('Collecting metric %s for %s' % (entity, nsip)) try: data[nsip][entity] = collect_data(nsip, entity, self.username, self.password, self.protocol, self.nitro_timeout) except Exception as e: logger.warning('Could not collect metric: ' + str(e)) # Add labels to metrics and provide to Prometheus log_prefix_match = True for entity_name, entity in self.metrics.items(): if ('labels' in entity.keys()): label_names = [v[1] for v in entity['labels']] label_names.append('nsip') else: label_names = [] label_names.append('nsip') # Provide collected metric to Prometheus as a counter for ns_metric_name, prom_metric_name in entity.get('counters', []): c = CounterMetricFamily(prom_metric_name, ns_metric_name, labels=label_names) for nsip in self.nsips: entity_stats = data[nsip].get(entity_name, []) if (type(entity_stats) is not list): entity_stats = [entity_stats] for data_item in entity_stats: if not data_item: continue if ns_metric_name not in data_item.keys(): logger.warning( 'Counter stats for %s not enabled in netscalar %s, so could not add to %s' % (ns_metric_name, nsip, entity_name)) break if ('labels' in entity.keys()): label_values = [ data_item[key] for key in [v[0] for v in entity['labels']] ] if os.environ.get( 'KUBERNETES_SERVICE_HOST') is not None: if entity_name == "lbvserver": prefix_match = update_lbvs_label( self.k8s_cic_prefix, label_values, ns_metric_name, log_prefix_match) if not prefix_match: log_prefix_match = False continue label_values.append(nsip) else: label_values = [nsip] try: c.add_metric(label_values, float(data_item[ns_metric_name])) except Exception as e: logger.error( 'Caught exception while adding counter %s to %s: %s' % (ns_metric_name, entity_name, str(e))) yield c # Provide collected metric to Prometheus as a gauge for ns_metric_name, prom_metric_name in entity.get('gauges', []): g = GaugeMetricFamily(prom_metric_name, ns_metric_name, labels=label_names) for nsip in self.nsips: entity_stats = data[nsip].get(entity_name, []) if (type(entity_stats) is not list): entity_stats = [entity_stats] for data_item in entity_stats: if not data_item: continue if ns_metric_name not in data_item.keys(): logger.warning( 'Gauge stats for %s not enabled in netscalar %s, so could not add to %s' % (ns_metric_name, nsip, entity_name)) break if ('labels' in entity.keys()): label_values = [ data_item[key] for key in [v[0] for v in entity['labels']] ] if entity_name == "lbvserver": prefix_match = update_lbvs_label( self.k8s_cic_prefix, label_values, ns_metric_name, log_prefix_match) if not prefix_match: log_prefix_match = False continue label_values.append(nsip) else: label_values = [nsip] try: g.add_metric(label_values, float(data_item[ns_metric_name])) except Exception as e: logger.error( 'Caught exception while adding counter %s to %s: %s' % (ns_metric_name, entity_name, str(e))) yield g
def to_metric(self, desc, tag_values, agg_data): """ to_metric translate the data that OpenCensus create to Prometheus format, using Prometheus Metric object :type desc: dict :param desc: The map that describes view definition :type tag_values: tuple of :class: `~opencensus.tags.tag_value.TagValue` :param object of opencensus.tags.tag_value.TagValue: TagValue object used as label values :type agg_data: object of :class: `~opencensus.stats.aggregation_data.AggregationData` :param object of opencensus.stats.aggregation_data.AggregationData: Aggregated data that needs to be converted as Prometheus samples :rtype: :class:`~prometheus_client.core.CounterMetricFamily` or :class:`~prometheus_client.core.HistogramMetricFamily` or :class:`~prometheus_client.core.UnknownMetricFamily` or :class:`~prometheus_client.core.GaugeMetricFamily` :returns: A Prometheus metric object """ metric_name = desc["name"] metric_description = desc["documentation"] label_keys = desc["labels"] metric_units = desc["units"] assert (len(tag_values) == len(label_keys)) # Prometheus requires that all tag values be strings hence # the need to cast none to the empty string before exporting. See # https://github.com/census-instrumentation/opencensus-python/issues/480 tag_values = [tv if tv else "" for tv in tag_values] if isinstance(agg_data, aggregation_data_module.CountAggregationData): metric = CounterMetricFamily(name=metric_name, documentation=metric_description, unit=metric_units, labels=label_keys) metric.add_metric(labels=tag_values, value=agg_data.count_data) return metric elif isinstance(agg_data, aggregation_data_module.DistributionAggregationData): assert (agg_data.bounds == sorted(agg_data.bounds)) # buckets are a list of buckets. Each bucket is another list with # a pair of bucket name and value, or a triple of bucket name, # value, and exemplar. buckets need to be in order. buckets = [] cum_count = 0 # Prometheus buckets expect cumulative count. for ii, bound in enumerate(agg_data.bounds): cum_count += agg_data.counts_per_bucket[ii] bucket = [str(bound), cum_count] buckets.append(bucket) # Prometheus requires buckets to be sorted, and +Inf present. # In OpenCensus we don't have +Inf in the bucket bonds so need to # append it here. buckets.append(["+Inf", agg_data.count_data]) metric = HistogramMetricFamily(name=metric_name, documentation=metric_description, labels=label_keys) metric.add_metric( labels=tag_values, buckets=buckets, sum_value=agg_data.sum, ) return metric elif isinstance(agg_data, aggregation_data_module.SumAggregationData): metric = UnknownMetricFamily(name=metric_name, documentation=metric_description, labels=label_keys) metric.add_metric(labels=tag_values, value=agg_data.sum_data) return metric elif isinstance(agg_data, aggregation_data_module.LastValueAggregationData): metric = GaugeMetricFamily(name=metric_name, documentation=metric_description, labels=label_keys) metric.add_metric(labels=tag_values, value=agg_data.value) return metric else: raise ValueError(f"unsupported aggregation type {type(agg_data)}")
def collect(self): # @stats is a pretty-printer object with __str__() returning a nice table, # plus some fields that contain data from that table. # unfortunately, fields are pretty-printed themselves (i. e. '4.5MB'). stats = gc.get_stats(memory_pressure=False) # type: ignore # @s contains same fields as @stats, but as actual integers. s = stats._s # type: ignore # also note that field naming is completely braindead # and only vaguely correlates with the pretty-printed table. # >>>> gc.get_stats(False) # Total memory consumed: # GC used: 8.7MB (peak: 39.0MB) # s.total_gc_memory, s.peak_memory # in arenas: 3.0MB # s.total_arena_memory # rawmalloced: 1.7MB # s.total_rawmalloced_memory # nursery: 4.0MB # s.nursery_size # raw assembler used: 31.0kB # s.jit_backend_used # ----------------------------- # Total: 8.8MB # stats.memory_used_sum # # Total memory allocated: # GC allocated: 38.7MB (peak: 41.1MB) # s.total_allocated_memory, s.peak_allocated_memory # in arenas: 30.9MB # s.peak_arena_memory # rawmalloced: 4.1MB # s.peak_rawmalloced_memory # nursery: 4.0MB # s.nursery_size # raw assembler allocated: 1.0MB # s.jit_backend_allocated # ----------------------------- # Total: 39.7MB # stats.memory_allocated_sum # # Total time spent in GC: 0.073 # s.total_gc_time pypy_gc_time = CounterMetricFamily( "pypy_gc_time_seconds_total", "Total time spent in PyPy GC", labels=[], ) pypy_gc_time.add_metric([], s.total_gc_time / 1000) yield pypy_gc_time pypy_mem = GaugeMetricFamily( "pypy_memory_bytes", "Memory tracked by PyPy allocator", labels=["state", "class", "kind"], ) # memory used by JIT assembler pypy_mem.add_metric(["used", "", "jit"], s.jit_backend_used) pypy_mem.add_metric(["allocated", "", "jit"], s.jit_backend_allocated) # memory used by GCed objects pypy_mem.add_metric(["used", "", "arenas"], s.total_arena_memory) pypy_mem.add_metric(["allocated", "", "arenas"], s.peak_arena_memory) pypy_mem.add_metric(["used", "", "rawmalloced"], s.total_rawmalloced_memory) pypy_mem.add_metric(["allocated", "", "rawmalloced"], s.peak_rawmalloced_memory) pypy_mem.add_metric(["used", "", "nursery"], s.nursery_size) pypy_mem.add_metric(["allocated", "", "nursery"], s.nursery_size) # totals pypy_mem.add_metric(["used", "totals", "gc"], s.total_gc_memory) pypy_mem.add_metric(["allocated", "totals", "gc"], s.total_allocated_memory) pypy_mem.add_metric(["used", "totals", "gc_peak"], s.peak_memory) pypy_mem.add_metric(["allocated", "totals", "gc_peak"], s.peak_allocated_memory) yield pypy_mem
def get_vpu_class_info(_self): records = _self.execute_sql('vpu_class') vpu_classes = [] for record in records: class_info = CounterMetricFamily( 'node_ifx_vpu_class_{0}'.format(record['classname']), 'VPU info value for class {0}'.format(record['classname']), labels=["ifxserver", "class", "metric"]) class_info.add_metric( [_self.dbhostname, record['classname'], 'usecs_user'], str(record['usecs_user'])) class_info.add_metric( [_self.dbhostname, record['classname'], 'usecs_sys'], str(record['usecs_sys'])) class_info.add_metric( [_self.dbhostname, record['classname'], 'readyqueue'], str(record['readyqueue'])) class_info.add_metric( [_self.dbhostname, record['classname'], 'num_ready'], str(record['num_ready'])) class_info.add_metric( [_self.dbhostname, record['classname'], 'idle'], str(record['idle'])) class_info.add_metric( [_self.dbhostname, record['classname'], 'semops'], str(record['semops'])) class_info.add_metric( [_self.dbhostname, record['classname'], 'busy_waits'], str(record['busy_waits'])) class_info.add_metric( [_self.dbhostname, record['classname'], 'spins'], str(record['spins'])) vpu_classes.append(class_info) return vpu_classes
def reset(self, label_keys): self.metric = CounterMetricFamily(self.name, self.description, labels=label_keys)
def _expose_counter(name, value, labels_keys, labels_values): metric = CounterMetricFamily(name, "", labels=labels_keys) metric.add_metric(labels_values, value) return metric
def collect(self): # Metrics common to Broker, Historical for daemon in ['broker', 'historical']: query_metrics = self._get_query_histograms(daemon) cache_metrics = self._get_cache_counters(daemon) for metric in query_metrics: if not self.histograms[metric]: continue if daemon in self.histograms[metric]: for datasource in self.histograms[metric][daemon]: buckets = self.histograms[metric][daemon][datasource] buckets_without_sum = [(k, v) for k, v in buckets.items() if k != 'sum'] query_metrics[metric].add_metric( [datasource], buckets=buckets_without_sum, sum_value=self.histograms[metric][daemon][datasource]['sum']) yield query_metrics[metric] # Cache metrics common to Broker and Historical for daemon in ['broker', 'historical']: cache_metrics = self._get_cache_counters(daemon) for metric in cache_metrics: if not self.counters[metric] or daemon not in self.counters[metric]: if not self.supported_metric_names[daemon][metric]: cache_metrics[metric].add_metric([], float('nan')) else: continue else: cache_metrics[metric].add_metric([], self.counters[metric][daemon]) yield cache_metrics[metric] # Query count metrics common to broker and historical for daemon in ['broker', 'historical']: query_metrics = self._get_query_counters(daemon) for metric in query_metrics: if not self.counters[metric] or daemon not in self.counters[metric]: if not self.supported_metric_names[daemon][metric]: query_metrics[metric].add_metric([], float('nan')) else: continue else: query_metrics[metric].add_metric([], self.counters[metric][daemon]) yield query_metrics[metric] historical_health_metrics = self._get_historical_counters() coordinator_metrics = self._get_coordinator_counters() overlord_metrics = self._get_overlord_counters() for daemon, metrics in [('coordinator', coordinator_metrics), ('historical', historical_health_metrics), ('overlord', overlord_metrics)]: for metric in metrics: if not self.counters[metric] or daemon not in self.counters[metric]: if not self.supported_metric_names[daemon][metric]: metrics[metric].add_metric([], float('nan')) else: continue else: labels = self.supported_metric_names[daemon][metric] if not labels: metrics[metric].add_metric( [], self.counters[metric][daemon]) elif len(labels) == 1: for label in self.counters[metric][daemon]: metrics[metric].add_metric( [label], self.counters[metric][daemon][label]) else: for outer_label in self.counters[metric][daemon]: for inner_label in self.counters[metric][daemon][outer_label]: metrics[metric].add_metric( [outer_label, inner_label], self.counters[metric][daemon][outer_label][inner_label]) yield metrics[metric] registered = CounterMetricFamily('druid_exporter_datapoints_registered', 'Number of datapoints successfully registered ' 'by the exporter.') registered.add_metric([], self.datapoints_registered) yield registered
def collect(self): base_metric_name = "covid_19_" covid_states = requests.get('https://corona.lmao.ninja/v2/states') json_covid_states = covid_states.json() keys = json_covid_states[0].keys() # cases metric_name = base_metric_name + "cases" c = CounterMetricFamily(metric_name, 'Help text', labels=['state']) for state in json_covid_states: c.add_metric([state['state']], state['cases']) yield c # todays_cases metric_name = base_metric_name + "todayCases" c = CounterMetricFamily(metric_name, 'Help text', labels=['state']) for state in json_covid_states: c.add_metric([state['state']], state['todayCases']) yield c # deaths metric_name = base_metric_name + "deaths" c = CounterMetricFamily(metric_name, 'Help text', labels=['state']) for state in json_covid_states: c.add_metric([state['state']], state['deaths']) yield c # todayDeaths metric_name = base_metric_name + "todayDeaths" c = CounterMetricFamily(metric_name, 'Help text', labels=['state']) for state in json_covid_states: c.add_metric([state['state']], state['todayDeaths']) yield c # active metric_name = base_metric_name + "active" c = CounterMetricFamily(metric_name, 'Help text', labels=['state']) for state in json_covid_states: c.add_metric([state['state']], state['active']) yield c # tests metric_name = base_metric_name + "tests" c = CounterMetricFamily(metric_name, 'Help text', labels=['state']) for state in json_covid_states: c.add_metric([state['state']], state['tests']) yield c # testsPerOneMillion metric_name = base_metric_name + "testsPerOneMillion" c = CounterMetricFamily(metric_name, 'Help text', labels=['state']) for state in json_covid_states: c.add_metric([state['state']], state['testsPerOneMillion']) yield c
def test_fallback_to_state_machine_label_parsing(self): from unittest.mock import patch from prometheus_client.openmetrics.parser import _parse_sample parse_sample_function = "prometheus_client.openmetrics.parser._parse_sample" parse_labels_function = "prometheus_client.openmetrics.parser._parse_labels" parse_remaining_function = "prometheus_client.openmetrics.parser._parse_remaining_text" state_machine_function = "prometheus_client.openmetrics.parser._parse_labels_with_state_machine" parse_sample_return_value = Sample("a_total", {"foo": "foo # bar"}, 1) with patch(parse_sample_function, return_value=parse_sample_return_value) as mock: families = text_string_to_metric_families("""# TYPE a counter # HELP a help a_total{foo="foo # bar"} 1 # EOF """) a = CounterMetricFamily("a", "help", labels=["foo"]) a.add_metric(["foo # bar"], 1) self.assertEqual([a], list(families)) mock.assert_called_once_with('a_total{foo="foo # bar"} 1') # First fallback case state_machine_return_values = [{ "foo": "foo # bar" }, len('foo="foo # bar"}')] parse_remaining_values = [1, None, None] with patch(parse_labels_function) as mock1: with patch(state_machine_function, return_value=state_machine_return_values) as mock2: with patch(parse_remaining_function, return_value=parse_remaining_values) as mock3: sample = _parse_sample('a_total{foo="foo # bar"} 1') s = Sample("a_total", {"foo": "foo # bar"}, 1) self.assertEqual(s, sample) mock1.assert_not_called() mock2.assert_called_once_with('foo="foo # bar"} 1') mock3.assert_called_once_with('1') # Second fallback case state_machine_return_values = [{"le": "1.0"}, len('le="1.0"}')] parse_remaining_values = [ 0.0, Timestamp(123, 0), Exemplar({"a": "b"}, 0.5) ] with patch(parse_labels_function) as mock1: with patch(state_machine_function, return_value=state_machine_return_values) as mock2: with patch(parse_remaining_function, return_value=parse_remaining_values) as mock3: sample = _parse_sample( 'a_bucket{le="1.0"} 0 123 # {a="b"} 0.5') s = Sample("a_bucket", {"le": "1.0"}, 0.0, Timestamp(123, 0), Exemplar({"a": "b"}, 0.5)) self.assertEqual(s, sample) mock1.assert_not_called() mock2.assert_called_once_with( 'le="1.0"} 0 123 # {a="b"} 0.5') mock3.assert_called_once_with('0 123 # {a="b"} 0.5') # No need to fallback case parse_labels_return_values = {"foo": "foo#bar"} parse_remaining_values = [1, None, None] with patch(parse_labels_function, return_value=parse_labels_return_values) as mock1: with patch(state_machine_function) as mock2: with patch(parse_remaining_function, return_value=parse_remaining_values) as mock3: sample = _parse_sample('a_total{foo="foo#bar"} 1') s = Sample("a_total", {"foo": "foo#bar"}, 1) self.assertEqual(s, sample) mock1.assert_called_once_with('foo="foo#bar"') mock2.assert_not_called() mock3.assert_called_once_with('1')
def collect(self): size = CounterMetricFamily('pg_master_data_size', 'size database', labels=['db_name']) size.add_metric([get_db['db_name']], get_db['db_size']) max_connections = CounterMetricFamily('pg_master_max_connections', 'max_connections', labels=['db_name']) max_connections.add_metric([get_db['db_name']], get_db['max_connections']) total_connections = CounterMetricFamily('pg_master_total_connections', 'total_connections', labels=['db_name']) total_connections.add_metric([get_db['db_name']], get_db['total_connections']) left_connections = CounterMetricFamily('pg_master_left_connections', 'left_connections', labels=['db_name']) left_connections.add_metric([get_db['db_name']], get_db['left_connections']) db_deadlocks = CounterMetricFamily('pg_master_db_deadlocks', 'db_deadlocks', labels=['db_name']) db_deadlocks.add_metric([get_db['db_name']], get_db['db_deadlocks']) replic_status = CounterMetricFamily('pg_master_replic_status', 'replic_status', labels=['db_name']) replic_status.add_metric([get_db['db_name']], get_db['replic_status']) replic_usesysid = CounterMetricFamily('pg_master_replic_usesysid', 'replic_usesysid', labels=['db_name', 'replic_ip']) replic_pid = CounterMetricFamily('pg_master_replic_pid', 'replic_pid', labels=['db_name', 'replic_ip']) replica_lags = CounterMetricFamily('pg_master_replica_lags', 'replica_lags', labels=['db_name', 'replic_ip']) for x in range(get_db['replic_status']): replic_usesysid.add_metric([get_db['db_name'], get_db['replic_ip'][x]], get_db['replic_usesysid'][x]) replic_pid.add_metric([get_db['db_name'], get_db['replic_ip'][x]], get_db['replic_pid'][x]) replica_lags.add_metric([get_db['db_name'], get_db['replic_ip'][x]], get_db['replica_lags'][x]) yield size yield max_connections yield total_connections yield left_connections yield db_deadlocks yield replica_lags yield replic_usesysid yield replic_pid yield replic_status