Beispiel #1
0
    def collect(self):
        deluge_host = os.environ.get('DELUGE_HOST', '127.0.0.1')
        client = DelugeRPCClient(deluge_host, self.rpc_port, self.rpc_user,
                                 self.rpc_password)
        client.connect()

        libtorrent_status_metrics = get_libtorrent_status_metrics_meta()
        libtorrent_status_metric_source_names = [
            x['source'] for x in libtorrent_status_metrics.values()
        ]

        libtorrent_status_metric_values = client.call(
            'core.get_session_status', libtorrent_status_metric_source_names)

        for metric, props in libtorrent_status_metrics.items():
            if props['type'] is None:
                continue

            value = libtorrent_status_metric_values[props['source']]
            if 'conv' in props:
                value = props['conv'](value)
            yield props['type']('deluge_libtorrent_{}'.format(metric),
                                props['help'],
                                value=value)

        for direction in ['upload', 'download']:
            transfer_metric = CounterMetricFamily(
                'deluge_libtorrent_{}_bytes_total'.format(direction),
                'Total bytes {}ed for all torrents.'.format(direction),
                labels=['type'])
            for traffic_type in ['payload', 'ip_overhead', 'dht', 'tracker']:
                transfer_metric.add_metric(
                    [traffic_type],
                    libtorrent_status_metric_values['total_{}_{}'.format(
                        traffic_type, direction).encode('ascii')])
            yield transfer_metric

        yield new_metric_with_labels_and_value(
            GaugeMetricFamily,
            'deluge_info',
            'Deluge information',
            labels={
                'version':
                client.call('daemon.info').decode('utf-8'),
                'libtorrent_version':
                client.call('core.get_libtorrent_version').decode('utf-8'),
            },
            value=1)

        for key, value in client.call('core.get_config').items():
            if isinstance(value, (int, float, bool)):
                yield GaugeMetricFamily(
                    'deluge_config_{}'.format(key.decode('utf-8')),
                    'Value of the deluge config setting {}'.format(
                        key.decode('utf-8')),
                    value=value)

        torrents_by_state = {
            'downloading': 0,
            'seeding': 0,
            'paused': 0,
            'checking': 0,
            'queued': 0,
            'error': 0,
            'active': 0,

            # not the prometheus way, but the states above (as defined by deluge) are already overlapping, so sum() over them is already meaningless
            'total': 0,
        }
        torrents_by_label = defaultdict(int)
        for torrent in client.core.get_torrents_status({}, [
                b'label', b'state', b'download_payload_rate',
                b'upload_payload_rate'
        ]).values():
            if b'label' in torrent:
                torrents_by_label[torrent[b'label'].decode('utf-8')] += 1
            torrents_by_state[torrent[b'state'].decode('utf-8').lower()] += 1
            torrents_by_state['total'] += 1
            if torrent[b'download_payload_rate'] > 0 or torrent[
                    b'upload_payload_rate'] > 0:
                torrents_by_state['active'] += 1

        if len(torrents_by_label) > 0:
            torrents_by_label_metric = GaugeMetricFamily(
                'deluge_torrents_by_label',
                'The number of torrents for each label assigned to a torrent using the deluge label plugin',
                labels=['label'])
            for label, count in torrents_by_label.items():
                torrents_by_label_metric.add_metric([label], count)
            yield torrents_by_label_metric

        torrents_metric = GaugeMetricFamily(
            'deluge_torrents',
            'The number of torrents in a specific state (note: some states overlap)',
            labels=['state'])
        for state, torrent_count in torrents_by_state.items():
            torrents_metric.add_metric([state], torrent_count)
        yield torrents_metric

        client.disconnect()
Beispiel #2
0
 def test_gauge_labels(self):
     cmf = GaugeMetricFamily('g', 'help', labels=['a'])
     cmf.add_metric(['b'], 2)
     self.custom_collector(cmf)
     self.assertEqual(2, self.registry.get_sample_value('g', {'a': 'b'}))
    def collect(self):
        """Collect metrics."""
        # Task metrics
        task_info = get_task_state_info()
        t_state = GaugeMetricFamily(
            'airflow_task_status',
            'Shows the number of task instances with particular status',
            labels=['dag_id', 'task_id', 'owner', 'status'])
        for task in task_info:
            t_state.add_metric(
                [task.dag_id, task.task_id, task.owners, task.state or 'none'],
                task.value)
        yield t_state

        task_duration = GaugeMetricFamily(
            'airflow_task_duration',
            'Duration of successful tasks in seconds',
            labels=['task_id', 'dag_id', 'execution_date'])
        for task in get_task_duration_info():
            task_duration_value = (task.end_date -
                                   task.start_date).total_seconds()
            task_duration.add_metric(
                [task.task_id, task.dag_id,
                 str(task.execution_date.date())], task_duration_value)
        yield task_duration

        task_failure_count = GaugeMetricFamily('airflow_task_fail_count',
                                               'Count of failed tasks',
                                               labels=['dag_id', 'task_id'])
        for task in get_task_failure_counts():
            task_failure_count.add_metric([task.dag_id, task.task_id],
                                          task.count)
        yield task_failure_count

        # Dag Metrics
        dag_info = get_dag_state_info()
        d_state = GaugeMetricFamily(
            'airflow_dag_status',
            'Shows the number of dag starts with this status',
            labels=['dag_id', 'owner', 'status'])
        for dag in dag_info:
            d_state.add_metric([dag.dag_id, dag.owners, dag.state], dag.count)
        yield d_state

        dag_duration = GaugeMetricFamily(
            'airflow_dag_run_duration',
            'Duration of successful dag_runs in seconds',
            labels=['dag_id'])
        for dag in get_dag_duration_info():
            dag_duration_value = (dag.end_date -
                                  dag.start_date).total_seconds()
            dag_duration.add_metric([dag.dag_id], dag_duration_value)
        yield dag_duration

        # Scheduler Metrics
        dag_scheduler_delay = GaugeMetricFamily('airflow_dag_scheduler_delay',
                                                'Airflow DAG scheduling delay',
                                                labels=['dag_id'])

        for dag in get_dag_scheduler_delay():
            dag_scheduling_delay_value = (dag.start_date -
                                          dag.execution_date).total_seconds()
            dag_scheduler_delay.add_metric([dag.dag_id],
                                           dag_scheduling_delay_value)
        yield dag_scheduler_delay

        task_scheduler_delay = GaugeMetricFamily(
            'airflow_task_scheduler_delay',
            'Airflow Task scheduling delay',
            labels=['queue'])

        for task in get_task_scheduler_delay():
            task_scheduling_delay_value = (task.start_date -
                                           task.queued_dttm).total_seconds()
            task_scheduler_delay.add_metric([task.queue],
                                            task_scheduling_delay_value)
        yield task_scheduler_delay

        num_queued_tasks_metric = GaugeMetricFamily(
            'airflow_num_queued_tasks',
            'Airflow Number of Queued Tasks',
        )

        num_queued_tasks = get_num_queued_tasks()
        num_queued_tasks_metric.add_metric([], num_queued_tasks)
        yield num_queued_tasks_metric
Beispiel #4
0
def gen_k8s_node_gpu_reserved():
    return GaugeMetricFamily("k8s_node_gpu_reserved",
                             "gpu reserved on k8s node",
                             labels=["host_ip"])
Beispiel #5
0
def gen_k8s_api_gauge():
    return GaugeMetricFamily("k8s_api_server_count",
                             "count of k8s api server",
                             labels=["error", "host_ip"])
Beispiel #6
0
    def collect(self):

        start = time.time()

        # Perform REST API call to fetch data
        data = call_rest_api('/mgmt/status/default/CurrentSensors', self.ip,
                             self.port, self.session, self.timeout)
        if data == '':
            return

        # Update Prometheus metrics
        for cs in data['CurrentSensors']:

            if cs['Name'] == 'Power Supply 1 In Current':
                g = GaugeMetricFamily(
                    'mqa_current_sensors_power_supply_1_in_current_upper_critical_threshold_amperes',
                    'Upper critical threshold for current going into power supply 1',
                    labels=['appliance', 'readingStatus'])
                g.add_metric([self.appliance, cs['ReadingStatus']],
                             cs['UpperCriticalThreshold'] / 1000)
                yield g

                g = GaugeMetricFamily(
                    'mqa_current_sensors_power_supply_1_in_current_amperes',
                    'Current going into power supply 1',
                    labels=['appliance', 'readingStatus'])
                g.add_metric([self.appliance, cs['ReadingStatus']],
                             cs['Value'] / 1000)
                yield g

            if cs['Name'] == 'Power Supply 1 Out Current':
                g = GaugeMetricFamily(
                    'mqa_current_sensors_power_supply_1_out_current_upper_critical_threshold_amperes',
                    'Upper critical threshold for current going out power supply 1',
                    labels=['appliance', 'readingStatus'])
                g.add_metric([self.appliance, cs['ReadingStatus']],
                             cs['UpperCriticalThreshold'] / 1000)
                yield g

                g = GaugeMetricFamily(
                    'mqa_current_sensors_power_supply_1_out_current_amperes',
                    'Current going out power supply 1',
                    labels=['appliance', 'readingStatus'])
                g.add_metric([self.appliance, cs['ReadingStatus']],
                             cs['Value'] / 1000)
                yield g

            if cs['Name'] == 'Power Supply 2 In Current':
                g = GaugeMetricFamily(
                    'mqa_current_sensors_power_supply_2_in_current_upper_critical_threshold_amperes',
                    'Upper critical threshold for current going into power supply 2',
                    labels=['appliance', 'readingStatus'])
                g.add_metric([self.appliance, cs['ReadingStatus']],
                             cs['UpperCriticalThreshold'] / 1000)
                yield g

                g = GaugeMetricFamily(
                    'mqa_current_sensors_power_supply_2_in_current_amperes',
                    'Current going into power supply 2',
                    labels=['appliance', 'readingStatus'])
                g.add_metric([self.appliance, cs['ReadingStatus']],
                             cs['Value'] / 1000)
                yield g

            if cs['Name'] == 'Power Supply 2 Out Current':
                g = GaugeMetricFamily(
                    'mqa_current_sensors_power_supply_2_out_current_upper_critical_threshold_amperes',
                    'Upper critical threshold for current going out power supply 2',
                    labels=['appliance', 'readingStatus'])
                g.add_metric([self.appliance, cs['ReadingStatus']],
                             cs['UpperCriticalThreshold'] / 1000)
                yield g

                g = GaugeMetricFamily(
                    'mqa_current_sensors_power_supply_2_out_current_amperes',
                    'Current going out power supply 2',
                    labels=['appliance', 'readingStatus'])
                g.add_metric([self.appliance, cs['ReadingStatus']],
                             cs['Value'] / 1000)
                yield g

        g = GaugeMetricFamily(
            'mqa_exporter_current_sensors_elapsed_time_seconds',
            'Exporter eleapsed time to collect current sensors metrics',
            labels=['appliance'])
        g.add_metric([self.appliance], time.time() - start)
        yield g
Beispiel #7
0
def gen_gpu_mem_util_gauge():
    return GaugeMetricFamily("nvidiasmi_utilization_memory",
                             "gpu memory utilization of card",
                             labels=["minor_number"])
Beispiel #8
0
    def collect_host_info(self) -> dict:
        """
        GET /host
        "output": {
            "disk_available": 3057,
            "ipl_time": "IPL at 06/02/17 11:07:10 EDT",
            "vcpus_used": 6,
            "hypervisor_type": "zvm",
            "vcpus": 6,
            "zvm_host": "OPNSTK2",
            "memory_mb": 51200.0,
            "cpu_info": {
                "cec_model": "2817",
                "architecture": "s390x"
            },
            "disk_total": 3623,
            "zcc_userid": "ZCCUID",
            "hypervisor_hostname": "OPNSTK2",
            "hypervisor_version": 640,
            "disk_used": 566,
            "memory_mb_used": 0.0
        }
        """

        res = self.send_request('host_get_info')

        metric = {}
        metric['vcpus'] = GaugeMetricFamily('zvm_host_vcpus',
                                            'The virtual CPUs',
                                            labels=['host'])
        metric['vcpus_used'] = GaugeMetricFamily('zvm_host_vcpus_used',
                                                 'The used vcpus',
                                                 labels=['host'])
        metric['memory_mb'] = GaugeMetricFamily(
            'zvm_host_memory_mb',
            'The total available size of the memory in MB.',
            labels=['host'])
        metric['memory_mb_used'] = GaugeMetricFamily(
            'zvm_host_memory_mb_used',
            'The size of used memory in MB.',
            labels=['host'])
        metric['disk_available'] = GaugeMetricFamily(
            'zvm_host_disk_available',
            'The total available size of the disks in the pool in Gigabytes(G).',
            labels=['host'])
        metric['disk_total'] = GaugeMetricFamily(
            'zvm_host_disk_total',
            'The total size of the pool in Gigabytes (G).',
            labels=['host'])
        metric['disk_used'] = GaugeMetricFamily(
            'zvm_host_disk_used',
            'The size of used disks in the pool in Gigabytes(G).',
            labels=['host'])

        data = res['output']

        self.host = data['hypervisor_hostname']  # hypervisor_hostname?

        for i in metric.keys():
            metric[i].add_metric([self.host], data[i])

        # labels = ['zvm_host', 'hypervisor_hostname', 'hypervisor_version', 'hypervisor_type', 'zcc_userid', 'ipl_time'] # TODO:deal with cpu_info
        # labels_value = []
        # for i in labels:
        #     print(i)
        #     labels_value.append(data[i])

        # metric['other_info'] = GaugeMetricFamily('other_info', '', labels=labels)
        # metric['other_info'].add_metric(labels_value, 1)
        return metric
Beispiel #9
0
    def collect(self):
        cm = GaugeMetricFamily("python_gc_counts", "GC object counts", labels=["gen"])
        for n, m in enumerate(gc.get_count()):
            cm.add_metric([str(n)], m)

        yield cm
 def collect(self):
     data_gauges = {}
     account_keys = [
         "td_account_cost", "td_account_imp", "td_account_click",
         "td_account_activation", "td_account_register",
         "td_account_conversion", "td_account_retention",
         "td_account_download_completed", "td_account_awaken",
         "td_account_media_kuaishou_aclick",
         "td_account_media_kuaishou_bclick", "td_account_form",
         "td_account_adv_form", "td_account_adv_valid_clue"
     ]
     account_value = list(
         mysql_get('''
     select SUM(cost) as td_account_cost , SUM(imp) as td_account_imp, SUM(click) as td_account_click, SUM(activation) as td_account_activation, SUM(register) as td_account_register , SUM(conversion) as td_account_conversion , SUM(retention) as td_account_retention, SUM(download_completed) as td_account_download_completed, SUM(awaken) as td_account_awaken, SUM(media_kuaishou_aclick) as td_account_media_kuaishou_aclick, SUM(media_kuaishou_bclick) as td_account_media_kuaishou_bclick, sum(form) as td_account_form, sum(adv_form) as td_account_adv_form, SUM(adv_valid_clue) as td_account_adv_valid_clue from alphadesk.report_realtime_account where pday=date_format(now(),'%Y%m%d');'''
                   ))
     creative_kyes = [
         "td_creative_cost", "td_creative_imp", "td_creative_click",
         "td_creative_activation", "td_creative_register",
         "td_creative_conversion", "td_creative_retention",
         "td_creative_download_completed", "td_creative_awaken",
         "td_creative_media_kuaishou_aclick",
         "td_creative_media_kuaishou_bclick", "td_creative_form",
         "td_creative_adv_form", "td_creative_adv_valid_clue",
         "td_creative_drs_click"
     ]
     creative_value = list(
         mysql_get('''
     select SUM(cost) as td_creative_cost , SUM(imp) as td_creative_imp, SUM(click) as td_creative_click, SUM(activation) as td_creative_activation, SUM(register) as td_creative_register , SUM(conversion) as td_creative_conversion , SUM(retention) as td_creative_retention, SUM(download_completed) as td_creative_download_completed, SUM(awaken) as td_creative_awaken, SUM(media_kuaishou_aclick) as td_creative_media_kuaishou_aclick, SUM(media_kuaishou_bclick) as td_creative_media_kuaishou_bclick, sum(form) as td_creative_form, sum(adv_form) as td_creative_adv_form, SUM(adv_valid_clue) as td_creative_adv_valid_clue,  SUM(drs_click) as td_creative_drs_click from alphadesk.report_realtime_creative where pday=date_format(now(),'%Y%m%d');'''
                   ))
     account_dict = dictpro(account_keys, account_value)
     creative_dict = dictpro(creative_kyes, creative_value)
     pday, phour = time_create()
     hour_account_keys = [
         'hour_account_imp', 'hour_account_cost', 'hour_account_clk'
     ]
     hour_account_value = list(
         mysql_get('''
     select SUM(imp) as hour_account_imp,sum(cost) as hour_account_cost,SUM(click) as hour_account_clk from alphadesk.report_realtime_account where pday={} and phour={};
     '''.format(pday, phour)))
     hour_account_dict = dictpro(hour_account_keys, hour_account_value)
     hour_creative_keys = [
         'hour_creative_imp', 'hour_creative_cost', 'hour_creative_clk',
         'hour_creative_drs_click'
     ]
     hour_creative_value = list(
         mysql_get('''
     select SUM(imp) as hour_creative_imp,sum(cost) as hour_creative_cost,SUM(click) as hour_creative_clk, SUM(drs_click) as hour_creative_drs_click from alphadesk.report_realtime_creative where pday={} and phour={};
     '''.format(pday, phour)))
     hour_creative_dict = dictpro(hour_creative_keys, hour_creative_value)
     for key in account_dict:
         data_gauges[key] = GaugeMetricFamily('polardb_{}'.format(key),
                                              'td_polardb',
                                              value=account_dict[key])
     for key in creative_dict:
         data_gauges[key] = GaugeMetricFamily('polardb_{}'.format(key),
                                              'td_polardb',
                                              value=creative_dict[key])
     for key in hour_account_dict:
         data_gauges[key] = GaugeMetricFamily('polardb_{}'.format(key),
                                              'td_polardb',
                                              value=hour_account_dict[key])
     for key in hour_creative_dict:
         data_gauges[key] = GaugeMetricFamily('polardb_{}'.format(key),
                                              'td_polardb',
                                              value=hour_creative_dict[key])
     for metric in data_gauges:
         yield data_gauges[metric]
 def __init__(self, fa):
     self.fa = fa
     self.map_host_vol = GaugeMetricFamily(
         'purefa_host_volumes_info',
         'FlashArray host volumes connections',
         labels=['host', 'naaid'])
Beispiel #12
0
 def metrics_setup_sta(self, metrics):
     metrics['c_sta_rx_bytes'] = CounterMetricFamily('unifi_sta_rx_bytes', 'Client RX bytes',    labels=['mac', 'hostname', 'radio', 'essid'])
     metrics['c_sta_tx_bytes'] = CounterMetricFamily('unifi_sta_tx_bytes', 'Client TX bytes',    labels=['mac', 'hostname', 'radio', 'essid'])
     metrics['g_sta_rssi']     = GaugeMetricFamily('unifi_sta_rssi',       'Client signal RSSI', labels=['mac', 'hostname', 'radio', 'essid'])
Beispiel #13
0
def metric_up_gauge(resource: str, succeeded=True):
    metric_name = resource + '_up'
    description = 'Did the {} fetch succeed.'.format(resource)
    return GaugeMetricFamily(metric_name, description, value=int(succeeded))
    def collect(self):

        start = time.time()

        # Perform REST API call to fetch data
        data = call_rest_api('/mgmt/status/default/FilesystemStatus', self.ip,
                             self.port, self.session, self.timeout)
        if data == '':
            return

        # Update Prometheus metrics
        g = GaugeMetricFamily(
            'mqa_file_system_encrypted_bytes_free',
            'Free, or unused and available, encrypted storage space on the appliance',
            labels=['appliance'])
        g.add_metric([self.appliance],
                     data['FilesystemStatus']['FreeEncrypted'] * 1000000)
        yield g

        c = CounterMetricFamily(
            'mqa_file_system_encrypted_bytes_total',
            'Total encrypted storage space on the appliance (the maximum capacity)',
            labels=['appliance'])
        c.add_metric([self.appliance],
                     data['FilesystemStatus']['TotalEncrypted'] * 1000000)
        yield c

        g = GaugeMetricFamily(
            'mqa_file_system_temporary_bytes_free',
            'Free, or unused and available, temporary storage space on the appliance',
            labels=['appliance'])
        g.add_metric([self.appliance],
                     data['FilesystemStatus']['FreeTemporary'] * 1000000)
        yield g

        c = CounterMetricFamily(
            'mqa_file_system_temporary_bytes_total',
            'Total temporary storage space on the appliance',
            labels=['appliance'])
        c.add_metric([self.appliance],
                     data['FilesystemStatus']['TotalTemporary'] * 1000000)
        yield c

        g = GaugeMetricFamily(
            'mqa_file_system_internal_bytes_free',
            'Free, or unused and available, internal storage space on the appliance',
            labels=['appliance'])
        g.add_metric([self.appliance],
                     data['FilesystemStatus']['FreeInternal'] * 1000000)
        yield g

        c = CounterMetricFamily(
            'mqa_file_system_internal_bytes_total',
            'Total internal storage space on the appliance',
            labels=['appliance'])
        c.add_metric([self.appliance],
                     data['FilesystemStatus']['TotalInternal'] * 1000000)
        yield c

        g = GaugeMetricFamily(
            'mqa_exporter_file_system_elapsed_time_seconds',
            'Exporter eleapsed time to collect file system metrics',
            labels=['appliance'])
        g.add_metric([self.appliance], time.time() - start)
        yield g
Beispiel #15
0
 def collect(self):
     # The pages in the engineer mode don't need a csrf_token, other requests do
     # however need it. A valid token can be extracted from the index.html
     dsl_info = requests.get("http://{}/html/engineer/ro_dsl.htm".format(
         sys.argv[1]),
                             cookies=cookie_jar)
     ds = parse_metric_info(dsl_info.text)
     # Dirty approach to DSL metrics, only gauge is available
     # because we can only scrape error counts, not increment them
     speedport_state = GaugeMetricFamily(
         'speedport_state', 'DSL Sync state',
         labels=["host", "report"])  # State; 1=online, 0=anything else
     speedport_state.add_metric([sys.argv[1], ds["State"]],
                                1 if ds["State"] == "online" else 0)
     yield speedport_state
     speedport_actual_data_rate = GaugeMetricFamily(
         'speedport_actual_data_rate_kpbs',
         'Actual DSL Sync data rate',
         labels=["host", "method"])  # ActualDataRate
     speedport_actual_data_rate.add_metric([sys.argv[1], "upload"],
                                           to_float(
                                               ds["ActualDataRate"][0]))
     speedport_actual_data_rate.add_metric([sys.argv[1], "download"],
                                           to_float(
                                               ds["ActualDataRate"][1]))
     yield speedport_actual_data_rate
     speedport_attainable_data_rate = GaugeMetricFamily(
         'speedport_attainable_data_rate_kpbs',
         'Attainable DSL Sync data rate',
         labels=["host", "method"])  # AttainableDataRate
     speedport_attainable_data_rate.add_metric(
         [sys.argv[1], "upload"], to_float(ds["AttainableDataRate"][0]))
     speedport_attainable_data_rate.add_metric(
         [sys.argv[1], "download"], to_float(ds["AttainableDataRate"][1]))
     yield speedport_attainable_data_rate
     speedport_crc = GaugeMetricFamily('speedport_crc_error_count',
                                       'Amount of CRC Errors',
                                       labels=["host",
                                               "method"])  # CRCerrorcount
     speedport_crc.add_metric([sys.argv[1], "upload"],
                              to_float(ds["CRCerrorcount"][0]))
     speedport_crc.add_metric([sys.argv[1], "download"],
                              to_float(ds["CRCerrorcount"][1]))
     yield speedport_crc
     speedport_fec = GaugeMetricFamily('speedport_fec_error_count',
                                       'Amount of FEC Errors',
                                       labels=["host",
                                               "method"])  # FECerrorcount
     speedport_fec.add_metric([sys.argv[1], "upload"],
                              to_float(ds["FECerrorcount"][0]))
     speedport_fec.add_metric([sys.argv[1], "download"],
                              to_float(ds["FECerrorcount"][1]))
     yield speedport_fec
     speedport_hec = GaugeMetricFamily('speedport_hec_error_count',
                                       'Amount of HEC Errors',
                                       labels=["host",
                                               "method"])  # HECerrorcount
     speedport_hec.add_metric([sys.argv[1], "upload"],
                              to_float(ds["HECerrorcount"][0]))
     speedport_hec.add_metric([sys.argv[1], "download"],
                              to_float(ds["HECerrorcount"][1]))
     yield speedport_hec
     speedport_line_attenuation = GaugeMetricFamily(
         'speedport_line_attenuation',
         'Line Attenuation',
         labels=["host", "method"])  # LineAttenuation
     speedport_line_attenuation.add_metric([sys.argv[1], "upload"],
                                           to_float(
                                               ds["LineAttenuation"][0]))
     speedport_line_attenuation.add_metric([sys.argv[1], "download"],
                                           to_float(
                                               ds["LineAttenuation"][1]))
     yield speedport_line_attenuation
     speedport_snr = GaugeMetricFamily('speedport_snr_margin',
                                       'SNR Margin',
                                       labels=["host",
                                               "method"])  # SNRMargin
     speedport_snr.add_metric([sys.argv[1], "upload"],
                              to_float(ds["SNRMargin"][0]))
     speedport_snr.add_metric([sys.argv[1], "download"],
                              to_float(ds["SNRMargin"][1]))
     yield speedport_snr
     speedport_signal_level = GaugeMetricFamily('speedport_signal_level',
                                                'Signal Level',
                                                labels=["host", "method"
                                                        ])  # Signal-level
     speedport_signal_level.add_metric([sys.argv[1], "upload"],
                                       to_float(ds["Signal-level"][0]))
     speedport_signal_level.add_metric([sys.argv[1], "download"],
                                       to_float(ds["Signal-level"][1]))
     yield speedport_signal_level
Beispiel #16
0
    def get_hypervisor_metrics(self):
        metrics = []

        nova_hypervisor_up = GaugeMetricFamily(
            'nova_hypervisor_up',
            'Metadata about a Nova hypervisor',
            labels=[
                'nova_hypervisor_id',
                'nova_hypervisor_name',
                'nova_hypervisor_up',
                'nova_hypervisor_enabled',
                'nova_hypervisor_type',
                'nova_hypervisor_version',
                'nova_hypervisor_cpu_vendor',
                'nova_hypervisor_cpu_model',
                'nova_hypervisor_cpu_arch',
            ])
        metrics.append(nova_hypervisor_up)

        nova_hypervisor_vcpus = GaugeMetricFamily(
            'nova_hypervisor_vcpus',
            'Number of vcpus available',
            labels=['nova_hypervisor_id'],
        )
        metrics.append(nova_hypervisor_vcpus)

        nova_hypervisor_vcpus_used = GaugeMetricFamily(
            'nova_hypervisor_vcpus',
            'Number of vcpus in use',
            labels=['nova_hypervisor_id'],
        )
        metrics.append(nova_hypervisor_vcpus_used)

        nova_hypervisor_running_vms = GaugeMetricFamily(
            'nova_hypervisor_running_vms',
            'Number of vms running on this hypervisor',
            labels=['nova_hypervisor_id'],
        )
        metrics.append(nova_hypervisor_running_vms)

        nova_hypervisor_local_disk_size = GaugeMetricFamily(
            'nova_hypervisor_local_disk_size',
            'Amount of local disk available on this hypervisor',
            labels=['nova_hypervisor_id'],
        )
        metrics.append(nova_hypervisor_local_disk_size)

        nova_hypervisor_local_disk_used = GaugeMetricFamily(
            'nova_hypervisor_local_disk_used',
            'Amount of local disk used on this hypervisor',
            labels=['nova_hypervisor_id'],
        )
        metrics.append(nova_hypervisor_local_disk_used)

        nova_hypervisor_local_disk_free = GaugeMetricFamily(
            'nova_hypervisor_local_disk_free',
            'Amount of local disk free on this hypervisor',
            labels=['nova_hypervisor_id'],
        )
        metrics.append(nova_hypervisor_local_disk_free)

        nova_hypervisor_memory_size = GaugeMetricFamily(
            'nova_hypervisor_memory_size',
            'Amount of memory available on this hypervisor',
            labels=['nova_hypervisor_id'],
        )
        metrics.append(nova_hypervisor_memory_size)

        nova_hypervisor_memory_used = GaugeMetricFamily(
            'nova_hypervisor_memory_used',
            'Amount of memory used on this hypervisor',
            labels=['nova_hypervisor_id'],
        )
        metrics.append(nova_hypervisor_memory_used)

        nova_hypervisor_memory_free = GaugeMetricFamily(
            'nova_hypervisor_memory_free',
            'Amount of memory free on this hypervisor',
            labels=['nova_hypervisor_id'],
        )
        metrics.append(nova_hypervisor_memory_free)

        nova_hypervisor_current_workload = GaugeMetricFamily(
            'nova_hypervisor_current_workload',
            'Number of hypervisor tasks',
            labels=['nova_hypervisor_id'],
        )
        metrics.append(nova_hypervisor_current_workload)

        for hv in self.cloud.list_hypervisors():
            LOG.debug('gathering metrics for hypervisor %s (%s)', hv.name,
                      hv.id)
            if not isinstance(hv.cpu_info, dict):
                cpu = json.loads(hv.cpu_info)
            else:
                cpu = hv.cpu_info

            hvid = str(hv.id)
            nova_hypervisor_up.add_metric([
                hvid,
                hv.name,
                'true' if hv.state == 'up' else 'false',
                'true' if hv.status == 'enabled' else 'false',
                hv.hypervisor_type,
                str(hv.hypervisor_version),
                cpu['vendor'],
                cpu['model'],
                cpu['arch'],
            ], 1.0)

            nova_hypervisor_vcpus.add_metric([hvid], hv.vcpus)
            nova_hypervisor_vcpus_used.add_metric([hvid], hv.vcpus_used)
            nova_hypervisor_running_vms.add_metric([hvid], hv.running_vms)
            nova_hypervisor_local_disk_size.add_metric([hvid],
                                                       hv.local_disk_size)
            nova_hypervisor_local_disk_used.add_metric([hvid],
                                                       hv.local_disk_used)
            nova_hypervisor_local_disk_free.add_metric([hvid],
                                                       hv.local_disk_free)
            nova_hypervisor_memory_size.add_metric([hvid], hv.memory_size)
            nova_hypervisor_memory_used.add_metric([hvid], hv.memory_used)
            nova_hypervisor_memory_free.add_metric([hvid], hv.memory_free)
            nova_hypervisor_current_workload.add_metric([hvid],
                                                        hv.current_workload)

        yield from iter(metrics)
def scrape():
    global START
    START = datetime.datetime.utcnow().date().strftime('%Y-%m-%d %H:%M:%S')

    pushes = retrieve_recent_pub_pushes()

    pub_pushes_total_family = CounterMetricFamily('pub_pushes_total',
                                                  'Count of all pub pushes',
                                                  labels=PUSH_LABELS)
    for value, labels in pub_pushes_total(pushes):
        pub_pushes_total_family.add_metric(labels, value)

    pub_push_errors_total_family = CounterMetricFamily(
        'pub_push_errors_total',
        'Count of all pub push errors',
        labels=PUSH_LABELS)
    error_pushes = only(pushes, states=error_states)
    for value, labels in pub_pushes_total(error_pushes):
        pub_push_errors_total_family.add_metric(labels, value)

    pub_in_progress_pushes_family = GaugeMetricFamily(
        'pub_in_progress_pushes',
        'Count of all in-progress pub pushes',
        labels=PUSH_LABELS,
    )
    in_progress_pushes = retrieve_open_pub_pushes()
    for value, labels in pub_pushes_total(in_progress_pushes):
        pub_in_progress_pushes_family.add_metric(labels, value)

    pub_waiting_pushes_family = GaugeMetricFamily(
        'pub_waiting_pushes',
        'Count of all waiting, unscheduled pub pushes',
        labels=PUSH_LABELS,
    )
    waiting_pushes = retrieve_waiting_pub_pushes()
    for value, labels in pub_pushes_total(waiting_pushes):
        pub_waiting_pushes_family.add_metric(labels, value)

    pub_push_duration_seconds_family = HistogramMetricFamily(
        'pub_push_duration_seconds',
        'Histogram of pub push durations',
        labels=PUSH_LABELS,
    )
    for buckets, duration_sum, labels in pub_push_duration_seconds(pushes):
        pub_push_duration_seconds_family.add_metric(labels,
                                                    buckets,
                                                    sum_value=duration_sum)

    # Replace this in one atomic operation to avoid race condition to the Expositor
    metrics.update({
        'pub_pushes_total':
        pub_pushes_total_family,
        'pub_push_errors_total':
        pub_push_errors_total_family,
        'pub_in_progress_pushes':
        pub_in_progress_pushes_family,
        'pub_waiting_pushes':
        pub_waiting_pushes_family,
        'pub_push_duration_seconds':
        pub_push_duration_seconds_family,
    })
Beispiel #18
0
def gen_pai_pod_gauge():
    return GaugeMetricFamily("pai_pod_count", "count of pai pod",
            labels=["service_name", "name", "namespace", "phase", "host_ip",
                "initialized", "pod_scheduled", "ready"])
Beispiel #19
0
def gen_docker_daemon_counter():
    return GaugeMetricFamily("docker_daemon_count",
                             "count of docker daemon",
                             labels=["error"])
Beispiel #20
0
def gen_pai_job_pod_gauge():
    return GaugeMetricFamily("pai_job_pod_count", "count of pai job pod",
            labels=["job_name", "name", "phase", "host_ip",
                "initialized", "pod_bound", "pod_scheduled", "ready"])
Beispiel #21
0
def gen_k8s_node_gpu_available():
    return GaugeMetricFamily("k8s_node_gpu_available",
                             "gpu available on k8s node",
                             labels=["host_ip"])
Beispiel #22
0
def gen_pai_container_gauge():
    return GaugeMetricFamily("pai_container_count", "count of container pod",
            labels=["service_name", "pod_name", "name", "namespace", "state",
                "host_ip", "ready"])
Beispiel #23
0
def gen_k8s_node_gpu_total():
    return GaugeMetricFamily("k8s_node_gpu_total",
                             "gpu total on k8s node",
                             labels=["host_ip"])
Beispiel #24
0
def gen_pai_node_gauge():
    return GaugeMetricFamily("pai_node_count", "count of pai node",
            labels=["name", "disk_pressure", "memory_pressure", "out_of_disk", "ready", "unschedulable"])
Beispiel #25
0
 def test_gauge(self):
     self.custom_collector(GaugeMetricFamily('g', 'help', value=1))
     self.assertEqual(1, self.registry.get_sample_value('g', {}))
    def collect(self):
        try:
            hostname = socket.gethostname()
            # allCpu
            allCpu = GaugeMetricFamily(
                'offline_machine_cpu_percentage',
                'machine cpu percentage',
                labels=['host'])
            allCpu.add_metric([hostname], value=psutil.cpu_percent())
            yield allCpu

            # all mem
            allMem = GaugeMetricFamily(
                'offline_machine_mem_percentage',
                'machine mem percentage',
                labels=['host'])
            allMem.add_metric([hostname],
                              value=psutil.virtual_memory().percent)
            yield allMem

            # all disk
            allDisk = GaugeMetricFamily(
                'offline_machine_disk_percentage',
                'machine disk percentage',
                labels=['host'])
            allDisk.add_metric([hostname],
                               value=psutil.disk_usage('/').percent)
            yield allDisk

            process_names = self.config['check_processes']
            for process_name in process_names:
                print 'process_name = %s ' % (process_name)

                allProcess = get_pid(process_name)

                metrics = {}

                ThreadList = []
                for i, value in enumerate(allProcess):
                    t = CollectThread(str(i), value['pid'], metrics)
                    ThreadList.append(t)
                for t in ThreadList:
                    t.start()
                for t in ThreadList:
                    t.join()

                for key, process_metrics in metrics.iteritems():

                    snake_case = process_name.lower()
                    process_count = GaugeMetricFamily(
                        'offline_process_count',
                        snake_case + ' Total Running time in seconds.',
                        labels=['pid', 'exe', 'cmd', 'host'])
                    process_count.add_metric([
                        process_metrics['pid'], process_name,
                        process_metrics['cmdline'], hostname
                    ],
                                             value=1)
                    yield process_count

                    if process_metrics:
                        runningTime = GaugeMetricFamily(
                            'offline_process_running_time_seconds_total',
                            snake_case + ' Total Running time in seconds.',
                            labels=['pid', 'exe', 'cmd', 'host'])
                        runningTime.add_metric(
                            [
                                process_metrics['pid'], process_name,
                                process_metrics['cmdline'], hostname
                            ],
                            value=process_metrics['create_time'])
                        yield runningTime
                        # cpu
                        cpu = GaugeMetricFamily(
                            'offline_process_cpu_percentage',
                            snake_case + ' CPU Percentage.',
                            labels=['pid', 'exe', 'cmd', 'host'])
                        cpu.add_metric([
                            process_metrics['pid'], process_name,
                            process_metrics['cmdline'], hostname
                        ],
                                       value=process_metrics['cpu_percent'])
                        yield cpu

                        # mempersent
                        mempersent = GaugeMetricFamily(
                            'offline_process_mem_percentage',
                            snake_case + ' mem Percentage.',
                            labels=['pid', 'exe', 'cmd', 'host'])
                        mempersent.add_metric(
                            [
                                process_metrics['pid'], process_name,
                                process_metrics['cmdline'], hostname
                            ],
                            value=process_metrics['memory_percent'])
                        yield mempersent

                        threadCount = GaugeMetricFamily(
                            'offline_process_threads_number',
                            snake_case + ' Total Number of Threads.',
                            labels=['pid', 'exe', 'cmd', 'host'])
                        threadCount.add_metric(
                            [
                                process_metrics['pid'], process_name,
                                process_metrics['cmdline'], hostname
                            ],
                            value=process_metrics['num_threads'])

                        yield threadCount

                    else:
                        pass
        except Exception, err:
            print 1, err
Beispiel #27
0
    def to_metric(self, desc, tag_values, agg_data):
        """ to_metric translate the data that OpenCensus create
        to Prometheus format, using Prometheus Metric object

        :type desc: dict
        :param desc: The map that describes view definition

        :type tag_values: tuple of :class:
            `~opencensus.tags.tag_value.TagValue`
        :param object of opencensus.tags.tag_value.TagValue:
            TagValue object used as label values

        :type agg_data: object of :class:
            `~opencensus.stats.aggregation_data.AggregationData`
        :param object of opencensus.stats.aggregation_data.AggregationData:
            Aggregated data that needs to be converted as Prometheus samples

        :rtype: :class:`~prometheus_client.core.CounterMetricFamily` or
                :class:`~prometheus_client.core.HistogramMetricFamily` or
                :class:`~prometheus_client.core.UnknownMetricFamily` or
                :class:`~prometheus_client.core.GaugeMetricFamily`
        :returns: A Prometheus metric object
        """
        metric_name = desc['name']
        metric_description = desc['documentation']
        label_keys = desc['labels']

        assert (len(tag_values) == len(label_keys))
        # Prometheus requires that all tag values be strings hence
        # the need to cast none to the empty string before exporting. See
        # https://github.com/census-instrumentation/opencensus-python/issues/480
        tag_values = [tv if tv else "" for tv in tag_values]

        if isinstance(agg_data, aggregation_data_module.CountAggregationData):
            metric = CounterMetricFamily(name=metric_name,
                                         documentation=metric_description,
                                         labels=label_keys)
            metric.add_metric(labels=tag_values, value=agg_data.count_data)
            return metric

        elif isinstance(agg_data,
                        aggregation_data_module.DistributionAggregationData):

            assert (agg_data.bounds == sorted(agg_data.bounds))
            # buckets are a list of buckets. Each bucket is another list with
            # a pair of bucket name and value, or a triple of bucket name,
            # value, and exemplar. buckets need to be in order.
            buckets = []
            cum_count = 0  # Prometheus buckets expect cumulative count.
            for ii, bound in enumerate(agg_data.bounds):
                cum_count += agg_data.counts_per_bucket[ii]
                bucket = [str(bound), cum_count]
                buckets.append(bucket)
            # Prometheus requires buckets to be sorted, and +Inf present.
            # In OpenCensus we don't have +Inf in the bucket bonds so need to
            # append it here.
            buckets.append(["+Inf", agg_data.count_data])
            metric = HistogramMetricFamily(name=metric_name,
                                           documentation=metric_description,
                                           labels=label_keys)
            metric.add_metric(
                labels=tag_values,
                buckets=buckets,
                sum_value=agg_data.sum,
            )
            return metric

        elif isinstance(agg_data, aggregation_data_module.SumAggregationData):
            metric = UnknownMetricFamily(name=metric_name,
                                         documentation=metric_description,
                                         labels=label_keys)
            metric.add_metric(labels=tag_values, value=agg_data.sum_data)
            return metric

        elif isinstance(agg_data,
                        aggregation_data_module.LastValueAggregationData):
            metric = GaugeMetricFamily(name=metric_name,
                                       documentation=metric_description,
                                       labels=label_keys)
            metric.add_metric(labels=tag_values, value=agg_data.value)
            return metric

        else:
            raise ValueError("unsupported aggregation type %s" %
                             type(agg_data))
    def collect(self):
        """Collect metrics."""
        # Task metrics
        task_info = get_task_state_info()

        t_state = GaugeMetricFamily(
            "airflow_task_status",
            "Shows the number of task instances with particular status",
            labels=["dag_id", "task_id", "owner", "status"],
        )

        for task in task_info:
            t_state.add_metric(
                [task.dag_id, task.task_id, task.owners, task.state or "none"],
                task.value,
            )
        yield t_state

        task_duration = GaugeMetricFamily(
            "airflow_task_duration",
            "Duration of successful tasks in seconds",
            labels=["task_id", "dag_id", "execution_date"],
        )
        for task in get_task_duration_info():
            task_duration_value = (task.end_date -
                                   task.start_date).total_seconds()
            task_duration.add_metric(
                [task.task_id, task.dag_id,
                 str(task.execution_date.date())],
                task_duration_value,
            )
        yield task_duration

        task_failure_count = GaugeMetricFamily(
            "airflow_task_fail_count",
            "Count of failed tasks",
            labels=["dag_id", "task_id"],
        )
        for task in get_task_failure_counts():
            task_failure_count.add_metric([task.dag_id, task.task_id],
                                          task.count)
        yield task_failure_count

        # Dag Metrics

        # Dag Metrics
        dag_info = get_dag_state_info()
        labels = ["dag_id", "owner", "status"]
        d_state = GaugeMetricFamily(
            "airflow_dag_status",
            "Shows the number of dag starts with this status",
            labels=labels,
        )
        for dag in dag_info:
            k, v = get_dag_labels(dag.dag_id)
            d_state._labelnames = labels + k
            d_state.add_metric([dag.dag_id, dag.owners, dag.state] + v,
                               dag.count)
        yield d_state

        labels = ["dag_id"]
        dag_duration = GaugeMetricFamily(
            "airflow_dag_run_duration",
            "Duration of successful dag_runs in seconds",
            labels=labels,
        )

        for dag in get_dag_duration_info():
            k, v = get_dag_labels(dag.dag_id)
            dag_duration._labelnames = labels + k

            dag_duration_value = (dag.end_date -
                                  dag.start_date).total_seconds()
            dag_duration.add_metric([dag.dag_id] + v, dag_duration_value)
        yield dag_duration

        # Scheduler Metrics
        labels = ["dag_id"]
        dag_scheduler_delay = GaugeMetricFamily(
            "airflow_dag_scheduler_delay",
            "Airflow DAG scheduling delay",
            labels=labels,
        )

        for dag in get_dag_scheduler_delay():
            k, v = get_dag_labels(dag.dag_id)
            dag_scheduler_delay._labelnames = labels + k

            dag_scheduling_delay_value = (dag.start_date -
                                          dag.execution_date).total_seconds()
            dag_scheduler_delay.add_metric([dag.dag_id] + v,
                                           dag_scheduling_delay_value)
        yield dag_scheduler_delay

        # XCOM parameters
        labels = ["dag_id", "task_id"]

        xcom_params = GaugeMetricFamily(
            "airflow_xcom_parameter",
            "Airflow Xcom Parameter",
            labels=labels,
        )

        xcom_config = load_xcom_config()
        for tasks in xcom_config.get("xcom_params", []):
            k, v = get_dag_labels(task.dag_id)
            xcom_params._labelnames = labels + k

            for param in get_xcom_params(tasks["task_id"]):
                xcom_value = extract_xcom_parameter(param.value)

                if tasks["key"] in xcom_value:
                    xcom_params.add_metric([param.dag_id, param.task_id] + v,
                                           xcom_value[tasks["key"]])

        yield xcom_params

        task_scheduler_delay = GaugeMetricFamily(
            "airflow_task_scheduler_delay",
            "Airflow Task scheduling delay",
            labels=["queue"],
        )

        for task in get_task_scheduler_delay():
            task_scheduling_delay_value = (task.start_date -
                                           task.queued_dttm).total_seconds()
            task_scheduler_delay.add_metric([task.queue],
                                            task_scheduling_delay_value)
        yield task_scheduler_delay

        num_queued_tasks_metric = GaugeMetricFamily(
            "airflow_num_queued_tasks",
            "Airflow Number of Queued Tasks",
        )

        num_queued_tasks = get_num_queued_tasks()
        num_queued_tasks_metric.add_metric([], num_queued_tasks)
        yield num_queued_tasks_metric
Beispiel #29
0
 def add_gauge(self, name, desc, labels):
     self.gauges[name] = GaugeMetricFamily(name, desc, labels=labels)
    def collect(self):
        data = {}
        for nsip in self.nsips:
            data[nsip] = {}
            for entity in self.metrics.keys():
                logger.info('Collecting metric %s for %s' % (entity, nsip))
                try:
                    data[nsip][entity] = collect_data(nsip, entity, self.username, self.password, self.protocol, self.nitro_timeout)
                except Exception as e:
                    logger.warning('Could not collect metric: ' + str(e))

        # Add labels to metrics and provide to Prometheus
        log_prefix_match = True

        for entity_name, entity in self.metrics.items():
            if('labels' in entity.keys()):
                label_names = [v[1] for v in entity['labels']]
                label_names.append('nsip')
            else:
                label_names = []
                label_names.append('nsip')
            # Provide collected metric to Prometheus as a counter
            for ns_metric_name, prom_metric_name in entity.get('counters', []):
                c = CounterMetricFamily(prom_metric_name, ns_metric_name, labels=label_names)
                for nsip in self.nsips:
                    entity_stats = data[nsip].get(entity_name, [])
                    if(type(entity_stats) is not list):
                        entity_stats = [entity_stats]

                    for data_item in entity_stats:
                        if not data_item:
                            continue

                        if ns_metric_name not in data_item.keys():
                            logger.warning('Counter stats for %s not enabled in netscalar %s, so could not add to %s' % (ns_metric_name, nsip, entity_name))
                            break

                        if('labels' in entity.keys()):
                            label_values = [data_item[key] for key in [v[0] for v in entity['labels']]]
                            if os.environ.get('KUBERNETES_SERVICE_HOST') is not None:
                                if entity_name == "lbvserver":
                                    prefix_match = update_lbvs_label(self.k8s_cic_prefix, label_values, ns_metric_name, log_prefix_match)
                                    if not prefix_match:
                                        log_prefix_match = False

                            label_values.append(nsip)
                        else:
                            label_values = [nsip]
                        try:
                            c.add_metric(label_values, float(data_item[ns_metric_name]))
                        except Exception as e:
                            logger.error('Caught exception while adding counter %s to %s: %s' % (ns_metric_name, entity_name, str(e)))

                yield c

            # Provide collected metric to Prometheus as a gauge
            for ns_metric_name, prom_metric_name in entity.get('gauges', []):
                g = GaugeMetricFamily(prom_metric_name, ns_metric_name, labels=label_names)
                for nsip in self.nsips:
                    entity_stats = data[nsip].get(entity_name, [])
                    if(type(entity_stats) is not list):
                        entity_stats = [entity_stats]

                    for data_item in entity_stats:
                        if not data_item:
                            continue
                        if ns_metric_name not in data_item.keys():
                            logger.warning('Gauge stats for %s not enabled in netscalar %s, so could not add to %s' % (ns_metric_name, nsip, entity_name))
                            break

                        if('labels' in entity.keys()):
                            label_values = [data_item[key] for key in [v[0] for v in entity['labels']]]

                            if os.environ.get('KUBERNETES_SERVICE_HOST') is not None:
                                if entity_name == "lbvserver":
                                    prefix_match = update_lbvs_label(self.k8s_cic_prefix, label_values, ns_metric_name, log_prefix_match)
                                    if not prefix_match:
                                        log_prefix_match = False

                            label_values.append(nsip)
                        else:
                            label_values = [nsip]
                        try:
                            g.add_metric(label_values, float(data_item[ns_metric_name]))
                        except Exception as e:
                            logger.error('Caught exception while adding counter %s to %s: %s' % (ns_metric_name, entity_name, str(e)))

                yield g