def collect(self):
        background_process_in_flight_count = GaugeMetricFamily(
            "synapse_background_process_in_flight_count",
            "Number of background processes in flight",
            labels=["name"],
        )

        # We copy the dict so that it doesn't change from underneath us.
        # We also copy the process lists as that can also change
        with _bg_metrics_lock:
            _background_processes_copy = {
                k: list(v)
                for k, v in six.iteritems(_background_processes)
            }

        for desc, processes in six.iteritems(_background_processes_copy):
            background_process_in_flight_count.add_metric(
                (desc,), len(processes),
            )
            for process in processes:
                process.update_metrics()

        yield background_process_in_flight_count

        # now we need to run collect() over each of the static Counters, and
        # yield each metric they return.
        for m in (
                _background_process_ru_utime,
                _background_process_ru_stime,
                _background_process_db_txn_count,
                _background_process_db_txn_duration,
                _background_process_db_sched_duration,
        ):
            for r in m.collect():
                yield r
Esempio n. 2
0
 def collect(self):
     cm = GaugeMetricFamily(
         "python_twisted_reactor_last_seen",
         "Seconds since the Twisted reactor was last seen",
     )
     cm.add_metric([], time.time() - last_ticked)
     yield cm
Esempio n. 3
0
 def collect(self):
     logger.debug('Polling...')
     if not self.session_id:
         self.session_id = get_session_id(self.base_url, self.login, self.password)
     tickets_count = get_tickes_count(self.base_url, self.session_id)
     support_tickets_total = GaugeMetricFamily(
         'support_tickets_total', 'Number of tickets', labels=['project', 'status'])
     for status_data in tickets_count:
         if status_data['department_id'] in self.department_ids:
             support_tickets_total.add_metric([
                 status_data['department'], status_data['name']], status_data['count'])
     yield support_tickets_total
 def __init__(self):
     self.runnable = GaugeMetricFamily(
         "hydra_machine_type_runnable",
         "Number of currently runnable builds",
         labels=["machineType"])
     self.running = GaugeMetricFamily(
         "hydra_machine_type_running",
         "Number of currently running builds",
         labels=["machineType"])
     self.wait_time = CounterMetricFamily(
         "hydra_machine_type_wait_time_total",
         "Number of seconds spent waiting",
         labels=["machineType"])
     self.last_active = CounterMetricFamily(
         "hydra_machine_type_last_active_total",
         "Last time this machine type was active",
         labels=["machineType"])
    def collect(self):
        start = time.time()

        # Request data from Azure Status
        status = self._request_data()

        for region_section in status[1]:
            for category in status[1][region_section]:
                for service in status[1][region_section][category]:
                    for region in status[1][region_section][category][service]:
                        metric_name = "azure_status_{}_{}_status".format(category, service).replace(".", "_")
                        metric = GaugeMetricFamily(metric_name, 'Azure Status for {}'.format(metric_name), labels=["region"])

                        metric.add_metric([region], STATUSES[status[1][region_section][category][service][region]])
                        yield metric

        duration = time.time() - start
        COLLECTION_TIME.observe(duration)
Esempio n. 6
0
    def collect(self) -> Iterator[GaugeMetricFamily]:
        active, idle, dead = 0, 0, 0
        for slave in self._get_slaves():
            if slave.is_alive(use_cached=True) and slave.current_build_id is not None:
                active += 1
            elif slave.is_alive(use_cached=True) and slave.current_build_id is None:
                idle += 1
            elif not slave.is_alive(use_cached=True) and not slave.is_shutdown():
                # Slave is not alive and was not deliberately put in shutdown mode. Count it as dead.
                dead += 1
            else:
                # If not slave.is_alive() and slave.is_shutdown() = True then we have deliberately
                # and gracefully killed the slave. We do not want to categorize such a slave as 'dead'
                pass

        slaves_gauge = GaugeMetricFamily('slaves', 'Total number of slaves', labels=['state'])
        slaves_gauge.add_metric(['active'], active)
        slaves_gauge.add_metric(['idle'], idle)
        slaves_gauge.add_metric(['dead'], dead)
        yield slaves_gauge
Esempio n. 7
0
    def collect(self):

        g = GaugeMetricFamily(self.name, self.desc, labels=self.labels)

        try:
            calls = self.caller()
        except Exception:
            logger.exception(
                "Exception running callback for LaterGauge(%s)",
                self.name,
            )
            yield g
            return

        if isinstance(calls, dict):
            for k, v in six.iteritems(calls):
                g.add_metric(k, v)
        else:
            g.add_metric([], calls)

        yield g
Esempio n. 8
0
    def collect(self):
        """Called by prometheus client when it reads metrics.

        Note: may be called by a separate thread.
        """
        in_flight = GaugeMetricFamily(self.name + "_total", self.desc, labels=self.labels)

        metrics_by_key = {}

        # We copy so that we don't mutate the list while iterating
        with self._lock:
            keys = list(self._registrations)

        for key in keys:
            with self._lock:
                callbacks = set(self._registrations[key])

            in_flight.add_metric(key, len(callbacks))

            metrics = self._metrics_class()
            metrics_by_key[key] = metrics
            for callback in callbacks:
                callback(metrics)

        yield in_flight

        for name in self.sub_metrics:
            gauge = GaugeMetricFamily("_".join([self.name, name]), "", labels=self.labels)
            for key, metrics in six.iteritems(metrics_by_key):
                gauge.add_metric(key, getattr(metrics, name))
            yield gauge
 def collect(self):
     result_dict = {}
     apps = self.client.list_apps(embed_task_stats=True)
     for app_attribute in self.APP_ATTIBUTES:
         metric_family = GaugeMetricFamily(
             self.get_metric_key(app_attribute, 'apps'),
             documentation='from v2/apps?embed=apps.taskStats value of %s' % app_attribute,
             labels=["id"])
         for app in apps:
             labels = [app.id]
             value = self.get_metric_value(app_attribute, app)
             if value is None:
                 continue
             metric_family.add_metric(labels, value)
         yield metric_family
     queue = self.client.list_queue()
     for queue_attribute in self.QUEUE_ATTRIBUTES:
         metric_family = GaugeMetricFamily(
             self.get_metric_key(queue_attribute, 'queue'),
             documentation='from v2/queue value of %s' % queue_attribute,
             labels=["id"])
         for queue_item in queue:
             labels = [queue_item.app.id]
             value = self.get_metric_value(queue_attribute, queue_item)
             if value is None:
                 continue
             metric_family.add_metric(labels, value)
         yield metric_family
class MachineTypeScrapeImporter:
    def __init__(self):
        self.runnable = GaugeMetricFamily(
            "hydra_machine_type_runnable",
            "Number of currently runnable builds",
            labels=["machineType"])
        self.running = GaugeMetricFamily(
            "hydra_machine_type_running",
            "Number of currently running builds",
            labels=["machineType"])
        self.wait_time = CounterMetricFamily(
            "hydra_machine_type_wait_time_total",
            "Number of seconds spent waiting",
            labels=["machineType"])
        self.last_active = CounterMetricFamily(
            "hydra_machine_type_last_active_total",
            "Last time this machine type was active",
            labels=["machineType"])


    def load_machine_type(self, name, report):
        self.runnable.add_metric([name], report.destructive_read("runnable"))
        self.running.add_metric([name], report.destructive_read("running"))
        try:
            self.wait_time.add_metric([name], report.destructive_read("waitTime"))
        except KeyError:
            pass
        try:
            self.last_active.add_metric([name], report.destructive_read("lastActive"))
        except KeyError:
            pass

        debug_remaining_state(report)

    def metrics(self):
        yield self.runnable
        yield self.running
        yield self.wait_time
        yield self.last_active
Esempio n. 11
0
 def convert_meter_metric(cls, marathon_key, marathon_metric):
     metric_key = cls.convert_metric_key(marathon_key)
     metric_key = '%s_rate' % metric_key
     g = GaugeMetricFamily(
         name=metric_key,
         documentation='from %s' % marathon_key,
         labels=('window',))
     g.add_metric(('1m',), marathon_metric['m1_rate'])
     g.add_metric(('5m',), marathon_metric['m5_rate'])
     g.add_metric(('15m',), marathon_metric['m15_rate'])
     g.add_metric(('mean',), marathon_metric['mean_rate'])
     return g
 def __init__(self):
     labels = [ "host" ]
     self.consective_failures = GaugeMetricFamily(
         "hydra_machine_consecutive_failures",
         "Number of consecutive failed builds",
         labels=labels)
     self.current_jobs = GaugeMetricFamily(
         "hydra_machine_current_jobs",
         "Number of current jobs",
         labels=labels)
     self.idle_since = GaugeMetricFamily(
         "hydra_machine_idle_since",
         "When the current idle period started",
         labels=labels)
     self.disabled_until = GaugeMetricFamily(
         "hydra_machine_disabled_until",
         "When the machine will be used again",
         labels=labels)
     self.enabled = GaugeMetricFamily(
         "hydra_machine_enabled",
         "If the machine is enabled (1) or not (0)",
         labels=labels)
     self.last_failure = CounterMetricFamily(
         "hydra_machine_last_failure",
         "timestamp of the last failure",
         labels=labels)
     self.number_steps_done = CounterMetricFamily(
         "hydra_machine_steps_done_total",
         "Total count of the steps completed",
         labels=labels)
     self.total_step_build_time = CounterMetricFamily(
         "hydra_machine_step_build_time_total",
         "Number of seconds spent building steps",
         labels=labels)
     self.total_step_time = CounterMetricFamily(
         "hydra_machine_step_time_total",
         "Number of seconds spent on steps",
         labels=labels)
Esempio n. 13
0
    def collect(self):
        session = requests.Session()
        session.trust_env = False
        session.auth = (self.sonar_user, self.sonar_password)
        session.verify = False

        req_string = self.rest_url + '/resources?metrics=ncloc,coverage'
        res = session.get(req_string)
        # METRIC: detailed test results
        c = GaugeMetricFamily('sonar_metrics', 'SonarQube Metrics',
                              labels=['name', 'key'])
        if res:
            results = res.json()
            #pp = pprint.PrettyPrinter()
            for result in results:
                # pp.pprint(result)
                for msr in result['msr']:
                    c.add_metric([result['name'], msr['key']], msr['val'])

            yield c
        else:
            print "Error fetching from " + req_string
            print res
Esempio n. 14
0
    def collect(self):
        session = requests.Session()
        session.trust_env = False
        session.auth = (self.sonar_user, self.sonar_password)
        session.verify = False

        req_string = self.rest_url + '/resources?metrics=' + self.metrics
        res = session.get(req_string)
        # METRIC: detailed test results
        c = GaugeMetricFamily('sonar_metrics', 'SonarQube Metrics', labels=['name', 'key'])
        try:
            if res:
                results = res.json()
                for result in results:
                    for msr in result['msr']:
                        c.add_metric([result['name'], msr['key']], msr['val'])

                yield c
            else:
                logging.error("Error fetching from " + req_string)
                logging.error(res)
        except KeyError:
            logging.error("Could not retrieve metrics from: " + self.metrics)
            logging.error("Check argument sonar_metrics")
Esempio n. 15
0
    def collect(self):
        if not HAVE_PROC_SELF_STAT:
            return

        with open("/proc/self/stat") as s:
            line = s.read()
            raw_stats = line.split(") ", 1)[1].split(" ")

            user = GaugeMetricFamily("process_cpu_user_seconds_total", "")
            user.add_metric([], float(raw_stats[11]) / self.ticks_per_sec)
            yield user

            sys = GaugeMetricFamily("process_cpu_system_seconds_total", "")
            sys.add_metric([], float(raw_stats[12]) / self.ticks_per_sec)
            yield sys
class MachineScrapeImporter:
    def __init__(self):
        labels = [ "host" ]
        self.consective_failures = GaugeMetricFamily(
            "hydra_machine_consecutive_failures",
            "Number of consecutive failed builds",
            labels=labels)
        self.current_jobs = GaugeMetricFamily(
            "hydra_machine_current_jobs",
            "Number of current jobs",
            labels=labels)
        self.idle_since = GaugeMetricFamily(
            "hydra_machine_idle_since",
            "When the current idle period started",
            labels=labels)
        self.disabled_until = GaugeMetricFamily(
            "hydra_machine_disabled_until",
            "When the machine will be used again",
            labels=labels)
        self.enabled = GaugeMetricFamily(
            "hydra_machine_enabled",
            "If the machine is enabled (1) or not (0)",
            labels=labels)
        self.last_failure = CounterMetricFamily(
            "hydra_machine_last_failure",
            "timestamp of the last failure",
            labels=labels)
        self.number_steps_done = CounterMetricFamily(
            "hydra_machine_steps_done_total",
            "Total count of the steps completed",
            labels=labels)
        self.total_step_build_time = CounterMetricFamily(
            "hydra_machine_step_build_time_total",
            "Number of seconds spent building steps",
            labels=labels)
        self.total_step_time = CounterMetricFamily(
            "hydra_machine_step_time_total",
            "Number of seconds spent on steps",
            labels=labels)

    def load_machine(self, name, report):
        report.unused_read("mandatoryFeatures")
        report.unused_read("supportedFeatures")
        report.unused_read("systemTypes")
        report.unused_read("avgStepBuildTime")
        report.unused_read("avgStepTime")
        labels = [name]
        self.consective_failures.add_metric(
            labels,
            report.destructive_read("consecutiveFailures")
        )
        self.current_jobs.add_metric(
            labels,
            report.destructive_read("currentJobs")
        )
        try:
            self.idle_since.add_metric(
                labels,
                report.destructive_read("idleSince")
            )
        except KeyError:
            pass
        self.disabled_until.add_metric(
            labels,
            report.destructive_read("disabledUntil")
        )
        self.enabled.add_metric(
            labels,
            1 if report.destructive_read("enabled") else 0
        )
        self.last_failure.add_metric(
            labels,
            report.destructive_read("lastFailure")
        )
        self.number_steps_done.add_metric(
            labels,
            report.destructive_read("nrStepsDone")
        )
        self.total_step_build_time.add_metric(
            labels,
            report.destructive_read_default("totalStepBuildTime", default=0)
        )
        self.total_step_time.add_metric(
            labels,
            report.destructive_read_default("totalStepTime", default=0)
        )
        debug_remaining_state(report)
    def metrics(self):
        yield self.consective_failures
        yield self.current_jobs
        yield self.idle_since
        yield self.disabled_until
        yield self.enabled
        yield self.last_failure
        yield self.number_steps_done
        yield self.total_step_build_time
        yield self.total_step_time
Esempio n. 17
0
    def collect(self):
        self.tor.reconnect()
        yield GaugeMetricFamily("tor_written_bytes",
                                "Tor written data counter",
                                value=int(
                                    self.tor.get_info("traffic/written")))
        yield GaugeMetricFamily("tor_read_bytes",
                                "Tor received data counter",
                                value=int(self.tor.get_info("traffic/read")))

        version = GaugeMetricFamily("tor_version",
                                    "Tor version as a label",
                                    labels=["version"])
        version.add_metric([str(torctl.get_version())], 1)
        yield version
        version_status = GaugeMetricFamily(
            "tor_version_status",
            "Tor version status {new, old, unrecommended, recommended, new in series, obsolete, unknown} as a label",
            labels=["version_status"])
        version_status.add_metric(
            [self.tor.get_info("status/version/current")], 1)
        yield version_status
        yield GaugeMetricFamily(
            "tor_network_liveness",
            "Indicates whether tor believes that the network is currently reachable",
            value=int(self.tor.get_info("network-liveness") == "up"))
        reachable = GaugeMetricFamily(
            "tor_reachable",
            "Indicates whether our OR/Dir port is reachable",
            labels=["port"])
        for entry in self.tor.get_info(
                "status/reachability-succeeded").split():
            k, v = entry.split("=")
            reachable.add_metric([k], int(v))
        yield reachable
        yield GaugeMetricFamily(
            "tor_circuit_established",
            "Indicates whether Tor is capable of establishing circuits",
            value=int(self.tor.get_info("status/circuit-established")))
        # For some reason, 0 actually means that Tor is active, keep it that way
        yield GaugeMetricFamily(
            "tor_dormant",
            "Indicates whether Tor is currently active and building circuits (note that 0 corresponds to Tor being active)",
            value=int(self.tor.get_info("dormant")))

        effective_rate = self.tor.get_effective_rate(None)
        effective_burst_rate = self.tor.get_effective_rate(None, burst=True)
        if effective_rate is not None and effective_burst_rate is not None:
            yield GaugeMetricFamily("tor_effective_rate",
                                    "Shows Tor effective rate",
                                    value=int(effective_rate))
            yield GaugeMetricFamily("tor_effective_burst_rate",
                                    "Shows Tor effective burst rate",
                                    value=int(effective_burst_rate))

        fingerprint = GaugeMetricFamily("tor_fingerprint",
                                        "Tor fingerprint as a label",
                                        labels=["fingerprint"])
        fingerprint.add_metric([self.tor.get_info("fingerprint")], 1)
        yield fingerprint
        nickname = GaugeMetricFamily("tor_nickname",
                                     "Tor nickname as a label",
                                     labels=["nickname"])
        nickname.add_metric([self.tor.get_conf("Nickname", "Unnamed")], 1)
        yield nickname

        # Connection counting
        # This won't work/will return wrong results if we are not running on
        # the same box as the Tor daemon is.
        # DisableDebuggerAttachment has to be set to 0
        # TODO: Count individual OUT/DIR/Control connections, see arm sources
        # for reference
        try:
            connections = stem.util.connection.get_connections(
                process_pid=self.tor.get_pid())
            yield GaugeMetricFamily(
                "tor_connection_count",
                "Amount of connections the Tor daemon has open",
                value=len(connections))
        except OSError:
            # This happens if the PID does not exists (on another machine).
            pass
        try:
            has_flags = self.tor.get_network_status().flags
        except stem.DescriptorUnavailable:
            # The tor daemon fails with this for a few minutes after startup
            # (before figuring out its own flags?)
            has_flags = []
        flags = GaugeMetricFamily("tor_flags",
                                  "Has a Tor flag",
                                  labels=["flag"])
        for flag in [
                "Authority", "BadExit", "Exit", "Fast", "Guard", "HSDir",
                "NoEdConsensus", "Stable", "Running", "Valid", "V2Dir"
        ]:
            flags.add_metric([flag], int(flag in has_flags))
        yield flags

        accs = self.tor.get_accounting_stats()
        yield GaugeMetricFamily("tor_accounting_read_bytes",
                                "Tor accounting read bytes", accs.read_bytes)
        yield GaugeMetricFamily("tor_accounting_left_read_bytes",
                                "Tor accounting read bytes left",
                                accs.read_bytes_left)
        yield GaugeMetricFamily("tor_accounting_read_limit_bytes",
                                "Tor accounting read bytes limit",
                                accs.read_limit)
        yield GaugeMetricFamily("tor_accounting_write_bytes",
                                "Tor accounting write bytes",
                                accs.written_bytes)
        yield GaugeMetricFamily("tor_accounting_left_write_bytes",
                                "Tor accounting write bytes left",
                                accs.write_bytes_left)
        yield GaugeMetricFamily("tor_accounting_write_limit_bytes",
                                "Tor accounting write bytes limit",
                                accs.write_limit)
Esempio n. 18
0
    def _create_metric_containers(self):
        metric_list = {}
        metric_list['vms'] = {
            'vmware_vm_power_state':
            GaugeMetricFamily(
                'vmware_vm_power_state',
                'VMWare VM Power state (On / Off)',
                labels=['vm_name', 'host_name', 'dc_name', 'cluster_name']),
            'vmware_vm_boot_timestamp_seconds':
            GaugeMetricFamily(
                'vmware_vm_boot_timestamp_seconds',
                'VMWare VM boot time in seconds',
                labels=['vm_name', 'host_name', 'dc_name', 'cluster_name']),
            'vmware_vm_num_cpu':
            GaugeMetricFamily(
                'vmware_vm_num_cpu',
                'VMWare Number of processors in the virtual machine',
                labels=['vm_name', 'host_name', 'dc_name', 'cluster_name']),
        }
        metric_list['vmguests'] = {
            'vmware_vm_guest_disk_free':
            GaugeMetricFamily('vmware_vm_guest_disk_free',
                              'Disk metric per partition',
                              labels=[
                                  'vm_name',
                                  'host_name',
                                  'dc_name',
                                  'cluster_name',
                                  'partition',
                              ]),
            'vmware_vm_guest_disk_capacity':
            GaugeMetricFamily('vmware_vm_guest_disk_capacity',
                              'Disk capacity metric per partition',
                              labels=[
                                  'vm_name',
                                  'host_name',
                                  'dc_name',
                                  'cluster_name',
                                  'partition',
                              ]),
        }
        metric_list['snapshots'] = {
            'vmware_vm_snapshots':
            GaugeMetricFamily(
                'vmware_vm_snapshots',
                'VMWare current number of existing snapshots',
                labels=['vm_name', 'host_name', 'dc_name', 'cluster_name']),
            'vmware_vm_snapshot_timestamp_seconds':
            GaugeMetricFamily('vmware_vm_snapshot_timestamp_seconds',
                              'VMWare Snapshot creation time in seconds',
                              labels=[
                                  'vm_name', 'host_name', 'dc_name',
                                  'cluster_name', 'vm_snapshot_name'
                              ]),
        }
        metric_list['datastores'] = {
            'vmware_datastore_capacity_size':
            GaugeMetricFamily('vmware_datastore_capacity_size',
                              'VMWare Datasore capacity in bytes',
                              labels=['ds_name', 'dc_name', 'ds_cluster']),
            'vmware_datastore_freespace_size':
            GaugeMetricFamily('vmware_datastore_freespace_size',
                              'VMWare Datastore freespace in bytes',
                              labels=['ds_name', 'dc_name', 'ds_cluster']),
            'vmware_datastore_uncommited_size':
            GaugeMetricFamily('vmware_datastore_uncommited_size',
                              'VMWare Datastore uncommitted in bytes',
                              labels=['ds_name', 'dc_name', 'ds_cluster']),
            'vmware_datastore_provisoned_size':
            GaugeMetricFamily('vmware_datastore_provisoned_size',
                              'VMWare Datastore provisoned in bytes',
                              labels=['ds_name', 'dc_name', 'ds_cluster']),
            'vmware_datastore_hosts':
            GaugeMetricFamily('vmware_datastore_hosts',
                              'VMWare Hosts number using this datastore',
                              labels=['ds_name', 'dc_name', 'ds_cluster']),
            'vmware_datastore_vms':
            GaugeMetricFamily('vmware_datastore_vms',
                              'VMWare Virtual Machines count per datastore',
                              labels=['ds_name', 'dc_name', 'ds_cluster']),
            'vmware_datastore_maintenance_mode':
            GaugeMetricFamily(
                'vmware_datastore_maintenance_mode',
                'VMWare datastore maintenance mode (normal / inMaintenance / enteringMaintenance)',
                labels=['ds_name', 'dc_name', 'ds_cluster', 'mode']),
            'vmware_datastore_type':
            GaugeMetricFamily(
                'vmware_datastore_type',
                'VMWare datastore type (VMFS, NetworkFileSystem, NetworkFileSystem41, CIFS, VFAT, VSAN, VFFS)',
                labels=['ds_name', 'dc_name', 'ds_cluster', 'ds_type']),
            'vmware_datastore_accessible':
            GaugeMetricFamily('vmware_datastore_accessible',
                              'VMWare datastore accessible (true / false)',
                              labels=['ds_name', 'dc_name', 'ds_cluster'])
        }
        metric_list['hosts'] = {
            'vmware_host_power_state':
            GaugeMetricFamily('vmware_host_power_state',
                              'VMWare Host Power state (On / Off)',
                              labels=['host_name', 'dc_name', 'cluster_name']),
            'vmware_host_connection_state':
            GaugeMetricFamily(
                'vmware_host_connection_state',
                'VMWare Host connection state (connected / disconnected / notResponding)',
                labels=['host_name', 'dc_name', 'cluster_name', 'state']),
            'vmware_host_maintenance_mode':
            GaugeMetricFamily('vmware_host_maintenance_mode',
                              'VMWare Host maintenance mode (true / false)',
                              labels=['host_name', 'dc_name', 'cluster_name']),
            'vmware_host_boot_timestamp_seconds':
            GaugeMetricFamily('vmware_host_boot_timestamp_seconds',
                              'VMWare Host boot time in seconds',
                              labels=['host_name', 'dc_name', 'cluster_name']),
            'vmware_host_cpu_usage':
            GaugeMetricFamily('vmware_host_cpu_usage',
                              'VMWare Host CPU usage in Mhz',
                              labels=['host_name', 'dc_name', 'cluster_name']),
            'vmware_host_cpu_max':
            GaugeMetricFamily('vmware_host_cpu_max',
                              'VMWare Host CPU max availability in Mhz',
                              labels=['host_name', 'dc_name', 'cluster_name']),
            'vmware_host_memory_usage':
            GaugeMetricFamily('vmware_host_memory_usage',
                              'VMWare Host Memory usage in Mbytes',
                              labels=['host_name', 'dc_name', 'cluster_name']),
            'vmware_host_memory_max':
            GaugeMetricFamily('vmware_host_memory_max',
                              'VMWare Host Memory Max availability in Mbytes',
                              labels=['host_name', 'dc_name', 'cluster_name']),
        }

        metrics = {}
        for key, value in self.collect_only.items():
            if value is True:
                metrics.update(metric_list[key])

        return metrics
Esempio n. 19
0
    def collect(self):

        if self.stats_access_pending or self.ns_session_pending:
            return

        if not self.login():
            return

        data = {}
        self.stats_access_pending = True
        for entity in self.metrics.keys():
            logger.debug('Collecting metric {}'.format(entity))
            try:
                status, entity_data = self.collect_data(entity)
            except Exception as e:
                logger.error('Could not collect metric :{}'.format(e))

            if status == self.FAILURE:
                self.ns_session_clear()
                return

            if entity_data:
                data[entity] = entity_data

        if 'k8s_ingress_lbvs' in self.metrics and \
                os.environ.get('KUBERNETES_SERVICE_HOST') is not None:
            lbvs_dict = None
            try:
                status, lbvs_dict = self.collect_lbvs_config()
            except Exception as e:
                logger.error(
                    'Could not collect config entries for lbvs: {}'.format(e))

            if status == self.FAILURE:
                self.ns_session_clear()
                return

        # Add labels to metrics and provide to Prometheus
        log_prefix_match = True
        for entity_name, entity in self.metrics.items():
            if ('labels' in entity.keys()):
                label_names = [v[1] for v in entity['labels']]
                label_names.append('nsip')
            else:
                label_names = []
                label_names.append('nsip')

            # Provide collected metric to Prometheus as a counter
            entity_stats = data.get(entity_name, [])
            if (type(entity_stats) is not list):
                entity_stats = [entity_stats]

            for ns_metric_name, prom_metric_name in entity.get('counters', []):
                c = CounterMetricFamily(prom_metric_name,
                                        ns_metric_name,
                                        labels=label_names)
                for data_item in entity_stats:
                    if not data_item:
                        continue

                    if ns_metric_name not in data_item.keys():
                        logger.info(
                            'Counter stats {} not enabled for entity: {}'.
                            format(ns_metric_name, entity_name))
                        break

                    if ('labels' in entity.keys()):
                        label_values = [
                            data_item[key]
                            for key in [v[0] for v in entity['labels']]
                        ]

                        # populate and update k8s_ingress_lbvs metrics if in k8s-CIC enviroment
                        if entity_name == "k8s_ingress_lbvs":
                            if os.environ.get(
                                    'KUBERNETES_SERVICE_HOST') is not None:
                                prefix_match = self.update_lbvs_label(
                                    label_values, lbvs_dict, log_prefix_match)
                                if not prefix_match:
                                    log_prefix_match = False
                                    continue
                            else:
                                continue
                        label_values.append(self.nsip)
                    else:
                        label_values = [self.nsip]
                    try:
                        c.add_metric(label_values,
                                     float(data_item[ns_metric_name]))
                    except Exception as e:
                        logger.error(
                            'Caught exception while adding counter {} to {}: {}'
                            .format(ns_metric_name, entity_name, str(e)))

                yield c

            # Provide collected metric to Prometheus as a gauge
            for ns_metric_name, prom_metric_name in entity.get('gauges', []):
                g = GaugeMetricFamily(prom_metric_name,
                                      ns_metric_name,
                                      labels=label_names)

                for data_item in entity_stats:
                    if not data_item:
                        continue

                    if ns_metric_name not in data_item.keys():
                        logger.info(
                            'Gauge stat {} not enabled for entity: {}'.format(
                                ns_metric_name, entity_name))
                        break

                    if ('labels' in entity.keys()):
                        label_values = [
                            data_item[key]
                            for key in [v[0] for v in entity['labels']]
                        ]

                        # populate and update k8s_ingress_lbvs metrics if in k8s-CIC enviroment
                        if entity_name == "k8s_ingress_lbvs":
                            if os.environ.get(
                                    'KUBERNETES_SERVICE_HOST') is not None:
                                prefix_match = self.update_lbvs_label(
                                    label_values, lbvs_dict, log_prefix_match)
                                if not prefix_match:
                                    log_prefix_match = False
                                    continue
                            else:
                                continue

                        label_values.append(self.nsip)
                    else:
                        label_values = [self.nsip]
                    try:
                        g.add_metric(label_values,
                                     float(data_item[ns_metric_name]))
                    except Exception as e:
                        logger.error(
                            'Caught exception while adding counter {} to {}: {}'
                            .format(ns_metric_name, entity_name, str(e)))

                yield g
        self.stats_access_pending = False
Esempio n. 20
0
    def _vmware_get_vm_perf_manager_metrics(self, vm_metrics):
        log('START: _vmware_get_vm_perf_manager_metrics')

        virtual_machines, counter_info = yield parallelize(
            self.vm_inventory, self.counter_ids)

        # List of performance counter we want
        perf_list = [
            'cpu.ready.summation',
            'cpu.usage.average',
            'cpu.usagemhz.average',
            'disk.usage.average',
            'disk.read.average',
            'disk.write.average',
            'mem.usage.average',
            'net.received.average',
            'net.transmitted.average',
        ]

        # Prepare gauges
        for p in perf_list:
            p_metric = 'vmware_vm_' + p.replace('.', '_')
            vm_metrics[p_metric] = GaugeMetricFamily(
                p_metric,
                p_metric,
                labels=['vm_name', 'host_name', 'dc_name', 'cluster_name'])

        metrics = []
        metric_names = {}
        for perf_metric in perf_list:
            perf_metric_name = 'vmware_vm_' + perf_metric.replace('.', '_')
            counter_key = counter_info[perf_metric]
            metrics.append(
                vim.PerformanceManager.MetricId(counterId=counter_key,
                                                instance=''))
            metric_names[counter_key] = perf_metric_name

        specs = []
        for vm in virtual_machines.values():
            if vm.get('runtime.powerState') != 'poweredOn':
                continue
            specs.append(
                vim.PerformanceManager.QuerySpec(maxSample=1,
                                                 entity=vm['obj'],
                                                 metricId=metrics,
                                                 intervalId=20))

        content = yield self.content

        results, labels = yield parallelize(
            threads.deferToThread(content.perfManager.QueryStats,
                                  querySpec=specs),
            self.vm_labels,
        )

        for ent in results:
            for metric in ent.value:
                vm_metrics[metric_names[metric.id.counterId]].add_metric(
                    labels[ent.entity._moId],
                    float(sum(metric.value)),
                )
        log('FIN: _vmware_get_vm_perf_manager_metrics')
Esempio n. 21
0
 def metric_workers(self):
     metric = GaugeMetricFamily('resque_workers', "Number of workers")
     metric.add_metric([], len(self.workers))
     return metric
Esempio n. 22
0
    def collect(self):
        session = requests.Session()
        session.trust_env = False
        session.auth = (self.bamboo_user, self.bamboo_password)
        session.verify = False

        # METRIC: detailed test results
        c = GaugeMetricFamily('bamboo_test_results', 'Bamboo Test Results', labels=['name', 'job', 'className', 'methodName'])
        for job in self.bamboo_test_jobs:
            res = session.get(self.web_url + '/rest/api/latest/result/' + job + '/latest.json?expand=testResults.allTests')
            if res:
                results = res.json()
                for testResult in res.json()['testResults']['allTests']['testResult']:
                    c.add_metric([results['plan']['name'], job, testResult['className'], testResult['methodName']], testResult['status'] == 'successful')
            else:
                print "error fetching test results"
                print res
        yield c

        # METRIC: bamboo agent state
        c = GaugeMetricFamily('bamboo_build_state', 'Bamboo Build Dashboard', labels=['state', 'host'])
        res = session.get(self.web_url + '/build/admin/ajax/getDashboardSummary.action')
        if res:
            dashboard_summary = res.json()
            for host, values in self.tally_agent_info(dashboard_summary).iteritems():
                for state, state_count in values.iteritems():
                    c.add_metric([state, host], state_count)
            yield c
        else:
            print res

        # Collect results tagged
        d = {}
        r = session.get(
            self.web_url + '/rest/api/latest/result.json?favourite&expand=results.result.buildDurationInSeconds')
        if r.ok:
            # NOTE: this may return multiple results for the same plan - need to use highest build number
            results = r.json()
            for result in results['results']['result']:
                key = result['plan']['key']
                if key in d and d[key]['number'] < result['number']:
                    continue  # don't overwrite with older build
                d[key] = result
        else:
            print r

        # METRIC: build status (favourites)
        METRICS = ['buildNumber', 'buildDurationInSeconds']
        TEST_METRICS = ['failedTestCount', 'skippedTestCount', 'quarantinedTestCount', 'successfulTestCount']

        statusMetric = GaugeMetricFamily('build_results', 'Status of flagged plans', labels=['name', 'state'])
        testMetric = GaugeMetricFamily('test_counts', 'Test result counts', labels=['shortName', 'countType'])
        metrics = {x: GaugeMetricFamily(x, x, labels=['shortName']) for x in METRICS}
        for key, result in d.iteritems():
            statusMetric.add_metric([result['plan']['shortName'], result['state']], result['successful'])
            for name in TEST_METRICS:
                testMetric.add_metric([result['plan']['shortName'], name], result[name])
            for name, metric in metrics.iteritems():
                metric.add_metric([result['plan']['shortName']], result[name])

        yield statusMetric
        yield testMetric
        for metric in metrics.itervalues():
            yield metric
Esempio n. 23
0
def gen_nvidia_gpu_mem_util_gauge():
    return GaugeMetricFamily("nvidiasmi_utilization_memory",
                             "gpu memory utilization of card",
                             labels=["minor_number"])
    def collect(self):
        '''collect metrics'''

        # Task metrics
        task_info = get_task_state_info()
        t_state = GaugeMetricFamily(
            'airflow_task_status',
            'Shows the number of task starts with this status',
            labels=['dag_id', 'task_id', 'owner', 'status']
        )
        for task in task_info:
            t_state.add_metric([task.dag_id, task.task_id, task.owners, task.state or 'none'], task.value)
        yield t_state

        # Dag Metrics
        dag_info = get_dag_state_info()
        d_state = GaugeMetricFamily(
            'airflow_dag_status',
            'Shows the number of dag starts with this status',
            labels=['dag_id', 'owner', 'status']
        )
        for dag in dag_info:
            d_state.add_metric([dag.dag_id, dag.owners, dag.state], dag.count)
        yield d_state

        # DagRun metrics
        dag_duration = GaugeMetricFamily(
            'airflow_dag_run_duration',
            'Duration of currently running dag_runs in seconds',
            labels=['dag_id', 'run_id']
        )
        driver = Session.bind.driver
        for dag in get_dag_duration_info():
            if driver == 'mysqldb' or driver == 'pysqlite':
                dag_duration.add_metric([dag.dag_id, dag.run_id], dag.duration)
            else:
                dag_duration.add_metric([dag.dag_id, dag.run_id], dag.duration.seconds)
        yield dag_duration
Esempio n. 25
0
    def collect(self):
        cm = GaugeMetricFamily("python_gc_counts", "GC object counts", labels=["gen"])
        for n, m in enumerate(gc.get_count()):
            cm.add_metric([str(n)], m)

        yield cm
Esempio n. 26
0
 def metric_queues(self):
     metric = GaugeMetricFamily('resque_queues', "Number of queues")
     metric.add_metric([], len(self.queues))
     return metric
 def _get_coordinator_counters(self):
     return {
         'segment/assigned/count':
         GaugeMetricFamily(
             'druid_coordinator_segment_assigned_count',
             'Number of segments assigned to be loaded in the cluster.',
             labels=['tier']),
         'segment/moved/count':
         GaugeMetricFamily(
             'druid_coordinator_segment_moved_count',
             'Number of segments assigned to be loaded in the cluster.',
             labels=['tier']),
         'segment/dropped/count':
         GaugeMetricFamily(
             'druid_coordinator_segment_dropped_count',
             'Number of segments dropped due to being overshadowed.',
             labels=['tier']),
         'segment/deleted/count':
         GaugeMetricFamily('druid_coordinator_segment_deleted_count',
                           'Number of segments dropped due to rules.',
                           labels=['tier']),
         'segment/unneeded/count':
         GaugeMetricFamily(
             'druid_coordinator_segment_unneeded_count',
             'Number of segments dropped due to being marked as unused.',
             labels=['tier']),
         'segment/overShadowed/count':
         GaugeMetricFamily('druid_coordinator_segment_overshadowed_count',
                           'Number of overShadowed segments.'),
         'segment/loadQueue/failed':
         GaugeMetricFamily(
             'druid_coordinator_segment_loadqueue_failed_count',
             'Number of segments that failed to load.',
             labels=['server']),
         'segment/loadQueue/count':
         GaugeMetricFamily('druid_coordinator_segment_loadqueue_count',
                           'Number of segments to load.',
                           labels=['server']),
         'segment/dropQueue/count':
         GaugeMetricFamily('druid_coordinator_segment_dropqueue_count',
                           'Number of segments to drop.',
                           labels=['server']),
         'segment/size':
         GaugeMetricFamily('druid_coordinator_segment_size_bytes',
                           'Size in bytes of available segments.',
                           labels=['datasource']),
         'segment/count':
         GaugeMetricFamily('druid_coordinator_segment_count',
                           'Number of served segments.',
                           labels=['datasource']),
         'segment/unavailable/count':
         GaugeMetricFamily(
             'druid_coordinator_segment_unavailable_count',
             'Number of segments (not including replicas) left to load '
             'until segments that should be loaded in the cluster '
             'are available for queries.',
             labels=['datasource']),
         'segment/underReplicated/count':
         GaugeMetricFamily(
             'druid_coordinator_segment_under_replicated_count',
             'Number of segments (including replicas) left to load until '
             'segments that should be loaded in the cluster are '
             'available for queries.',
             labels=['tier', 'datasource']),
         'jetty/numOpenConnections':
         GaugeMetricFamily('druid_coordinator_jetty_numOpenConnections',
                           'Number of open jetty connections.',
                           labels=['datasource']),
     }
Esempio n. 28
0
def gen_k8s_component_gauge():
    return GaugeMetricFamily("k8s_component_count",
                             "count of k8s component",
                             labels=["service_name", "error", "host_ip"])
Esempio n. 29
0
def gen_amd_gpu_util_gauge():
    return GaugeMetricFamily("rocmsmi_utilization_gpu",
                             "gpu core utilization of card",
                             labels=["minor_number"])
Esempio n. 30
0
    def collect(self):
        blazegraph_metrics = {
            '/Query Engine/queryStartCount':
            CounterMetricFamily(
                'blazegraph_queries_start',
                'Number of queries that have started since the start of the application.'
            ),
            '/Query Engine/queryDoneCount':
            CounterMetricFamily(
                'blazegraph_queries_done',
                'Number of queries completed since the start of the application.'
            ),
            '/Query Engine/queryErrorCount':
            CounterMetricFamily(
                'blazegraph_queries_error',
                'Number of queries in error since the start of the application.'
            ),
            '/Query Engine/queriesPerSecond':
            GaugeMetricFamily(
                'blazegraph_queries_per_second',
                'Number of queries per second (rolling average).'),
            '/Query Engine/operatorActiveCount':
            GaugeMetricFamily('blazegraph_operator_active_count',
                              'Number of active blazegraph operators'),
            '/Query Engine/runningQueriesCount':
            GaugeMetricFamily('blazegraph_running_queries_count',
                              'Number of running queries'),
            '/Query Engine/GeoSpatial/geoSpatialSearchRequests':
            GaugeMetricFamily(
                'blazegraph_geospatial_search_requets',
                'Number of geospatial search requests since the start of the application.'
            ),
            '/Journal/bytesReadPerSec':
            GaugeMetricFamily('blazegraph_journal_bytes_read_per_second', ''),
            '/Journal/bytesWrittenPerSec':
            GaugeMetricFamily('blazegraph_journal_bytes_written_per_second',
                              ''),
            '/Journal/extent':
            GaugeMetricFamily('blazegraph_journal_extent', ''),
            '/Journal/commitCount':
            CounterMetricFamily('blazegraph_journal_commit_count', ''),
            '/Journal/commit/totalCommitSecs':
            GaugeMetricFamily('blazegraph_journal_total_commit_seconds',
                              'Total time spent in commit.'),
            '/Journal/commit/flushWriteSetSecs':
            GaugeMetricFamily('blazegraph_journal_flush_write_set_seconds',
                              ''),
            '/Journal/Concurrency Manager/Read Service/Average Active Count':
            GaugeMetricFamily(
                'blazegraph_journal_concurrency_read_average_active_count',
                'Average Number of Read Active Threads'),
            '/JVM/Memory/DirectBufferPool/default/bytesUsed':
            GaugeMetricFamily(
                'blazegraph_jvm_memory_direct_buffer_pool_default_bytes_used',
                ''),
            '/JVM/Memory/Runtime Free Memory':
            GaugeMetricFamily('blazegraph_jvm_memory_runtime_free_memory',
                              'Current amount of free memory in the JVM.'),
            '/JVM/Memory/Runtime Max Memory':
            GaugeMetricFamily('blazegraph_jvm_memory_runtime_max_memory',
                              'Max amount of memory the JVM can allocate.'),
            '/JVM/Memory/Runtime Total Memory':
            GaugeMetricFamily('blazegraph_jvm_memory_runtime_total_memory',
                              'Total amount of memory allocated to the JVM.'),
            '/JVM/Memory/Garbage Collectors/G1 Old Generation/Collection Count':
            CounterMetricFamily(
                'blazegraph_jvm_memory_gc_g1_old_collecton_count',
                'Number of old GC since JVM start.'),
            '/JVM/Memory/Garbage Collectors/G1 Old Generation/Cumulative Collection Time':
            GaugeMetricFamily(
                'blazegraph_jvm_memory_gc_g1_old_cumulative_collection_time',
                'Total time spent in old GC (seconds).'),
            '/JVM/Memory/Garbage Collectors/G1 Young Generation/Collection Count':
            CounterMetricFamily(
                'blazegraph_jvm_memory_gc_g1_young_collection_count',
                'Number of young GC since JVM start.'),
            '/JVM/Memory/Garbage Collectors/G1 Young Generation/Cumulative Collection Time':
            GaugeMetricFamily(
                'blazegraph_jvm_memory_gc_g1_young_cumulative_collection_time',
                'Total time spent in young GC (seconds).'),
        }

        for metric_name, metric_family in blazegraph_metrics.items():
            if metric_name is None:
                log.warning('Unknown metric %r', metric_name)
            else:
                metric_value = self.get_counter(metric_name)

                try:
                    value = float(metric_value)
                except (ValueError, TypeError):
                    value = float('nan')

                metric_family.add_metric([], value)

        triple_metric = GaugeMetricFamily('blazegraph_triples',
                                          'Count of triples in Blazegraph')
        lag_metric = CounterMetricFamily('blazegraph_lastupdated',
                                         'Last update timestamp')

        try:
            sparql_query = """ prefix schema: <http://schema.org/>
                        SELECT * WHERE { {
                          SELECT ( COUNT( * ) AS ?count ) { ?s ?p ?o }
                        } UNION {
                          SELECT * WHERE { <http://www.wikidata.org> schema:dateModified ?y }
                        } }"""

            data = self.execute_sparql(sparql_query)

            for binding in data['results']['bindings']:
                if 'count' in binding:
                    triple_count = binding['count']['value']
                    triple_metric.add_metric([], float(triple_count))

                elif 'y' in binding:
                    lastUpdated = parse(binding['y']['value'])
                    lag_metric.add_metric([],
                                          float(lastUpdated.strftime('%s')))
                else:
                    raise ValueError(
                        'SPARQL binding returned with unexpected key')

        except requests.exceptions.RequestException:
            log.exception("Error querying endpoint")
            triple_metric.add_metric([], float('nan'))
            lag_metric.add_metric([], float('nan'))

        alloc_metric = GaugeMetricFamily(
            'blazegraph_allocators',
            'Number of used FixedAllocators in Blazegraph')
        alloc_free_metric = GaugeMetricFamily(
            'blazegraph_free_allocators',
            'Number of free FixedAllocators in Blazegraph')

        allocs = self.fetch_allocators()
        if allocs:
            alloc_metric.add_metric([], allocs)
            alloc_free_metric.add_metric([], 256 * 1024 - allocs)
        else:
            alloc_metric.add_metric([], float('nan'))
            alloc_free_metric.add_metric([], float('nan'))

        yield triple_metric
        yield lag_metric
        yield alloc_metric
        yield alloc_free_metric

        for metric in blazegraph_metrics.values():
            yield metric
Esempio n. 31
0
def gen_gpu_used_by_external_process_counter():
    return GaugeMetricFamily("gpu_used_by_external_process_count",
                             "count of gpu used by external process",
                             labels=["minor_number", "pid"])
Esempio n. 32
0
def metric_up_gauge(resource: str, succeeded=True):
    metric_name = resource + '_up'
    description = 'Did the {} fetch succeed.'.format(resource)
    return GaugeMetricFamily(metric_name, description, value=int(succeeded))
Esempio n. 33
0
def gen_process_mem_usage_gauge():
    return GaugeMetricFamily(
        "process_mem_usage_byte",
        "memory usage of process, to save space in prometheus, we only expose those who consume more than 500Mb of memory",
        labels=["pid", "cmd"])
    def collect(self):
        repository_tags_total = GaugeMetricFamily(
            'repository_tags_total',
            'Number of tags for each repo',
            labels=['repository'])
        repository_revisions_total = GaugeMetricFamily(
            'repository_revisions_total',
            'Number of revisions for each repo',
            labels=['repository'])
        repository_tag_layers_total = GaugeMetricFamily(
            'repository_tag_layers_total',
            'Number of layers in each tag',
            labels=['repository', 'tag'])
        repository_tag_size_bytes = GaugeMetricFamily(
            'repository_tag_size_bytes',
            'Size of each tag',
            labels=['repository', 'tag'])

        repositories = self._find_repositories()

        logger.debug('Found %s repositories: %s', len(repositories),
                     repositories)

        for repository in repositories:
            logger.debug('Scanning %s for tags', repository)
            tags = self._scrape_tags(repository)
            repository_tags_total.add_metric([repository], len(tags))
            revisions = self._scrape_revisions(repository)
            repository_revisions_total.add_metric([repository], len(revisions))
            for tag in tags:
                manifest = self._scrape_manifest(repository, tag)
                repository_tag_layers_total.add_metric([repository, tag],
                                                       len(manifest['layers']))
                size = 0
                for layer in manifest['layers']:
                    size += layer['size'] if 'size' in layer else 0
                repository_tag_size_bytes.add_metric([repository, tag], size)

        yield repository_tags_total
        yield repository_revisions_total
        yield repository_tag_layers_total
        yield repository_tag_size_bytes
Esempio n. 35
0
def gen_docker_daemon_counter():
    return GaugeMetricFamily("docker_daemon_count",
                             "count of docker daemon",
                             labels=["error"])
Esempio n. 36
0
  def collect(self):
    deluge_host = os.environ.get('DELUGE_HOST', '127.0.0.1')
    client = DelugeRPCClient(deluge_host, self.rpc_port, self.rpc_user, self.rpc_password)
    client.connect()

    libtorrent_metrics = get_libtorrent_metrics_meta()
    libtorrent_metric_values = client.call('core.get_session_status', [])

    for metric, metric_type in libtorrent_metrics.items():
      encoded_name = metric.encode('ascii')
      if encoded_name in libtorrent_metric_values:
        yield metric_type(
          'deluge_libtorrent_{}'.format(metric.replace('.', '_')),
          'libtorrent metric {}'.format(metric),
          value=libtorrent_metric_values[encoded_name]
        )

    yield new_metric_with_labels_and_value(GaugeMetricFamily, 'deluge_info', 'Deluge information',
      labels={
        'version': client.call('daemon.info').decode('utf-8'),
        'libtorrent_version': client.call('core.get_libtorrent_version').decode('utf-8'),
      },
      value=1
    )

    for key, value in client.call('core.get_config').items():
      if isinstance(value, (int, float, bool)):
        yield GaugeMetricFamily('deluge_config_{}'.format(key.decode('utf-8')), 'Value of the deluge config setting {}'.format(key.decode('utf-8')), value=value)

    torrents_by_state = {
      'downloading': 0,
      'seeding': 0,
      'paused': 0,
      'checking': 0,
      'queued': 0,
      'error': 0,
      'active': 0,

      # not the prometheus way, but the states above (as defined by deluge) are already overlapping, so sum() over them is already meaningless
      'total': 0,
    }
    torrents_by_label = defaultdict(int)
    for torrent in client.core.get_torrents_status({}, [b'label', b'state', b'download_payload_rate', b'upload_payload_rate']).values():
      if b'label' in torrent:
        torrents_by_label[torrent[b'label'].decode('utf-8')] += 1
      torrents_by_state[torrent[b'state'].decode('utf-8').lower()] += 1
      torrents_by_state['total'] += 1
      if torrent[b'download_payload_rate'] > 0 or torrent[b'upload_payload_rate'] > 0:
        torrents_by_state['active'] += 1

    if len(torrents_by_label) > 0:
      torrents_by_label_metric = GaugeMetricFamily('deluge_torrents_by_label', 'The number of torrents for each label assigned to a torrent using the deluge label plugin', labels=['label'])
      for label, count in torrents_by_label.items():
        torrents_by_label_metric.add_metric([label], count)
      yield torrents_by_label_metric

    torrents_metric = GaugeMetricFamily('deluge_torrents', 'The number of torrents in a specific state (note: some states overlap)', labels=['state'])
    for state, torrent_count in torrents_by_state.items():
      torrents_metric.add_metric([state], torrent_count)
    yield torrents_metric

    if self.per_torrent_metrics_enabled:
      per_torrent_keys = [
        (CounterMetricFamily, b'total_done', 'The amount of data downloaded for this torrent'),
        (CounterMetricFamily, b'total_size', 'The size of this torrent'),
        (CounterMetricFamily, b'total_uploaded', 'The amount of data uploaded for this torrent'),
        (GaugeMetricFamily, b'num_peers', 'The number of peers currently connected to for this torrent'),
        (GaugeMetricFamily, b'num_seeds', 'The number of seeds currently connected to for this torrent'),
        (GaugeMetricFamily, b'total_peers', 'The number of peers in the swarm for this torrent'),
        (GaugeMetricFamily, b'total_seeds', 'The number of seeds in the swarm for this torrent'),
      ]
      per_torrent_metrics = dict(generate_per_torrent_metrics(per_torrent_keys))

      for torrent_hash, torrent in client.core.get_torrents_status({}, [key[1] for key in per_torrent_keys] + [b'name']).items():
        for metric_name, metric in per_torrent_metrics.items():
          metric.add_metric(
            [
              torrent[b'name'].decode('utf-8'),
              torrent_hash.decode('utf-8')
            ],
            torrent[metric_name]
          )

      for metric in per_torrent_metrics.values():
        yield metric

    client.disconnect()
 def trivial_gauge(self, name, help, value):
     c = GaugeMetricFamily(f"hydra_{name}", help)
     c.add_metric([], value)
     return c
Esempio n. 38
0
    def collect(self, vsphere_host, section='default'):
        """ collects metrics """
        if section not in self.config.keys():
            log("{} is not a valid section, using default".format(section))
            section = 'default'
        metric_list = {}
        metric_list['vms'] = {
            'vmware_vm_power_state':
            GaugeMetricFamily('vmware_vm_power_state',
                              'VMWare VM Power state (On / Off)',
                              labels=['vm_name', 'host_name']),
            'vmware_vm_boot_timestamp_seconds':
            GaugeMetricFamily('vmware_vm_boot_timestamp_seconds',
                              'VMWare VM boot time in seconds',
                              labels=['vm_name', 'host_name']),
            'vmware_vm_snapshots':
            GaugeMetricFamily('vmware_vm_snapshots',
                              'VMWare current number of existing snapshots',
                              labels=['vm_name']),
            'vmware_vm_snapshot_timestamp_seconds':
            GaugeMetricFamily('vmware_vm_snapshot_timestamp_seconds',
                              'VMWare Snapshot creation time in seconds',
                              labels=['vm_name', 'vm_snapshot_name']),
            'vmware_vm_num_cpu':
            GaugeMetricFamily(
                'vmware_vm_num_cpu',
                'VMWare Number of processors in the virtual machine',
                labels=['vm_name', 'host_name'])
        }
        metric_list['datastores'] = {
            'vmware_datastore_capacity_size':
            GaugeMetricFamily('vmware_datastore_capacity_size',
                              'VMWare Datasore capacity in bytes',
                              labels=['ds_name']),
            'vmware_datastore_freespace_size':
            GaugeMetricFamily('vmware_datastore_freespace_size',
                              'VMWare Datastore freespace in bytes',
                              labels=['ds_name']),
            'vmware_datastore_uncommited_size':
            GaugeMetricFamily('vmware_datastore_uncommited_size',
                              'VMWare Datastore uncommitted in bytes',
                              labels=['ds_name']),
            'vmware_datastore_provisoned_size':
            GaugeMetricFamily('vmware_datastore_provisoned_size',
                              'VMWare Datastore provisoned in bytes',
                              labels=['ds_name']),
            'vmware_datastore_hosts':
            GaugeMetricFamily('vmware_datastore_hosts',
                              'VMWare Hosts number using this datastore',
                              labels=['ds_name']),
            'vmware_datastore_vms':
            GaugeMetricFamily(
                'vmware_datastore_vms',
                'VMWare Virtual Machines number using this datastore',
                labels=['ds_name'])
        }
        metric_list['hosts'] = {
            'vmware_host_power_state':
            GaugeMetricFamily('vmware_host_power_state',
                              'VMWare Host Power state (On / Off)',
                              labels=['host_name']),
            'vmware_host_boot_timestamp_seconds':
            GaugeMetricFamily('vmware_host_boot_timestamp_seconds',
                              'VMWare Host boot time in seconds',
                              labels=['host_name']),
            'vmware_host_cpu_usage':
            GaugeMetricFamily('vmware_host_cpu_usage',
                              'VMWare Host CPU usage in Mhz',
                              labels=['host_name']),
            'vmware_host_cpu_max':
            GaugeMetricFamily('vmware_host_cpu_max',
                              'VMWare Host CPU max availability in Mhz',
                              labels=['host_name']),
            'vmware_host_memory_usage':
            GaugeMetricFamily('vmware_host_memory_usage',
                              'VMWare Host Memory usage in Mbytes',
                              labels=['host_name']),
            'vmware_host_memory_max':
            GaugeMetricFamily('vmware_host_memory_max',
                              'VMWare Host Memory Max availability in Mbytes',
                              labels=['host_name']),
        }

        metrics = {}
        for key, value in self.config[section]['collect_only'].items():
            if value is True:
                metrics.update(metric_list[key])

        log("Start collecting vcenter metrics for {0}".format(vsphere_host))

        self.vmware_connection = self._vmware_connect(vsphere_host, section)
        if not self.vmware_connection:
            log("Cannot connect to vmware")
            return

        content = self.vmware_connection.RetrieveContent()

        if self.config[section]['collect_only']['vms'] is True:
            # Get performance metrics counter information
            counter_info = self._vmware_perf_metrics(content)

            # Fill VM Informations
            log("Starting VM performance metric collection")
            self._vmware_get_vms(content, metrics, counter_info)
            log("Finish starting vm performance vm collection")

            # Fill Snapshots (count and age)
            log("Starting VM snapshot metric collection")
            vm_counts, vm_ages = self._vmware_get_snapshots(content)
            for v in vm_counts:
                metrics['vmware_vm_snapshots'].add_metric([v['vm_name']],
                                                          v['snapshot_count'])
            for vm_age in vm_ages:
                for v in vm_age:
                    metrics['vmware_vm_snapshot_timestamp_seconds'].add_metric(
                        [v['vm_name'], v['vm_snapshot_name']],
                        v['vm_snapshot_timestamp_seconds'])
            log("Finished VM snapshot metric collection")

        # Fill Datastore
        if self.config[section]['collect_only']['datastores'] is True:
            self._vmware_get_datastores(content, metrics)

        # Fill Hosts Informations
        if self.config[section]['collect_only']['hosts'] is True:
            self._vmware_get_hosts(content, metrics)

        log("Stop collecting vcenter metrics for {0}".format(vsphere_host))
        self.threader.join()
        self._vmware_disconnect()

        for _key, metric in metrics.items():
            yield metric
Esempio n. 39
0
def gen_nv_peer_mem_gauge():
    return GaugeMetricFamily(
        "nv_peer_mem_count",
        "count of active nv_peer_mem (GPUDirect) module. 0 or 1")
Esempio n. 40
0
def gen_zombie_process_counter():
    return GaugeMetricFamily("zombie_process_count",
                             "count of zombie process",
                             labels=["command"])
Esempio n. 41
0
def gen_gpu_util_gauge():
    return GaugeMetricFamily("nvidiasmi_utilization_gpu",
                             "gpu core utilization of card",
                             labels=["minor_number", "uuid"])
Esempio n. 42
0
def gen_gpu_used_by_zombie_container_counter():
    return GaugeMetricFamily("gpu_used_by_zombie_container_count",
                             "count of gpu used by zombie container",
                             labels=["minor_number", "container_id"])
Esempio n. 43
0
def gen_gpu_retired_page_count():
    return GaugeMetricFamily("nvidiasmi_retired_page_count",
                             "count of nvidia ecc retired page",
                             labels=["minor_number", "uuid", "type"])
Esempio n. 44
0
 def add_gauge(self, name, desc, labels):
     self.gauges[name] = GaugeMetricFamily(name, desc, labels=labels)
Esempio n. 45
0
def gen_gpu_ecc_counter():
    return GaugeMetricFamily("nvidiasmi_ecc_error_count",
                             "count of nvidia ecc error",
                             labels=["minor_number", "type"])
Esempio n. 46
0
def gen_gpu_mem_util_gauge():
    return GaugeMetricFamily("gpu_mem_utilization",
                             "gpu memory utilization of card",
                             labels=["minor_number", "vender"])
Esempio n. 47
0
def gen_gpu_memory_leak_counter():
    return GaugeMetricFamily("nvidiasmi_memory_leak_count",
                             "count of nvidia memory leak",
                             labels=["minor_number"])
Esempio n. 48
0
def gen_nvidia_gpu_temperature_gauge():
    return GaugeMetricFamily("nvidiasmi_temperature",
                             "gpu temperature of card",
                             labels=["minor_number"])
Esempio n. 49
0
    def collect(self) -> Iterable[Metric]:

        # @stats is a pretty-printer object with __str__() returning a nice table,
        # plus some fields that contain data from that table.
        # unfortunately, fields are pretty-printed themselves (i. e. '4.5MB').
        stats = gc.get_stats(memory_pressure=False)  # type: ignore
        # @s contains same fields as @stats, but as actual integers.
        s = stats._s  # type: ignore

        # also note that field naming is completely braindead
        # and only vaguely correlates with the pretty-printed table.
        # >>>> gc.get_stats(False)
        # Total memory consumed:
        #     GC used:            8.7MB (peak: 39.0MB)        # s.total_gc_memory, s.peak_memory
        #        in arenas:            3.0MB                  # s.total_arena_memory
        #        rawmalloced:          1.7MB                  # s.total_rawmalloced_memory
        #        nursery:              4.0MB                  # s.nursery_size
        #     raw assembler used: 31.0kB                      # s.jit_backend_used
        #     -----------------------------
        #     Total:              8.8MB                       # stats.memory_used_sum
        #
        #     Total memory allocated:
        #     GC allocated:            38.7MB (peak: 41.1MB)  # s.total_allocated_memory, s.peak_allocated_memory
        #        in arenas:            30.9MB                 # s.peak_arena_memory
        #        rawmalloced:          4.1MB                  # s.peak_rawmalloced_memory
        #        nursery:              4.0MB                  # s.nursery_size
        #     raw assembler allocated: 1.0MB                  # s.jit_backend_allocated
        #     -----------------------------
        #     Total:                   39.7MB                 # stats.memory_allocated_sum
        #
        #     Total time spent in GC:  0.073                  # s.total_gc_time

        pypy_gc_time = CounterMetricFamily(
            "pypy_gc_time_seconds_total",
            "Total time spent in PyPy GC",
            labels=[],
        )
        pypy_gc_time.add_metric([], s.total_gc_time / 1000)
        yield pypy_gc_time

        pypy_mem = GaugeMetricFamily(
            "pypy_memory_bytes",
            "Memory tracked by PyPy allocator",
            labels=["state", "class", "kind"],
        )
        # memory used by JIT assembler
        pypy_mem.add_metric(["used", "", "jit"], s.jit_backend_used)
        pypy_mem.add_metric(["allocated", "", "jit"], s.jit_backend_allocated)
        # memory used by GCed objects
        pypy_mem.add_metric(["used", "", "arenas"], s.total_arena_memory)
        pypy_mem.add_metric(["allocated", "", "arenas"], s.peak_arena_memory)
        pypy_mem.add_metric(["used", "", "rawmalloced"], s.total_rawmalloced_memory)
        pypy_mem.add_metric(["allocated", "", "rawmalloced"], s.peak_rawmalloced_memory)
        pypy_mem.add_metric(["used", "", "nursery"], s.nursery_size)
        pypy_mem.add_metric(["allocated", "", "nursery"], s.nursery_size)
        # totals
        pypy_mem.add_metric(["used", "totals", "gc"], s.total_gc_memory)
        pypy_mem.add_metric(["allocated", "totals", "gc"], s.total_allocated_memory)
        pypy_mem.add_metric(["used", "totals", "gc_peak"], s.peak_memory)
        pypy_mem.add_metric(["allocated", "totals", "gc_peak"], s.peak_allocated_memory)
        yield pypy_mem
Esempio n. 50
0
    def collect(self) -> Iterable[Metric]:
        cm = GaugeMetricFamily("python_gc_counts", "GC object counts", labels=["gen"])
        for n, m in enumerate(gc.get_count()):
            cm.add_metric([str(n)], m)

        yield cm
    def collect(self):
        
        # Collect metrics from NetScalers
        data = {}
        for nsip in self.nsips:
            data[nsip] = {}    
            for entity in self.metrics.keys():  # cycle through metrics json to get required entities whose stats need to be collected
                print('>>> Collecting stats for: %s::%s' % (nsip, entity))
                try:
                    data[nsip][entity] = collect_data(nsip, entity, self.username, self.password, self.secure)
                except Exception as e:
                    print('>>> Caught exception while collecting data: ' + str(e))

        # Provide collected stats to Prometheus as a counter/guage with desired labels
        for entity_name, entity in self.metrics.items():
            if('labels' in entity.keys()):
                label_names = [v[1] for v in entity['labels']]
                label_names.append('nsip')
            else:
                label_names = []
                label_names.append('nsip')
            
            for ns_metric_name, prom_metric_name in entity.get('counters', []): 
                
                c = CounterMetricFamily(prom_metric_name, ns_metric_name, labels=label_names)
                for nsip in self.nsips:
                    entity_stats = data[nsip].get(entity_name, [])
                    if( type(entity_stats) is not list):
                        entity_stats = [entity_stats]
                    
                    for data_item in entity_stats:
                        if('labels' in entity.keys()):
                            label_values = [data_item[key] for key in [v[0] for v in entity['labels']]]
                            label_values.append(nsip)
                        else:
                            label_values = [nsip]
                        try:
                            c.add_metric(label_values, float(data_item[ns_metric_name]))
                        except Exception as e:
                            print('>>> Caught exception while adding counter %s to %s: %s' %(ns_metric_name, entity_name, str(e)))
                yield c

            for ns_metric_name, prom_metric_name in entity.get('gauges', []):
                
                g = GaugeMetricFamily(prom_metric_name, ns_metric_name, labels=label_names)
                for nsip in self.nsips:
                    entity_stats = data[nsip].get(entity_name, [])
                    if(type(entity_stats) is not list):
                        entity_stats = [entity_stats]
                    
                    for data_item in entity_stats:
                        if('labels' in entity.keys()):
                            label_values = [data_item[key] for key in [v[0] for v in entity['labels']]]
                            label_values.append(nsip)
                        else:
                            label_values = [nsip]
                        try:
                            g.add_metric(label_values, float(data_item[ns_metric_name]))
                        except Exception as e:
                            print('>>> Caught exception while adding guage %s to %s: %s' %(ns_metric_name, entity_name, str(e)) )
                yield g