コード例 #1
0
def check():
    return Check(
        name="test",
        metrics=["foo"],
        report_queue=ReportQueue(),
        value_constraints=None,
        resend_interval=Timedelta.from_s(60),
    )
コード例 #2
0
    async def _configure(
        self,
        *,
        checks,
        nsca: dict,
        reporting_host: str = DEFAULT_HOSTNAME,
        resend_interval: str = "3min",
        overrides: Optional[dict] = None,
        **_kwargs,
    ) -> None:
        self._reporting_host = reporting_host
        self._nsca_config = NscaConfig(
            **{
                cfg_key: v
                for cfg_key, v in nsca.items()
                # ignore unknown keys in NSCA config
                if cfg_key in set(f.name for f in dataclass_fields(NscaConfig))
            })

        if overrides is not None:
            try:
                self._overrides = Overrides.from_config(overrides)
            except (ValueError, TypeError) as e:
                logger.error("Invalid overrides section in configuration: {}",
                             e)
                raise
        else:
            logger.debug(
                'Configuration did not contain an "overrides" section')

        try:
            self._global_resend_interval = Timedelta.from_string(
                resend_interval)
        except ValueError as e:
            logger.error(
                f'Invalid resend interval "{resend_interval}" in configuration: {e}'
            )
            raise

        if not self._checks:
            self._init_checks(checks)
        else:
            await self._update_checks(checks)

        c: Check
        self._has_value_checks = any(c._has_value_checks()
                                     for c in self._checks.values())

        logger.info(
            f"Configured NSCA reporter sink for host {self._reporting_host} and checks {', '.join(self._checks)!r}"
        )
        logger.debug(f"NSCA config: {self._nsca_config!r}")
コード例 #3
0
 async def _send_reports_loop(self):
     while True:
         report: Report
         reports = [
             NscaReport(
                 host=self._reporting_host,
                 service=report.service,
                 state=report.state,
                 message=report.message,
             ) async for report in self._report_queue.batch(
                 timeout=Timedelta.from_s(5))
         ]
         await self._send_reports(*reports)
コード例 #4
0
    async def _on_config(self, **config):
        self.period = Timedelta.from_s(1)
        jobs = []
        global CMD_IPMI_SENSORE_BASE
        CMD_IPMI_SENSORE_BASE = build_cmd_ipmi_base(
            config.get("ipmi_sensors_cmd", IPMI_SENSORS),
            config.get("ipmi_sensors_params", {}),
        )

        for cfg in config['ipmi_hosts']:
            jobs.append(
                create_conf_and_metrics(
                    cfg,
                    config.get('interval', 1),
                )
            )
        results = []
        if jobs:
            results = await asyncio.gather(*jobs)
        all_metrics = {}
        complete_conf = []
        for metrics, conf in results:
            all_metrics = {**all_metrics, **metrics}
            complete_conf.append(conf)
        await self.declare_metrics(all_metrics)
        logger.info(
            "declared {} metrics".format(
                len(all_metrics),
            )
        )

        await asyncio.gather(
            *(cancel_and_wait(task) for task in self.collection_loops),
            cancel_and_wait(self.log_loop),
        )
        logger.debug("Cancelled old log/collection loops")

        self.collection_loops = spawn_collection_loops(
            complete_conf, result_queue=self.result_queue,
        )
        logger.debug("Set up new collection loops")

        self.log_loop = asyncio.ensure_future(
            log_loop(complete_conf, log_interval=config.get("log_interval", 30))
        )
        logger.debug("Set up new log loop")
コード例 #5
0
ファイル: main.py プロジェクト: metricq/metricq-source-http
 async def _on_config(self, **config):
     self.period = Timedelta.from_s(1)
     new_conf, metrics = make_conf_and_metrics(
         config['hosts'],
         config.get('interval', 1),
         config.get('http_timeout', 5),
     )
     await self.declare_metrics(metrics)
     logger.info("declared {} metrics".format(len(metrics)))
     request_loops = []
     for metric_name, conf in new_conf.items():
         request_loops.append(
             collect_periodically(
                 metric_name,
                 conf,
                 self.result_queue,
             )
         )
     asyncio.gather(*request_loops)  # FIXME: close loops when _on_config is called multiple times
コード例 #6
0
    async def _on_config(self, **config):
        logger.info("config: {}", config)
        rate = config["rate"]
        self.period = Timedelta.from_s(1 / rate)
        try:
            self.prefix = config["prefix"]
            if self.prefix != "" and not self.prefix.endswith("."):
                self.prefix = self.prefix + "."
        except KeyError:
            logger.info("No explicit prefix given, using hostname")
            self.prefix = socket.gethostname() + "."

        meta = dict()

        # Initialize CPU usage:
        psutil.cpu_percent(percpu=True)
        meta["cpu.usage"] = {
            "rate": rate,
            "description": "CPU usage (100% = 1 logical CPU busy)",
            "unit": "%",
        }

        # Initialize memory
        for mem_name in psutil.virtual_memory()._fields:
            meta[f"mem.{mem_name}"] = {
                "rate": rate,
                "description": "See https://psutil.readthedocs.io/en/latest/#psutil.virtual_memory",
                "unit": "%" if mem_name == "percent" else "B",
            }

        for swap_name in psutil.swap_memory()._fields:
            meta[f"swap.{swap_name}"] = {
                "rate": rate,
                "description": "See https://psutil.readthedocs.io/en/latest/#psutil.swap_memory",
                "unit": "%" if swap_name == "percent" else "B",
            }

        # Network
        self.prev_net_io = psutil.net_io_counters(pernic=True, nowrap=True)
        self.prev_timestamp = Timestamp.now()
        for nic_name in self.prev_net_io.keys():
            for sr in "sent", "recv":
                meta[f"net.{nic_name}.{sr}.bytes"] = {
                    "rate": rate,
                    "description": f"Total data {sr} on nic {nic_name}",
                    "unit": "B/s",
                }
                meta[f"net.{nic_name}.{sr}.packets"] = {
                    "rate": rate,
                    "description": f"Number of packets {sr} on nic {nic_name}",
                    "unit": "Hz",
                }

        # Disk
        self.prev_disk_io = psutil.disk_io_counters(perdisk=True, nowrap=True)
        for disk_name in self.prev_disk_io.keys():                
            for rw in "read", "written":
                meta[f"disk.{disk_name}.{rw}.count"] = {
                    "rate": rate,
                    "description": f"Number of {rw}s on partition {disk_name}",
                    "unit": "Hz",
                }
                meta[f"disk.{disk_name}.{rw}.bytes"] = {
                    "rate": rate,
                    "description": f"Total data {rw} on partition {disk_name}",
                    "unit": "B/s",
                }

        await self.declare_metrics(
            {self.prefix + key: value for key, value in meta.items()}
        )
コード例 #7
0
def empty_transition_history():
    return StateTransitionHistory(time_window=Timedelta.from_s(60))
コード例 #8
0
    async def _worker_task(self, object_group, worker_task_stop_future):
        start_time = Timestamp.now()
        interval = object_group["interval"]
        device_address_str = object_group["device_address_str"]
        object_type = object_group["object_type"]
        objects = [(object_type, instance)
                   for instance in object_group["object_instances"]]
        chunk_size = object_group.get("chunk_size")

        logger.debug(
            f"starting BACnetSource worker task for device {device_address_str}"
        )

        logger.debug(
            "This is {} the main thread.",
            "" if threading.current_thread() == threading.main_thread() else
            "not",
        )

        # wait for random time between 10 ms and 10.01s
        random_wait_time = random.random() * 10 + 0.01
        await asyncio.sleep(random_wait_time)
        self._worker_tasks_count_starting += 1

        await self.event_loop.run_in_executor(
            None,
            functools.partial(
                self._bacnet_reader.request_device_properties,
                device_address_str,
                skip_when_cached=True,
                request_timeout=Timedelta.from_s(30),
            ),
        )
        await self.event_loop.run_in_executor(
            None,
            functools.partial(
                self._bacnet_reader.request_object_properties,
                device_address_str,
                objects,
                skip_when_cached=True,
                chunk_size=chunk_size,
                request_timeout=Timedelta.from_s(30),
            ),
        )

        device_info = self._bacnet_reader.get_device_info(
            device_address_str,
            device_identifier=object_group.get("device_identifier"))
        if device_info is None:
            logger.error("Missing device info for {}. Stopping worker task!",
                         device_address_str)
            self._worker_tasks_count_failed += 1
            return

        device_name = self._object_name_vendor_specific_mapping.get(
            device_info["objectName"], device_info["objectName"])

        device_name = substitute_all(
            device_name, self._object_name_vendor_specific_substitutions)

        metrics = {}
        missing_metrics = 0

        for object_instance in object_group["object_instances"]:
            metadata = {
                "rate": 1.0 / interval,
                "device": device_address_str,
                "objectType": object_type,
                "objectInstance": object_instance,
            }
            object_info = self._bacnet_reader.get_object_info(
                device_address_str, object_type, object_instance)
            if (object_info is None or "objectName" not in object_info
                    or "description" not in object_info):
                logger.error(
                    "No object info for ({}, {}) of {} available!",
                    object_type,
                    object_instance,
                    device_address_str,
                )
                missing_metrics += 1
                continue

            # Get vendor-specific-address from object cache
            object_name = object_info.get("3000", object_info["objectName"])

            object_name = self._object_name_vendor_specific_mapping.get(
                object_name, object_name)

            object_name = substitute_all(
                object_name, self._object_name_vendor_specific_substitutions)

            metric_id = (Template(object_group["metric_id"]).safe_substitute({
                "objectName":
                object_name,
                "deviceName":
                device_name
            }).replace("'", ".").replace("`",
                                         ".").replace("´",
                                                      ".").replace(" ", ""))
            if "description" in object_group:
                description = (Template(
                    object_group["description"]).safe_substitute({
                        "objectName":
                        object_name,
                        "objectDescription":
                        object_info["description"],
                        "deviceName":
                        device_name,
                        "deviceDescription":
                        device_info["description"],
                    }).replace("'", ".").replace("`", ".").replace("´", "."))
                metadata["description"] = substitute_all(
                    description,
                    self._object_description_vendor_specific_substitutions)
            if "units" in object_info:
                metadata["unit"] = object_info["units"]

            metrics[metric_id] = metadata

        try:
            await self.declare_metrics(metrics)
        except RPCError:
            logger.exception(
                f"Can't declare metadata for device {device_address_str}. Stopping worker task!"
            )
            self._worker_tasks_count_failed += 1
            return

        segmentationSupport = "unknown"
        device_address = Address(device_address_str)
        device_info = self._bacnet_reader.deviceInfoCache.get_device_info(
            device_address)
        if device_info:
            segmentationSupport = device_info.segmentationSupported

        start_duration = Timestamp.now() - start_time

        logger.info(
            f"Started BACnetSource worker task for device {device_address_str}! Took {start_duration.s - random_wait_time:.2f} s (waited {random_wait_time:.2f} s), {missing_metrics} metrics have no object info"
        )

        self._worker_tasks_count_running += 1
        deadline = Timestamp.now()
        while True:
            self._bacnet_reader.request_values(device_address_str,
                                               objects,
                                               chunk_size=chunk_size)

            if object_group.get("nan_at_timeout"):
                for metric_id in metrics:
                    now = Timestamp.now()
                    last_timestamp = self._last_time_send_by_metric.get(
                        metric_id, now)
                    if now - last_timestamp >= Timedelta.from_s(6 * interval):
                        timestamp_nan = last_timestamp + Timedelta.from_s(
                            5 * interval)
                        await self.send(metric_id, timestamp_nan, float("nan"))
                        self._last_time_send_by_metric[
                            metric_id] = timestamp_nan

                        logger.warn(
                            "Timeout for metric {} reached. Sending NaN! Device: {}",
                            metric_id,
                            device_address_str,
                        )
            try:
                deadline += Timedelta.from_s(interval)
                now = Timestamp.now()
                while now >= deadline:
                    logger.warn(
                        "Missed deadline {}, it is now {}. Device: {}, {}, chunk size: {}",
                        deadline,
                        now,
                        device_address_str,
                        segmentationSupport,
                        chunk_size,
                    )
                    deadline += Timedelta.from_s(interval)

                timeout = (deadline - now).s
                await asyncio.wait_for(asyncio.shield(worker_task_stop_future),
                                       timeout=timeout)
                worker_task_stop_future.result()
                logger.info("stopping BACnetSource worker task")
                break
            except asyncio.TimeoutError:
                # This is the normal case, just continue with the loop
                continue
コード例 #9
0
    async def task(self):
        self._main_task_stop_future = self.event_loop.create_future()

        logger.info(
            f"Current worker count (expected/starting/running/failed): ({self._worker_tasks_count_expected}/{self._worker_tasks_count_starting}/{self._worker_tasks_count_running}/{self._worker_tasks_count_failed})"
        )
        last_state_log = Timestamp.now()

        while True:
            queue_get_task = asyncio.create_task(self._result_queue.get())
            done, pending = await asyncio.wait(
                {queue_get_task, self._main_task_stop_future},
                return_when=asyncio.FIRST_COMPLETED,
            )

            if queue_get_task in done:
                result: Tuple[Timestamp, str, str,
                              Dict] = queue_get_task.result()

                timestamp, device_name, device_address_string, result_values = result

                device_config = self._device_config[device_address_string]
                device_name = self._object_name_vendor_specific_mapping.get(
                    device_name, device_name)

                device_name = substitute_all(
                    device_name,
                    self._object_name_vendor_specific_substitutions)

                for object_name, object_result in result_values.items():
                    object_name = self._object_name_vendor_specific_mapping.get(
                        object_name, object_name)

                    object_name = substitute_all(
                        object_name,
                        self._object_name_vendor_specific_substitutions)

                    # TODO maybe support more placeholders
                    metric_id = (Template(
                        device_config["metric_id"]).safe_substitute({
                            "objectName":
                            object_name,
                            "deviceName":
                            device_name
                        }).replace("'", ".").replace("`", ".").replace(
                            "´", ".").replace(" ", ""))
                    if "presentValue" in object_result and isinstance(
                            object_result["presentValue"], (int, float)):
                        await self.send(metric_id, timestamp,
                                        object_result["presentValue"])
                        self._last_time_send_by_metric[metric_id] = timestamp

                self._result_queue.task_done()

            if Timestamp.now() - last_state_log > Timedelta.from_string(
                    "5min"):
                logger.info(
                    f"Current worker count (expected/starting/running/failed): ({self._worker_tasks_count_expected}/{self._worker_tasks_count_starting}/{self._worker_tasks_count_running}/{self._worker_tasks_count_failed})"
                )
                last_state_log = Timestamp.now()

            if self._main_task_stop_future in done:
                logger.info("stopping BACnetSource main task")
                break