def check(): return Check( name="test", metrics=["foo"], report_queue=ReportQueue(), value_constraints=None, resend_interval=Timedelta.from_s(60), )
async def _configure( self, *, checks, nsca: dict, reporting_host: str = DEFAULT_HOSTNAME, resend_interval: str = "3min", overrides: Optional[dict] = None, **_kwargs, ) -> None: self._reporting_host = reporting_host self._nsca_config = NscaConfig( **{ cfg_key: v for cfg_key, v in nsca.items() # ignore unknown keys in NSCA config if cfg_key in set(f.name for f in dataclass_fields(NscaConfig)) }) if overrides is not None: try: self._overrides = Overrides.from_config(overrides) except (ValueError, TypeError) as e: logger.error("Invalid overrides section in configuration: {}", e) raise else: logger.debug( 'Configuration did not contain an "overrides" section') try: self._global_resend_interval = Timedelta.from_string( resend_interval) except ValueError as e: logger.error( f'Invalid resend interval "{resend_interval}" in configuration: {e}' ) raise if not self._checks: self._init_checks(checks) else: await self._update_checks(checks) c: Check self._has_value_checks = any(c._has_value_checks() for c in self._checks.values()) logger.info( f"Configured NSCA reporter sink for host {self._reporting_host} and checks {', '.join(self._checks)!r}" ) logger.debug(f"NSCA config: {self._nsca_config!r}")
async def _send_reports_loop(self): while True: report: Report reports = [ NscaReport( host=self._reporting_host, service=report.service, state=report.state, message=report.message, ) async for report in self._report_queue.batch( timeout=Timedelta.from_s(5)) ] await self._send_reports(*reports)
async def _on_config(self, **config): self.period = Timedelta.from_s(1) jobs = [] global CMD_IPMI_SENSORE_BASE CMD_IPMI_SENSORE_BASE = build_cmd_ipmi_base( config.get("ipmi_sensors_cmd", IPMI_SENSORS), config.get("ipmi_sensors_params", {}), ) for cfg in config['ipmi_hosts']: jobs.append( create_conf_and_metrics( cfg, config.get('interval', 1), ) ) results = [] if jobs: results = await asyncio.gather(*jobs) all_metrics = {} complete_conf = [] for metrics, conf in results: all_metrics = {**all_metrics, **metrics} complete_conf.append(conf) await self.declare_metrics(all_metrics) logger.info( "declared {} metrics".format( len(all_metrics), ) ) await asyncio.gather( *(cancel_and_wait(task) for task in self.collection_loops), cancel_and_wait(self.log_loop), ) logger.debug("Cancelled old log/collection loops") self.collection_loops = spawn_collection_loops( complete_conf, result_queue=self.result_queue, ) logger.debug("Set up new collection loops") self.log_loop = asyncio.ensure_future( log_loop(complete_conf, log_interval=config.get("log_interval", 30)) ) logger.debug("Set up new log loop")
async def _on_config(self, **config): self.period = Timedelta.from_s(1) new_conf, metrics = make_conf_and_metrics( config['hosts'], config.get('interval', 1), config.get('http_timeout', 5), ) await self.declare_metrics(metrics) logger.info("declared {} metrics".format(len(metrics))) request_loops = [] for metric_name, conf in new_conf.items(): request_loops.append( collect_periodically( metric_name, conf, self.result_queue, ) ) asyncio.gather(*request_loops) # FIXME: close loops when _on_config is called multiple times
async def _on_config(self, **config): logger.info("config: {}", config) rate = config["rate"] self.period = Timedelta.from_s(1 / rate) try: self.prefix = config["prefix"] if self.prefix != "" and not self.prefix.endswith("."): self.prefix = self.prefix + "." except KeyError: logger.info("No explicit prefix given, using hostname") self.prefix = socket.gethostname() + "." meta = dict() # Initialize CPU usage: psutil.cpu_percent(percpu=True) meta["cpu.usage"] = { "rate": rate, "description": "CPU usage (100% = 1 logical CPU busy)", "unit": "%", } # Initialize memory for mem_name in psutil.virtual_memory()._fields: meta[f"mem.{mem_name}"] = { "rate": rate, "description": "See https://psutil.readthedocs.io/en/latest/#psutil.virtual_memory", "unit": "%" if mem_name == "percent" else "B", } for swap_name in psutil.swap_memory()._fields: meta[f"swap.{swap_name}"] = { "rate": rate, "description": "See https://psutil.readthedocs.io/en/latest/#psutil.swap_memory", "unit": "%" if swap_name == "percent" else "B", } # Network self.prev_net_io = psutil.net_io_counters(pernic=True, nowrap=True) self.prev_timestamp = Timestamp.now() for nic_name in self.prev_net_io.keys(): for sr in "sent", "recv": meta[f"net.{nic_name}.{sr}.bytes"] = { "rate": rate, "description": f"Total data {sr} on nic {nic_name}", "unit": "B/s", } meta[f"net.{nic_name}.{sr}.packets"] = { "rate": rate, "description": f"Number of packets {sr} on nic {nic_name}", "unit": "Hz", } # Disk self.prev_disk_io = psutil.disk_io_counters(perdisk=True, nowrap=True) for disk_name in self.prev_disk_io.keys(): for rw in "read", "written": meta[f"disk.{disk_name}.{rw}.count"] = { "rate": rate, "description": f"Number of {rw}s on partition {disk_name}", "unit": "Hz", } meta[f"disk.{disk_name}.{rw}.bytes"] = { "rate": rate, "description": f"Total data {rw} on partition {disk_name}", "unit": "B/s", } await self.declare_metrics( {self.prefix + key: value for key, value in meta.items()} )
def empty_transition_history(): return StateTransitionHistory(time_window=Timedelta.from_s(60))
async def _worker_task(self, object_group, worker_task_stop_future): start_time = Timestamp.now() interval = object_group["interval"] device_address_str = object_group["device_address_str"] object_type = object_group["object_type"] objects = [(object_type, instance) for instance in object_group["object_instances"]] chunk_size = object_group.get("chunk_size") logger.debug( f"starting BACnetSource worker task for device {device_address_str}" ) logger.debug( "This is {} the main thread.", "" if threading.current_thread() == threading.main_thread() else "not", ) # wait for random time between 10 ms and 10.01s random_wait_time = random.random() * 10 + 0.01 await asyncio.sleep(random_wait_time) self._worker_tasks_count_starting += 1 await self.event_loop.run_in_executor( None, functools.partial( self._bacnet_reader.request_device_properties, device_address_str, skip_when_cached=True, request_timeout=Timedelta.from_s(30), ), ) await self.event_loop.run_in_executor( None, functools.partial( self._bacnet_reader.request_object_properties, device_address_str, objects, skip_when_cached=True, chunk_size=chunk_size, request_timeout=Timedelta.from_s(30), ), ) device_info = self._bacnet_reader.get_device_info( device_address_str, device_identifier=object_group.get("device_identifier")) if device_info is None: logger.error("Missing device info for {}. Stopping worker task!", device_address_str) self._worker_tasks_count_failed += 1 return device_name = self._object_name_vendor_specific_mapping.get( device_info["objectName"], device_info["objectName"]) device_name = substitute_all( device_name, self._object_name_vendor_specific_substitutions) metrics = {} missing_metrics = 0 for object_instance in object_group["object_instances"]: metadata = { "rate": 1.0 / interval, "device": device_address_str, "objectType": object_type, "objectInstance": object_instance, } object_info = self._bacnet_reader.get_object_info( device_address_str, object_type, object_instance) if (object_info is None or "objectName" not in object_info or "description" not in object_info): logger.error( "No object info for ({}, {}) of {} available!", object_type, object_instance, device_address_str, ) missing_metrics += 1 continue # Get vendor-specific-address from object cache object_name = object_info.get("3000", object_info["objectName"]) object_name = self._object_name_vendor_specific_mapping.get( object_name, object_name) object_name = substitute_all( object_name, self._object_name_vendor_specific_substitutions) metric_id = (Template(object_group["metric_id"]).safe_substitute({ "objectName": object_name, "deviceName": device_name }).replace("'", ".").replace("`", ".").replace("´", ".").replace(" ", "")) if "description" in object_group: description = (Template( object_group["description"]).safe_substitute({ "objectName": object_name, "objectDescription": object_info["description"], "deviceName": device_name, "deviceDescription": device_info["description"], }).replace("'", ".").replace("`", ".").replace("´", ".")) metadata["description"] = substitute_all( description, self._object_description_vendor_specific_substitutions) if "units" in object_info: metadata["unit"] = object_info["units"] metrics[metric_id] = metadata try: await self.declare_metrics(metrics) except RPCError: logger.exception( f"Can't declare metadata for device {device_address_str}. Stopping worker task!" ) self._worker_tasks_count_failed += 1 return segmentationSupport = "unknown" device_address = Address(device_address_str) device_info = self._bacnet_reader.deviceInfoCache.get_device_info( device_address) if device_info: segmentationSupport = device_info.segmentationSupported start_duration = Timestamp.now() - start_time logger.info( f"Started BACnetSource worker task for device {device_address_str}! Took {start_duration.s - random_wait_time:.2f} s (waited {random_wait_time:.2f} s), {missing_metrics} metrics have no object info" ) self._worker_tasks_count_running += 1 deadline = Timestamp.now() while True: self._bacnet_reader.request_values(device_address_str, objects, chunk_size=chunk_size) if object_group.get("nan_at_timeout"): for metric_id in metrics: now = Timestamp.now() last_timestamp = self._last_time_send_by_metric.get( metric_id, now) if now - last_timestamp >= Timedelta.from_s(6 * interval): timestamp_nan = last_timestamp + Timedelta.from_s( 5 * interval) await self.send(metric_id, timestamp_nan, float("nan")) self._last_time_send_by_metric[ metric_id] = timestamp_nan logger.warn( "Timeout for metric {} reached. Sending NaN! Device: {}", metric_id, device_address_str, ) try: deadline += Timedelta.from_s(interval) now = Timestamp.now() while now >= deadline: logger.warn( "Missed deadline {}, it is now {}. Device: {}, {}, chunk size: {}", deadline, now, device_address_str, segmentationSupport, chunk_size, ) deadline += Timedelta.from_s(interval) timeout = (deadline - now).s await asyncio.wait_for(asyncio.shield(worker_task_stop_future), timeout=timeout) worker_task_stop_future.result() logger.info("stopping BACnetSource worker task") break except asyncio.TimeoutError: # This is the normal case, just continue with the loop continue
async def task(self): self._main_task_stop_future = self.event_loop.create_future() logger.info( f"Current worker count (expected/starting/running/failed): ({self._worker_tasks_count_expected}/{self._worker_tasks_count_starting}/{self._worker_tasks_count_running}/{self._worker_tasks_count_failed})" ) last_state_log = Timestamp.now() while True: queue_get_task = asyncio.create_task(self._result_queue.get()) done, pending = await asyncio.wait( {queue_get_task, self._main_task_stop_future}, return_when=asyncio.FIRST_COMPLETED, ) if queue_get_task in done: result: Tuple[Timestamp, str, str, Dict] = queue_get_task.result() timestamp, device_name, device_address_string, result_values = result device_config = self._device_config[device_address_string] device_name = self._object_name_vendor_specific_mapping.get( device_name, device_name) device_name = substitute_all( device_name, self._object_name_vendor_specific_substitutions) for object_name, object_result in result_values.items(): object_name = self._object_name_vendor_specific_mapping.get( object_name, object_name) object_name = substitute_all( object_name, self._object_name_vendor_specific_substitutions) # TODO maybe support more placeholders metric_id = (Template( device_config["metric_id"]).safe_substitute({ "objectName": object_name, "deviceName": device_name }).replace("'", ".").replace("`", ".").replace( "´", ".").replace(" ", "")) if "presentValue" in object_result and isinstance( object_result["presentValue"], (int, float)): await self.send(metric_id, timestamp, object_result["presentValue"]) self._last_time_send_by_metric[metric_id] = timestamp self._result_queue.task_done() if Timestamp.now() - last_state_log > Timedelta.from_string( "5min"): logger.info( f"Current worker count (expected/starting/running/failed): ({self._worker_tasks_count_expected}/{self._worker_tasks_count_starting}/{self._worker_tasks_count_running}/{self._worker_tasks_count_failed})" ) last_state_log = Timestamp.now() if self._main_task_stop_future in done: logger.info("stopping BACnetSource main task") break