def write_event(self, event: SctEvent, tee: Optional[Callable[[str], Any]] = None) -> None: if event.source_timestamp: message = f"{event.formatted_event_timestamp} <{event.formatted_source_timestamp}>: {str(event).strip()}" else: message = f"{event.formatted_event_timestamp}: {str(event).strip()}" if tee and not isinstance(event, TestResultEvent): with verbose_suppress("%s: failed to tee %s to %s", self, event, tee): tee(message) message = message.encode("utf-8") + b"\n" # Update events.log file (all events.) with verbose_suppress("%s: failed to write %s to %s", self, event, self.events_log): with self.events_log.open("ab+", buffering=0) as fobj: fobj.write(message) # Update {event.severity}.log file. log_file = self.events_logs_by_severity[event.severity] with verbose_suppress("%s: failed to write %s to %s", self, event, log_file): with log_file.open("ab+", buffering=0) as fobj: fobj.write(message) # Update summary.log file (statistics.) self.events_summary[Severity(event.severity).name] += 1 with verbose_suppress("%s: failed to update %s", self, self.events_summary_log): with self.events_summary_log.open("wb", buffering=0) as fobj: fobj.write(json.dumps(dict(self.events_summary), indent=4).encode("utf-8"))
def write_event(self, event: SctEvent) -> None: if event.source_timestamp: message = f"{event.formatted_event_timestamp} <{event.formatted_source_timestamp}>: {str(event).strip()}" else: message = f"{event.formatted_event_timestamp}: {str(event).strip()}" message_bin = message.encode("utf-8") + b"\n" if event.severity != Severity.DEBUG: # Log event to the console tee = getattr(LOGGER, logging.getLevelName(event.log_level).lower()) if tee and not isinstance(event, TestResultEvent): with verbose_suppress("%s: failed to tee %s to %s", self, event, tee): tee(message) # Write event to events.log file if getattr(event, 'save_to_files', False): with verbose_suppress("%s: failed to write %s to %s", self, event, self.events_log): with self.events_log.open("ab+", buffering=0) as fobj: fobj.write(message_bin) if log_file := self.events_logs_by_severity.get(event.severity): with verbose_suppress("%s: failed to write %s to %s", self, event, log_file): with log_file.open("ab+", buffering=0) as fobj: fobj.write(message_bin)
def publish_event(self, event, timeout=PUBLISH_EVENT_TIMEOUT) -> None: with verbose_suppress("%s: failed to write %s to %s", self, event, self.raw_events_log): with self._raw_events_lock, open(self.raw_events_log, "ab+", buffering=0) as log_file: log_file.write(event.to_json().encode("utf-8") + b"\n") with verbose_suppress("%s: failed to publish %s", self, event): self._queue.put(pickle.dumps(event), timeout=timeout) self._events_counter.value += 1
def run(self): with suppress_interrupt(), verbose_suppress("EventsDevice failed"): with zmq.Context() as ctx, ctx.socket(zmq.PUB) as pub, ctx.socket(zmq.SUB) as sub: self._sub_port.value = pub.bind_to_random_port("tcp://*") self._running.set() LOGGER.info("EventsDevice listen on %s", self.subscribe_address) # Delivery verification subscriber. sub.connect(self.subscribe_address) sub.subscribe(b"") time.sleep(self.start_delay) while self._running.is_set() or not self._queue.empty(): try: event = self._queue.get(timeout=self.pub_queue_wait_timeout) try: pub.send(event) except zmq.ZMQError: LOGGER.exception("EventsDevice failed to send %s", pickle.loads(event)) else: try: if sub.poll(timeout=self.sub_polling_timeout) and sub.recv(zmq.NOBLOCK) == event: continue # everything is OK, we can go to send next event in the queue. except zmq.ZMQError: pass LOGGER.error("EventsDevice failed to verify delivery of %s", pickle.loads(event)) time.sleep(self.pub_queue_events_rate) except queue.Empty: pass
def run(self) -> None: # Waiting until the monitor URL is set, and we can start using the API. self.url_set.wait() for annotation in self.inbound_events(): # events from GrafanaAggregator with verbose_suppress("GrafanaEventPostman failed to post an annotation %s", annotation): requests.post(self._grafana_post_url, json=annotation, auth=self.api_auth).raise_for_status()
def run(self) -> None: time_window_counters: Dict[AnnotationKey, int] = defaultdict(int) time_window_end = time.perf_counter() for annotation in self.inbound_events(): # pylint: disable=no-member; pylint doesn't understand generics with verbose_suppress( "GrafanaEventAggregator failed to process an annotation %s", annotation): annotation_key = self.unique_key(annotation) time_diff = time.perf_counter() - time_window_end # The current time window expired. if time_diff > 0: LOGGER.debug( "GrafanaEventAggregator start a new time window (%s sec)", self.time_window) time_window_counters.clear() # It can be more than one time window expired since last event seen. time_window_end += (time_diff // self.time_window + 1) * self.time_window time_window_counters[annotation_key] += 1 if time_window_counters[annotation_key] > self.max_duplicates: continue # Put the annotation to the posting queue. self.outbound_queue.put(annotation)
def get_logger_event_summary( _registry: Optional[EventsProcessesRegistry] = None) -> dict: events_summary_log = get_events_logger( _registry=_registry).events_summary_log with verbose_suppress("Failed to read %s", events_summary_log): with events_summary_log.open() as fobj: return json.load(fobj) return {}
def run(self) -> None: LOGGER.info("Writing to %s", self.events_log) for log_file in chain((self.events_log, self.events_summary_log, ), self.events_logs_by_severity.values(), ): log_file.touch() for event_tuple in self.inbound_events(): with verbose_suppress("EventsFileLogger failed to process %s", event_tuple): _, event = event_tuple # try to unpack event from EventsDevice self.write_event(event=event, tee=LOGGER.info)
def run(self) -> None: events_gauge = \ nemesis_metrics_obj().create_gauge("sct_events_gauge", "Gauge for SCT events", ["event_type", "type", "subtype", "severity", "node", ]) for event_tuple in self.inbound_events(): with verbose_suppress("PrometheusDumper failed to process %s", event_tuple): event_class, event = event_tuple # try to unpack event from EventsDevice events_gauge.labels( event_class, # pylint: disable=no-member getattr(event, "type", ""), getattr(event, "subtype", ""), event.severity, getattr(event, "node", "")).set(event.timestamp)
def run(self) -> None: for event_tuple in self.inbound_events(): with verbose_suppress("EventsAnalyzer failed to process %s", event_tuple): event_class, event = event_tuple # try to unpack event from EventsDevice # Don't kill the test cause of TestResultEvent: it was done already when this event was sent out. if event_class == "TestResultEvent" or event.severity != Severity.CRITICAL: continue try: if event_class in LOADERS_EVENTS: raise TestFailure(f"Stress command failed: {event}") raise TestFailure(f"Got critical event: {event}") except TestFailure: self.kill_test(sys.exc_info())
def run(self) -> None: for event_tuple in self.inbound_events(): # pylint: disable=no-member; pylint doesn't understand generics with verbose_suppress("GrafanaAnnotator failed to process %s", event_tuple): event_class, event = event_tuple # try to unpack event from EventsDevice tags = [event_class, event.severity.name, "events", ] if event_type := getattr(event, "type", None): tags.append(event_type) if event_subtype := getattr(event, "subtype", None): tags.append(event_subtype) self.outbound_queue.put( Annotation({ "time": int(event.timestamp * 1000.0), "tags": tags, "isRegion": False, "text": str(event), }) )
class EventsFileLogger(BaseEventsProcess[Tuple[str, Any], None], multiprocessing.Process): def __init__(self, _registry: EventsProcessesRegistry): base_dir: Path = get_events_main_device( _registry=_registry).events_log_base_dir self.events_log = base_dir / EVENTS_LOG self.events_logs_by_severity = { Severity.CRITICAL: base_dir / CRITICAL_LOG, Severity.ERROR: base_dir / ERROR_LOG, Severity.WARNING: base_dir / WARNING_LOG, Severity.NORMAL: base_dir / NORMAL_LOG, Severity.DEBUG: base_dir / DEBUG_LOG, } self.events_summary = collections.defaultdict(int) self.events_summary_log = base_dir / SUMMARY_LOG super().__init__(_registry=_registry) def run(self) -> None: LOGGER.info("Writing to %s", self.events_log) for log_file in chain( ( self.events_log, self.events_summary_log, ), self.events_logs_by_severity.values(), ): log_file.touch() for event_tuple in self.inbound_events(): with verbose_suppress("EventsFileLogger failed to process %s", event_tuple): _, event = event_tuple # try to unpack event from EventsDevice self.write_event(event=event) def write_event(self, event: SctEvent) -> None: if event.source_timestamp: message = f"{event.formatted_event_timestamp} <{event.formatted_source_timestamp}>: {str(event).strip()}" else: message = f"{event.formatted_event_timestamp}: {str(event).strip()}" message_bin = message.encode("utf-8") + b"\n" if event.severity != Severity.DEBUG: # Log event to the console tee = getattr(LOGGER, logging.getLevelName(event.log_level).lower()) if tee and not isinstance(event, TestResultEvent): with verbose_suppress("%s: failed to tee %s to %s", self, event, tee): tee(message) # Write event to events.log file if getattr(event, 'save_to_files', False): with verbose_suppress("%s: failed to write %s to %s", self, event, self.events_log): with self.events_log.open("ab+", buffering=0) as fobj: fobj.write(message_bin) if log_file := self.events_logs_by_severity.get(event.severity): with verbose_suppress("%s: failed to write %s to %s", self, event, log_file): with log_file.open("ab+", buffering=0) as fobj: fobj.write(message_bin) # Update summary.log file (statistics.) self.events_summary[Severity(event.severity).name] += 1 with verbose_suppress("%s: failed to update %s", self, self.events_summary_log): with self.events_summary_log.open("wb", buffering=0) as fobj: fobj.write( json.dumps(dict(self.events_summary), indent=4).encode("utf-8"))