def monitor_system_status(self): q = CommsQueue('status', private=True) try: for msg in q.listen(): if self.stop: break message = msg['msg'] msg_type = msg['msg_type'] self.socketio.emit(msg_type, message, namespace=self.namespace) LOGGER.info( f"SocketIO:{self.namespace} - Sending {msg_type} event to all connected users." ) except Exception: LOGGER.exception(f"SocketIO:{self.namespace}") finally: LOGGER.info( f"SocketIO:{self.namespace} - No more users connected to status monitoring, exiting thread..." ) with self.connections_lock: self.background_task = None
def monitor_alerts(self, user_info): sid = user_info['sid'] q = CommsQueue('alerts', private=True) try: for msg in q.listen(): if sid not in self.connections: break alert = msg['msg'] msg_type = msg['msg_type'] if classification.is_accessible( user_info['classification'], alert.get('classification', classification.UNRESTRICTED)): self.socketio.emit(msg_type, alert, room=sid, namespace=self.namespace) LOGGER.info( f"SocketIO:{self.namespace} - {user_info['display']} - " f"Sending {msg_type} event for alert matching ID: {alert['alert_id']}" ) if AUDIT: AUDIT_LOG.info( f"{user_info['uname']} [{user_info['classification']}]" f" :: AlertMonitoringNamespace.get_alert(alert_id={alert['alert_id']})" ) except Exception: LOGGER.exception( f"SocketIO:{self.namespace} - {user_info['display']}") finally: LOGGER.info( f"SocketIO:{self.namespace} - {user_info['display']} - Connection to client was terminated" )
#!/usr/bin/env python import sys from assemblyline.remote.datatypes.queues.comms import CommsQueue from pprint import pprint if __name__ == "__main__": queue_name = None if len(sys.argv) > 1: queue_name = sys.argv[1] if queue_name is None: print( "\nERROR: You must specify a queue name.\n\npubsub_reader.py [queue_name]" ) exit(1) print(f"Listening for messages on '{queue_name}' queue.") q = CommsQueue(queue_name) try: while True: for msg in q.listen(): pprint(msg) except KeyboardInterrupt: print('Exiting') finally: q.close()
class HeartbeatManager(ServerBase): def __init__(self, config=None): super().__init__('assemblyline.heartbeat_manager') self.config = config or forge.get_config() self.datastore = forge.get_datastore() self.metrics_queue = CommsQueue(METRICS_QUEUE) self.scheduler = BackgroundScheduler(daemon=True) self.hm = HeartbeatFormatter("heartbeat_manager", self.log, config=self.config) self.counters_lock = Lock() self.counters = {} self.rolling_window = {} self.window_ttl = {} self.ttl = self.config.core.metrics.export_interval * 2 self.window_size = int(60 / self.config.core.metrics.export_interval) if self.window_size != 60 / self.config.core.metrics.export_interval: self.log.warning( "Cannot calculate a proper window size for reporting heartbeats. " "Metrics reported during hearbeat will be wrong.") if self.config.core.metrics.apm_server.server_url is not None: self.log.info( f"Exporting application metrics to: {self.config.core.metrics.apm_server.server_url}" ) elasticapm.instrument() self.apm_client = elasticapm.Client( server_url=self.config.core.metrics.apm_server.server_url, service_name="heartbeat_manager") else: self.apm_client = None def try_run(self): self.scheduler.add_job( self._export_hearbeats, 'interval', seconds=self.config.core.metrics.export_interval) self.scheduler.start() while self.running: for msg in self.metrics_queue.listen(): # APM Transaction start if self.apm_client: self.apm_client.begin_transaction('heartbeat') m_name = msg.pop('name', None) m_type = msg.pop('type', None) m_host = msg.pop('host', None) msg.pop('instance', None) self.log.debug(f"Received {m_type.upper()} metrics message") if not m_name or not m_type or not m_host: # APM Transaction end if self.apm_client: self.apm_client.end_transaction( 'process_message', 'invalid_message') continue with self.counters_lock: c_key = (m_name, m_type, m_host) if c_key not in self.counters or m_type in NON_AGGREGATED: self.counters[c_key] = Counter(msg) else: non_agg_values = {} if m_type in NON_AGGREGATED_COUNTERS: non_agg_values = { k: v for k, v in msg.items() if k in NON_AGGREGATED_COUNTERS[m_type] } self.counters[c_key].update(Counter(msg)) for k, v in non_agg_values.items(): self.counters[c_key][k] = v # APM Transaction end if self.apm_client: self.apm_client.end_transaction('process_message', 'success') def _export_hearbeats(self): try: self.heartbeat() self.log.info("Expiring unused counters...") # APM Transaction start if self.apm_client: self.apm_client.begin_transaction('heartbeat') c_time = time.time() for k in list(self.window_ttl.keys()): if self.window_ttl.get(k, c_time) < c_time: c_name, c_type, c_host = k self.log.info( f"Counter {c_name} [{c_type}] for host {c_host} is expired" ) del self.window_ttl[k] del self.rolling_window[k] self.log.info("Saving current counters to rolling window ...") with self.counters_lock: counter_copy, self.counters = self.counters, {} for w_key, counter in counter_copy.items(): _, m_type, _ = w_key if w_key not in self.rolling_window or m_type in NON_AGGREGATED: self.rolling_window[w_key] = [counter] else: self.rolling_window[w_key].append(counter) self.rolling_window[w_key] = self.rolling_window[w_key][ -self.window_size:] self.window_ttl[w_key] = time.time() + self.ttl self.log.info("Compiling service list...") aggregated_counters = {} for service in [ s['name'] for s in self.datastore.list_all_services(as_obj=False) if s['enabled'] ]: data = { 'cache_hit': 0, 'cache_miss': 0, 'cache_skipped': 0, 'execute': 0, 'fail_recoverable': 0, 'fail_nonrecoverable': 0, 'scored': 0, 'not_scored': 0, 'instances': 0 } aggregated_counters[(service, 'service')] = Counter(data) self.log.info("Aggregating heartbeat data...") for component_parts, counters_list in self.rolling_window.items(): c_name, c_type, c_host = component_parts # Expiring data outside of the window counters_list = counters_list[-self.window_size:] key = (c_name, c_type) if key not in aggregated_counters: aggregated_counters[key] = Counter() aggregated_counters[key]['instances'] += 1 for c in counters_list: aggregated_counters[key].update(c) self.log.info("Generating heartbeats...") for aggregated_parts, counter in aggregated_counters.items(): agg_c_name, agg_c_type = aggregated_parts with elasticapm.capture_span(name=f"{agg_c_type}.{agg_c_name}", span_type="send_heartbeat"): metrics_data = {} for key, value in counter.items(): # Skip counts, they will be paired with a time entry and we only want to count it once if key.endswith('.c'): continue # We have an entry that is a timer, should also have a .c count elif key.endswith('.t'): name = key.rstrip('.t') metrics_data[name] = value / max( counter.get(name + ".c", 1), 1) metrics_data[name + "_count"] = counter.get( name + ".c", 0) # Plain old metric, no modifications needed else: metrics_data[key] = value agg_c_instances = metrics_data.pop('instances', 1) metrics_data.pop('instances_count', None) self.hm.send_heartbeat(agg_c_type, agg_c_name, metrics_data, agg_c_instances) # APM Transaction end if self.apm_client: self.apm_client.end_transaction('send_heartbeats', 'success') except Exception: self.log.exception( "Unknown exception occurred during heartbeat creation:")
class MetricsServer(ServerBase): """ There can only be one of these type of metrics server running because it runs of a pubsub queue. """ def __init__(self, config=None): super().__init__('assemblyline.metrics_aggregator', shutdown_timeout=65) self.config = config or forge.get_config() self.elastic_hosts = self.config.core.metrics.elasticsearch.hosts self.is_datastream = False if not self.elastic_hosts: self.log.error( "No elasticsearch cluster defined to store metrics. All gathered stats will be ignored..." ) sys.exit(1) self.scheduler = BackgroundScheduler(daemon=True) self.metrics_queue = None self.es = None self.counters_lock = Lock() self.counters = {} if self.config.core.metrics.apm_server.server_url is not None: self.log.info( f"Exporting application metrics to: {self.config.core.metrics.apm_server.server_url}" ) elasticapm.instrument() self.apm_client = elasticapm.Client( server_url=self.config.core.metrics.apm_server.server_url, service_name="metrics_aggregator") else: self.apm_client = None def try_run(self): # If our connection to the metrics database requires a custom ca cert, prepare it ca_certs = None if self.config.core.metrics.elasticsearch.host_certificates: with tempfile.NamedTemporaryFile(delete=False) as ca_certs_file: ca_certs = ca_certs_file.name ca_certs_file.write(self.config.core.metrics.elasticsearch. host_certificates.encode()) self.metrics_queue = CommsQueue(METRICS_QUEUE) self.es = elasticsearch.Elasticsearch( hosts=self.elastic_hosts, connection_class=elasticsearch.RequestsHttpConnection, ca_certs=ca_certs) # Determine if ES will support data streams (>= 7.9) self.is_datastream = version.parse( self.es.info()['version']['number']) >= version.parse("7.9") self.scheduler.add_job(self._create_aggregated_metrics, 'interval', seconds=60) self.scheduler.start() while self.running: for msg in self.metrics_queue.listen(): # APM Transaction start if self.apm_client: self.apm_client.begin_transaction('metrics') m_name = msg.pop('name', None) m_type = msg.pop('type', None) msg.pop('host', None) msg.pop('instance', None) self.log.debug(f"Received {m_type.upper()} metrics message") if not m_name or not m_type: # APM Transaction end if self.apm_client: self.apm_client.end_transaction( 'process_message', 'invalid_message') continue with self.counters_lock: c_key = (m_name, m_type) if c_key not in self.counters or m_type in NON_AGGREGATED: self.counters[c_key] = Counter(msg) else: non_agg_values = {} if m_type in NON_AGGREGATED_COUNTERS: non_agg_values = { k: v for k, v in msg.items() if k in NON_AGGREGATED_COUNTERS[m_type] } self.counters[c_key].update(Counter(msg)) for k, v in non_agg_values.items(): self.counters[c_key][k] = v # APM Transaction end if self.apm_client: self.apm_client.end_transaction('process_message', 'success') def _create_aggregated_metrics(self): self.log.info("Copying counters ...") # APM Transaction start if self.apm_client: self.apm_client.begin_transaction('metrics') with self.counters_lock: counter_copy, self.counters = self.counters, {} self.log.info("Aggregating metrics ...") timestamp = now_as_iso() for component, counts in counter_copy.items(): component_name, component_type = component output_metrics = {'name': component_name, 'type': component_type} for key, value in counts.items(): # Skip counts, they will be paired with a time entry and we only want to count it once if key.endswith('.c'): continue # We have an entry that is a timer, should also have a .c count elif key.endswith('.t'): name = key.rstrip('.t') output_metrics[name] = counts[key] / counts.get( name + ".c", 1) output_metrics[name + "_count"] = counts.get( name + ".c", 0) # Plain old metric, no modifications needed else: output_metrics[key] = value ensure_indexes(self.log, self.es, self.config.core.metrics.elasticsearch, [component_type], datastream_enabled=self.is_datastream) index = f"al_metrics_{component_type}" # Were data streams created for the index specified? try: if self.es.indices.get_index_template(name=f"{index}_ds"): output_metrics['@timestamp'] = timestamp index = f"{index}_ds" except elasticsearch.exceptions.TransportError: pass output_metrics['timestamp'] = timestamp output_metrics = cleanup_metrics(output_metrics) self.log.info(output_metrics) with_retries(self.log, self.es.index, index=index, body=output_metrics) self.log.info("Metrics aggregated. Waiting for next run...") # APM Transaction end if self.apm_client: self.apm_client.end_transaction('aggregate_metrics', 'success')