Example #1
0
    def monitor_system_status(self):
        q = CommsQueue('status', private=True)
        try:
            for msg in q.listen():
                if self.stop:
                    break

                message = msg['msg']
                msg_type = msg['msg_type']
                self.socketio.emit(msg_type, message, namespace=self.namespace)
                LOGGER.info(
                    f"SocketIO:{self.namespace} - Sending {msg_type} event to all connected users."
                )

        except Exception:
            LOGGER.exception(f"SocketIO:{self.namespace}")
        finally:
            LOGGER.info(
                f"SocketIO:{self.namespace} - No more users connected to status monitoring, exiting thread..."
            )
            with self.connections_lock:
                self.background_task = None
Example #2
0
    def monitor_alerts(self, user_info):
        sid = user_info['sid']
        q = CommsQueue('alerts', private=True)
        try:
            for msg in q.listen():
                if sid not in self.connections:
                    break

                alert = msg['msg']
                msg_type = msg['msg_type']
                if classification.is_accessible(
                        user_info['classification'],
                        alert.get('classification',
                                  classification.UNRESTRICTED)):
                    self.socketio.emit(msg_type,
                                       alert,
                                       room=sid,
                                       namespace=self.namespace)
                    LOGGER.info(
                        f"SocketIO:{self.namespace} - {user_info['display']} - "
                        f"Sending {msg_type} event for alert matching ID: {alert['alert_id']}"
                    )

                    if AUDIT:
                        AUDIT_LOG.info(
                            f"{user_info['uname']} [{user_info['classification']}]"
                            f" :: AlertMonitoringNamespace.get_alert(alert_id={alert['alert_id']})"
                        )

        except Exception:
            LOGGER.exception(
                f"SocketIO:{self.namespace} - {user_info['display']}")
        finally:
            LOGGER.info(
                f"SocketIO:{self.namespace} - {user_info['display']} - Connection to client was terminated"
            )
#!/usr/bin/env python

import sys

from assemblyline.remote.datatypes.queues.comms import CommsQueue
from pprint import pprint

if __name__ == "__main__":
    queue_name = None
    if len(sys.argv) > 1:
        queue_name = sys.argv[1]

    if queue_name is None:
        print(
            "\nERROR: You must specify a queue name.\n\npubsub_reader.py [queue_name]"
        )
        exit(1)

    print(f"Listening for messages on '{queue_name}' queue.")

    q = CommsQueue(queue_name)

    try:
        while True:
            for msg in q.listen():
                pprint(msg)
    except KeyboardInterrupt:
        print('Exiting')
    finally:
        q.close()
class HeartbeatManager(ServerBase):
    def __init__(self, config=None):
        super().__init__('assemblyline.heartbeat_manager')
        self.config = config or forge.get_config()
        self.datastore = forge.get_datastore()
        self.metrics_queue = CommsQueue(METRICS_QUEUE)
        self.scheduler = BackgroundScheduler(daemon=True)
        self.hm = HeartbeatFormatter("heartbeat_manager",
                                     self.log,
                                     config=self.config)

        self.counters_lock = Lock()
        self.counters = {}
        self.rolling_window = {}
        self.window_ttl = {}
        self.ttl = self.config.core.metrics.export_interval * 2
        self.window_size = int(60 / self.config.core.metrics.export_interval)
        if self.window_size != 60 / self.config.core.metrics.export_interval:
            self.log.warning(
                "Cannot calculate a proper window size for reporting heartbeats. "
                "Metrics reported during hearbeat will be wrong.")

        if self.config.core.metrics.apm_server.server_url is not None:
            self.log.info(
                f"Exporting application metrics to: {self.config.core.metrics.apm_server.server_url}"
            )
            elasticapm.instrument()
            self.apm_client = elasticapm.Client(
                server_url=self.config.core.metrics.apm_server.server_url,
                service_name="heartbeat_manager")
        else:
            self.apm_client = None

    def try_run(self):
        self.scheduler.add_job(
            self._export_hearbeats,
            'interval',
            seconds=self.config.core.metrics.export_interval)
        self.scheduler.start()

        while self.running:
            for msg in self.metrics_queue.listen():
                # APM Transaction start
                if self.apm_client:
                    self.apm_client.begin_transaction('heartbeat')

                m_name = msg.pop('name', None)
                m_type = msg.pop('type', None)
                m_host = msg.pop('host', None)
                msg.pop('instance', None)

                self.log.debug(f"Received {m_type.upper()} metrics message")
                if not m_name or not m_type or not m_host:
                    # APM Transaction end
                    if self.apm_client:
                        self.apm_client.end_transaction(
                            'process_message', 'invalid_message')

                    continue

                with self.counters_lock:
                    c_key = (m_name, m_type, m_host)
                    if c_key not in self.counters or m_type in NON_AGGREGATED:
                        self.counters[c_key] = Counter(msg)
                    else:
                        non_agg_values = {}
                        if m_type in NON_AGGREGATED_COUNTERS:
                            non_agg_values = {
                                k: v
                                for k, v in msg.items()
                                if k in NON_AGGREGATED_COUNTERS[m_type]
                            }
                        self.counters[c_key].update(Counter(msg))
                        for k, v in non_agg_values.items():
                            self.counters[c_key][k] = v

                # APM Transaction end
                if self.apm_client:
                    self.apm_client.end_transaction('process_message',
                                                    'success')

    def _export_hearbeats(self):
        try:
            self.heartbeat()
            self.log.info("Expiring unused counters...")
            # APM Transaction start
            if self.apm_client:
                self.apm_client.begin_transaction('heartbeat')

            c_time = time.time()
            for k in list(self.window_ttl.keys()):
                if self.window_ttl.get(k, c_time) < c_time:
                    c_name, c_type, c_host = k
                    self.log.info(
                        f"Counter {c_name} [{c_type}] for host {c_host} is expired"
                    )
                    del self.window_ttl[k]
                    del self.rolling_window[k]

            self.log.info("Saving current counters to rolling window ...")
            with self.counters_lock:
                counter_copy, self.counters = self.counters, {}

            for w_key, counter in counter_copy.items():
                _, m_type, _ = w_key
                if w_key not in self.rolling_window or m_type in NON_AGGREGATED:
                    self.rolling_window[w_key] = [counter]
                else:
                    self.rolling_window[w_key].append(counter)

                self.rolling_window[w_key] = self.rolling_window[w_key][
                    -self.window_size:]
                self.window_ttl[w_key] = time.time() + self.ttl

            self.log.info("Compiling service list...")
            aggregated_counters = {}
            for service in [
                    s['name']
                    for s in self.datastore.list_all_services(as_obj=False)
                    if s['enabled']
            ]:
                data = {
                    'cache_hit': 0,
                    'cache_miss': 0,
                    'cache_skipped': 0,
                    'execute': 0,
                    'fail_recoverable': 0,
                    'fail_nonrecoverable': 0,
                    'scored': 0,
                    'not_scored': 0,
                    'instances': 0
                }
                aggregated_counters[(service, 'service')] = Counter(data)

            self.log.info("Aggregating heartbeat data...")
            for component_parts, counters_list in self.rolling_window.items():
                c_name, c_type, c_host = component_parts

                # Expiring data outside of the window
                counters_list = counters_list[-self.window_size:]

                key = (c_name, c_type)
                if key not in aggregated_counters:
                    aggregated_counters[key] = Counter()

                aggregated_counters[key]['instances'] += 1

                for c in counters_list:
                    aggregated_counters[key].update(c)

            self.log.info("Generating heartbeats...")
            for aggregated_parts, counter in aggregated_counters.items():
                agg_c_name, agg_c_type = aggregated_parts
                with elasticapm.capture_span(name=f"{agg_c_type}.{agg_c_name}",
                                             span_type="send_heartbeat"):

                    metrics_data = {}
                    for key, value in counter.items():
                        # Skip counts, they will be paired with a time entry and we only want to count it once
                        if key.endswith('.c'):
                            continue
                        # We have an entry that is a timer, should also have a .c count
                        elif key.endswith('.t'):
                            name = key.rstrip('.t')
                            metrics_data[name] = value / max(
                                counter.get(name + ".c", 1), 1)
                            metrics_data[name + "_count"] = counter.get(
                                name + ".c", 0)
                        # Plain old metric, no modifications needed
                        else:
                            metrics_data[key] = value

                    agg_c_instances = metrics_data.pop('instances', 1)
                    metrics_data.pop('instances_count', None)
                    self.hm.send_heartbeat(agg_c_type, agg_c_name,
                                           metrics_data, agg_c_instances)

            # APM Transaction end
            if self.apm_client:
                self.apm_client.end_transaction('send_heartbeats', 'success')

        except Exception:
            self.log.exception(
                "Unknown exception occurred during heartbeat creation:")
class MetricsServer(ServerBase):
    """
    There can only be one of these type of metrics server running because it runs of a pubsub queue.
    """
    def __init__(self, config=None):
        super().__init__('assemblyline.metrics_aggregator',
                         shutdown_timeout=65)
        self.config = config or forge.get_config()
        self.elastic_hosts = self.config.core.metrics.elasticsearch.hosts
        self.is_datastream = False

        if not self.elastic_hosts:
            self.log.error(
                "No elasticsearch cluster defined to store metrics. All gathered stats will be ignored..."
            )
            sys.exit(1)

        self.scheduler = BackgroundScheduler(daemon=True)
        self.metrics_queue = None
        self.es = None
        self.counters_lock = Lock()
        self.counters = {}

        if self.config.core.metrics.apm_server.server_url is not None:
            self.log.info(
                f"Exporting application metrics to: {self.config.core.metrics.apm_server.server_url}"
            )
            elasticapm.instrument()
            self.apm_client = elasticapm.Client(
                server_url=self.config.core.metrics.apm_server.server_url,
                service_name="metrics_aggregator")
        else:
            self.apm_client = None

    def try_run(self):
        # If our connection to the metrics database requires a custom ca cert, prepare it
        ca_certs = None
        if self.config.core.metrics.elasticsearch.host_certificates:
            with tempfile.NamedTemporaryFile(delete=False) as ca_certs_file:
                ca_certs = ca_certs_file.name
                ca_certs_file.write(self.config.core.metrics.elasticsearch.
                                    host_certificates.encode())

        self.metrics_queue = CommsQueue(METRICS_QUEUE)
        self.es = elasticsearch.Elasticsearch(
            hosts=self.elastic_hosts,
            connection_class=elasticsearch.RequestsHttpConnection,
            ca_certs=ca_certs)
        # Determine if ES will support data streams (>= 7.9)
        self.is_datastream = version.parse(
            self.es.info()['version']['number']) >= version.parse("7.9")

        self.scheduler.add_job(self._create_aggregated_metrics,
                               'interval',
                               seconds=60)
        self.scheduler.start()

        while self.running:
            for msg in self.metrics_queue.listen():
                # APM Transaction start
                if self.apm_client:
                    self.apm_client.begin_transaction('metrics')

                m_name = msg.pop('name', None)
                m_type = msg.pop('type', None)
                msg.pop('host', None)
                msg.pop('instance', None)

                self.log.debug(f"Received {m_type.upper()} metrics message")
                if not m_name or not m_type:
                    # APM Transaction end
                    if self.apm_client:
                        self.apm_client.end_transaction(
                            'process_message', 'invalid_message')

                    continue

                with self.counters_lock:
                    c_key = (m_name, m_type)
                    if c_key not in self.counters or m_type in NON_AGGREGATED:
                        self.counters[c_key] = Counter(msg)
                    else:
                        non_agg_values = {}
                        if m_type in NON_AGGREGATED_COUNTERS:
                            non_agg_values = {
                                k: v
                                for k, v in msg.items()
                                if k in NON_AGGREGATED_COUNTERS[m_type]
                            }
                        self.counters[c_key].update(Counter(msg))
                        for k, v in non_agg_values.items():
                            self.counters[c_key][k] = v

                # APM Transaction end
                if self.apm_client:
                    self.apm_client.end_transaction('process_message',
                                                    'success')

    def _create_aggregated_metrics(self):
        self.log.info("Copying counters ...")
        # APM Transaction start
        if self.apm_client:
            self.apm_client.begin_transaction('metrics')

        with self.counters_lock:
            counter_copy, self.counters = self.counters, {}

        self.log.info("Aggregating metrics ...")
        timestamp = now_as_iso()
        for component, counts in counter_copy.items():
            component_name, component_type = component
            output_metrics = {'name': component_name, 'type': component_type}

            for key, value in counts.items():
                # Skip counts, they will be paired with a time entry and we only want to count it once
                if key.endswith('.c'):
                    continue
                # We have an entry that is a timer, should also have a .c count
                elif key.endswith('.t'):
                    name = key.rstrip('.t')
                    output_metrics[name] = counts[key] / counts.get(
                        name + ".c", 1)
                    output_metrics[name + "_count"] = counts.get(
                        name + ".c", 0)
                # Plain old metric, no modifications needed
                else:
                    output_metrics[key] = value

            ensure_indexes(self.log,
                           self.es,
                           self.config.core.metrics.elasticsearch,
                           [component_type],
                           datastream_enabled=self.is_datastream)

            index = f"al_metrics_{component_type}"
            # Were data streams created for the index specified?
            try:
                if self.es.indices.get_index_template(name=f"{index}_ds"):
                    output_metrics['@timestamp'] = timestamp
                    index = f"{index}_ds"
            except elasticsearch.exceptions.TransportError:
                pass
            output_metrics['timestamp'] = timestamp
            output_metrics = cleanup_metrics(output_metrics)

            self.log.info(output_metrics)
            with_retries(self.log,
                         self.es.index,
                         index=index,
                         body=output_metrics)

        self.log.info("Metrics aggregated. Waiting for next run...")

        # APM Transaction end
        if self.apm_client:
            self.apm_client.end_transaction('aggregate_metrics', 'success')