def alert_on(data): info = get_user_info(request, session) if info.get('uname', None) is None: return LOGGER.info("[%s@%s] SocketIO:Alert - Event received => %s" % (info.get('uname', None), info['ip'], data)) if AUDIT: AUDIT_LOG.info("%s [%s] :: %s(start)" % (info.get('uname', None), info.get('classification', classification.UNRESTRICTED), "socketsrv_alert_on")) emit('connected', data) q = CommsQueue('alerts', private=True) try: for msg in q.listen(): if msg['type'] == "message": data = json.loads(msg['data']) if classification.is_accessible(info.get('classification', classification.UNRESTRICTED), data.get('body', {}).get('classification', classification.UNRESTRICTED)): emit('AlertCreated', data) except Exception: LOGGER.exception("[%s@%s] SocketIO:Alert" % (info.get('uname', None), info['ip'])) finally: LOGGER.info("[%s@%s] SocketIO:Alert - Connection to client was terminated" % (info.get('uname', None), info['ip'])) if AUDIT: AUDIT_LOG.info("%s [%s] :: %s(stop)" % (info.get('uname', None), info.get('classification', classification.UNRESTRICTED), "socketsrv_alert_on"))
def _rpc_executor_thread_main(self): qname = 'Controller.' + self.mac self.log.info("Listening for RPCs on " + qname) rpc = CommsQueue(qname) while self._should_run: try: self.log.debug('Checking for RPCs') raw = next(rpc.listen()) if not raw or 'data' not in raw: continue self.log.info("RAW RPC:\n%s" % pprint.pformat(raw)) raw = json.loads(raw['data']) msg = None error = None try: msg = ControllerRequest.parse(raw) except Exception as e: # pylint:disable=W0703 self.log.exception('While processing rpc: %s', raw) error = str(e) if msg: self.jobs.push(msg) else: reply_to_rpc(raw, response_body=error, succeeded=False) except KeyboardInterrupt: self._should_run = False self.log.error('Thread got CTL-C in consumer thread.') return except Exception: self.log.exception('Unhandled Exception in consumer thread.') time.sleep(2) continue
def serve_forever(self): self.metrics_queue = CommsQueue(self.metrics_channel_name) self.es = elasticsearch.Elasticsearch([{ 'host': self.elastic_ip, 'port': self.elastic_port }]) self.scheduler.add_interval_job(self._create_aggregated_metrics, seconds=60, kwargs={"my_logger": self.log}) self.scheduler.start() while True: for msg in self.metrics_queue.listen(): if not msg or msg.get('type', None) != 'message': continue metrics = json.loads(msg['data']) metrics_name = metrics.pop('name', None) metrics_type = metrics.pop('type', None) metrics_host = metrics.pop('host', None) _ = metrics.pop('instance', None) if not metrics_name or not metrics_type or not metrics_host: continue with self.counters_lock: if (metrics_name, metrics_type, metrics_host) not in self.counters: self.counters[(metrics_name, metrics_type, metrics_host)] = Counter(metrics) else: self.counters[(metrics_name, metrics_type, metrics_host)] += Counter(metrics)
def main(): q = CommsQueue('status') try: while True: for msg in q.listen(): print msg if not msg or msg.get('type', None) != 'message': continue data = json.loads(msg['data']) print pformat(data) except KeyboardInterrupt: print 'Exiting' q.close()
def heartbeat(self): while not self.drain: with self.lock: heartbeat = { 'shard': self.shard, 'entries': len(self.entries), 'errors': len(self.errors), 'results': len(self.results), 'resources': { "cpu_usage.percent": psutil.cpu_percent(), "mem_usage.percent": psutil.phymem_usage().percent, "disk_usage.percent": psutil.disk_usage('/').percent, "disk_usage.free": psutil.disk_usage('/').free, }, 'services': self._service_info(), 'queues': { 'max_inflight': self.high, 'control': self.control_queue.length(), 'ingest': q.length(self.ingest_queue), 'response': q.length(self.response_queue), }, } heartbeat['hostinfo'] = self.hostinfo msg = message.Message(to="*", sender='dispatcher', mtype=message.MT_DISPHEARTBEAT, body=heartbeat) CommsQueue('status').publish(msg.as_dict()) time.sleep(1)
def send_heartbeat(self): self.log.debug(r'heartbeat.') heartbeat = self.heartbeat() msg = Message(to='*', mtype=MT_SVCHEARTBEAT, sender=self.mac, body=heartbeat) CommsQueue('status').publish(msg.as_dict())
def monitoring_on(data): info = get_user_info(request, session) if info.get('uname', None) is None: return LOGGER.info("[%s@%s] SocketIO:Monitor - Event received => %s" % (info.get('uname', None), info['ip'], data)) emit('connected', data) q = CommsQueue('status', private=True) try: for msg in q.listen(): if msg['type'] == "message": data = json.loads(msg['data']) emit(data['mtype'], data) except Exception: LOGGER.exception("[%s@%s] SocketIO:Monitor" % (info.get('uname', None), info['ip'])) finally: LOGGER.info("[%s@%s] SocketIO:Monitor - Connection to client was terminated" % (info.get('uname', None), info['ip']))
def _heartbeat_thread_main(self): while self._should_run: # TODO: add locking self.log.debug('Sending heartbeat') heartbeat = self._build_heartbeat() msg = Message(to="*", sender='controller', mtype=MT_CONTROLLERHEARTBEAT, body=heartbeat) CommsQueue('status').publish(msg.as_dict()) time.sleep(config.system.update_interval)
def submission_on(data): info = get_user_info(request, session) if info.get('uname', None) is None: return LOGGER.info("[%s@%s] SocketIO:Submission - Event received => %s" % (info.get('uname', None), info['ip'], data)) if AUDIT: AUDIT_LOG.info("%s [%s] :: %s(start)" % (info.get('uname', None), info.get('classification', classification.UNRESTRICTED), "socketsrv_submission_on")) emit('connected', data) q = CommsQueue('traffic', private=True) try: for msg in q.listen(): if msg['type'] == "message": body = json.loads(msg['data']) submission_classification = body.get('body', {}).get('classification', classification.UNRESTRICTED) message = { 'body': body, 'mtype': 'SubmissionIngested', 'reply_to': None, 'sender': u'middleman', 'succeeded': True, 'to': u'*' } if classification.is_accessible(info.get('classification', classification.UNRESTRICTED), submission_classification): emit('SubmissionIngested', message) except Exception: LOGGER.exception("[%s@%s] SocketIO:Submission" % (info.get('uname', None), info['ip'])) finally: LOGGER.info("[%s@%s] SocketIO:Submission - Connection to client was terminated" % (info.get('uname', None), info['ip'])) if AUDIT: AUDIT_LOG.info("%s [%s] :: %s(stop)" % (info.get('uname', None), info.get('classification', classification.UNRESTRICTED), "socketsrv_submission_on"))
def save_alert(psid, alert, datastore, counter, event_id): if psid: alert_action({'action': 'update', 'alert': alert}) counter.increment('alert.updated') else: datastore.save_alert(event_id, alert) counter.increment('alert.saved') msg = message.Message(to="*", sender='alerter', mtype=message.MT_ALERT_CREATED, body=alert) CommsQueue('alerts').publish(msg.as_dict())
def listen_loop(): # TODO: This should resume if there is a connection failure/problem. try: status = CommsQueue('status') for msg in status.listen(): if msg['type'] != 'message': continue msg = Message.parse(json.loads(msg['data'])) if msg.mtype != MT_SVCHEARTBEAT: continue t = time.time() for k in service_list(msg.body): with self.lock: service = self.services.get(k, None) if not service: service = self._add_service(k) if service: service.metadata['last_heartbeat_at'] = t except Exception: log.exception('In listen_loop') raise
def send_minimal_heartbeat(service_name, num_workers): """Send just enough heartbeat that the dispatcher knows we are up.""" logger.info('Sending heartbeat.') heartbeat = { 'services': { 'details': { service_name: { 'num_workers': num_workers } } } } msg = Message(to='*', mtype=MT_SVCHEARTBEAT, sender='runservice_live', body=heartbeat) CommsQueue('status').publish(msg.as_dict())
def reply_to_rpc(request_msg, response_body, succeeded=True): if isinstance(request_msg, Message): rpc_channel = request_msg.reply_to to = request_msg.sender mtype = request_msg.mtype sender = request_msg.to else: # raw dictionary rpc_channel = request_msg['reply_to'] to = request_msg['sender'] mtype = request_msg['mtype'] sender = request_msg['to'] msg = Message( to=to, mtype=mtype, sender=sender, succeeded=succeeded, body=response_body).as_dict() if rpc_channel == '*' or rpc_channel == u'*': CommsQueue('status').publish(msg) else: NamedQueue(rpc_channel).push(msg)
def get_metrics_sink(): from assemblyline.al.common.queue import CommsQueue return CommsQueue('SsMetrics')
class MetricsServer(object): SRV_METRICS = [ 'svc.cache_hit', 'svc.cache_miss', 'svc.cache_skipped', 'svc.execute_start', 'svc.execute_done', 'svc.execute_fail_recov', 'svc.execute_fail_nonrecov', 'svc.job_scored', 'svc.job_not_scored' ] INGEST_METRICS = [ 'ingest.duplicates', 'ingest.bytes_ingested', 'ingest.submissions_ingested', 'ingest.error', 'ingest.timed_out', 'ingest.submissions_completed', 'ingest.files_completed', 'ingest.bytes_completed', 'ingest.skipped', 'ingest.whitelisted' ] DISPATCH_METRICS = ['dispatch.files_completed'] ALERT_METRICS = [ 'alert.received', 'alert.err_no_submission', 'alert.heavy_ignored', 'alert.proto_http', 'alert.proto_smtp', 'alert.proto_other', 'alert.saved' ] METRIC_TYPES = { 'alerter': ALERT_METRICS, 'ingester': INGEST_METRICS, 'dispatcher': DISPATCH_METRICS, 'service': SRV_METRICS } def __init__(self, metrics_channel_name, logger, elastic_ip_p, elastic_port_p): self.metrics_channel_name = metrics_channel_name self.elastic_ip = elastic_ip_p self.elastic_port = elastic_port_p self.scheduler = Scheduler() self.metrics_queue = None self.es = None self.log = logger self.METRIC_TYPES.update( forge.get_config().core.metricsd.extra_metrics) self.counters_lock = Lock() self.counters = {} def serve_forever(self): self.metrics_queue = CommsQueue(self.metrics_channel_name) self.es = elasticsearch.Elasticsearch([{ 'host': self.elastic_ip, 'port': self.elastic_port }]) self.scheduler.add_interval_job(self._create_aggregated_metrics, seconds=60, kwargs={"my_logger": self.log}) self.scheduler.start() while True: for msg in self.metrics_queue.listen(): if not msg or msg.get('type', None) != 'message': continue metrics = json.loads(msg['data']) metrics_name = metrics.pop('name', None) metrics_type = metrics.pop('type', None) metrics_host = metrics.pop('host', None) _ = metrics.pop('instance', None) if not metrics_name or not metrics_type or not metrics_host: continue with self.counters_lock: if (metrics_name, metrics_type, metrics_host) not in self.counters: self.counters[(metrics_name, metrics_type, metrics_host)] = Counter(metrics) else: self.counters[(metrics_name, metrics_type, metrics_host)] += Counter(metrics) def _create_aggregated_metrics(self, my_logger): my_logger.info("Copying counters.") with self.counters_lock: counter_copy = copy.deepcopy(self.counters) self.counters = {} my_logger.info("Aggregating metrics.") timestamp = now_as_iso() for component, counts in counter_copy.iteritems(): component_name, component_type, component_host = component output_metrics = { 'name': component_name, 'type': component_type, 'host': component_host } if component_type in self.METRIC_TYPES: output_metrics.update({ k: counts.get(k, 0) for k in self.METRIC_TYPES[component_type] }) else: my_logger.info("Skipping unknown component type: {cpt}".format( cpt=component_type)) continue output_metrics['timestamp'] = timestamp output_metrics = cleanup_metrics(output_metrics) my_logger.info(output_metrics) try: self.es.create( "al_metrics-%s" % timestamp[:10].replace("-", "."), component_type, output_metrics) except Exception as e: my_logger.exception(e) my_logger.info("Metrics aggregated... Waiting for next run.")
class MessageError(Exception): pass MT_DISPHEARTBEAT = 'DispHeartbeat' MT_INGESTHEARTBEAT = 'IngestHeartbeat' MT_SVCHEARTBEAT = 'SvcHeartbeat' MT_CONTROLLERHEARTBEAT = 'CtlHeartbeat' MT_HARDDRIVE_FAILURES = "HardDriveFailures" MT_ALERT_CREATED = "AlertCreated" def send_rpc_comms_queue(msg, response_timeout=10, async=False): assert isinstance(msg, Message) if not async: msg.reply_to = reply_queue_name('ssrpc') CommsQueue(msg.to).publish(msg.as_dict()) result = NamedQueue(msg.reply_to).pop(timeout=response_timeout) if not result: return None return Message.parse(result) else: msg.reply_to = '*' CommsQueue(msg.to).publish(msg.as_dict()) return def send_rpc(msg, response_timeout=10, async=False): assert isinstance(msg, Message) if not async: msg.reply_to = reply_queue_name('ssrpc') NamedQueue(msg.to).push(msg.as_dict())