def start(self): if self.running: return self.logger.info("start tshark driver on interface %s for ports %s, with bpf filter %s", self.interface, self.ports, self.bpf_filter) self.running = True self.data_mr = MetricsRecorder("sniffer.driver.data") self.error_mr = MetricsRecorder("sniffer.driver.error") port_filter = " or ".join(["tcp port {}".format(port) for port in self.ports]) if self.bpf_filter: port_filter = "({}) and ({})".format(port_filter, self.bpf_filter) tshark_home = get_tshark_home() if not tshark_home: raise RuntimeError("tshark is not find") self.logger.info("find tshark at %s", tshark_home) command = (is_linux() and "sudo " or "") + """%(tshark_home)s/tshark -o ssl.desegment_ssl_application_data:TRUE -o ssl.desegment_ssl_records:TRUE -o ssl.keys_list:"0.0.0.0","443","http","/home/threathunter/private.key" -f "%(port_filter)s" -i %(interface)s -Y "http.request or http.response" -T fields -Eseparator=/t -e http -e http.request -e ip.src -e tcp.srcport -e ip.dst -e tcp.dstport -e http.request.method -e http.host -e http.request.uri -e http.request.full_uri -e http.user_agent -e http.content_length -e http.content_type -e http.response.code -e http.response.phrase -e http.content_encoding -e http.cookie -e http.set_cookie -e http.referer -e data.data -e text """ % ({"tshark_home": tshark_home, "port_filter": port_filter, "interface": self.interface}) environments = dict() environments["PCAP_PF_RING_CLUSTER_ID"] = "14" environments["PCAP_PF_RING_APPNAME"] = "tshark-" + self.interface environments["PCAP_PF_RING_USE_CLUSTER_PER_FLOW_4_TUPLE"] = "1" environments["LD_LIBRARY_PATH"] = "/usr/local/lib64" self.logger.info("start tshark command: %s", command) self.sub_task = pexpect.spawn(command, env=environments, timeout=3600) import atexit atexit.register(self.stop) # establish client gevent.sleep(2) self.client_task = run_in_thread(self.process_input) return
def notice_clean(): global logger logger_name = "nebula.notice.cleaner" start_time = time.time() error_type = None job_name = "Clean table notice" try: logger = utils.init_env(logger_name) notice_clean_recorder = MetricsRecorder("cronjob.notice_clean", expire=86400* 60, #2month interval=300, #5min to merge type="sum", db="nebula.offline") cj = NoticeCleanCronJob() notice_clean_recorder.record(1, {"status":"run"}) cj.start() status = "success" except Exception as e: logger.exception(e.message) status = "fail" error_type = e.message finally: costs = (time.time() - start_time)/ 60.0 logger.info("Cronjob(%s) start at %s has been %s , costs %s min.", job_name, start_time, status, costs) notice_clean_recorder.record(1, {"status":status, "error_type":error_type, "costs": costs}) # wait for metrics to write. gevent.sleep(60)
def __init__(self, id, parser, driver, cpu=None, is_process=True): self.parser = parser self.driver = driver self.id = id self._running = False self._rpc_task = None self._events_task = None self._health_task = None self.queue = gevent.queue.Queue(maxsize=10000) self.cpu = cpu self.is_process = is_process self.logger = settings.init_logging("main.{}".format(self.id)) self.error_mr = MetricsRecorder("sniffer.main.error") self.msg_mr = MetricsRecorder("sniffer.main.msg") self.event_mr = MetricsRecorder("sniffer.main.event") self.rpc_mr = MetricsRecorder("sniffer.main.rpc") self.main_mr = MetricsRecorder("sniffer.main.loop") self.urltree = URLTree()
def notice_stat(timestamp): global logger logger_name = "nebula.notice_stat.writer" job_name = "generate notice stat" error_type = None start_time = time.time() # 获取需要转换notice -> notice_stat 的时间戳, 如果不指定,默认转换的notice的时间范围是上个小时内 if not timestamp: timestamp = utils.get_last_hour() t = datetime.strptime(timestamp, settings.LogPath_Format) settings.Working_TS = time.mktime((t.year, t.month, t.day, t.hour, t.minute, t.second, 0, 0, 0)) settings.Working_DAY = int(time.mktime((t.year, t.month, t.day, 0, 0, 0, 0, 0, 0))) click.echo(u"所使用的工作小时的时间戳是:%s, 既:%s" % (settings.Working_TS, datetime.fromtimestamp(settings.Working_TS))) click.echo(u"所处的日期是%s, 既:%s" % (settings.Working_DAY, datetime.fromtimestamp(settings.Working_DAY*1.0))) try: logger = utils.init_env(logger_name) notice_stat_recorder = MetricsRecorder("cronjob.notice_stat", expire=86400* 60, #2month interval=300, #5min to merge type="sum", db="nebula.offline") utils.get_strategies_weigh() cj = NoticeStatCronJob() notice_stat_recorder.record(1, {"status":"run", "workingts":settings.Working_TS}) cj.start() status = "success" except Exception as e: logger.exception(traceback.format_exc()) status = "fail" error_type = e.message finally: costs = (time.time() - start_time)/ 60.0 logger.info("Cronjob(%s) working ts: %s has been %s, costs: %s min.", job_name, settings.Working_TS, status, costs) notice_stat_recorder.record(1, {"status":status, "workingts":settings.Working_TS, "error_type":error_type}) # wait for metrics to write. gevent.sleep(60)
class Main(object): def __init__(self, id, parser, driver, cpu=None, is_process=True): self.parser = parser self.driver = driver self.id = id self._running = False self._rpc_task = None self._events_task = None self._health_task = None self.queue = gevent.queue.Queue(maxsize=10000) self.cpu = cpu self.is_process = is_process self.logger = settings.init_logging("main.{}".format(self.id)) self.error_mr = MetricsRecorder("sniffer.main.error") self.msg_mr = MetricsRecorder("sniffer.main.msg") self.event_mr = MetricsRecorder("sniffer.main.event") self.rpc_mr = MetricsRecorder("sniffer.main.rpc") self.main_mr = MetricsRecorder("sniffer.main.loop") self.urltree = URLTree() def add_error_metrics(self, data_type): tags = {"id": self.id, "type": data_type} self.error_mr.record(1, tags) def start(self): if self._running: return self.main_mr.record(1, {"id": self.id, "type": "start"}) # cpu binding self.logger.info("process %s binding to cpu %s", os.getpid(), self.cpu) if is_linux() and self.cpu and self.is_process: subprocess.Popen([ "taskset", "-cp", "{}".format(self.cpu), "{}".format( os.getpid()) ], stderr=subprocess.PIPE, stdout=subprocess.PIPE).communicate() self._running = True self.logger.info("sniffer instance is starting driver") if self.driver: self.driver.start() self.logger.info("sniffer instance is starting rpc task") self._rpc_task = gevent.spawn(self.rpc_processor) self._rpc_task.start() # parse event for httpmsg self.logger.info("sniffer instance is starting events task") self._events_task = gevent.spawn(self.event_processor) self._events_task.start() self.logger.info("sniffer instance is starting healthy task") self._health_task = gevent.spawn(self.health_processor) self._health_task.start() self.urltree.synchronize() def stop(self): self._running = False self.logger.info("sniffer instance is stopping rpc task") self.main_mr.record(1, {"id": self.id, "type": "stop"}) if self._rpc_task: self._rpc_task.kill() self.logger.info("sniffer instance is stopping events task") if self._events_task: self._events_task.kill() self.logger.info("sniffer instance is stopping healthy task") if self._health_task: self._health_task.kill() self.logger.info("sniffer instance is stopping driver") if self.driver: self.driver.stop() def close(self): self.stop() def __del__(self): self.stop() def event_processor(self): idle_run = 0 while self._running: # no events coming if idle_run > 0 and idle_run % 5 == 0: # idle sleep for 0.5 seconds gevent.sleep(0.5) if idle_run % 100 == 0: self.logger.debug("no msg in the last short time") self.main_mr.record(1, {"id": self.id, "type": "idle"}) try: msg = self.driver.get_msg_nowait() except Exception as ex: # no msg yet msg = None if not msg: idle_run += 1 continue else: idle_run = 0 # msg common processing try: self.msg_mr.record(1, {"id": self.id, "type": "input"}) self.logger.debug("start to process msg %s", msg) # 开始bones折叠 self.urltree.synchronize() uri_stem = msg.uri_stem page = msg.page if msg.is_static: # 静态页面特殊逻辑 new_url = msg.host + '/****.' + msg.page.rsplit( '.', 1)[-1] msg.uri_stem = msg.page = new_url elif page == uri_stem: # no normalization yet new_page, new_params = self.urltree.normalize_url(page) if new_page != page: msg.uri_stem = new_page msg.page = new_page new_params = '&'.join([ '%s=%s' % (k, v) for k, v in new_params.iteritems() ]) old_params = msg.uri_query if old_params: new_params = old_params + '&' + new_params msg.uri_query = new_params # msg specific processing per customer if self.parser.filter(msg): self.logger.debug("filtered by customparsers") self.msg_mr.record(1, {"id": self.id, "type": "drop"}) continue self.logger.debug("msg has passed the filter") events = [] if isinstance(msg, HttpMsg): # parse 实际入口,对http信息进行处理,返回一个events(事件列表) events = self.parser.get_events_from_http_msg(msg) elif isinstance(msg, TextMsg): events = self.parser.get_events_from_text_msg(msg) else: self.logger.error("fail to process this type of event") self.add_error_metrics("parse failure") continue http_events = [ e for e in events if e.name in {"HTTP_DYNAMIC", "HTTP_STATIC"} ] if not http_events: continue # 取第一个是因为所有的,客户处理模块中第一个处理函数都是extract_http_log_event() http_event = http_events[0] # try autoparsers for g in get_current_generators(): result = g.parse_event(http_event, msg) if result: events.append(result) if not events: continue self.logger.debug("msg has generated %d events", len(events)) self.msg_mr.record(1, {"id": self.id, "type": "output"}) self.event_mr.record(len(events), { "id": self.id, "type": "input" }) # this is an ugly version, need a totally new one # processing id and pid httpid = "0" * 24 for ev in events: if ev.name in {"HTTP_DYNAMIC", "HTTP_STATIC"}: ev.property_values["pid"] = "0" * 24 httpid = ev.property_values["id"] for ev in events: if ev.name not in {"HTTP_DYNAMIC", "HTTP_STATIC"}: ev.property_values["id"] = str(ObjectId()) ev.property_values["pid"] = httpid # "processing uid/did/sid" id_dict = { "uid": "", "did": "", "sid": "", } for ev in events: for key in id_dict.keys(): if ev.property_values.get(key): id_dict[key] = ev.property_values[key] if ev.name == "ACCOUNT_LOGIN": id_dict["uid"] = ev.property_values["user_name"] store_user_session_mapping(id_dict["uid"], id_dict["sid"]) if ev.name == "ACCOUNT_REGISTRATION": id_dict["uid"] = ev.property_values["user_name"] store_user_session_mapping(id_dict["uid"], id_dict["sid"]) if not id_dict["uid"] or id_dict["uid"].startswith("fake"): t = get_user_from_session(id_dict["sid"]) if t: id_dict["uid"] = t self.logger.debug("get id for this batch of events %s", id_dict) for ev in events: ev.property_values.update(id_dict) _max_length = max_body_length_config.get() for ev in events: # body should not be too long if "s_body" in ev.property_values: ev.property_values["s_body"] = ev.property_values[ "s_body"][:_max_length] if "c_body" in ev.property_values: ev.property_values["c_body"] = ev.property_values[ "c_body"][:_max_length] # end of the ugly code for ev in events: self.logger.debug("get event %s", ev) self.queue.put_nowait(ev) self.event_mr.record(len(events), { "id": self.id, "type": "output" }) except: # todo add metrics self.add_error_metrics("main process failure") self.msg_mr.record(1, {"id": self.id, "type": "drop"}) self.logger.error("fail to process, error %s", traceback.format_exc()) def health_processor(self): while self._running: if self.driver and not self.driver.is_alive(): self._running = False gevent.sleep(5) def rpc_processor(self): mode = configcontainer.get_config("sniffer").get_string( "sniffer.servicemode", "redis") if mode == "redis": import redisserviceclient http_client = redisserviceclient.get_httplog_rpc_client() misc_client = redisserviceclient.get_misclog_rpc_client() elif mode == "rabbitmq": import rabbitmqserviceclient amqp_url = configcontainer.get_config("sniffer").get_string( "sniffer.amqp_url", "") http_client = rabbitmqserviceclient.get_httplog_rpc_client( amqp_url) misc_client = rabbitmqserviceclient.get_misclog_rpc_client( amqp_url) else: self.add_error_metrics("invalid service") raise RuntimeError("invalid service mode") http_client.start() misc_client.start() idle_run = 0 events_sent = 0 r = 0 event = None while self._running: r += 1 try: events_sent = 0 event = self.queue.get_nowait() self.rpc_mr.record( 1, { "id": self.id, "type": "input", "mode": mode, "name": event.name }) if event.name == "HTTP_DYNAMIC" or event.name == "HTTP_STATIC": if event.property_values["is_static"]: # remove redundant values event.property_values["s_body"] = "" event.property_values["c_body"] = "" event.property_values["cookie"] = "" event.key = event.property_values["c_ip"] http_client.send(event, event.key, False) self.logger.debug("sending an http event on key %s", event.key) self.rpc_mr.record( 1, { "id": self.id, "type": "output", "mode": mode, "name": event.name }) else: misc_client.send(event, event.key, False) self.logger.debug("sending an %s event on key %s", event.name, event.key) self.rpc_mr.record( 1, { "id": self.id, "type": "output", "mode": mode, "name": event.name }) events_sent = 1 event = None except gevent.queue.Empty: pass except Exception as err: import traceback traceback.print_exc() self.add_error_metrics("send event") self.rpc_mr.record( 1, { "id": self.id, "type": "error", "mode": mode, "name": event.name if event else "" }) self.logger.error("fail to send event, error %s", err) finally: # sleep while idle if not events_sent: idle_run += 1 idle_run = min(idle_run, 5) gevent.sleep(0.1 * idle_run) else: idle_run = 0
conditions.append(get_op_tip(left, ops[op], right)) tip += '{}s内,{}{}的总数等于{};'.format(interval, source_event, conditions and '在满足{}的情况下'.format(",".join(conditions)) or "", v) elif left.subtype == "getvariable": counter = dbcontext.nebula_ui_variables_dict.get(tuple(left.variable)) counter_name = counter.name v = values.get(counter_name) if v: tip += '{}等于"{}";'.format(counter_name, v) else: continue tip += "满足策略{}".format(name) # print tip return tip metrics_recorder_stats = MetricsRecorder("web.notices.stats", expire=86400*60, interval=300, type="sum", db="default") def add_metrics(notice): metrics_recorder_stats.record(1, {"test": 1 if notice.test else 0}) def process_notify(event): if not event: return try: data = { "timestamp": event.timestamp, "key": event.key, "scene_name": event.property_values["sceneName"],
def start(self): self.logger.info("start syslog driver") self.data_mr = MetricsRecorder("sniffer.driver.data") self.error_mr = MetricsRecorder("sniffer.driver.error") self.server.start()
class TsharkHttpsDriver(Driver): def __init__(self, interface="lo0", ports=(80, 8080, 8443), key_place="/Users/lw/sslprivatekey/server.key.unencrypted", bpf_filter=None): Driver.__init__(self, "tshark.{}".format(interface)) self.ports = configcontainer.get_config("sniffer").get_string("filter.traffic.server_ports", "") \ or ports self.ports = expand_ports(self.ports) self.key_place = key_place self.interface = interface self.bpf_filter = bpf_filter self.sub_task = None self.client_task = None self.running = False # cache used for building the http message self.cache = Cache(50000, ttl=30) self.TIMEOUT = 30 # 30s timeout self.last_check = millis_now() self.data_mr = None self.error_mr = None self.fixed_tags = {"ports": str(self.ports), "interface": self.interface} def start(self): if self.running: return self.logger.info("start tshark driver on interface %s for ports %s, with bpf filter %s", self.interface, self.ports, self.bpf_filter) self.running = True self.data_mr = MetricsRecorder("sniffer.driver.data") self.error_mr = MetricsRecorder("sniffer.driver.error") port_filter = " or ".join(["tcp port {}".format(port) for port in self.ports]) if self.bpf_filter: port_filter = "({}) and ({})".format(port_filter, self.bpf_filter) tshark_home = get_tshark_home() if not tshark_home: raise RuntimeError("tshark is not find") self.logger.info("find tshark at %s", tshark_home) command = (is_linux() and "sudo " or "") + """%(tshark_home)s/tshark -o ssl.desegment_ssl_application_data:TRUE -o ssl.desegment_ssl_records:TRUE -o ssl.keys_list:"0.0.0.0","443","http","/home/threathunter/private.key" -f "%(port_filter)s" -i %(interface)s -Y "http.request or http.response" -T fields -Eseparator=/t -e http -e http.request -e ip.src -e tcp.srcport -e ip.dst -e tcp.dstport -e http.request.method -e http.host -e http.request.uri -e http.request.full_uri -e http.user_agent -e http.content_length -e http.content_type -e http.response.code -e http.response.phrase -e http.content_encoding -e http.cookie -e http.set_cookie -e http.referer -e data.data -e text """ % ({"tshark_home": tshark_home, "port_filter": port_filter, "interface": self.interface}) environments = dict() environments["PCAP_PF_RING_CLUSTER_ID"] = "14" environments["PCAP_PF_RING_APPNAME"] = "tshark-" + self.interface environments["PCAP_PF_RING_USE_CLUSTER_PER_FLOW_4_TUPLE"] = "1" environments["LD_LIBRARY_PATH"] = "/usr/local/lib64" self.logger.info("start tshark command: %s", command) self.sub_task = pexpect.spawn(command, env=environments, timeout=3600) import atexit atexit.register(self.stop) # establish client gevent.sleep(2) self.client_task = run_in_thread(self.process_input) return def stop(self): self.logger.info("stop tshark driver on interface %s for ports %s, with bpf filter %s", self.interface, self.ports, self.bpf_filter) self.running = False if self.client_task: self.client_task.join(timeout=2) self.client_task = None if self.sub_task is not None: try: if self.sub_task.isalive(): os.killpg(self.sub_task.pid, signal.SIGTERM) self.sub_task.wait() except Exception as ex: traceback.print_exc() self.logger.error("fail to kill subprocess %s", ex) self.sub_task = None def is_alive(self): return self.sub_task is not None and self.sub_task.isalive() def process_input(self): while self.running: try: self.sub_task.expect('\n') line = self.sub_task.before self.process_http_line(line) except pexpect.EOF: break except Exception as err: self.logger.error("fail to process line %s", err) self.add_dropped_msgs(1) continue def process_http_line(self, line): try: if not line or not line.startswith("http"): self.logger.error("invalid http data, could be error msg: %s", line) return self.add_data_metrics("input") self.logger.debug("tshark get line %s", line) fields = line.split("\t", 20) if len(fields) < 21: self.add_drop_data_metrics("wrong number fields") self.add_error_metrics("wrong number fields") raise RuntimeError("invalid fields") try: flag, is_request, src, srcport, dst, dstport, method, host, uri, full_uri, user_agent, content_length, \ content_type, code, phase, content_encoding, cookie, set_cookie, referer, data, text = tuple(fields) except Exception as error: self.add_drop_data_metrics("split fields") self.add_error_metrics("split fields") self.logger.error("fail to split fields from line, error: %s", error) raise RuntimeError("fail to parse fields: {}".format(fields)) if is_request: key = (src, srcport, dst, dstport) else: key = (dst, dstport, src, srcport) if is_request: self.add_data_metrics("input_request") if self.cache.get(key): # there are previous request not processed self.logger.warn("dropping previous incomplete request for key: {}".format(key)) del self.cache[key] self.add_dropped_msgs(1) self.add_error_metrics("stale request") http_record = dict() http_record["source_ip"] = src http_record["source_port"] = int(srcport) http_record["dest_ip"] = dst http_record["dest_port"] = int(dstport) http_record["method"] = method.upper() http_record["host"] = host http_record["uri"] = uri or "" http_record["user_agent"] = user_agent http_record["referer"] = referer or "" http_record["req_body_len"] = int(content_length or 0) http_record["req_headers"] = dict() http_record["req_content_type"] = content_type or "" if "POST" == http_record["method"] and http_record["req_body_len"] > 0: body = extract_body_from_data(data, text, content_encoding) http_record["log_body"] = True else: body = "" http_record["log_body"] = False http_record["req_body"] = body current = millis_now() http_record["ts"] = current cookie = threathunter_common.util.text(cookie or "") http_record["req_headers"] = {"COOKIE": cookie} self.cache[key] = http_record # add an entry for background check else: # process the response self.add_data_metrics("input_response") http_record = self.cache.get(key, {}) if not http_record: self.logger.warn("can't find matching request") self.add_drop_data_metrics("no matching request") self.add_error_metrics("no matching request") return http_record["status_code"] = int(code or 0) http_record["status_msg"] = phase http_record["resp_body_len"] = int(content_length or 0) http_record["resp_content_type"] = content_type or "" if http_record["log_body"] and http_record["method"] == "POST" and http_record["resp_body_len"] > 0: body = extract_body_from_data(data, text, content_encoding) else: body = "" http_record["resp_body"] = body if not http_record["resp_body_len"]: # in case the gzip response http_record["resp_body_len"] = len(body) http_record["resp_headers"] = dict() set_cookie = threathunter_common.util.text(set_cookie or "") http_record["resp_headers"] = {"SET-COOKIE": set_cookie} http_record["debug_processing"] = False http_record["debug_processing"] = True http_record["resp_content_type"] = content_type http_record["req_time"] = millis_now() - http_record["ts"] self.logger.debug("get http data from tshark: %s", http_record) try: new_msg = HttpMsg(**http_record) except BeFilteredException as bpf: self.add_drop_data_metrics(bpf.type) return except Exception as err: self.add_drop_data_metrics(str(err)) self.add_error_metrics("msg_parse") return self.logger.debug("get http msg from tshark: %s", new_msg) self.add_data_metrics("output") self.put_msg(new_msg) del self.cache[key] return new_msg except Exception as err: self.add_error_metrics("data_parse") self.add_drop_data_metrics("data_parse") self.logger.error("fail to process tshark, error: %s", err) traceback.print_exc() raise err # ## For metrics def add_data_metrics(self, data_type, subtype=""): tags = {"source_type": "tshark", "interface": self.interface, "type": data_type, "port": str(self.ports), "subtype": subtype} self.data_mr.record(1, tags) def add_input_data_metrics(self): return self.add_data_metrics("input") def add_output_data_metrics(self): return self.add_data_metrics("output") def add_drop_data_metrics(self, reason=""): return self.add_data_metrics("drop", reason) def add_error_metrics(self, data_type): tags = {"source_type": "tshark", "interface": self.interface, "type": data_type, "port": str(self.ports)} self.error_mr.record(1, tags)
class ServiceClient(object): send_metrics = MetricsRecorder("babel.client.sendcount", db="fx", type="count") costrange_metrics = MetricsRecorder("babel.client.costrange", db="fx", type="count") cost_max_metrics = MetricsRecorder("babel.client.cost.max", db="fx", type="max") cost_min_metrics = MetricsRecorder("babel.client.cost.min", db="fx", type="min") cost_avg_metrics = MetricsRecorder("babel.client.cost.avg", db="fx", type="avg") timeout_metrics = MetricsRecorder("babel.client.timeout", db="fx", type="count") error_metrics = MetricsRecorder("babel.client.error", db="fx", type="count") def __init__(self, service_meta, **kwargs): self.raise_if_connect_error = kwargs.get('raise_if_connect_error',True) #add by wxt 2015-12-16 如果初始化失败是否raise异常 self.running = False try: self.service_meta = service_meta # atexit if self.service_meta.callmode == "notify": atexit.register(self.batch_notify_flush) self.client_id = kwargs.get("client_id") if not self.client_id: self.client_id = gen_uuid() self.client_id = '_client.{}.{}'.format(self.service_meta.name,self.client_id) if service_meta.serverimpl == "rabbitmq": from . import babelrabbitmq self.impl = babelrabbitmq elif service_meta.serverimpl == "redis": from . import babelredis self.impl = babelredis else: raise RuntimeError("serverimpl {} not implemented yet".format(service_meta.serverimpl)) if "sdc" not in self.service_meta.options: raise RuntimeError("sdc not in service meta") self.sdc = self.service_meta.options.get("sdc", "") if "cdc" not in self.service_meta.options: raise RuntimeError("cdc not in service meta") self.cdc = self.service_meta.options.get("cdc", "") if "," in self.service_meta.options["sdc"] and self.service_meta.callmode != "notify": raise RuntimeError("only notify supports multiple data center") self._sender = self.impl.get_client_sender(service_meta, **kwargs) if service_meta.callmode != "notify": # need a queue to receive response self._receiver = self.impl.get_client_receiver(service_meta, self.client_id, **kwargs) else: self._receiver = None if service_meta.coder != "mail": raise RuntimeError("coder {} is not supported yet".format(service_meta.coder)) self.requestid_base = AtomicLong(0) self.request_cache = RequestCache() self.running = True self.response_task = None self.metrics_tags = { "service": service_meta.name, "delivery": service_meta.delivermode, "call": service_meta.callmode, "impl": service_meta.serverimpl, "clientid": self.client_id, "sdc": self.sdc, "cdc": self.cdc } # caching requests for batch_notify self.batch_cache = list() self.running = True except Exception,e: if self.raise_if_connect_error: print e raise RuntimeError('babel connect error')
def start(self): self.logger.info("start redis list driver") self.running = True self.data_mr = MetricsRecorder("sniffer.driver.data") self.error_mr = MetricsRecorder("sniffer.driver.error") task = gevent.spawn(self.redis_loop)
import utils import settings from model import Incident logger = logging.getLogger("nebula.dbwriter.incident") Test_Get_Event_Count = 0 Test_Get_All_Event_Time = None Test_Save_All_Time = None Not_Null_Cols = ("ip", "start_time", "strategies", "hit_tags", "risk_score", "uri_stems",\ "hosts", "dids", "associated_users") Incident_Metrics_Recorder = MetricsRecorder("store.incident", expire=86400* 60, #2month interval=300, #5min to merge type="sum", db="nebula.offline") def add_success_metrics(): add_incident_metrics("success") def add_receive_metrics(): add_incident_metrics("receive") def add_fail_metrics(): add_incident_metrics("fail") def add_incident_metrics(status): Incident_Metrics_Recorder.record(1, {"status":status})
class BroHttpDriver(Driver): EVENT_TOPIC = "/sniffer/events" CMD_TOPIC = "/sniffer/cmds" def __init__(self, interface, ports=None, embedded_bro=True, bro_home=None, idx=1, start_port=None, bpf_filter=""): Driver.__init__(self) if ports is None: ports = [80, 81, 1080, 3128, 8000, 8080, 8888, 9001] self.embedded_bro = embedded_bro self.bro_home = get_bro_home(bro_home) self.interface = interface self.bpf_filter = bpf_filter self.logger = settings.init_logging('bro.{}'.format(idx)) self.ports = configcontainer.get_config("sniffer").get_string( "filter.traffic.server_ports", "") or ports self.ports = expand_ports(self.ports) self.idx = idx self.bro_port = start_port + idx self.last_netstat_ts = millis_now() self.sub_task = None self.client_task = None self.last_update = 0 self.filtered_clients = [] self.encrypt_keys = [] self.encrypt_salt = "" #bro broker self.ep = None self.sub = None self.ss = None # self.data_mr = None self.error_mr = None self.running = False def start(self): if self.running: return self.running = True self.logger.info('bro starting:{}'.format(self.running)) # metrics should initialize in its own process self.data_mr = MetricsRecorder("sniffer.driver.data") self.error_mr = MetricsRecorder("sniffer.driver.error") # establish bro if self.embedded_bro: if not self.interface: self.add_error_metrics("invalid params") raise RuntimeError("null interface") if not self.ports: self.add_error_metrics("invalid params") raise RuntimeError("null ports") self.logger.info( "trying to start bro driver on interface %s for ports %s", self.interface, self.ports) tmp_bro_file_name = os.path.join( "tmp", "worker-{}-{}".format(self.interface, self.idx)) out = file(tmp_bro_file_name, "w") print >> out, "@load policy/frameworks/control/controllee" print >> out, "@load policy/misc/loaded-scripts.bro" print >> out, "redef Control::controllee_listen = F;" print >> out, "redef Broker::default_listen_address = \"127.0.0.1\";" print >> out, "redef Broker::default_port = %s/tcp;" % self.bro_port ports_str = "".join("{}/tcp,".format(_) for _ in self.ports) print >> out, "const ports = {" print >> out, ports_str print >> out, "};" print >> out, "redef likely_server_ports += { ports };" out.close() executable = os.path.join(self.bro_home, "bin/bro") #script = os.path.join(self.bro_home, "share/bro/base/protocols/http/main.bro") script = os.path.join(settings.Conf_Sniffer_Path, "http.bro") environments = dict() environments["PCAP_PF_RING_CLUSTER_ID"] = "13" environments["PCAP_PF_RING_APPNAME"] = "bro-" + self.interface environments["PCAP_PF_RING_USE_CLUSTER_PER_FLOW_4_TUPLE"] = "1" self.logger.info('init bro, bro home is {}'.format(self.bro_home)) self.logger.info( 'init bro, bro executable is {}'.format(executable)) self.logger.info('init bro, bro interface is {}'.format( self.interface)) self.logger.info('init bro, bro bpf filter is {}'.format( self.bpf_filter)) self.logger.info( 'init bro, bro temp file is {}'.format(tmp_bro_file_name)) self.logger.info('init bro, bro script is {}'.format(script)) if self.bpf_filter: # use bpf filter self.sub_task = subprocess.Popen([ executable, "-C", "-b", "-i", self.interface, "-f", self.bpf_filter, tmp_bro_file_name, script ], shell=False, preexec_fn=os.setsid, stderr=sys.stderr, stdout=sys.stdout, env=environments) else: self.sub_task = subprocess.Popen([ executable, "-C", "-b", "-i", self.interface, tmp_bro_file_name, script ], shell=False, preexec_fn=os.setsid, stderr=sys.stderr, stdout=sys.stdout, env=environments) atexit.register(self.stop) # establish client gevent.sleep(5) self.connect_bro() self.config_bro() self.client_task = gevent.spawn(self.process_input) self.logger.info("driver start") self.client_task.start() return def connect_bro(self): self.logger.info("connect to bro on port %s", self.bro_port) self.ep = broker.Endpoint() self.sub = self.ep.make_subscriber(BroHttpDriver.EVENT_TOPIC) self.ss = self.ep.make_status_subscriber(True) self.ep.peer("127.0.0.1", self.bro_port) self.logger.info("connect to bro on port 1 %s", self.bro_port) st = self.ss.get() self.logger.info("connect to bro on port 2 %s", self.bro_port) if not (type(st) == broker.Status and st.code() == broker.SC.PeerAdded): self.logger.info("connect to bro failed") raise RuntimeError("connect to bro failed") self.logger.info("connect to bro successed") def config_bro(self): self.logger.debug("sending config to bro") self.ep.publish(BroHttpDriver.CMD_TOPIC, broker.bro.Event("Control::net_stats_request")) self.ep.publish( BroHttpDriver.CMD_TOPIC, broker.bro.Event("update_staticresourcesuffix", str(suffix_config.get()))) self.ep.publish( BroHttpDriver.CMD_TOPIC, broker.bro.Event("update_filteredhosts", str(filtered_hosts_config.get()))) self.ep.publish( BroHttpDriver.CMD_TOPIC, broker.bro.Event("update_filteredurls", str(filtered_urls_config.get()))) self.ep.publish( BroHttpDriver.CMD_TOPIC, broker.bro.Event("update_filteredservers", str(filtered_servers_config.get()))) def process_httpevent(self, data): try: self.add_input_data_metrics() if not data.method or not data.host: self.add_error_metrics("null host") self.add_drop_data_metrics("null host") self.logger.error("error data method:{} or host{}".format( data.method, data.host)) return # raise RuntimeError("null field") self.logger.debug("start process_httpevent") args = dict() args["method"] = data.method args["host"] = data.host args["uri"] = data.uri or "" args["uriraw"] = data.uri args["referer"] = data.referrer args["user_agent"] = data.user_agent args["status_code"] = data.status_code args["status_msg"] = data.status_msg args["source_ip"] = data.orig_ip args["source_port"] = data.orig_port.split("/")[0] args["dest_ip"] = data.resp_ip args["dest_port"] = data.resp_port.split("/")[0] args["req_headers"] = {} for header in data.req_headers.split("$$$"): if not header: continue parts = header.split("@@@") args["req_headers"][parts[0]] = parts[1] args["resp_headers"] = {} for header in data.resp_headers.split("$$$"): if not header: continue parts = header.split("@@@") args["resp_headers"][parts[0]] = parts[1] args["req_body"] = data.req_body args["resp_body"] = data.resp_body args["log_body"] = data.log_body args["req_body_len"] = data.req_body_len args["resp_body_len"] = data.resp_body_len args['req_content_type'] = data.req_content_type args['resp_content_type'] = data.res_content_type ts = float(data.ts) secs = int(ts) msecs = int(1000 * (ts - secs)) args["ts_secs"] = secs args["ts_msecs"] = msecs args["debug_processing"] = False try: msg = HttpMsg(**args) if ":" in msg.source_ip or not msg.source_ip: print data self.logger.debug("get msg %s", msg) self.put_msg(msg) self.add_output_data_metrics() except BeFilteredException as bfe: self.add_drop_data_metrics(bfe.type) return except Exception as err: self.add_drop_data_metrics(str(err)) self.add_error_metrics("msg_parse") return except Exception as ex: import traceback self.logger.error("process_httpevent error:{}".format( traceback.print_exc())) self.add_drop_data_metrics("data_parse") self.add_error_metrics("data_parse") self.logger.error("error while receiving data %s", str(ex)) self.add_dropped_msgs(1) def stop(self): self.logger.warn("bro stop...") if not self.running: self.logger.warn("bro running:{}".format(self.running)) return self.running = False gevent.sleep(2) self.client_task = None if self.sub_task is not None: try: self.logger.warn("bro killpg({})...".format(self.sub_task.pid)) os.killpg(self.sub_task.pid, signal.SIGTERM) self.logger.warn("bro killpg({}) down.".format( self.sub_task.pid)) except Exception as ex: self.logger.error("fail to kill the process, %s", ex) traceback.print_exc() self.sub_task.wait() self.sub_task = None self.logger.warn("bro stop down.") def is_alive(self): current = millis_now() if not self.embedded_bro: return True if self.ss and self.ss.available(): st = self.ss.get() if not (type(st) == broker.Status and st.code() == broker.SC.PeerAdded): self.logger.info("connect to bro failed") return False if current - self.last_netstat_ts > 30 * 1000: self.config_bro() self.last_netstat_ts = current return self.sub_task is not None and self.sub_task.poll() is None def process_input(self): while self.running: try: self.logger.debug("bro process_input:{}".format(self.running)) if self.sub.available(): (t, d) = self.sub.get() events = broker.bro.Event(d) self.logger.debug("received {}{}".format( events.name(), events.args())) for e in events.args(): try: data = HttpData(e) except: self.logger.error( "bro.Event to HttpData except: {}".format( traceback.print_exc())) self.process_httpevent(data) if not self.running: self.logger.error("bro process_input stopped.") break else: self.logger.debug("bro sub not available.") gevent.sleep(1) except: self.logger.error("bro process_input except: {}".format( traceback.print_exc())) self.logger.error("bro process_input stopped.") # ## For metrics def add_data_metrics(self, data_type, subtype=""): tags = { "source_type": "bro", "interface": self.interface, "type": data_type, "port": str(self.ports), "subtype": subtype } self.data_mr.record(1, tags) def add_input_data_metrics(self): return self.add_data_metrics("input") def add_output_data_metrics(self): return self.add_data_metrics("output") def add_drop_data_metrics(self, reason=""): return self.add_data_metrics("drop", reason) def add_error_metrics(self, data_type): tags = { "source_type": "bro", "interface": self.interface, "type": data_type, "port": str(self.ports) } self.error_mr.record(1, tags)
def start(self): if self.running: return self.running = True self.logger.info('bro starting:{}'.format(self.running)) # metrics should initialize in its own process self.data_mr = MetricsRecorder("sniffer.driver.data") self.error_mr = MetricsRecorder("sniffer.driver.error") # establish bro if self.embedded_bro: if not self.interface: self.add_error_metrics("invalid params") raise RuntimeError("null interface") if not self.ports: self.add_error_metrics("invalid params") raise RuntimeError("null ports") self.logger.info( "trying to start bro driver on interface %s for ports %s", self.interface, self.ports) tmp_bro_file_name = os.path.join( "tmp", "worker-{}-{}".format(self.interface, self.idx)) out = file(tmp_bro_file_name, "w") print >> out, "@load policy/frameworks/control/controllee" print >> out, "@load policy/misc/loaded-scripts.bro" print >> out, "redef Control::controllee_listen = F;" print >> out, "redef Broker::default_listen_address = \"127.0.0.1\";" print >> out, "redef Broker::default_port = %s/tcp;" % self.bro_port ports_str = "".join("{}/tcp,".format(_) for _ in self.ports) print >> out, "const ports = {" print >> out, ports_str print >> out, "};" print >> out, "redef likely_server_ports += { ports };" out.close() executable = os.path.join(self.bro_home, "bin/bro") #script = os.path.join(self.bro_home, "share/bro/base/protocols/http/main.bro") script = os.path.join(settings.Conf_Sniffer_Path, "http.bro") environments = dict() environments["PCAP_PF_RING_CLUSTER_ID"] = "13" environments["PCAP_PF_RING_APPNAME"] = "bro-" + self.interface environments["PCAP_PF_RING_USE_CLUSTER_PER_FLOW_4_TUPLE"] = "1" self.logger.info('init bro, bro home is {}'.format(self.bro_home)) self.logger.info( 'init bro, bro executable is {}'.format(executable)) self.logger.info('init bro, bro interface is {}'.format( self.interface)) self.logger.info('init bro, bro bpf filter is {}'.format( self.bpf_filter)) self.logger.info( 'init bro, bro temp file is {}'.format(tmp_bro_file_name)) self.logger.info('init bro, bro script is {}'.format(script)) if self.bpf_filter: # use bpf filter self.sub_task = subprocess.Popen([ executable, "-C", "-b", "-i", self.interface, "-f", self.bpf_filter, tmp_bro_file_name, script ], shell=False, preexec_fn=os.setsid, stderr=sys.stderr, stdout=sys.stdout, env=environments) else: self.sub_task = subprocess.Popen([ executable, "-C", "-b", "-i", self.interface, tmp_bro_file_name, script ], shell=False, preexec_fn=os.setsid, stderr=sys.stderr, stdout=sys.stdout, env=environments) atexit.register(self.stop) # establish client gevent.sleep(5) self.connect_bro() self.config_bro() self.client_task = gevent.spawn(self.process_input) self.logger.info("driver start") self.client_task.start() return
#!/usr/bin/env python # -*- coding:utf-8 -*- import json from tornado.web import RequestHandler from threathunter_common.metrics.metricsrecorder import MetricsRecorder from threathunter_common.metrics.metricsagent import get_latency_str_for_millisecond import settings cost_range_metrics = MetricsRecorder( "web.api.cost.range", db="default", type="count", expire=86400 * 7, interval=60) cost_max_metrics = MetricsRecorder( "web.api.cost.max", db="default", type="max", expire=86400 * 7, interval=60) cost_min_metrics = MetricsRecorder( "web.api.cost.min", db="default", type="min", expire=86400 * 7, interval=60) cost_avg_metrics = MetricsRecorder( "web.api.cost.avg", db="default", type="avg", expire=86400 * 7, interval=60) user_access_metrics = MetricsRecorder( "web.api.user.count", db="default", type="count", expire=86400 * 7, interval=60) class BaseHandler(RequestHandler): def data_received(self, chunk): pass def get_current_user(self): return self.get_secure_cookie("user")
def start(self): self.logger.info("start logstash driver") self.data_mr = MetricsRecorder("sniffer.driver.data") self.error_mr = MetricsRecorder("sniffer.driver.error") self.server.start_running()
class ServiceServer(object): processed_metrics = MetricsRecorder("babel.server.processcount", type="count", db="fx", interval=5) cost_range_metrics = MetricsRecorder("babel.server.costrange", type="count", db="fx", interval=5) cost_avg_metrics = MetricsRecorder("babel.server.cost.avg", type="avg", db="fx", interval=5) cost_max_metrics = MetricsRecorder("babel.server.cost.max", type="max", db="fx", interval=5) cost_min_metrics = MetricsRecorder("babel.server.cost.min", type="min", db="fx", interval=5) error_metrics = MetricsRecorder("babel.server.error", type="count", db="fx", interval=5) def __init__(self, service_meta, func=None, workers=5, **kwargs): self.service_meta = service_meta self.server_id = kwargs.get("server_id") self.func = func self.sender_cache = dict() self.kwargs = kwargs self.workers = workers if not self.server_id: self.server_id = gen_uuid() if service_meta.serverimpl == "rabbitmq": from . import babelrabbitmq self.impl = babelrabbitmq elif service_meta.serverimpl == "redis": from . import babelredis self.impl = babelredis else: raise RuntimeError("serverimpl {} not implemented yet".format( service_meta.serverimpl)) if "sdc" not in self.service_meta.options: raise RuntimeError("sdc not in service meta") self.sdc = self.service_meta.options.get("sdc", "") if "," in self.service_meta.options["sdc"]: raise RuntimeError("server should only have one dc") if service_meta.coder != "mail": raise RuntimeError("coder {} is not supported yet".format( service_meta.coder)) self._receiver = self.impl.get_server_receiver(service_meta, **kwargs) #if not func: #raise RuntimeError("the service implementation should not by empty") if self.workers > 1: self.worker_pool = ThreadPool(processes=self.workers) self.running = True self.metrics_tags = { "service": service_meta.name, "delivery": service_meta.delivermode, "call": service_meta.callmode, "impl": service_meta.serverimpl, "serverid": self.server_id, "sdc": self.sdc } def _get_sender(self, cdc, client_id): key = "{}.{}".format(cdc, client_id) result = self.sender_cache.get(key) if result: return result result = self.impl.get_server_sender(cdc, client_id, **self.kwargs) self.sender_cache[key] = result return result def start(self, func=None, sync=False): if not self.func: self.func = func self.running = True self._receiver.start_consuming() if sync: self.accept() else: self.accept_task = run_in_thread(self.accept) def start_sync(self, func=None): self.start(func=func, sync=True) def close(self): if not self.running: return self.running = False self._receiver.stop_consuming() if self.accept_task: self.accept_task.join() self.accept_task = None if self.worker_pool: self.worker_pool.close() self.worker_pool.join() self.worker_pool = None if self._receiver: self._receiver.close() self._receiver = None def work(self, *args, **kwargs): if self.workers == 1: # run in this thread self.process_mail(*args, **kwargs) else: self.worker_pool.apply(self.process_mail, args, kwargs) def accept(self): while self.running: try: request_mail = self._receiver.get(True, 0.5) request_mail = Mail.from_json(request_mail) accept_ts = millis_now() self.worker_pool.apply(self.process_mail, [(request_mail, accept_ts)]) except Queue.Empty: pass except Exception as error: tags = {"type": "accept"} tags.update(self.metrics_tags) ServiceServer.error_metrics.record(1, tags) print error self._receiver.close() for request_mail in self._receiver.dump_cache(): self.worker_pool.apply(self.process_mail, request_mail) def process_mail(self, args): try: cdc = "" client_id = "" try: request_mail, accept_ts = args cdc = request_mail.get_header("cdc", "") client_id = request_mail.f process_start_ts = millis_now() tags = { "type": "accept2process", "cdc": cdc, "clientid": client_id } tags.update(self.metrics_tags) cost = process_start_ts - accept_ts ServiceServer.cost_avg_metrics.record(cost, tags) ServiceServer.cost_max_metrics.record(cost, tags) ServiceServer.cost_min_metrics.record(cost, tags) events = extract_data_from_mail(request_mail) if isinstance(events, list): for e in events: result = self.func(e) else: result = self.func(events) finish_func_ts = millis_now() tags = { "type": "invokefunction", "cdc": cdc, "clientid": client_id } tags.update(self.metrics_tags) cost = finish_func_ts - process_start_ts ServiceServer.cost_avg_metrics.record(cost, tags) ServiceServer.cost_max_metrics.record(cost, tags) ServiceServer.cost_min_metrics.record(cost, tags) success = True except Exception, err: tags = { "type": "invokefunction", "cdc": cdc, "clientid": client_id } tags.update(self.metrics_tags) ServiceServer.error_metrics.record(1, tags) success = False result = str(err) if self.service_meta.callmode != "notify": sender = self._get_sender(cdc, client_id) response_mail = Mail.new_mail(self.server_id, client_id, request_mail.requestid) populate_data_into_mail(response_mail, result) sender.send(response_mail.get_json(), False, 1) after_response_ts = millis_now() tags = { "type": "sendresponse", "cdc": cdc, "clientid": client_id } tags.update(self.metrics_tags) cost = after_response_ts - finish_func_ts ServiceServer.cost_avg_metrics.record(cost, tags) ServiceServer.cost_max_metrics.record(cost, tags) ServiceServer.cost_min_metrics.record(cost, tags) except Exception as error: tags = {"type": "sendresponse", "cdc": cdc, "clientid": client_id} tags.update(self.metrics_tags) ServiceServer.error_metrics.record(1, tags) success = False