def log_event(event_tuple, packet=None, skip_write=False, skip_condensing=False): try: sec, usec, src_ip, src_port, dst_ip, dst_port, proto, trail_type, trail, info, reference = event_tuple if ignore_event(event_tuple): return if not (any(check_whitelisted(_) for _ in (src_ip, dst_ip)) and trail_type != TRAIL.DNS): # DNS requests/responses can't be whitelisted based on src_ip/dst_ip if not skip_write: localtime = "%s.%06d" % (time.strftime(TIME_FORMAT, time.localtime(int(sec))), usec) if not skip_condensing: if any(_ in info for _ in CONDENSE_ON_INFO_KEYWORDS): with _condensing_lock: key = (src_ip, trail) if key not in _condensed_events: _condensed_events[key] = [] _condensed_events[key].append(event_tuple) return current_bucket = sec / config.PROCESS_COUNT if getattr(_thread_data, "log_bucket", None) != current_bucket: # log throttling _thread_data.log_bucket = current_bucket _thread_data.log_trails = set() else: if any(_ in _thread_data.log_trails for _ in ((src_ip, trail), (dst_ip, trail))): return else: _thread_data.log_trails.add((src_ip, trail)) _thread_data.log_trails.add((dst_ip, trail)) event = "%s %s %s\n" % (safe_value(localtime), safe_value(config.SENSOR_NAME), " ".join(safe_value(_) for _ in event_tuple[2:])) if not config.DISABLE_LOCAL_LOG_STORAGE: handle = get_event_log_handle(sec) os.write(handle, event) if config.LOG_SERVER: remote_host, remote_port = config.LOG_SERVER.split(':') s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) s.sendto("%s %s" % (sec, event), (remote_host, int(remote_port))) if config.SYSLOG_SERVER: extension = "src=%s spt=%s dst=%s dpt=%s trail=%s ref=%s" % (src_ip, src_port, dst_ip, dst_port, trail, reference) _ = CEF_FORMAT.format(syslog_time=time.strftime("%b %d %H:%M:%S", time.localtime(int(sec))), host=HOSTNAME, device_vendor=NAME, device_product="sensor", device_version=VERSION, signature_id=time.strftime("%Y-%m-%d", time.localtime(os.path.getctime(TRAILS_FILE))), name=info, severity=0, extension=extension) remote_host, remote_port = config.SYSLOG_SERVER.split(':') s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) s.sendto(_, (remote_host, int(remote_port))) if config.DISABLE_LOCAL_LOG_STORAGE and not any(config.LOG_SERVER, config.SYSLOG_SERVER) or config.console: sys.stderr.write(event) sys.stderr.flush() if config.plugin_functions: for _ in config.plugin_functions: _(event_tuple, packet) except (OSError, IOError): if config.SHOW_DEBUG: traceback.print_exc()
def update_trails(force=False, offline=False): """ Update trails from feeds """ success = False trails = TrailsDict() duplicates = {} try: if not os.path.isdir(USERS_DIR): os.makedirs(USERS_DIR, 0o755) except Exception as ex: exit( "[!] something went wrong during creation of directory '%s' ('%s')" % (USERS_DIR, ex)) _chown(USERS_DIR) if config.UPDATE_SERVER: print("[i] retrieving trails from provided 'UPDATE_SERVER' server...") content = retrieve_content(config.UPDATE_SERVER) if not content or content.count(',') < 2: print("[x] unable to retrieve data from '%s'" % config.UPDATE_SERVER) else: with _fopen(config.TRAILS_FILE, "w+b" if six.PY2 else "w+", open if six.PY2 else codecs.open) as f: f.write(content) trails = load_trails() else: trail_files = set() for dirpath, dirnames, filenames in os.walk( os.path.abspath(os.path.join(ROOT_DIR, "trails"))): for filename in filenames: trail_files.add( os.path.abspath(os.path.join(dirpath, filename))) if config.CUSTOM_TRAILS_DIR: for dirpath, dirnames, filenames in os.walk( os.path.abspath( os.path.join( ROOT_DIR, os.path.expanduser(config.CUSTOM_TRAILS_DIR)))): for filename in filenames: trail_files.add( os.path.abspath(os.path.join(dirpath, filename))) if not trails and ( force or not os.path.isfile(config.TRAILS_FILE) or (time.time() - os.stat(config.TRAILS_FILE).st_mtime) >= config.UPDATE_PERIOD or os.stat(config.TRAILS_FILE).st_size == 0 or any( os.stat(_).st_mtime > os.stat(config.TRAILS_FILE).st_mtime for _ in trail_files)): if not config.no_updates: print("[i] updating trails (this might take a while)...") else: print("[i] checking trails...") if not offline and (force or config.USE_FEED_UPDATES): _ = os.path.abspath(os.path.join(ROOT_DIR, "trails", "feeds")) if _ not in sys.path: sys.path.append(_) filenames = sorted(glob.glob(os.path.join(_, "*.py"))) else: filenames = [] _ = os.path.abspath(os.path.join(ROOT_DIR, "trails")) if _ not in sys.path: sys.path.append(_) filenames += [os.path.join(_, "custom")] filenames += [ os.path.join(_, "static") ] # Note: higher priority than previous one because of dummy user trails (FE) filenames = [_ for _ in filenames if "__init__.py" not in _] if config.DISABLED_FEEDS: filenames = [ filename for filename in filenames if os.path.splitext(os.path.split(filename)[-1])[0] not in re.split(r"[^\w]+", config.DISABLED_FEEDS) ] for i in xrange(len(filenames)): filename = filenames[i] try: module = __import__( os.path.basename(filename).split(".py")[0]) except (ImportError, SyntaxError) as ex: print( "[x] something went wrong during import of feed file '%s' ('%s')" % (filename, ex)) continue for name, function in inspect.getmembers( module, inspect.isfunction): if name == "fetch": url = module.__url__ # Note: to prevent "SyntaxError: can not delete variable 'module' referenced in nested scope" print(" [o] '%s'%s" % (url, " " * 20 if len(url) < 20 else "")) sys.stdout.write( "[?] progress: %d/%d (%d%%)\r" % (i, len(filenames), i * 100 // len(filenames))) sys.stdout.flush() if config.DISABLED_TRAILS_INFO_REGEX and re.search( config.DISABLED_TRAILS_INFO_REGEX, getattr(module, "__info__", "")): continue try: results = function() for item in results.items(): if item[0].startswith( "www.") and '/' not in item[0]: item = [item[0][len("www."):], item[1]] if item[0] in trails: if item[0] not in duplicates: duplicates[item[0]] = set( (trails[item[0]][1], )) duplicates[item[0]].add(item[1][1]) if not ( item[0] in trails and (any(_ in item[1][0] for _ in LOW_PRIORITY_INFO_KEYWORDS) or trails[item[0]][1] in HIGH_PRIORITY_REFERENCES)) or ( item[1][1] in HIGH_PRIORITY_REFERENCES and "history" not in item[1][0] ) or any( _ in item[1][0] for _ in HIGH_PRIORITY_INFO_KEYWORDS): trails[item[0]] = item[1] if not results and not any( _ in url for _ in ("abuse.ch", "cobaltstrike")): print( "[x] something went wrong during remote data retrieval ('%s')" % url) except Exception as ex: print( "[x] something went wrong during processing of feed file '%s' ('%s')" % (filename, ex)) try: sys.modules.pop(module.__name__) del module except Exception: pass # custom trails from remote location if config.CUSTOM_TRAILS_URL: print(" [o] '(remote custom)'%s" % (" " * 20)) for url in re.split(r"[;,]", config.CUSTOM_TRAILS_URL): url = url.strip() if not url: continue url = ("http://%s" % url) if "//" not in url else url content = retrieve_content(url) if not content: print( "[x] unable to retrieve data (or empty response) from '%s'" % url) else: __info__ = "blacklisted" __reference__ = "(remote custom)" # urlparse.urlsplit(url).netloc for line in content.split('\n'): line = line.strip() if not line or line.startswith('#'): continue line = re.sub(r"\s*#.*", "", line) if '://' in line: line = re.search(r"://(.*)", line).group(1) line = line.rstrip('/') if line in trails and any( _ in trails[line][1] for _ in ("custom", "static")): continue if '/' in line: trails[line] = (__info__, __reference__) line = line.split('/')[0] elif re.search(r"\A\d+\.\d+\.\d+\.\d+\Z", line): trails[line] = (__info__, __reference__) else: trails[line.strip('.')] = (__info__, __reference__) for match in re.finditer(r"(\d+\.\d+\.\d+\.\d+)/(\d+)", content): prefix, mask = match.groups() mask = int(mask) if mask > 32: continue start_int = addr_to_int(prefix) & make_mask(mask) end_int = start_int | ((1 << 32 - mask) - 1) if 0 <= end_int - start_int <= 1024: address = start_int while start_int <= address <= end_int: trails[int_to_addr(address)] = ( __info__, __reference__) address += 1 print("[i] post-processing trails (this might take a while)...") # basic cleanup for key in list(trails.keys()): if key not in trails: continue if config.DISABLED_TRAILS_INFO_REGEX: if re.search(config.DISABLED_TRAILS_INFO_REGEX, trails[key][0]): del trails[key] continue try: _key = key.decode(UNICODE_ENCODING) if isinstance( key, bytes) else key _key = _key.encode("idna") if six.PY3: _key = _key.decode(UNICODE_ENCODING) if _key != key: # for domains with non-ASCII letters (e.g. phishing) trails[_key] = trails[key] del trails[key] key = _key except: pass if not key or re.search(r"(?i)\A\.?[a-z]+\Z", key) and not any( _ in trails[key][1] for _ in ("custom", "static")): del trails[key] continue if re.search(r"\A\d+\.\d+\.\d+\.\d+\Z", key): if any( _ in trails[key][0] for _ in ("parking site", "sinkhole") ) and key in duplicates: # Note: delete (e.g.) junk custom trails if static trail is a sinkhole del duplicates[key] if trails[key][0] == "malware": trails[key] = ("potential malware site", trails[key][1]) if config.get("IP_MINIMUM_FEEDS", 3) > 1: if (key not in duplicates or len(duplicates[key]) < config.get("IP_MINIMUM_FEEDS", 3) ) and re.search(r"\b(custom|static)\b", trails[key][1]) is None: del trails[key] continue if any(int(_) > 255 for _ in key.split('.')): del trails[key] continue if trails[key][0] == "ransomware": trails[key] = ("ransomware (malware)", trails[key][1]) if key.startswith("www.") and '/' not in key: _ = trails[key] del trails[key] key = key[len("www."):] if key: trails[key] = _ if '?' in key and not key.startswith('/'): _ = trails[key] del trails[key] key = key.split('?')[0] if key: trails[key] = _ if '//' in key: _ = trails[key] del trails[key] key = key.replace('//', '/') trails[key] = _ if key != key.lower(): _ = trails[key] del trails[key] key = key.lower() trails[key] = _ if key in duplicates: _ = trails[key] others = sorted(duplicates[key] - set((_[1], ))) if others and " (+" not in _[1]: trails[key] = (_[0], "%s (+%s)" % (_[1], ','.join(others))) read_whitelist() for key in list(trails.keys()): match = re.search(r"\A(\d+\.\d+\.\d+\.\d+)\b", key) if check_whitelisted(key) or any( key.startswith(_) for _ in BAD_TRAIL_PREFIXES): del trails[key] elif match and (bogon_ip(match.group(1)) or cdn_ip(match.group(1))) and not any( _ in trails[key][0] for _ in ("parking", "sinkhole")): del trails[key] else: try: key.decode("utf8") if hasattr( key, "decode") else key.encode("utf8") trails[key][0].decode("utf8") if hasattr( trails[key][0], "decode") else trails[key][0].encode("utf8") trails[key][1].decode("utf8") if hasattr( trails[key][1], "decode") else trails[key][1].encode("utf8") except UnicodeError: del trails[key] try: if trails: with _fopen(config.TRAILS_FILE, "w+b" if six.PY2 else "w+", open if six.PY2 else codecs.open) as f: writer = csv.writer(f, delimiter=',', quotechar='\"', quoting=csv.QUOTE_MINIMAL) for trail in trails: row = (trail, trails[trail][0], trails[trail][1]) writer.writerow(row) success = True except Exception as ex: print( "[x] something went wrong during trails file write '%s' ('%s')" % (config.TRAILS_FILE, ex)) print("[i] update finished%s" % (40 * " ")) if success: print("[i] trails stored to '%s'" % config.TRAILS_FILE) return trails
def log_event(event_tuple, packet=None, skip_write=False, skip_condensing=False): global _condensing_thread if _condensing_thread is None: _condensing_thread = threading.Thread(target=flush_condensed_events) _condensing_thread.daemon = True _condensing_thread.start() try: sec, usec, src_ip, src_port, dst_ip, dst_port, proto, trail_type, trail, info, reference = event_tuple if ignore_event(event_tuple): return if not ( any(check_whitelisted(_) for _ in (src_ip, dst_ip)) and trail_type != TRAIL.DNS ): # DNS requests/responses can't be whitelisted based on src_ip/dst_ip if not skip_write: localtime = "%s.%06d" % (time.strftime( TIME_FORMAT, time.localtime(int(sec))), usec) if not skip_condensing: if any(_ in info for _ in CONDENSE_ON_INFO_KEYWORDS): with _condensing_lock: key = (src_ip, trail) if key not in _condensed_events: _condensed_events[key] = [] _condensed_events[key].append(event_tuple) return current_bucket = sec // config.PROCESS_COUNT if getattr(_thread_data, "log_bucket", None) != current_bucket: # log throttling _thread_data.log_bucket = current_bucket _thread_data.log_trails = set() else: if any(_ in _thread_data.log_trails for _ in ((src_ip, trail), (dst_ip, trail))): return else: _thread_data.log_trails.add((src_ip, trail)) _thread_data.log_trails.add((dst_ip, trail)) event = "%s %s %s\n" % ( safe_value(localtime), safe_value(config.SENSOR_NAME), " ".join(safe_value(_) for _ in event_tuple[2:])) if not config.DISABLE_LOCAL_LOG_STORAGE: handle = get_event_log_handle(sec) os.write(handle, event.encode(UNICODE_ENCODING)) if config.LOG_SERVER: if config.LOG_SERVER.count(':') > 1: remote_host, remote_port = config.LOG_SERVER.replace( '[', '').replace(']', '').rsplit(':', 1) # Reference: https://github.com/squeaky-pl/zenchmarks/blob/master/vendor/twisted/internet/tcp.py _AI_NUMERICSERV = getattr(socket, "AI_NUMERICSERV", 0) _NUMERIC_ONLY = socket.AI_NUMERICHOST | _AI_NUMERICSERV _address = socket.getaddrinfo( remote_host, int(remote_port) if str(remote_port or "").isdigit() else 0, 0, 0, 0, _NUMERIC_ONLY)[0][4] else: remote_host, remote_port = config.LOG_SERVER.split(':') _address = (remote_host, int(remote_port)) s = socket.socket( socket.AF_INET if len(_address) == 2 else socket.AF_INET6, socket.SOCK_DGRAM) s.sendto(("%s %s" % (sec, event)).encode(UNICODE_ENCODING), _address) if config.SYSLOG_SERVER: extension = "src=%s spt=%s dst=%s dpt=%s trail=%s ref=%s" % ( src_ip, src_port, dst_ip, dst_port, trail, reference) _ = CEF_FORMAT.format( syslog_time=time.strftime("%b %d %H:%M:%S", time.localtime(int(sec))), host=HOSTNAME, device_vendor=NAME, device_product="sensor", device_version=VERSION, signature_id=time.strftime( "%Y-%m-%d", time.localtime(os.path.getctime( config.TRAILS_FILE))), name=info, severity=0, extension=extension) remote_host, remote_port = config.SYSLOG_SERVER.split(':') s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) s.sendto(_.encode(UNICODE_ENCODING), (remote_host, int(remote_port))) if (config.DISABLE_LOCAL_LOG_STORAGE and not any( (config.LOG_SERVER, config.SYSLOG_SERVER)) ) or config.console: sys.stderr.write(event) sys.stderr.flush() if config.plugin_functions: for _ in config.plugin_functions: _(event_tuple, packet) except (OSError, IOError): if config.SHOW_DEBUG: traceback.print_exc()
def log_event(event_tuple, packet=None, skip_write=False, skip_condensing=False): try: sec, usec, src_ip, src_port, dst_ip, dst_port, proto, trail_type, trail, info, reference = event_tuple if not any(check_whitelisted(_) for _ in (src_ip, dst_ip)): if not skip_write: localtime = "%s.%06d" % (time.strftime( TIME_FORMAT, time.localtime(int(sec))), usec) if not skip_condensing: if (sec - getattr(_thread_data, "condensed_events_flush_sec", 0)) > CONDENSED_EVENTS_FLUSH_PERIOD: _thread_data.condensed_events_flush_sec = sec for key in getattr(_thread_data, "condensed_events", []): condensed = False events = _thread_data.condensed_events[key] first_event = events[0] condensed_event = [_ for _ in first_event] for i in xrange(1, len(events)): current_event = events[i] for j in xrange( 3, 7 ): # src_port, dst_ip, dst_port, proto if current_event[j] != condensed_event[j]: condensed = True if not isinstance( condensed_event[j], set): condensed_event[j] = set( (condensed_event[j], )) condensed_event[j].add( current_event[j]) if condensed: for i in xrange(len(condensed_event)): if isinstance(condensed_event[i], set): condensed_event[i] = ','.join( str(_) for _ in sorted( condensed_event[i])) log_event(condensed_event, skip_condensing=True) _thread_data.condensed_events = {} if any(_ in info for _ in CONDENSE_ON_INFO_KEYWORDS): if not hasattr(_thread_data, "condensed_events"): _thread_data.condensed_events = {} key = (src_ip, trail) if key not in _thread_data.condensed_events: _thread_data.condensed_events[key] = [] _thread_data.condensed_events[key].append(event_tuple) return event = "%s %s %s\n" % ( safe_value(localtime), safe_value(config.SENSOR_NAME), " ".join(safe_value(_) for _ in event_tuple[2:])) if not config.DISABLE_LOCAL_LOG_STORAGE: handle = get_event_log_handle(sec) os.write(handle, event) if config.LOG_SERVER: remote_host, remote_port = config.LOG_SERVER.split(':') s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) s.sendto("%s %s" % (sec, event), (remote_host, int(remote_port))) if config.SYSLOG_SERVER: extension = "src=%s spt=%s dst=%s dpt=%s trail=%s ref=%s" % ( src_ip, src_port, dst_ip, dst_port, trail, reference) _ = CEF_FORMAT.format( syslog_time=time.strftime("%b %d %H:%M:%S", time.gmtime(int(sec))), host=HOSTNAME, device_vendor=NAME, device_product="sensor", device_version=VERSION, signature_id=time.strftime( "%Y-%m-%d", time.gmtime(os.path.getctime(TRAILS_FILE))), name=info, severity=0, extension=extension) remote_host, remote_port = config.SYSLOG_SERVER.split(':') s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) s.sendto(_, (remote_host, int(remote_port))) if config.DISABLE_LOCAL_LOG_STORAGE and not any( config.LOG_SERVER, config.SYSLOG_SERVER) or config.console: sys.stderr.write(event) sys.stderr.flush() if config.plugin_functions: for _ in config.plugin_functions: _(event_tuple, packet) except (OSError, IOError): if config.SHOW_DEBUG: traceback.print_exc()
trails[key] = _ if key != key.lower(): _ = trails[key] del trails[key] key = key.lower() trails[key] = _ if key in duplicates: _ = trails[key] others = sorted(duplicates[key] - set((_[1], ))) if others and " (+" not in _[1]: trails[key] = (_[0], "%s (+%s)" % (_[1], ','.join(others))) read_whitelist() for key in trails.keys(): if check_whitelisted(key) or any( key.startswith(_) for _ in BAD_TRAIL_PREFIXES): del trails[key] elif re.search(r"\A\d+\.\d+\.\d+\.\d+\Z", key) and (bogon_ip(key) or cdn_ip(key)): del trails[key] else: try: key.decode("utf8") trails[key][0].decode("utf8") trails[key][1].decode("utf8") except UnicodeDecodeError: del trails[key] try: if trails:
def _process_packet(packet, sec, usec, ip_offset): """ Processes single (raw) IP layer data """ global _connect_sec global _last_syn global _last_logged_syn global _last_udp global _last_logged_udp global _last_dns_exhaustion global _subdomains_sec try: if len(_result_cache) > MAX_RESULT_CACHE_ENTRIES: _result_cache.clear() if config.USE_HEURISTICS: if _locks.connect_sec: _locks.connect_sec.acquire() connect_sec = _connect_sec _connect_sec = sec if _locks.connect_sec: _locks.connect_sec.release() if sec > connect_sec: for key in _connect_src_dst: if len(_connect_src_dst[key]) > PORT_SCANNING_THRESHOLD: _src_ip, _dst_ip = key.split('~') if not check_whitelisted(_src_ip): for _ in _connect_src_details[key]: log_event((sec, usec, _src_ip, _[2], _dst_ip, _[3], PROTO.TCP, TRAIL.IP, _src_ip, "potential port scanning", "(heuristic)"), packet) _connect_src_dst.clear() _connect_src_details.clear() ip_data = packet[ip_offset:] ip_version = ord(ip_data[0]) >> 4 localhost_ip = LOCALHOST_IP[ip_version] if ip_version == 0x04: # IPv4 ip_header = struct.unpack("!BBHHHBBH4s4s", ip_data[:20]) iph_length = (ip_header[0] & 0xf) << 2 protocol = ip_header[6] src_ip = socket.inet_ntoa(ip_header[8]) dst_ip = socket.inet_ntoa(ip_header[9]) elif ip_version == 0x06: # IPv6 # Reference: http://chrisgrundemann.com/index.php/2012/introducing-ipv6-understanding-ipv6-addresses/ ip_header = struct.unpack("!BBHHBB16s16s", ip_data[:40]) iph_length = 40 protocol = ip_header[4] src_ip = inet_ntoa6(ip_header[6]) dst_ip = inet_ntoa6(ip_header[7]) else: return if protocol == socket.IPPROTO_TCP: # TCP src_port, dst_port, _, _, doff_reserved, flags = struct.unpack("!HHLLBB", ip_data[iph_length:iph_length+14]) if flags != 2 and config.plugin_functions: if dst_ip in trails: log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.IP, dst_ip, trails[dst_ip][0], trails[dst_ip][1]), packet, skip_write=True) elif src_ip in trails and dst_ip != localhost_ip: log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.IP, src_ip, trails[src_ip][0], trails[src_ip][1]), packet, skip_write=True) if flags == 2: # SYN set (only) _ = _last_syn _last_syn = (sec, src_ip, src_port, dst_ip, dst_port) if _ == _last_syn: # skip bursts return if dst_ip in trails: _ = _last_logged_syn _last_logged_syn = _last_syn if _ != _last_logged_syn: log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.IP, dst_ip, trails[dst_ip][0], trails[dst_ip][1]), packet) elif src_ip in trails and dst_ip != localhost_ip: _ = _last_logged_syn _last_logged_syn = _last_syn if _ != _last_logged_syn: log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.IP, src_ip, trails[src_ip][0], trails[src_ip][1]), packet) if config.USE_HEURISTICS: if dst_ip != localhost_ip: key = "%s~%s" % (src_ip, dst_ip) if key not in _connect_src_dst: _connect_src_dst[key] = set() _connect_src_details[key] = set() _connect_src_dst[key].add(dst_port) _connect_src_details[key].add((sec, usec, src_port, dst_port)) else: tcph_length = doff_reserved >> 4 h_size = iph_length + (tcph_length << 2) tcp_data = ip_data[h_size:] if tcp_data.startswith("HTTP/"): if any(_ in tcp_data[:tcp_data.find("\r\n\r\n")] for _ in ("X-Sinkhole:", "X-Malware-Sinkhole:", "Server: You got served", "Server: Apache 1.0/SinkSoft", "sinkdns.org")) or "\r\n\r\nsinkhole" in tcp_data: log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.IP, src_ip, "sinkhole response (malware)", "(heuristic)"), packet) else: index = tcp_data.find("<title>") if index >= 0: title = tcp_data[index + len("<title>"):tcp_data.find("</title>", index)] if all(_ in title.lower() for _ in ("this domain", "has been seized")): log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.IP, title, "seized domain (suspicious)", "(heuristic)"), packet) content_type = None first_index = tcp_data.find("\r\nContent-Type:") if first_index >= 0: first_index = first_index + len("\r\nContent-Type:") last_index = tcp_data.find("\r\n", first_index) if last_index >= 0: content_type = tcp_data[first_index:last_index].strip().lower() if content_type and content_type in SUSPICIOUS_CONTENT_TYPES: log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.HTTP, content_type, "content type (suspicious)", "(heuristic)"), packet) method, path = None, None index = tcp_data.find("\r\n") if index >= 0: line = tcp_data[:index] if line.count(' ') == 2 and " HTTP/" in line: method, path, _ = line.split(' ') if method and path: post_data = None host = dst_ip first_index = tcp_data.find("\r\nHost:") if first_index >= 0: first_index = first_index + len("\r\nHost:") last_index = tcp_data.find("\r\n", first_index) if last_index >= 0: host = tcp_data[first_index:last_index] host = host.strip().lower() if host.endswith(":80"): host = host[:-3] if host and host[0].isalpha() and dst_ip in trails: log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.IP, "%s (%s)" % (dst_ip, host.split(':')[0]), trails[dst_ip][0], trails[dst_ip][1]), packet) elif config.CHECK_HOST_DOMAINS and not host.replace('.', "").isdigit(): _check_domain(host, sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, packet) elif config.USE_HEURISTICS and config.CHECK_MISSING_HOST: log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.HTTP, "%s%s" % (host, path), "missing host header (suspicious)", "(heuristic)"), packet) index = tcp_data.find("\r\n\r\n") if index >= 0: post_data = tcp_data[index + 4:] if config.USE_HEURISTICS and dst_port == 80 and path.startswith("http://") and not _check_domain_whitelisted(urlparse.urlparse(path).netloc.split(':')[0]): log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.HTTP, path, "potential proxy probe (suspicious)", "(heuristic)"), packet) return elif "://" in path: url = path.split("://", 1)[1] if '/' not in url: url = "%s/" % url host, path = url.split('/', 1) if host.endswith(":80"): host = host[:-3] path = "/%s" % path proxy_domain = host.split(':')[0] _check_domain(proxy_domain, sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, packet) elif method == "CONNECT": if '/' in path: host, path = path.split('/', 1) path = "/%s" % path else: host, path = path, '/' if host.endswith(":80"): host = host[:-3] url = "%s%s" % (host, path) proxy_domain = host.split(':')[0] _check_domain(proxy_domain, sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, packet) else: url = "%s%s" % (host, path) if config.USE_HEURISTICS: user_agent, result = None, None first_index = tcp_data.find("\r\nUser-Agent:") if first_index >= 0: first_index = first_index + len("\r\nUser-Agent:") last_index = tcp_data.find("\r\n", first_index) if last_index >= 0: user_agent = tcp_data[first_index:last_index] user_agent = urllib.unquote(user_agent).strip() if user_agent: result = _result_cache.get(user_agent) if result is None: if not any(_ in user_agent for _ in WHITELIST_UA_KEYWORDS): match = re.search(SUSPICIOUS_UA_REGEX, user_agent) if match: def _(value): return value.replace('(', "\\(").replace(')', "\\)") parts = user_agent.split(match.group(0), 1) if len(parts) > 1 and parts[0] and parts[-1]: result = _result_cache[user_agent] = "%s (%s)" % (_(match.group(0)), _(user_agent)) else: result = _result_cache[user_agent] = _(match.group(0)).join(("(%s)" if part else "%s") % _(part) for part in parts) if not result: _result_cache[user_agent] = False if result: log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.UA, result, "user agent (suspicious)", "(heuristic)"), packet) checks = [path.rstrip('/')] if '?' in path: checks.append(path.split('?')[0].rstrip('/')) _ = os.path.splitext(checks[-1]) if _[1]: checks.append(_[0]) if checks[-1].count('/') > 1: checks.append(checks[-1][:checks[-1].rfind('/')]) checks.append(checks[0][checks[0].rfind('/'):].split('?')[0]) for check in filter(None, checks): for _ in ("", host): check = "%s%s" % (_, check) if check in trails: parts = url.split(check) other = ("(%s)" % _ if _ else _ for _ in parts) trail = check.join(other) log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.URL, trail, trails[check][0], trails[check][1])) return if config.USE_HEURISTICS: unquoted_path = urllib.unquote(path) unquoted_post_data = urllib.unquote(post_data or "") for char in SUSPICIOUS_HTTP_REQUEST_FORCE_ENCODE_CHARS: replacement = SUSPICIOUS_HTTP_REQUEST_FORCE_ENCODE_CHARS[char] path = path.replace(char, replacement) if post_data: post_data = post_data.replace(char, replacement) if not _check_domain_whitelisted(host): if not any(_ in unquoted_path.lower() for _ in WHITELIST_HTTP_REQUEST_PATHS): if any(_ in unquoted_path for _ in SUSPICIOUS_HTTP_REQUEST_PRE_CONDITION): found = _result_cache.get(unquoted_path) if found is None: for desc, regex in SUSPICIOUS_HTTP_REQUEST_REGEXES: if re.search(regex, unquoted_path, re.I | re.DOTALL): found = desc break _result_cache[unquoted_path] = found or "" if found: trail = "%s(%s)" % (host, path) log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.URL, trail, "%s (suspicious)" % found, "(heuristic)"), packet) return if any(_ in unquoted_post_data for _ in SUSPICIOUS_HTTP_REQUEST_PRE_CONDITION): found = _result_cache.get(unquoted_post_data) if found is None: for desc, regex in SUSPICIOUS_HTTP_REQUEST_REGEXES: if re.search(regex, unquoted_post_data, re.I | re.DOTALL): found = desc break _result_cache[unquoted_post_data] = found or "" if found: trail = "%s(%s \(%s %s\))" % (host, path, method, post_data.strip()) log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.HTTP, trail, "%s (suspicious)" % found, "(heuristic)"), packet) return if '.' in path: _ = urlparse.urlparse("http://%s" % url) # dummy scheme path = path.lower() filename = _.path.split('/')[-1] name, extension = os.path.splitext(filename) trail = "%s(%s)" % (host, path) if extension and extension in SUSPICIOUS_DIRECT_DOWNLOAD_EXTENSIONS and not any(_ in path for _ in WHITELIST_DIRECT_DOWNLOAD_KEYWORDS) and '=' not in _.query and len(name) < 10: log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.URL, trail, "direct %s download (suspicious)" % extension, "(heuristic)"), packet) elif filename in WEB_SHELLS: log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.URL, trail, "potential web shell (suspicious)", "(heuristic)"), packet) else: for desc, regex in SUSPICIOUS_HTTP_PATH_REGEXES: if re.search(regex, filename, re.I): log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.URL, trail, "%s (suspicious)" % desc, "(heuristic)"), packet) break elif protocol == socket.IPPROTO_UDP: # UDP _ = ip_data[iph_length:iph_length + 4] if len(_) < 4: return src_port, dst_port = struct.unpack("!HH", _) _ = _last_udp _last_udp = (sec, src_ip, src_port, dst_ip, dst_port) if _ == _last_udp: # skip bursts return if src_port != 53 and dst_port != 53: # not DNS if dst_ip in trails: trail = dst_ip elif src_ip in trails: trail = src_ip else: trail = None if trail: _ = _last_logged_udp _last_logged_udp = _last_udp if _ != _last_logged_udp: log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.UDP, TRAIL.IP, trail, trails[trail][0], trails[trail][1]), packet) else: dns_data = ip_data[iph_length + 8:] # Reference: http://www.ccs.neu.edu/home/amislove/teaching/cs4700/fall09/handouts/project1-primer.pdf if len(dns_data) > 6: qdcount = struct.unpack("!H", dns_data[4:6])[0] if qdcount > 0: offset = 12 query = "" while len(dns_data) > offset: length = ord(dns_data[offset]) if not length: query = query[:-1] break query += dns_data[offset + 1:offset + length + 1] + '.' offset += length + 1 query = query.lower() if not query or '.' not in query or not all(_ in VALID_DNS_CHARS for _ in query) or any(_ in query for _ in (".intranet.",)) or any(query.endswith(_) for _ in IGNORE_DNS_QUERY_SUFFIXES): return parts = query.split('.') if ord(dns_data[2]) == 0x01: # standard query type_, class_ = struct.unpack("!HH", dns_data[offset + 1:offset + 5]) if len(parts) > 2: domain = '.'.join(parts[-2:]) if not _check_domain_whitelisted(domain): # e.g. <hash>.hashserver.cs.trendmicro.com if (sec - (_subdomains_sec or 0)) > DAILY_SECS: _subdomains.clear() _dns_exhausted_domains.clear() _subdomains_sec = sec subdomains = _subdomains.get(domain) if not subdomains: subdomains = _subdomains[domain] = set() if len(subdomains) < DNS_EXHAUSTION_THRESHOLD: subdomains.add('.'.join(parts[:-2])) else: if (sec - (_last_dns_exhaustion or 0)) > 60: trail = "(%s).%s" % ('.'.join(parts[:-2]), '.'.join(parts[-2:])) log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.UDP, TRAIL.DNS, trail, "potential dns exhaustion (suspicious)", "(heuristic)"), packet) _dns_exhausted_domains.add(domain) _last_dns_exhaustion = sec return # Reference: http://en.wikipedia.org/wiki/List_of_DNS_record_types if type_ not in (12, 28) and class_ == 1: # Type not in (PTR, AAAA), Class IN if dst_ip in trails: log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.UDP, TRAIL.IP, "%s (%s)" % (dst_ip, query), trails[dst_ip][0], trails[dst_ip][1]), packet) elif src_ip in trails: log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.UDP, TRAIL.IP, src_ip, trails[src_ip][0], trails[src_ip][1]), packet) _check_domain(query, sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.UDP, packet) elif config.USE_HEURISTICS: if ord(dns_data[2]) & 0x80: # standard response if ord(dns_data[3]) == 0x80: # recursion available, no error _ = offset + 5 try: while _ < len(dns_data): if ord(dns_data[_]) & 0xc0 != 0 and dns_data[_ + 2] == "\00" and dns_data[_ + 3] == "\x01": # Type A break else: _ += 12 + struct.unpack("!H", dns_data[_ + 10: _ + 12])[0] _ = dns_data[_ + 12:_ + 16] if _: answer = socket.inet_ntoa(_) if answer in trails: _ = trails[answer] if "sinkhole" in _[0]: trail = "(%s).%s" % ('.'.join(parts[:-1]), '.'.join(parts[-1:])) log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.UDP, TRAIL.DNS, trail, "sinkholed by %s (malware)" % _[0].split(" ")[1], "(heuristic)"), packet) # (e.g. kitro.pl, devomchart.com, jebena.ananikolic.su, vuvet.cn) elif "parking" in _[0]: trail = "(%s).%s" % ('.'.join(parts[:-1]), '.'.join(parts[-1:])) log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.UDP, TRAIL.DNS, trail, "parked site (suspicious)", "(heuristic)"), packet) except IndexError: pass elif ord(dns_data[3]) == 0x83: # recursion available, no such name if '.'.join(parts[-2:]) not in _dns_exhausted_domains and not _check_domain_whitelisted(query) and not _check_domain_member(query, trails): if parts[-1].isdigit(): return if not (len(parts) > 4 and all(_.isdigit() and int(_) < 256 for _ in parts[:4])): # generic check for DNSBL IP lookups for _ in filter(None, (query, "*.%s" % '.'.join(parts[-2:]) if query.count('.') > 1 else None)): if _ not in NO_SUCH_NAME_COUNTERS or NO_SUCH_NAME_COUNTERS[_][0] != sec / 3600: NO_SUCH_NAME_COUNTERS[_] = [sec / 3600, 1, set()] else: NO_SUCH_NAME_COUNTERS[_][1] += 1 NO_SUCH_NAME_COUNTERS[_][2].add(query) if NO_SUCH_NAME_COUNTERS[_][1] > NO_SUCH_NAME_PER_HOUR_THRESHOLD: if _.startswith("*."): log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.UDP, TRAIL.DNS, "%s%s" % ("(%s)" % ','.join(item.replace(_[1:], "") for item in NO_SUCH_NAME_COUNTERS[_][2]), _[1:]), "excessive no such domain (suspicious)", "(heuristic)"), packet) for item in NO_SUCH_NAME_COUNTERS[_][2]: try: del NO_SUCH_NAME_COUNTERS[item] except KeyError: pass else: log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.UDP, TRAIL.DNS, _, "excessive no such domain (suspicious)", "(heuristic)"), packet) try: del NO_SUCH_NAME_COUNTERS[_] except KeyError: pass break if len(parts) > 2: part = parts[0] if parts[0] != "www" else parts[1] trail = "(%s).%s" % ('.'.join(parts[:-2]), '.'.join(parts[-2:])) elif len(parts) == 2: part = parts[0] trail = "(%s).%s" % (parts[0], parts[1]) else: part = query trail = query if part and '-' not in part: result = _result_cache.get(part) if result is None: # Reference: https://github.com/exp0se/dga_detector probabilities = (float(part.count(c)) / len(part) for c in set(_ for _ in part)) entropy = -sum(p * math.log(p) / math.log(2.0) for p in probabilities) if entropy > SUSPICIOUS_DOMAIN_ENTROPY_THRESHOLD: result = "entropy threshold no such domain (suspicious)" if not result: if sum(_ in CONSONANTS for _ in part) > SUSPICIOUS_DOMAIN_CONSONANT_THRESHOLD: result = "consonant threshold no such domain (suspicious)" _result_cache[part] = result or False if result: log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.UDP, TRAIL.DNS, trail, result, "(heuristic)"), packet) elif protocol in IPPROTO_LUT: # non-TCP/UDP (e.g. ICMP) if protocol == socket.IPPROTO_ICMP: if ord(ip_data[iph_length]) != 0x08: # Non-echo request return elif protocol == socket.IPPROTO_ICMPV6: if ord(ip_data[iph_length]) != 0x80: # Non-echo request return if dst_ip in trails: log_event((sec, usec, src_ip, '-', dst_ip, '-', IPPROTO_LUT[protocol], TRAIL.IP, dst_ip, trails[dst_ip][0], trails[dst_ip][1]), packet) elif src_ip in trails: log_event((sec, usec, src_ip, '-', dst_ip, '-', IPPROTO_LUT[protocol], TRAIL.IP, src_ip, trails[src_ip][0], trails[src_ip][1]), packet) except struct.error: pass except Exception: if config.SHOW_DEBUG: traceback.print_exc()
trails[key] = _ if key != key.lower(): _ = trails[key] del trails[key] key = key.lower() trails[key] = _ if key in duplicates: _ = trails[key] others = sorted(duplicates[key] - set((_[1],))) if others and " (+" not in _[1]: trails[key] = (_[0], "%s (+%s)" % (_[1], ','.join(others))) read_whitelist() for key in trails.keys(): if check_whitelisted(key) or any(key.startswith(_) for _ in BAD_TRAIL_PREFIXES): del trails[key] elif re.search(r"\A\d+\.\d+\.\d+\.\d+\Z", key) and (bogon_ip(key) or cdn_ip(key)): del trails[key] else: try: key.decode("utf8") trails[key][0].decode("utf8") trails[key][1].decode("utf8") except UnicodeDecodeError: del trails[key] try: if trails: with _fopen(TRAILS_FILE, "w+b") as f: writer = csv.writer(f, delimiter=',', quotechar='\"', quoting=csv.QUOTE_MINIMAL)
trails[key] = _ if key != key.lower(): _ = trails[key] del trails[key] key = key.lower() trails[key] = _ if key in duplicates: _ = trails[key] others = sorted(duplicates[key] - set((_[1],))) if others and " (+" not in _[1]: trails[key] = (_[0], "%s (+%s)" % (_[1], ",".join(others))) read_whitelist() for key in trails.keys(): if check_whitelisted(key) or any(key.startswith(_) for _ in BAD_TRAIL_PREFIXES): del trails[key] elif re.search(r"\A\d+\.\d+\.\d+\.\d+\Z", key) and cdn_ip(key): del trails[key] else: try: key.decode("utf8") trails[key][0].decode("utf8") trails[key][1].decode("utf8") except UnicodeDecodeError: del trails[key] try: if trails: with _fopen(TRAILS_FILE, "w+b") as f: writer = csv.writer(f, delimiter=",", quotechar='"', quoting=csv.QUOTE_MINIMAL)
def _process_packet(packet, sec, usec, ip_offset): """ Processes single (raw) IP layer data """ global _connect_sec global _last_syn global _last_logged_syn global _last_udp global _last_logged_udp global _last_dns_exhaustion global _subdomains_sec try: if len(_result_cache) > MAX_RESULT_CACHE_ENTRIES: _result_cache.clear() if config.USE_HEURISTICS: if _locks.connect_sec: _locks.connect_sec.acquire() connect_sec = _connect_sec _connect_sec = sec if _locks.connect_sec: _locks.connect_sec.release() if sec > connect_sec: for key in _connect_src_dst: if len(_connect_src_dst[key]) > PORT_SCANNING_THRESHOLD: _src_ip, _dst_ip = key.split('~') if not check_whitelisted(_src_ip): for _ in _connect_src_details[key]: log_event((sec, usec, _src_ip, _[2], _dst_ip, _[3], PROTO.TCP, TRAIL.IP, _src_ip, "potential port scanning", "(heuristic)"), packet) _connect_src_dst.clear() _connect_src_details.clear() ip_data = packet[ip_offset:] ip_version = ord(ip_data[0]) >> 4 localhost_ip = LOCALHOST_IP[ip_version] if ip_version == 0x04: # IPv4 ip_header = struct.unpack("!BBHHHBBH4s4s", ip_data[:20]) iph_length = (ip_header[0] & 0xf) << 2 fragment_offset = ip_header[4] & 0x1fff if fragment_offset != 0: return protocol = ip_header[6] src_ip = socket.inet_ntoa(ip_header[8]) dst_ip = socket.inet_ntoa(ip_header[9]) elif ip_version == 0x06: # IPv6 # Reference: http://chrisgrundemann.com/index.php/2012/introducing-ipv6-understanding-ipv6-addresses/ ip_header = struct.unpack("!BBHHBB16s16s", ip_data[:40]) iph_length = 40 protocol = ip_header[4] src_ip = inet_ntoa6(ip_header[6]) dst_ip = inet_ntoa6(ip_header[7]) else: return if protocol == socket.IPPROTO_TCP: # TCP src_port, dst_port, _, _, doff_reserved, flags = struct.unpack("!HHLLBB", ip_data[iph_length:iph_length+14]) if flags != 2 and config.plugin_functions: if dst_ip in trails: log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.IP, dst_ip, trails[dst_ip][0], trails[dst_ip][1]), packet, skip_write=True) elif src_ip in trails and dst_ip != localhost_ip: log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.IP, src_ip, trails[src_ip][0], trails[src_ip][1]), packet, skip_write=True) if flags == 2: # SYN set (only) _ = _last_syn _last_syn = (sec, src_ip, src_port, dst_ip, dst_port) if _ == _last_syn: # skip bursts return if dst_ip in trails or "%s:%s" % (dst_ip, dst_port) in trails: _ = _last_logged_syn _last_logged_syn = _last_syn if _ != _last_logged_syn: trail = dst_ip if dst_ip in trails else "%s:%s" % (dst_ip, dst_port) log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.IP if ':' not in trail else TRAIL.ADDR, trail, trails[trail][0], trails[trail][1]), packet) elif (src_ip in trails or "%s:%s" % (src_ip, src_port) in trails) and dst_ip != localhost_ip: _ = _last_logged_syn _last_logged_syn = _last_syn if _ != _last_logged_syn: trail = src_ip if src_ip in trails else "%s:%s" % (src_ip, src_port) log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.IP if ':' not in trail else TRAIL.ADDR, trail, trails[trail][0], trails[trail][1]), packet) if config.USE_HEURISTICS: if dst_ip != localhost_ip: key = "%s~%s" % (src_ip, dst_ip) if key not in _connect_src_dst: _connect_src_dst[key] = set() _connect_src_details[key] = set() _connect_src_dst[key].add(dst_port) _connect_src_details[key].add((sec, usec, src_port, dst_port)) else: tcph_length = doff_reserved >> 4 h_size = iph_length + (tcph_length << 2) tcp_data = ip_data[h_size:] if tcp_data.startswith("HTTP/"): if any(_ in tcp_data[:tcp_data.find("\r\n\r\n")] for _ in ("X-Sinkhole:", "X-Malware-Sinkhole:", "Server: You got served", "Server: Apache 1.0/SinkSoft", "sinkdns.org")) or "\r\n\r\nsinkhole" in tcp_data: log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.IP, src_ip, "sinkhole response (malware)", "(heuristic)"), packet) else: index = tcp_data.find("<title>") if index >= 0: title = tcp_data[index + len("<title>"):tcp_data.find("</title>", index)] if all(_ in title.lower() for _ in ("this domain", "has been seized")): log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.IP, title, "seized domain (suspicious)", "(heuristic)"), packet) content_type = None first_index = tcp_data.find("\r\nContent-Type:") if first_index >= 0: first_index = first_index + len("\r\nContent-Type:") last_index = tcp_data.find("\r\n", first_index) if last_index >= 0: content_type = tcp_data[first_index:last_index].strip().lower() if content_type and content_type in SUSPICIOUS_CONTENT_TYPES: log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.HTTP, content_type, "content type (suspicious)", "(heuristic)"), packet) method, path = None, None index = tcp_data.find("\r\n") if index >= 0: line = tcp_data[:index] if line.count(' ') == 2 and " HTTP/" in line: method, path, _ = line.split(' ') if method and path: post_data = None host = dst_ip first_index = tcp_data.find("\r\nHost:") path = path.lower() if first_index >= 0: first_index = first_index + len("\r\nHost:") last_index = tcp_data.find("\r\n", first_index) if last_index >= 0: host = tcp_data[first_index:last_index] host = host.strip().lower() if host.endswith(":80"): host = host[:-3] if host and host[0].isalpha() and dst_ip in trails: log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.IP, "%s (%s)" % (dst_ip, host.split(':')[0]), trails[dst_ip][0], trails[dst_ip][1]), packet) elif config.CHECK_HOST_DOMAINS: _check_domain(host, sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, packet) elif config.USE_HEURISTICS and config.CHECK_MISSING_HOST: log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.HTTP, "%s%s" % (host, path), "missing host header (suspicious)", "(heuristic)"), packet) index = tcp_data.find("\r\n\r\n") if index >= 0: post_data = tcp_data[index + 4:] if config.USE_HEURISTICS and dst_port == 80 and path.startswith("http://") and any(_ in path for _ in SUSPICIOUS_PROXY_PROBE_PRE_CONDITION) and not _check_domain_whitelisted(path.split('/')[2]): trail = re.sub(r"(http://[^/]+/)(.+)", r"\g<1>(\g<2>)", path) trail = re.sub(r"(http://)([^/(]+)", lambda match: "%s%s" % (match.group(1), match.group(2).split(':')[0].rstrip('.')), trail) log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.HTTP, trail, "potential proxy probe (suspicious)", "(heuristic)"), packet) return elif "://" in path: url = path.split("://", 1)[1] if '/' not in url: url = "%s/" % url host, path = url.split('/', 1) if host.endswith(":80"): host = host[:-3] path = "/%s" % path proxy_domain = host.split(':')[0] _check_domain(proxy_domain, sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, packet) elif method == "CONNECT": if '/' in path: host, path = path.split('/', 1) path = "/%s" % path else: host, path = path, '/' if host.endswith(":80"): host = host[:-3] url = "%s%s" % (host, path) proxy_domain = host.split(':')[0] _check_domain(proxy_domain, sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, packet) else: url = "%s%s" % (host, path) if config.USE_HEURISTICS: user_agent, result = None, None first_index = tcp_data.find("\r\nUser-Agent:") if first_index >= 0: first_index = first_index + len("\r\nUser-Agent:") last_index = tcp_data.find("\r\n", first_index) if last_index >= 0: user_agent = tcp_data[first_index:last_index] user_agent = urllib.unquote(user_agent).strip() if user_agent: result = _result_cache.get(user_agent) if result is None: if not any(_ in user_agent for _ in WHITELIST_UA_KEYWORDS): match = re.search(SUSPICIOUS_UA_REGEX, user_agent) if match: def _(value): return value.replace('(', "\\(").replace(')', "\\)") parts = user_agent.split(match.group(0), 1) if len(parts) > 1 and parts[0] and parts[-1]: result = _result_cache[user_agent] = "%s (%s)" % (_(match.group(0)), _(user_agent)) else: result = _result_cache[user_agent] = _(match.group(0)).join(("(%s)" if part else "%s") % _(part) for part in parts) if not result: _result_cache[user_agent] = False if result: log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.UA, result, "user agent (suspicious)", "(heuristic)"), packet) if not _check_domain_whitelisted(host): checks = [path.rstrip('/')] if '?' in path: checks.append(path.split('?')[0].rstrip('/')) _ = os.path.splitext(checks[-1]) if _[1]: checks.append(_[0]) if checks[-1].count('/') > 1: checks.append(checks[-1][:checks[-1].rfind('/')]) checks.append(checks[0][checks[0].rfind('/'):].split('?')[0]) for check in filter(None, checks): for _ in ("", host): check = "%s%s" % (_, check) if check in trails: parts = url.split(check) other = ("(%s)" % _ if _ else _ for _ in parts) trail = check.join(other) log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.URL, trail, trails[check][0], trails[check][1])) return if "%s/" % host in trails: trail = "%s/" % host log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.URL, trail, trails[trail][0], trails[trail][1])) return if config.USE_HEURISTICS: unquoted_path = urllib.unquote(path) unquoted_post_data = urllib.unquote(post_data or "") for char in SUSPICIOUS_HTTP_REQUEST_FORCE_ENCODE_CHARS: replacement = SUSPICIOUS_HTTP_REQUEST_FORCE_ENCODE_CHARS[char] path = path.replace(char, replacement) if post_data: post_data = post_data.replace(char, replacement) if not any(_ in unquoted_path.lower() for _ in WHITELIST_HTTP_REQUEST_PATHS): if any(_ in unquoted_path for _ in SUSPICIOUS_HTTP_REQUEST_PRE_CONDITION): found = _result_cache.get(unquoted_path) if found is None: for desc, regex in SUSPICIOUS_HTTP_REQUEST_REGEXES: if re.search(regex, unquoted_path, re.I | re.DOTALL): found = desc break _result_cache[unquoted_path] = found or "" if found: trail = "%s(%s)" % (host, path) log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.URL, trail, "%s (suspicious)" % found, "(heuristic)"), packet) return if any(_ in unquoted_post_data for _ in SUSPICIOUS_HTTP_REQUEST_PRE_CONDITION): found = _result_cache.get(unquoted_post_data) if found is None: for desc, regex in SUSPICIOUS_HTTP_REQUEST_REGEXES: if re.search(regex, unquoted_post_data, re.I | re.DOTALL): found = desc break _result_cache[unquoted_post_data] = found or "" if found: trail = "%s(%s \(%s %s\))" % (host, path, method, post_data.strip()) log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.HTTP, trail, "%s (suspicious)" % found, "(heuristic)"), packet) return if '.' in path: _ = urlparse.urlparse("http://%s" % url) # dummy scheme path = path.lower() filename = _.path.split('/')[-1] name, extension = os.path.splitext(filename) trail = "%s(%s)" % (host, path) if extension and extension in SUSPICIOUS_DIRECT_DOWNLOAD_EXTENSIONS and not any(_ in path for _ in WHITELIST_DIRECT_DOWNLOAD_KEYWORDS) and '=' not in _.query and len(name) < 10: log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.URL, trail, "direct %s download (suspicious)" % extension, "(heuristic)"), packet) elif filename in WEB_SHELLS: log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.URL, trail, "potential web shell (suspicious)", "(heuristic)"), packet) else: for desc, regex in SUSPICIOUS_HTTP_PATH_REGEXES: if re.search(regex, filename, re.I): log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.URL, trail, "%s (suspicious)" % desc, "(heuristic)"), packet) break elif protocol == socket.IPPROTO_UDP: # UDP _ = ip_data[iph_length:iph_length + 4] if len(_) < 4: return src_port, dst_port = struct.unpack("!HH", _) _ = _last_udp _last_udp = (sec, src_ip, src_port, dst_ip, dst_port) if _ == _last_udp: # skip bursts return if src_port != 53 and dst_port != 53: # not DNS if dst_ip in trails: trail = dst_ip elif src_ip in trails: trail = src_ip else: trail = None if trail: _ = _last_logged_udp _last_logged_udp = _last_udp if _ != _last_logged_udp: log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.UDP, TRAIL.IP, trail, trails[trail][0], trails[trail][1]), packet) else: dns_data = ip_data[iph_length + 8:] # Reference: http://www.ccs.neu.edu/home/amislove/teaching/cs4700/fall09/handouts/project1-primer.pdf if len(dns_data) > 6: qdcount = struct.unpack("!H", dns_data[4:6])[0] if qdcount > 0: offset = 12 query = "" while len(dns_data) > offset: length = ord(dns_data[offset]) if not length: query = query[:-1] break query += dns_data[offset + 1:offset + length + 1] + '.' offset += length + 1 query = query.lower() if not query or '.' not in query or not all(_ in VALID_DNS_CHARS for _ in query) or any(_ in query for _ in (".intranet.",)) or any(query.endswith(_) for _ in IGNORE_DNS_QUERY_SUFFIXES): return parts = query.split('.') if ord(dns_data[2]) & 0xfe == 0x00: # standard query (both recursive and non-recursive) type_, class_ = struct.unpack("!HH", dns_data[offset + 1:offset + 5]) if len(parts) > 2: if len(parts) > 3 and len(parts[-2]) <= 3: domain = '.'.join(parts[-3:]) else: domain = '.'.join(parts[-2:]) if not _check_domain_whitelisted(domain): # e.g. <hash>.hashserver.cs.trendmicro.com if (sec - (_subdomains_sec or 0)) > DAILY_SECS: _subdomains.clear() _dns_exhausted_domains.clear() _subdomains_sec = sec subdomains = _subdomains.get(domain) if not subdomains: subdomains = _subdomains[domain] = set() if not re.search(r"\A\d+\-\d+\-\d+\-\d+\Z", parts[0]): if len(subdomains) < DNS_EXHAUSTION_THRESHOLD: subdomains.add('.'.join(parts[:-2])) else: if (sec - (_last_dns_exhaustion or 0)) > 60: trail = "(%s).%s" % ('.'.join(parts[:-2]), '.'.join(parts[-2:])) log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.UDP, TRAIL.DNS, trail, "potential dns exhaustion (suspicious)", "(heuristic)"), packet) _dns_exhausted_domains.add(domain) _last_dns_exhaustion = sec return # Reference: http://en.wikipedia.org/wiki/List_of_DNS_record_types if type_ not in (12, 28) and class_ == 1: # Type not in (PTR, AAAA), Class IN if dst_ip in trails: log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.UDP, TRAIL.IP, "%s (%s)" % (dst_ip, query), trails[dst_ip][0], trails[dst_ip][1]), packet) elif src_ip in trails: log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.UDP, TRAIL.IP, src_ip, trails[src_ip][0], trails[src_ip][1]), packet) _check_domain(query, sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.UDP, packet) elif config.USE_HEURISTICS: if ord(dns_data[2]) & 0x80: # standard response if ord(dns_data[3]) == 0x80: # recursion available, no error _ = offset + 5 try: while _ < len(dns_data): if ord(dns_data[_]) & 0xc0 != 0 and dns_data[_ + 2] == "\00" and dns_data[_ + 3] == "\x01": # Type A break else: _ += 12 + struct.unpack("!H", dns_data[_ + 10: _ + 12])[0] _ = dns_data[_ + 12:_ + 16] if _: answer = socket.inet_ntoa(_) if answer in trails: _ = trails[answer] if "sinkhole" in _[0]: trail = "(%s).%s" % ('.'.join(parts[:-1]), '.'.join(parts[-1:])) log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.UDP, TRAIL.DNS, trail, "sinkholed by %s (malware)" % _[0].split(" ")[1], "(heuristic)"), packet) # (e.g. kitro.pl, devomchart.com, jebena.ananikolic.su, vuvet.cn) elif "parking" in _[0]: trail = "(%s).%s" % ('.'.join(parts[:-1]), '.'.join(parts[-1:])) log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.UDP, TRAIL.DNS, trail, "parked site (suspicious)", "(heuristic)"), packet) except IndexError: pass elif ord(dns_data[3]) == 0x83: # recursion available, no such name if '.'.join(parts[-2:]) not in _dns_exhausted_domains and not _check_domain_whitelisted(query) and not _check_domain_member(query, trails): if parts[-1].isdigit(): return if not (len(parts) > 4 and all(_.isdigit() and int(_) < 256 for _ in parts[:4])): # generic check for DNSBL IP lookups for _ in filter(None, (query, "*.%s" % '.'.join(parts[-2:]) if query.count('.') > 1 else None)): if _ not in NO_SUCH_NAME_COUNTERS or NO_SUCH_NAME_COUNTERS[_][0] != sec / 3600: NO_SUCH_NAME_COUNTERS[_] = [sec / 3600, 1, set()] else: NO_SUCH_NAME_COUNTERS[_][1] += 1 NO_SUCH_NAME_COUNTERS[_][2].add(query) if NO_SUCH_NAME_COUNTERS[_][1] > NO_SUCH_NAME_PER_HOUR_THRESHOLD: if _.startswith("*."): log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.UDP, TRAIL.DNS, "%s%s" % ("(%s)" % ','.join(item.replace(_[1:], "") for item in NO_SUCH_NAME_COUNTERS[_][2]), _[1:]), "excessive no such domain (suspicious)", "(heuristic)"), packet) for item in NO_SUCH_NAME_COUNTERS[_][2]: try: del NO_SUCH_NAME_COUNTERS[item] except KeyError: pass else: log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.UDP, TRAIL.DNS, _, "excessive no such domain (suspicious)", "(heuristic)"), packet) try: del NO_SUCH_NAME_COUNTERS[_] except KeyError: pass break if len(parts) > 2: part = parts[0] if parts[0] != "www" else parts[1] trail = "(%s).%s" % ('.'.join(parts[:-2]), '.'.join(parts[-2:])) elif len(parts) == 2: part = parts[0] trail = "(%s).%s" % (parts[0], parts[1]) else: part = query trail = query if part and '-' not in part: result = _result_cache.get(part) if result is None: # Reference: https://github.com/exp0se/dga_detector probabilities = (float(part.count(c)) / len(part) for c in set(_ for _ in part)) entropy = -sum(p * math.log(p) / math.log(2.0) for p in probabilities) if entropy > SUSPICIOUS_DOMAIN_ENTROPY_THRESHOLD: result = "entropy threshold no such domain (suspicious)" if not result: if sum(_ in CONSONANTS for _ in part) > SUSPICIOUS_DOMAIN_CONSONANT_THRESHOLD: result = "consonant threshold no such domain (suspicious)" _result_cache[part] = result or False if result: log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.UDP, TRAIL.DNS, trail, result, "(heuristic)"), packet) elif protocol in IPPROTO_LUT: # non-TCP/UDP (e.g. ICMP) if protocol == socket.IPPROTO_ICMP: if ord(ip_data[iph_length]) != 0x08: # Non-echo request return elif protocol == socket.IPPROTO_ICMPV6: if ord(ip_data[iph_length]) != 0x80: # Non-echo request return if dst_ip in trails: log_event((sec, usec, src_ip, '-', dst_ip, '-', IPPROTO_LUT[protocol], TRAIL.IP, dst_ip, trails[dst_ip][0], trails[dst_ip][1]), packet) elif src_ip in trails: log_event((sec, usec, src_ip, '-', dst_ip, '-', IPPROTO_LUT[protocol], TRAIL.IP, src_ip, trails[src_ip][0], trails[src_ip][1]), packet) except struct.error: pass except Exception: if config.SHOW_DEBUG: traceback.print_exc()
def log_event(event_tuple, packet=None, skip_write=False, skip_condensing=False): try: sec, usec, src_ip, src_port, dst_ip, dst_port, proto, trail_type, trail, info = event_tuple[:10] if not any(check_whitelisted(_) for _ in (src_ip, dst_ip)): if not skip_write: localtime = "%s.%06d" % (time.strftime(TIME_FORMAT, time.localtime(int(sec))), usec) if not skip_condensing: if (sec - getattr(_thread_data, "condensed_events_flush_sec", 0)) > CONDENSED_EVENTS_FLUSH_PERIOD: _thread_data.condensed_events_flush_sec = sec for key in getattr(_thread_data, "condensed_events", []): condensed = False events = _thread_data.condensed_events[key] first_event = events[0] condensed_event = [_ for _ in first_event] for i in xrange(1, len(events)): current_event = events[i] for j in xrange(3, 7): # src_port, dst_ip, dst_port, proto if current_event[j] != condensed_event[j]: condensed = True if not isinstance(condensed_event[j], set): condensed_event[j] = set((condensed_event[j],)) condensed_event[j].add(current_event[j]) if condensed: for i in xrange(len(condensed_event)): if isinstance(condensed_event[i], set): condensed_event[i] = ','.join(str(_) for _ in sorted(condensed_event[i])) log_event(condensed_event, skip_condensing=True) _thread_data.condensed_events = {} if any(_ in info for _ in CONDENSE_ON_INFO_KEYWORDS): if not hasattr(_thread_data, "condensed_events"): _thread_data.condensed_events = {} key = (src_ip, trail) if key not in _thread_data.condensed_events: _thread_data.condensed_events[key] = [] _thread_data.condensed_events[key].append(event_tuple) return event = "%s %s %s\n" % (safe_value(localtime), safe_value(config.SENSOR_NAME), " ".join(safe_value(_) for _ in event_tuple[2:])) if not config.DISABLE_LOCAL_LOG_STORAGE: handle = get_event_log_handle(sec) os.write(handle, event) if config.LOG_SERVER: remote_host, remote_port = config.LOG_SERVER.split(':') s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) s.sendto("%s %s" % (sec, event), (remote_host, int(remote_port))) if config.DISABLE_LOCAL_LOG_STORAGE and not config.LOG_SERVER or config.console: sys.stderr.write(event) sys.stderr.flush() if config.plugin_functions: for _ in config.plugin_functions: _(event_tuple, packet) except (OSError, IOError): if config.SHOW_DEBUG: traceback.print_exc()