def _check_domain(query, sec, usec, src_ip, src_port, dst_ip, dst_port, proto): parts = query.split('.') for i in xrange(0, len(parts)): domain = '.'.join(parts[i:]) if domain in trails: if domain == query: trail = domain else: _ = ".%s" % domain trail = "(%s)%s" % (query[:-len(_)], _) log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, proto, TRAIL.DNS, trail, trails[domain][0], trails[domain][1])) return if config.USE_HEURISTICS and len(parts[0]) > SUSPICIOUS_DOMAIN_LENGTH_THRESHOLD and '-' not in parts[0]: trail = None if len(parts) > 2: if '.'.join(parts[-2:]) not in WHITELIST: trail = "(%s).%s" % ('.'.join(parts[:-2]), '.'.join(parts[-2:])) elif len(parts) == 2: if '.'.join(parts) not in WHITELIST: trail = "(%s).%s" % (parts[0], parts[1]) else: trail = query if trail: log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, proto, TRAIL.DNS, trail, "long domain name (suspicious)", "(heuristic)"))
def _check_domain(query, sec, usec, src_ip, src_port, dst_ip, dst_port, proto, packet=None): if query: query = query.lower() if ':' in query: query = query.split(':', 1)[0] if query.replace('.', "").isdigit(): # IP address return if _result_cache.get(query) == False: return result = False if not _check_domain_whitelisted(query) and all(_ in VALID_DNS_CHARS for _ in query): parts = query.lower().split('.') for i in xrange(0, len(parts)): domain = '.'.join(parts[i:]) if domain in trails: if domain == query: trail = domain else: _ = ".%s" % domain trail = "(%s)%s" % (query[:-len(_)], _) if not (re.search(r"(?i)\Ad?ns\d*\.", query) and any(_ in trails.get(domain, " ")[0] for _ in ("suspicious", "sinkhole"))): # e.g. ns2.nobel.su if not ((query == trail) and any(_ in trails.get(domain, " ")[0] for _ in ("dynamic", "free web"))): # e.g. noip.com result = True log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, proto, TRAIL.DNS, trail, trails[domain][0], trails[domain][1]), packet) break if not result and config.USE_HEURISTICS: if len(parts[0]) > SUSPICIOUS_DOMAIN_LENGTH_THRESHOLD and '-' not in parts[0]: trail = None if len(parts) > 2: trail = "(%s).%s" % ('.'.join(parts[:-2]), '.'.join(parts[-2:])) elif len(parts) == 2: trail = "(%s).%s" % (parts[0], parts[1]) else: trail = query if trail and not any(_ in trail for _ in WHITELIST_LONG_DOMAIN_NAME_KEYWORDS): result = True log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, proto, TRAIL.DNS, trail, "long domain (suspicious)", "(heuristic)"), packet) if result == False: _result_cache[query] = False
def _check_domain(query, sec, usec, src_ip, src_port, dst_ip, dst_port, proto, packet=None): if not _check_domain_whitelisted(query): parts = query.lower().split('.') for i in xrange(0, len(parts)): domain = '.'.join(parts[i:]) if domain in trails: if domain == query: trail = domain else: _ = ".%s" % domain trail = "(%s)%s" % (query[:-len(_)], _) log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, proto, TRAIL.DNS, trail, trails[domain][0], trails[domain][1]), packet) return if config.USE_HEURISTICS: if len(parts[0]) > SUSPICIOUS_DOMAIN_LENGTH_THRESHOLD and '-' not in parts[0]: trail = None if len(parts) > 2: trail = "(%s).%s" % ('.'.join(parts[:-2]), '.'.join(parts[-2:])) elif len(parts) == 2: trail = "(%s).%s" % (parts[0], parts[1]) else: trail = query if trail and not any(_ in trail for _ in WHITELIST_LONG_DOMAIN_NAME_KEYWORDS): log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, proto, TRAIL.DNS, trail, "long domain (suspicious)", "(heuristic)"), packet) elif "sinkhole" in query: log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, proto, TRAIL.DNS, query, "potential sinkhole domain (suspicious)", "(heuristic)"), packet)
def _check_domain(query, sec, usec, src_ip, src_port, dst_ip, dst_port, proto): parts = query.lower().split('.') for i in xrange(0, len(parts)): domain = '.'.join(parts[i:]) if domain in trails: if domain == query: trail = domain else: _ = ".%s" % domain trail = "(%s)%s" % (query[:-len(_)], _) log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, proto, TRAIL.DNS, trail, trails[domain][0], trails[domain][1])) return if config.USE_HEURISTICS: if len(parts[0]) > SUSPICIOUS_DOMAIN_LENGTH_THRESHOLD and '-' not in parts[0]: trail = None if len(parts) > 2: if '.'.join(parts[-2:]) not in WHITELIST: trail = "(%s).%s" % ('.'.join(parts[:-2]), '.'.join(parts[-2:])) elif len(parts) == 2: if '.'.join(parts) not in WHITELIST: trail = "(%s).%s" % (parts[0], parts[1]) else: trail = query if trail and not any(_ in trail for _ in WHITELIST_LONG_DOMAIN_NAME_KEYWORDS): log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, proto, TRAIL.DNS, trail, "long domain name (suspicious)", "(heuristic)")) elif "sinkhole" in query: log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, proto, TRAIL.DNS, query, "potential sinkhole domain (suspicious)", "(heuristic)"))
def _check_domain(query, sec, usec, src_ip, src_port, dst_ip, dst_port, proto, packet=None): if _result_cache.get(query) == False: return result = False if not _check_domain_whitelisted(query) and all(_ in VALID_DNS_CHARS for _ in query): parts = query.lower().split('.') for i in xrange(0, len(parts)): domain = '.'.join(parts[i:]) if domain in trails: if domain == query: trail = domain else: _ = ".%s" % domain trail = "(%s)%s" % (query[:-len(_)], _) result = True log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, proto, TRAIL.DNS, trail, trails[domain][0], trails[domain][1]), packet) break if not result and config.USE_HEURISTICS: if len(parts[0]) > SUSPICIOUS_DOMAIN_LENGTH_THRESHOLD and '-' not in parts[0]: trail = None if len(parts) > 2: trail = "(%s).%s" % ('.'.join(parts[:-2]), '.'.join(parts[-2:])) elif len(parts) == 2: trail = "(%s).%s" % (parts[0], parts[1]) else: trail = query if trail and not any(_ in trail for _ in WHITELIST_LONG_DOMAIN_NAME_KEYWORDS): result = True log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, proto, TRAIL.DNS, trail, "long domain (suspicious)", "(heuristic)"), packet) if result == False: _result_cache[query] = False
def _process_packet(packet, sec, usec, ip_offset): """ Processes single (raw) IP layer data """ global _connect_sec global _last_syn global _last_logged_syn global _last_udp global _last_logged_udp try: if len(_result_cache) > MAX_RESULT_CACHE_ENTRIES: _result_cache.clear() if config.USE_HEURISTICS: if _locks.connect_sec: _locks.connect_sec.acquire() connect_sec = _connect_sec _connect_sec = sec if _locks.connect_sec: _locks.connect_sec.release() if sec > connect_sec: for key in _connect_src_dst: if len(_connect_src_dst[key]) > PORT_SCANNING_THRESHOLD: _src_ip, _dst_ip = key.split('~') if _src_ip not in WHITELIST: _src_ports = set(str(_[2]) for _ in _connect_src_details[key]) _dst_ports = set(str(_[3]) for _ in _connect_src_details[key]) log_event((sec, usec, _src_ip, ','.join(_src_ports), _dst_ip, ','.join(_dst_ports), PROTO.TCP, TRAIL.IP, "-", "potential port scanning", "(heuristic)"), packet) _connect_src_dst.clear() _connect_src_details.clear() ip_data = packet[ip_offset:] ip_version = ord(ip_data[0]) >> 4 localhost_ip = LOCALHOST_IP[ip_version] if ip_version == 0x04: # IPv4 ip_header = struct.unpack("!BBHHHBBH4s4s", ip_data[:20]) iph_length = (ip_header[0] & 0xf) << 2 protocol = ip_header[6] src_ip = socket.inet_ntoa(ip_header[8]) dst_ip = socket.inet_ntoa(ip_header[9]) elif ip_version == 0x06: # IPv6 # Reference: http://chrisgrundemann.com/index.php/2012/introducing-ipv6-understanding-ipv6-addresses/ ip_header = struct.unpack("!BBHHBB16s16s", ip_data[:40]) iph_length = 40 protocol = ip_header[4] src_ip = inet_ntoa6(ip_header[6]) dst_ip = inet_ntoa6(ip_header[7]) else: return if protocol == socket.IPPROTO_TCP: # TCP src_port, dst_port, _, _, doff_reserved, flags = struct.unpack("!HHLLBB", ip_data[iph_length:iph_length+14]) if flags != 2 and config.plugin_functions: if dst_ip in trails: log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.IP, dst_ip, trails[dst_ip][0], trails[dst_ip][1]), packet, skip_write=True) elif src_ip in trails and dst_ip != localhost_ip: log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.IP, src_ip, trails[src_ip][0], trails[src_ip][1]), packet, skip_write=True) if flags == 2: # SYN set (only) _ = _last_syn _last_syn = (sec, src_ip, src_port, dst_ip, dst_port) if _ == _last_syn: # skip bursts return if dst_ip in trails: _ = _last_logged_syn _last_logged_syn = _last_syn if _ != _last_logged_syn: log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.IP, dst_ip, trails[dst_ip][0], trails[dst_ip][1]), packet) elif src_ip in trails and dst_ip != localhost_ip: _ = _last_logged_syn _last_logged_syn = _last_syn if _ != _last_logged_syn: log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.IP, src_ip, trails[src_ip][0], trails[src_ip][1]), packet) if config.USE_HEURISTICS: if dst_ip != localhost_ip: key = "%s~%s" % (src_ip, dst_ip) if key not in _connect_src_dst: _connect_src_dst[key] = set() _connect_src_details[key] = set() _connect_src_dst[key].add(dst_port) _connect_src_details[key].add((sec, usec, src_port, dst_port)) else: tcph_length = doff_reserved >> 4 h_size = iph_length + (tcph_length << 2) tcp_data = ip_data[h_size:] if config.USE_DEEP_HEURISTICS: if tcp_data.startswith("HTTP/"): if any(_ in tcp_data[:tcp_data.find("\r\n\r\n")] for _ in ("X-Sinkhole:", "X-Malware-Sinkhole:", "Server: You got served", "Server: Apache 1.0/SinkSoft", "sinkdns.org")) or "\r\n\r\nsinkhole" in tcp_data: log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.IP, src_ip, "sinkhole response (malware)", "(heuristic)"), packet) else: index = tcp_data.find("<title>") if index >= 0: title = tcp_data[index + len("<title>"):tcp_data.find("</title>", index)] if all(_ in title.lower() for _ in ("this domain", "has been seized")): log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.IP, title, "seized domain (suspicious)", "(heuristic)"), packet) method, path = None, None index = tcp_data.find("\r\n") if index >= 0: line = tcp_data[:index] if line.count(' ') == 2 and " HTTP/" in line: method, path, _ = line.split(' ') if method and path: post_data = None host = dst_ip first_index = tcp_data.find("\r\nHost:") if first_index >= 0: first_index = first_index + len("\r\nHost:") last_index = tcp_data.find("\r\n", first_index) if last_index >= 0: host = tcp_data[first_index:last_index] host = host.strip() if host.endswith(":80"): host = host[:-3] if host and host[0].isalpha() and dst_ip in trails: log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.IP, "%s (%s)" % (dst_ip, host.split(':')[0]), trails[dst_ip][0], trails[dst_ip][1]), packet) elif config.USE_HEURISTICS and config.CHECK_MISSING_HOST: log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.HTTP, "%s%s" % (host, path), "missing host header (suspicious)", "(heuristic)"), packet) index = tcp_data.find("\r\n\r\n") if index >= 0: post_data = tcp_data[index + 4:] if "://" in path: url = path.split("://", 1)[1] if '/' not in url: url = "%s/" % url host, path = url.split('/', 1) if host.endswith(":80"): host = host[:-3] path = "/%s" % path proxy_domain = host.split(':')[0] _check_domain(proxy_domain, sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, packet) elif method == "CONNECT": if '/' in path: host, path = path.split('/', 1) path = "/%s" % path else: host, path = path, '/' if host.endswith(":80"): host = host[:-3] url = "%s%s" % (host, path) proxy_domain = host.split(':')[0] _check_domain(proxy_domain, sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, packet) else: url = "%s%s" % (host, path) if config.USE_HEURISTICS: user_agent, result = None, None first_index = tcp_data.find("\r\nUser-Agent:") if first_index >= 0: first_index = first_index + len("\r\nUser-Agent:") last_index = tcp_data.find("\r\n", first_index) if last_index >= 0: user_agent = tcp_data[first_index:last_index] user_agent = urllib.unquote(user_agent).strip() if user_agent: result = _result_cache.get(user_agent) if result is None: if not any(_ in user_agent for _ in WHITELIST_UA_KEYWORDS): match = re.search(SUSPICIOUS_UA_REGEX, user_agent) if match: def _(value): return value.replace('(', "\\(").replace(')', "\\)") parts = user_agent.split(match.group(0), 1) if len(parts) > 1 and parts[0] and parts[-1]: result = _result_cache[user_agent] = "%s (%s)" % (_(match.group(0)), _(user_agent)) else: result = _result_cache[user_agent] = _(match.group(0)).join(("(%s)" if part else "%s") % _(part) for part in parts) if not result: _result_cache[user_agent] = False if result: log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.UA, result, "user agent (suspicious)", "(heuristic)"), packet) checks = [path.rstrip('/')] if '?' in path: checks.append(path.split('?')[0].rstrip('/')) _ = os.path.splitext(checks[-1]) if _[1]: checks.append(_[0]) if checks[-1].count('/') > 1: checks.append(checks[-1][:checks[-1].rfind('/')]) for check in filter(None, checks): for _ in ("", host): check = "%s%s" % (_, check) if check in trails: parts = url.split(check) other = ("(%s)" % _ if _ else _ for _ in parts) trail = check.join(other) log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.URL, trail, trails[check][0], trails[check][1])) return if config.USE_HEURISTICS: unquoted_path = urllib.unquote(path) unquoted_post_data = urllib.unquote(post_data or "") for char in SUSPICIOUS_HTTP_REQUEST_FORCE_ENCODE_CHARS: replacement = SUSPICIOUS_HTTP_REQUEST_FORCE_ENCODE_CHARS[char] path = path.replace(char, replacement) if post_data: post_data = post_data.replace(char, replacement) if not _check_domain_whitelisted(host): if not any(_ in unquoted_path.lower() for _ in WHITELIST_HTTP_REQUEST_PATHS): if any(_ in unquoted_path for _ in SUSPICIOUS_HTTP_REQUEST_PRE_CONDITION): found = _result_cache.get(unquoted_path) if found is None: for desc, regex in SUSPICIOUS_HTTP_REQUEST_REGEXES: if re.search(regex, unquoted_path, re.I): found = desc break _result_cache[unquoted_path] = found or "" if found: trail = "%s(%s)" % (host, path) log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.URL, trail, "potential %s (suspicious)" % found, "(heuristic)"), packet) return if any(_ in unquoted_post_data for _ in SUSPICIOUS_HTTP_REQUEST_PRE_CONDITION): found = _result_cache.get(unquoted_post_data) if found is None: for desc, regex in SUSPICIOUS_HTTP_REQUEST_REGEXES: if re.search(regex, unquoted_post_data, re.I): found = desc break _result_cache[unquoted_post_data] = found or "" if found: trail = "%s(%s \(%s %s\))" % (host, path, method, post_data.strip()) log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.URL, trail, "potential %s (suspicious)" % found, "(heuristic)"), packet) return if '.' in path: _ = urlparse.urlparse("http://%s" % url) # dummy scheme filename = _.path.split('/')[-1] name, extension = os.path.splitext(filename) if extension and extension in SUSPICIOUS_DIRECT_DOWNLOAD_EXTENSIONS and not any(_ in path for _ in WHITELIST_DIRECT_DOWNLOAD_KEYWORDS) and not _.query and len(name) < 10: trail = "%s(%s)" % (host, path) log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.URL, trail, "direct %s download (suspicious)" % extension, "(heuristic)"), packet) elif filename == "suspendedpage.cgi": log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.URL, trail, "suspended page (suspicious)", "(heuristic)"), packet) elif protocol == socket.IPPROTO_UDP: # UDP _ = ip_data[iph_length:iph_length + 4] if len(_) < 4: return src_port, dst_port = struct.unpack("!HH", _) _ = _last_udp _last_udp = (sec, src_ip, src_port, dst_ip, dst_port) if _ == _last_udp: # skip bursts return if src_port != 53 and dst_port != 53: # not DNS if dst_ip in trails: trail = dst_ip elif src_ip in trails: trail = src_ip else: trail = None if trail: _ = _last_logged_udp _last_logged_udp = _last_udp if _ != _last_logged_udp: log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.UDP, TRAIL.IP, trail, trails[trail][0], trails[trail][1]), packet) else: dns_data = ip_data[iph_length + 8:] # Reference: http://www.ccs.neu.edu/home/amislove/teaching/cs4700/fall09/handouts/project1-primer.pdf if len(dns_data) > 6: qdcount = struct.unpack("!H", dns_data[4:6])[0] if qdcount > 0: offset = 12 query = "" while len(dns_data) > offset: length = ord(dns_data[offset]) if not length: query = query[:-1] break query += dns_data[offset + 1:offset + length + 1] + '.' offset += length + 1 query = query.lower() if not query or '.' not in query or not all(_ in VALID_DNS_CHARS for _ in query) or any(_ in query for _ in (".intranet.",)) or any(query.endswith(_) for _ in IGNORE_DNS_QUERY_SUFFIXES): return if ord(dns_data[2]) == 0x01: # standard query type_, class_ = struct.unpack("!HH", dns_data[offset + 1:offset + 5]) # Reference: http://en.wikipedia.org/wiki/List_of_DNS_record_types if type_ not in (12, 28) and class_ == 1: # Type not in (PTR, AAAA), Class IN if dst_ip in trails: log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.UDP, TRAIL.IP, "%s (%s)" % (dst_ip, query), trails[dst_ip][0], trails[dst_ip][1]), packet) elif src_ip in trails: log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.UDP, TRAIL.IP, src_ip, trails[src_ip][0], trails[src_ip][1]), packet) _check_domain(query, sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.UDP, packet) elif config.USE_HEURISTICS: if (ord(dns_data[2]) & 0x80) and (ord(dns_data[3]) == 0x83): # standard response, recursion available, no such name if not _check_domain_whitelisted(query): parts = query.split('.') if parts[-1].isdigit(): return if not (len(parts) > 4 and all(_.isdigit() and int(_) < 256 for _ in parts[:4])): # generic check for DNSBL IP lookups for _ in filter(None, (query, "*.%s" % '.'.join(parts[-2:]) if query.count('.') > 1 else None)): if _ not in NO_SUCH_NAME_COUNTERS or NO_SUCH_NAME_COUNTERS[_][0] != sec / 3600: NO_SUCH_NAME_COUNTERS[_] = [sec / 3600, 1, set()] else: NO_SUCH_NAME_COUNTERS[_][1] += 1 NO_SUCH_NAME_COUNTERS[_][2].add(query) if NO_SUCH_NAME_COUNTERS[_][1] > NO_SUCH_NAME_PER_HOUR_THRESHOLD: if _.startswith("*."): log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.UDP, TRAIL.DNS, "%s%s" % ("(%s)" % ','.join(item.replace(_[1:], "") for item in NO_SUCH_NAME_COUNTERS[_][2]), _[1:]), "excessive no such domain (suspicious)", "(heuristic)"), packet) for item in NO_SUCH_NAME_COUNTERS[_][2]: try: del NO_SUCH_NAME_COUNTERS[item] except KeyError: pass else: log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.UDP, TRAIL.DNS, _, "excessive no such domain (suspicious)", "(heuristic)"), packet) try: del NO_SUCH_NAME_COUNTERS[_] except KeyError: pass break if len(parts) > 2: part = parts[0] if parts[0] != "www" else parts[1] trail = "(%s).%s" % ('.'.join(parts[:-2]), '.'.join(parts[-2:])) elif len(parts) == 2: part = parts[0] trail = "(%s).%s" % (parts[0], parts[1]) else: part = query trail = query result = _result_cache.get(part) if part: if result is None: # Reference: https://github.com/exp0se/dga_detector probabilities = (float(part.count(c)) / len(part) for c in set(_ for _ in part)) entropy = -sum(p * math.log(p) / math.log(2.0) for p in probabilities) if entropy > SUSPICIOUS_DOMAIN_ENTROPY_THRESHOLD: result = "entropy threshold no such domain (suspicious)" if not result: if sum(_ in CONSONANTS for _ in part) > SUSPICIOUS_DOMAIN_CONSONANT_THRESHOLD: result = "consonant threshold no such domain (suspicious)" _result_cache[part] = result or False if result: log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.UDP, TRAIL.DNS, trail, result, "(heuristic)"), packet) elif protocol in IPPROTO_LUT: # non-TCP/UDP (e.g. ICMP) if protocol == socket.IPPROTO_ICMP: if ord(ip_data[iph_length]) != 0x08: # Non-echo request return elif protocol == socket.IPPROTO_ICMPV6: if ord(ip_data[iph_length]) != 0x80: # Non-echo request return if dst_ip in trails: log_event((sec, usec, src_ip, '-', dst_ip, '-', IPPROTO_LUT[protocol], TRAIL.IP, dst_ip, trails[dst_ip][0], trails[dst_ip][1]), packet) elif src_ip in trails: log_event((sec, usec, src_ip, '-', dst_ip, '-', IPPROTO_LUT[protocol], TRAIL.IP, src_ip, trails[src_ip][0], trails[src_ip][1]), packet) except struct.error: pass except Exception: if config.SHOW_DEBUG: traceback.print_exc()
def _process_packet(packet, sec, usec): """ Processes single (raw) packet """ global _connect_sec try: ip_offset = None if _datalink == pcapy.DLT_PPP: ppp_protocol = packet[2:4] if ppp_protocol == "\x00\x21": # IP ip_offset = PPPH_LENGTH else: if _datalink == pcapy.DLT_LINUX_SLL: packet = packet[2:] eth_header = struct.unpack("!HH8sH", packet[:ETH_LENGTH]) eth_protocol = socket.ntohs(eth_header[3]) if eth_protocol == 8: # IP ip_offset = ETH_LENGTH if ip_offset is None: return ip_header = struct.unpack("!BBHHHBBH4s4s", packet[ip_offset:ip_offset + 20]) ip_length = ip_header[2] packet = packet[:ETH_LENGTH + ip_length] # truncate iph_length = (ip_header[0] & 0xf) << 2 protocol = ip_header[6] src_ip = socket.inet_ntoa(ip_header[8]) dst_ip = socket.inet_ntoa(ip_header[9]) if protocol == socket.IPPROTO_TCP: # TCP i = iph_length + ETH_LENGTH src_port, dst_port, _, _, doff_reserved, flags = struct.unpack("!HHLLBB", packet[i:i+14]) if flags == 2: # SYN set (only) if dst_ip in trails: log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, "TCP", TRAIL.IP, dst_ip, trails[dst_ip][0], trails[dst_ip][1])) elif src_ip in trails: log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, "TCP", TRAIL.IP, src_ip, trails[src_ip][0], trails[src_ip][1])) if config.USE_HEURISTICS: key = "%s:%s" % (src_ip, dst_ip) if key not in _connect_src_dst: _connect_src_dst[key] = set() _connect_src_details[key] = set() _connect_src_dst[key].add(dst_port) _connect_src_details[key].add((sec, usec, src_port, dst_port)) if sec > _connect_sec: for key in _connect_src_dst: if len(_connect_src_dst[key]) > PORT_SCANNING_THRESHOLD: _src_ip, _dst_ip = key.split(':') for _sec, _usec, _src_port, _dst_port in _connect_src_details[key]: log_event((_sec, _usec, _src_ip, _src_port, _dst_ip, _dst_port, "TCP", TRAIL.IP, _src_ip, "potential port scanning", "(heuristic)")) _connect_sec = sec _connect_src_dst.clear() _connect_src_details.clear() if flags & 8 != 0: # PSH set tcph_length = doff_reserved >> 4 h_size = ETH_LENGTH + iph_length + (tcph_length << 2) data = packet[h_size:] method, path = None, None index = data.find("\n") if index >= 0: line = data[:index] if line.count(' ') == 2 and " HTTP/" in line: method = line.split(' ')[0].upper() path = line.split(' ')[1].lower() if method and path: host = dst_ip index = data.find("\r\nHost:") if index >= 0: index = index + len("\r\nHost:") host = data[index:data.find("\r\n", index)] host = host.strip() host = re.sub(r":80\Z", "", host) elif config.USE_HEURISTICS and config.CHECK_MISSING_HOST: log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, "TCP", TRAIL.HTTP, "%s%s" % (host, path), "suspicious http request (missing host header)", "(heuristic)")) if "://" in path: url = path.split("://", 1)[1] if '/' not in url: url = "%s/" % url host, path = url.split('/', 1) host = re.sub(r":80\Z", "", host) path = "/%s" % path proxy_domain = host.split(':')[0] _check_domain(proxy_domain, sec, usec, src_ip, src_port, dst_ip, dst_port, "TCP") elif method == "CONNECT": if '/' in path: host, path = path.split('/', 1) path = "/%s" % path else: host, path = path, '/' host = re.sub(r":80\Z", "", host) url = "%s%s" % (host, path) proxy_domain = host.split(':')[0] _check_domain(proxy_domain, sec, usec, src_ip, src_port, dst_ip, dst_port, "TCP") else: url = "%s%s" % (host, path) user_agent = None index = data.find("\r\nUser-Agent:") if index >= 0: index = index + len("\r\nUser-Agent:") user_agent = urllib.unquote(data[index:data.find("\r\n", index)]).strip() if config.USE_HEURISTICS: found = False if user_agent: if user_agent not in _user_agent_cache: found = _user_agent_cache[user_agent] = re.search(SUSPICIOUS_UA_REGEX, user_agent) is not None and not any(_ in user_agent for _ in WHITELIST_UA_KEYWORDS) else: found = _user_agent_cache[user_agent] if found: log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, "TCP", TRAIL.UA, user_agent.replace('(', "(").replace(')', ")"), "suspicious user agent", "(heuristic)")) if not found and config.CHECK_SHORT_OR_MISSING_USER_AGENT: if user_agent is None: log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, "TCP", TRAIL.HTTP, url, "suspicious http request (missing user agent header)", "(heuristic)")) elif len(user_agent) < SUSPICIOUS_UA_LENGTH_THRESHOLD: log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, "TCP", TRAIL.UA, user_agent, "suspicious user agent (too short)", "(heuristic)")) checks = [path.rstrip('/')] if '?' in path: checks.append(path.split('?')[0].rstrip('/')) _ = os.path.splitext(checks[-1]) if _[1]: checks.append(_[0]) if checks[-1].count('/') > 1: checks.append(checks[-1][:checks[-1].rfind('/')]) for check in filter(None, checks): for _ in ("", host): check = "%s%s" % (_, check) if check in trails: parts = url.split(check) other = ("(%s)" % _ if _ else _ for _ in parts) trail = check.join(other) log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, "TCP", TRAIL.URL, trail, trails[check][0], trails[check][1])) return if config.USE_HEURISTICS: if any(char in path for char in SUSPICIOUS_HTTP_REQUEST_FORCE_ENCODE_CHARS): for char in SUSPICIOUS_HTTP_REQUEST_FORCE_ENCODE_CHARS: path = path.replace(char, urllib.quote(char)) if host not in WHITELIST and not any(_ in path for _ in WHITELIST_HTTP_REQUEST_KEYWORDS) and re.search(SUSPICIOUS_HTTP_REQUEST_REGEX, urllib.unquote(path)): trail = "%s(%s)" % (host, path) log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, "TCP", TRAIL.URL, trail, "suspicious http request", "(heuristic)")) return if '.' in path: _ = urlparse.urlparse("http://%s" % url) # dummy scheme filename = _.path.split('/')[-1] name, extension = os.path.splitext(filename) if extension and extension in SUSPICIOUS_DIRECT_DOWNLOAD_EXTENSIONS and '.'.join(host.split('.')[-2:]) not in WHITELIST and not _.query and len(name) < 10: trail = "%s(%s)" % (host, path) log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, "TCP", TRAIL.URL, trail, "direct %s download (suspicious)" % extension, "(heuristic)")) elif filename in SUSPICIOUS_FILENAMES: trail = "%s(%s)" % (host, path) log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, "TCP", TRAIL.URL, trail, "suspicious page", "(heuristic)")) elif protocol == socket.IPPROTO_UDP: # UDP i = iph_length + ETH_LENGTH _ = packet[i:i + 4] if len(_) < 4: return src_port, dst_port = struct.unpack("!HH", _) if src_port != 53: if dst_ip in trails: log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, "UDP", TRAIL.IP, dst_ip, trails[dst_ip][0], trails[dst_ip][1])) elif src_ip in trails: log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, "UDP", TRAIL.IP, src_ip, trails[src_ip][0], trails[src_ip][1])) if dst_port == 53 or src_port == 53: h_size = ETH_LENGTH + iph_length + 8 data = packet[h_size:] # Reference: http://www.ccs.neu.edu/home/amislove/teaching/cs4700/fall09/handouts/project1-primer.pdf if len(data) > 6: qdcount = struct.unpack("!H", data[4:6])[0] if qdcount > 0: offset = 12 query = "" while len(data) > offset: length = ord(data[offset]) if not length: query = query[:-1] break query += data[offset + 1:offset + length + 1] + '.' offset += length + 1 if ' ' in query or '.' not in query or query.endswith(".in-addr.arpa") or query.endswith(".local"): return if ord(data[2]) == 0x01: # standard query type_, class_ = struct.unpack("!HH", data[offset + 1:offset + 5]) # Reference: http://en.wikipedia.org/wiki/List_of_DNS_record_types if type_ not in (12, 28) and class_ == 1: # Type not in (PTR, AAAA), Class IN _check_domain(query, sec, usec, src_ip, src_port, dst_ip, dst_port, "UDP") elif config.USE_HEURISTICS: if (ord(data[2]) & 0x80) and (ord(data[3]) == 0x83): # standard response, recursion available, no such name if query not in NO_SUCH_NAME_COUNTERS or NO_SUCH_NAME_COUNTERS[query][0] != sec / 3600: NO_SUCH_NAME_COUNTERS[query] = [sec / 3600, 1] else: NO_SUCH_NAME_COUNTERS[query][1] += 1 if NO_SUCH_NAME_COUNTERS[query][1] > NO_SUCH_NAME_PER_HOUR_THRESHOLD and query not in WHITELIST and '.'.join(query.split('.')[-2:]) not in WHITELIST: log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, "UDP", TRAIL.DNS, query, "excessive no such domain name (suspicious)", "(heuristic)")) elif protocol in IPPROTO_LUT: # non-TCP/UDP (e.g. ICMP) if protocol == socket.IPPROTO_ICMP: i = iph_length + ETH_LENGTH if packet[i] != 8: # Echo request return if dst_ip in trails: log_event((sec, usec, src_ip, '-', dst_ip, '-', IPPROTO_LUT[protocol], TRAIL.IP, dst_ip, trails[dst_ip][0], trails[dst_ip][1])) elif src_ip in trails: log_event((sec, usec, src_ip, '-', dst_ip, '-', IPPROTO_LUT[protocol], TRAIL.IP, src_ip, trails[src_ip][0], trails[src_ip][1])) except Exception: if config.SHOW_DEBUG: traceback.print_exc()
def _process_packet(packet, sec, usec): """ Processes single (raw) packet """ global _connect_sec try: ip_offset = None if _datalink == pcapy.DLT_PPP: if ord(packet[2]) == 0 and ord(packet[3]) == 0x21: # IPv4 ip_offset = PPPH_LENGTH else: if _datalink == pcapy.DLT_LINUX_SLL: packet = packet[2:] # Reference: ftp://ftp.heanet.ie/disk1/sourceforge/t/tp/tpcat/tpcat%20python%20source/TPCAT.py if ord(packet[12]) == 8 and ord(packet[13]) == 0: # IPv4 ip_offset = ETH_LENGTH elif ord(packet[12]) == 0x81 and ord(packet[13]) == 0: # VLAN if ord(packet[16]) == 8 and ord(packet[17]) == 0: # IPv4 ip_offset = VLANH_LENGTH if ip_offset is None: return ip_header = struct.unpack("!BBHHHBBH4s4s", packet[ip_offset:ip_offset + 20]) ip_length = ip_header[2] packet = packet[:ETH_LENGTH + ip_length] # truncate iph_length = (ip_header[0] & 0xf) << 2 protocol = ip_header[6] src_ip = socket.inet_ntoa(ip_header[8]) dst_ip = socket.inet_ntoa(ip_header[9]) if protocol == socket.IPPROTO_TCP: # TCP i = iph_length + ETH_LENGTH src_port, dst_port, _, _, doff_reserved, flags = struct.unpack( "!HHLLBB", packet[i:i + 14]) if flags == 2: # SYN set (only) if dst_ip in trails: log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, "TCP", TRAIL.IP, dst_ip, trails[dst_ip][0], trails[dst_ip][1])) elif src_ip in trails: log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, "TCP", TRAIL.IP, src_ip, trails[src_ip][0], trails[src_ip][1])) if config.USE_HEURISTICS: key = "%s:%s" % (src_ip, dst_ip) if key not in _connect_src_dst: _connect_src_dst[key] = set() _connect_src_details[key] = set() _connect_src_dst[key].add(dst_port) _connect_src_details[key].add( (sec, usec, src_port, dst_port)) if sec > _connect_sec: for key in _connect_src_dst: if len(_connect_src_dst[key] ) > PORT_SCANNING_THRESHOLD: _src_ip, _dst_ip = key.split(':') for _sec, _usec, _src_port, _dst_port in _connect_src_details[ key]: log_event( (_sec, _usec, _src_ip, _src_port, _dst_ip, _dst_port, "TCP", TRAIL.IP, _src_ip, "potential port scanning", "(heuristic)")) _connect_sec = sec _connect_src_dst.clear() _connect_src_details.clear() if flags & 8 != 0: # PSH set tcph_length = doff_reserved >> 4 h_size = ETH_LENGTH + iph_length + (tcph_length << 2) data = packet[h_size:] method, path = None, None index = data.find("\n") if index >= 0: line = data[:index] if line.count(' ') == 2 and " HTTP/" in line: method = line.split(' ')[0].upper() path = line.split(' ')[1].lower() if method and path: host = dst_ip index = data.find("\r\nHost:") if index >= 0: index = index + len("\r\nHost:") host = data[index:data.find("\r\n", index)] host = host.strip() host = re.sub(r":80\Z", "", host) elif config.USE_HEURISTICS and config.CHECK_MISSING_HOST: log_event( (sec, usec, src_ip, src_port, dst_ip, dst_port, "TCP", TRAIL.HTTP, "%s%s" % (host, path), "suspicious http request (missing host header)", "(heuristic)")) if "://" in path: url = path.split("://", 1)[1] if '/' not in url: url = "%s/" % url host, path = url.split('/', 1) host = re.sub(r":80\Z", "", host) path = "/%s" % path proxy_domain = host.split(':')[0] _check_domain(proxy_domain, sec, usec, src_ip, src_port, dst_ip, dst_port, "TCP") elif method == "CONNECT": if '/' in path: host, path = path.split('/', 1) path = "/%s" % path else: host, path = path, '/' host = re.sub(r":80\Z", "", host) url = "%s%s" % (host, path) proxy_domain = host.split(':')[0] _check_domain(proxy_domain, sec, usec, src_ip, src_port, dst_ip, dst_port, "TCP") else: url = "%s%s" % (host, path) user_agent = None index = data.find("\r\nUser-Agent:") if index >= 0: index = index + len("\r\nUser-Agent:") user_agent = urllib.unquote( data[index:data.find("\r\n", index)]).strip() if config.USE_HEURISTICS: found = False if user_agent: if user_agent not in _user_agent_cache: found = _user_agent_cache[ user_agent] = re.search( SUSPICIOUS_UA_REGEX, user_agent) is not None and not any( _ in user_agent for _ in WHITELIST_UA_KEYWORDS) else: found = _user_agent_cache[user_agent] if found: log_event( (sec, usec, src_ip, src_port, dst_ip, dst_port, "TCP", TRAIL.UA, user_agent.replace('(', "(").replace( ')', ")"), "suspicious user agent", "(heuristic)")) if not found and config.CHECK_SHORT_OR_MISSING_USER_AGENT: if user_agent is None: log_event(( sec, usec, src_ip, src_port, dst_ip, dst_port, "TCP", TRAIL.HTTP, url, "suspicious http request (missing user agent header)", "(heuristic)")) elif len(user_agent ) < SUSPICIOUS_UA_LENGTH_THRESHOLD: log_event( (sec, usec, src_ip, src_port, dst_ip, dst_port, "TCP", TRAIL.UA, user_agent, "suspicious user agent (too short)", "(heuristic)")) checks = [path.rstrip('/')] if '?' in path: checks.append(path.split('?')[0].rstrip('/')) _ = os.path.splitext(checks[-1]) if _[1]: checks.append(_[0]) if checks[-1].count('/') > 1: checks.append(checks[-1][:checks[-1].rfind('/')]) for check in filter(None, checks): for _ in ("", host): check = "%s%s" % (_, check) if check in trails: parts = url.split(check) other = ("(%s)" % _ if _ else _ for _ in parts) trail = check.join(other) log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, "TCP", TRAIL.URL, trail, trails[check][0], trails[check][1])) return if config.USE_HEURISTICS: if any(char in path for char in SUSPICIOUS_HTTP_REQUEST_FORCE_ENCODE_CHARS): for char in SUSPICIOUS_HTTP_REQUEST_FORCE_ENCODE_CHARS: path = path.replace(char, urllib.quote(char)) if host not in WHITELIST and not any( _ in path for _ in WHITELIST_HTTP_REQUEST_KEYWORDS ) and re.search(SUSPICIOUS_HTTP_REQUEST_REGEX, urllib.unquote(path)): trail = "%s(%s)" % (host, path) log_event( (sec, usec, src_ip, src_port, dst_ip, dst_port, "TCP", TRAIL.URL, trail, "suspicious http request", "(heuristic)")) return if '.' in path: _ = urlparse.urlparse("http://%s" % url) # dummy scheme filename = _.path.split('/')[-1] name, extension = os.path.splitext(filename) if extension and extension in SUSPICIOUS_DIRECT_DOWNLOAD_EXTENSIONS and '.'.join( host.split('.')[-2:] ) not in WHITELIST and not _.query and len( name) < 10: trail = "%s(%s)" % (host, path) log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, "TCP", TRAIL.URL, trail, "direct %s download (suspicious)" % extension, "(heuristic)")) elif filename in SUSPICIOUS_FILENAMES: trail = "%s(%s)" % (host, path) log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, "TCP", TRAIL.URL, trail, "suspicious page", "(heuristic)")) elif protocol == socket.IPPROTO_UDP: # UDP i = iph_length + ETH_LENGTH _ = packet[i:i + 4] if len(_) < 4: return src_port, dst_port = struct.unpack("!HH", _) if src_port != 53: if dst_ip in trails: log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, "UDP", TRAIL.IP, dst_ip, trails[dst_ip][0], trails[dst_ip][1])) elif src_ip in trails: log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, "UDP", TRAIL.IP, src_ip, trails[src_ip][0], trails[src_ip][1])) if dst_port == 53 or src_port == 53: h_size = ETH_LENGTH + iph_length + 8 data = packet[h_size:] # Reference: http://www.ccs.neu.edu/home/amislove/teaching/cs4700/fall09/handouts/project1-primer.pdf if len(data) > 6: qdcount = struct.unpack("!H", data[4:6])[0] if qdcount > 0: offset = 12 query = "" while len(data) > offset: length = ord(data[offset]) if not length: query = query[:-1] break query += data[offset + 1:offset + length + 1] + '.' offset += length + 1 if ' ' in query or '.' not in query or query.endswith( ".in-addr.arpa") or query.endswith(".local"): return if ord(data[2]) == 0x01: # standard query type_, class_ = struct.unpack( "!HH", data[offset + 1:offset + 5]) # Reference: http://en.wikipedia.org/wiki/List_of_DNS_record_types if type_ not in ( 12, 28 ) and class_ == 1: # Type not in (PTR, AAAA), Class IN _check_domain(query, sec, usec, src_ip, src_port, dst_ip, dst_port, "UDP") elif config.USE_HEURISTICS: if (ord(data[2]) & 0x80) and ( ord(data[3]) == 0x83 ): # standard response, recursion available, no such name if query not in NO_SUCH_NAME_COUNTERS or NO_SUCH_NAME_COUNTERS[ query][0] != sec / 3600: NO_SUCH_NAME_COUNTERS[query] = [ sec / 3600, 1 ] else: NO_SUCH_NAME_COUNTERS[query][1] += 1 if NO_SUCH_NAME_COUNTERS[query][ 1] > NO_SUCH_NAME_PER_HOUR_THRESHOLD and query not in WHITELIST and '.'.join( query.split('.') [-2:]) not in WHITELIST: log_event(( sec, usec, src_ip, src_port, dst_ip, dst_port, "UDP", TRAIL.DNS, query, "excessive no such domain name (suspicious)", "(heuristic)")) elif protocol in IPPROTO_LUT: # non-TCP/UDP (e.g. ICMP) if protocol == socket.IPPROTO_ICMP: i = iph_length + ETH_LENGTH if ord(packet[i]) != 8: # Echo request return if dst_ip in trails: log_event((sec, usec, src_ip, '-', dst_ip, '-', IPPROTO_LUT[protocol], TRAIL.IP, dst_ip, trails[dst_ip][0], trails[dst_ip][1])) elif src_ip in trails: log_event((sec, usec, src_ip, '-', dst_ip, '-', IPPROTO_LUT[protocol], TRAIL.IP, src_ip, trails[src_ip][0], trails[src_ip][1])) except Exception: if config.SHOW_DEBUG: traceback.print_exc()
def _process_packet(packet, sec, usec): """ Processes single (raw) packet """ try: if _datalink == pcapy.DLT_LINUX_SLL: packet = packet[2:] eth_header = struct.unpack("!HH8sH", packet[:ETH_LENGTH]) eth_protocol = socket.ntohs(eth_header[3]) if eth_protocol == IPPROTO: # IP ip_header = struct.unpack("!BBHHHBBH4s4s", packet[ETH_LENGTH:ETH_LENGTH + 20]) ip_length = ip_header[2] packet = packet[:ETH_LENGTH + ip_length] # truncate iph_length = (ip_header[0] & 0xF) << 2 protocol = ip_header[6] src_ip = socket.inet_ntoa(ip_header[8]) dst_ip = socket.inet_ntoa(ip_header[9]) if protocol == socket.IPPROTO_TCP: # TCP i = iph_length + ETH_LENGTH src_port, dst_port, _, _, doff_reserved, flags = struct.unpack("!HHLLBB", packet[i:i+14]) if flags == 2: # SYN set (only) if dst_ip in trails: log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, "TCP", TRAIL.IP, dst_ip, trails[dst_ip][0], trails[dst_ip][1])) elif src_ip in trails: log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, "TCP", TRAIL.IP, src_ip, trails[src_ip][0], trails[src_ip][1])) if flags & 8 != 0: # PSH set tcph_length = doff_reserved >> 4 h_size = ETH_LENGTH + iph_length + (tcph_length << 2) data = packet[h_size:] if len(data) > 0 and "HTTP/" in data: index = data.find("\r\n") if index >= 0: line = data[:index] if line.count(' ') == 2 and " HTTP/" in line: path = line.split(' ')[1].lower() else: return else: return index = data.find("\r\nHost:") if index >= 0: index = index + len("\r\nHost:") host = data[index:data.find("\r\n", index)] host = host.strip() host = re.sub(r":80\Z", "", host) else: return url = "%s%s" % (host, path) checks = [path.rstrip('/')] if '?' in path: checks.append(path.split('?')[0].rstrip('/')) _ = os.path.splitext(checks[-1]) if _[1]: checks.append(_[0]) if checks[-1].count('/') > 1: checks.append(checks[-1][:checks[-1].rfind('/')]) for check in filter(None, checks): for _ in ("", host): check = "%s%s" % (_, check) if check in trails: parts = url.split(check) other = ("(%s)" % _ if _ else _ for _ in parts) trail = check.join(other) log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, "TCP", TRAIL.URL, trail, trails[check][0], trails[check][1])) return if config.USE_HEURISTICS: if re.search(SUSPICIOUS_HTTP_REQUEST_REGEX, urllib.unquote(path)): trail = "%s(%s)" % (host, path) log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, "TCP", TRAIL.URL, trail, "suspicious http request", "(heuristic)")) return if ('.') in path: _ = urlparse.urlparse(url) filename = _.path.split('/')[-1] name, extension = os.path.splitext(filename) if extension and extension in SUSPICIOUS_DIRECT_DOWNLOAD_EXTENSIONS and '.'.join(host.split('.')[-2:]) not in WHITELIST and len(name) < 6: trail = "%s(%s)" % (host, path) log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, "TCP", TRAIL.URL, trail, "direct .%s download (suspicious)" % extension, "(heuristic)")) elif filename in SUSPICIOUS_FILENAMES: trail = "%s(%s)" % (host, path) log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, "TCP", TRAIL.URL, trail, "suspicious page", "(heuristic)")) elif protocol == socket.IPPROTO_UDP: # UDP i = iph_length + ETH_LENGTH _ = packet[i:i + 4] if len(_) < 4: return src_port, dst_port = struct.unpack("!HH", _) if src_port != 53: if dst_ip in trails: log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, "UDP", TRAIL.IP, dst_ip, trails[dst_ip][0], trails[dst_ip][1])) elif src_ip in trails: log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, "UDP", TRAIL.IP, src_ip, trails[src_ip][0], trails[src_ip][1])) if dst_port == 53 or src_port == 53: h_size = ETH_LENGTH + iph_length + 8 data = packet[h_size:] # Reference: http://www.ccs.neu.edu/home/amislove/teaching/cs4700/fall09/handouts/project1-primer.pdf if len(data) > 6: qdcount = struct.unpack("!H", data[4:6])[0] if qdcount > 0: offset = 12 query = "" while len(data) > offset: length = ord(data[offset]) if not length: query = query[:-1] break query += data[offset + 1:offset + length + 1] + '.' offset += length + 1 if ' ' in query or '.' not in query or query.endswith(".in-addr.arpa") or query.endswith(".local"): return if ord(data[2]) == 0x01: # standard query type_, class_ = struct.unpack("!HH", data[offset + 1:offset + 5]) # Reference: http://en.wikipedia.org/wiki/List_of_DNS_record_types if type_ != 12 and class_ == 1: # Type != PTR, Class IN parts = query.split('.') for i in xrange(0, len(parts)): domain = '.'.join(parts[i:]) if domain in trails: if domain == query: trail = domain else: _ = ".%s" % domain trail = "(%s)%s" % (query[:-len(_)], _) log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, "UDP", TRAIL.DNS, trail, trails[domain][0], trails[domain][1])) return if config.USE_HEURISTICS and len(parts[0]) > SUSPICIOUS_DOMAIN_LENGTH_THRESHOLD and '-' not in parts[0]: trail = None if len(parts) > 2: if '.'.join(parts[-2:]) not in WHITELIST: trail = "(%s).%s" % ('.'.join(parts[:-2]), '.'.join(parts[-2:])) elif len(parts) == 2: if '.'.join(parts) not in WHITELIST: trail = "(%s).%s" % (parts[0], parts[1]) else: trail = query if trail: log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, "UDP", TRAIL.DNS, trail, "long domain name (suspicious)", "(heuristic)")) elif config.USE_HEURISTICS and (ord(data[2]) & 0x80) and (ord(data[3]) == 0x83): # standard response, recursion available, no such name if query not in NO_SUCH_NAME_COUNTERS or NO_SUCH_NAME_COUNTERS[query][0] != sec / 3600: NO_SUCH_NAME_COUNTERS[query] = [sec / 3600, 1] else: NO_SUCH_NAME_COUNTERS[query][1] += 1 if NO_SUCH_NAME_COUNTERS[query][1] > NO_SUCH_NAME_PER_HOUR_THRESHOLD: log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, "UDP", TRAIL.DNS, query, "excessive no such domain name (suspicious)", "(heuristic)")) elif protocol in IPPROTO_LUT: # non-TCP/UDP (e.g. ICMP) if dst_ip in trails: log_event((sec, usec, src_ip, '-', dst_ip, '-', IPPROTO_LUT[protocol], TRAIL.IP, dst_ip, trails[dst_ip][0], trails[dst_ip][1])) elif src_ip in trails: log_event((sec, usec, src_ip, '-', dst_ip, '-', IPPROTO_LUT[protocol], TRAIL.IP, src_ip, trails[src_ip][0], trails[src_ip][1])) except Exception, ex: print "[x] '%s'" % ex print traceback.format_exc()
def _process_ip(ip_data, sec, usec): """ Processes single (raw) IP layer data """ global _connect_sec try: if len(_result_cache) > MAX_RESULT_CACHE_ENTRIES: _result_cache.clear() if config.USE_HEURISTICS: if _locks.connect_sec: _locks.connect_sec.acquire() connect_sec = _connect_sec _connect_sec = sec if _locks.connect_sec: _locks.connect_sec.release() if sec > connect_sec: for key in _connect_src_dst: if len(_connect_src_dst[key]) > PORT_SCANNING_THRESHOLD: _src_ip, _dst_ip = key.split(':') if _src_ip not in WHITELIST: _src_ports = set( str(_[2]) for _ in _connect_src_details[key]) _dst_ports = set( str(_[3]) for _ in _connect_src_details[key]) log_event( (sec, usec, _src_ip, ','.join(_src_ports), _dst_ip, ','.join(_dst_ports), PROTO.TCP, TRAIL.IP, _src_ip, "potential port scanning", "(heuristic)")) _connect_src_dst.clear() _connect_src_details.clear() ip_header = struct.unpack("!BBHHHBBH4s4s", ip_data[:20]) ip_length = ip_header[2] ip_data = ip_data[:ip_length] # truncate iph_length = (ip_header[0] & 0xf) << 2 protocol = ip_header[6] src_ip = socket.inet_ntoa(ip_header[8]) dst_ip = socket.inet_ntoa(ip_header[9]) if protocol == socket.IPPROTO_TCP: # TCP src_port, dst_port, _, _, doff_reserved, flags = struct.unpack( "!HHLLBB", ip_data[iph_length:iph_length + 14]) if flags == 2: # SYN set (only) if dst_ip in trails: log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.IP, dst_ip, trails[dst_ip][0], trails[dst_ip][1])) elif src_ip in trails and dst_ip != LOCALHOST_IP: log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.IP, src_ip, trails[src_ip][0], trails[src_ip][1])) if config.USE_HEURISTICS: if dst_ip != LOCALHOST_IP: key = "%s:%s" % (src_ip, dst_ip) if key not in _connect_src_dst: _connect_src_dst[key] = set() _connect_src_details[key] = set() _connect_src_dst[key].add(dst_port) _connect_src_details[key].add( (sec, usec, src_port, dst_port)) else: tcph_length = doff_reserved >> 4 h_size = iph_length + (tcph_length << 2) tcp_data = ip_data[h_size:] if src_port == 80 and tcp_data.startswith("HTTP/"): if any(_ in tcp_data[:tcp_data.find("\r\n\r\n")] for _ in ("X-Sinkhole:", "Server: Apache 1.0/SinkSoft" )) or "\r\n\r\nsinkhole" in tcp_data: log_event( (sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.IP, src_ip, "sinkhole response (malware)", "(heuristic)")) else: index = tcp_data.find("<title>") if index >= 0: title = tcp_data[index + len("<title>"):tcp_data. find("</title>", index)] if all(_ in title.lower() for _ in ("this domain", "has been seized")): log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.IP, title, "seized domain (suspicious)", "(heuristic)")) method, path = None, None index = tcp_data.find("\n") if index >= 0: line = tcp_data[:index] if line.count(' ') == 2 and " HTTP/" in line: method = line.split(' ')[0].upper() path = line.split(' ')[1].lower() if method and path: post_data = None host = dst_ip index = tcp_data.find("\r\nHost:") if index >= 0: index = index + len("\r\nHost:") host = tcp_data[index:tcp_data.find("\r\n", index)] host = host.strip() host = re.sub(r":80\Z", "", host) if not host.split( ':')[0][-1].isdigit() and dst_ip in trails: log_event( (sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.IP, "%s (%s)" % (dst_ip, host.split(':')[0]), trails[dst_ip][0], trails[dst_ip][1])) elif config.USE_HEURISTICS and config.CHECK_MISSING_HOST: log_event( (sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.HTTP, "%s%s" % (host, path), "suspicious http request (missing host header)", "(heuristic)")) index = tcp_data.find("\r\n\r\n") if index >= 0: post_data = tcp_data[index:] if "://" in path: url = path.split("://", 1)[1] if '/' not in url: url = "%s/" % url host, path = url.split('/', 1) host = re.sub(r":80\Z", "", host) path = "/%s" % path proxy_domain = host.split(':')[0] _check_domain(proxy_domain, sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP) elif method == "CONNECT": if '/' in path: host, path = path.split('/', 1) path = "/%s" % path else: host, path = path, '/' host = re.sub(r":80\Z", "", host) url = "%s%s" % (host, path) proxy_domain = host.split(':')[0] _check_domain(proxy_domain, sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP) else: url = "%s%s" % (host, path) if config.USE_HEURISTICS: user_agent, result = None, None match = re.search("(?i)\r\nUser-Agent:([^\r\n]+)", tcp_data) if match: user_agent = urllib.unquote(match.group(1)).strip() if user_agent: result = _result_cache.get(user_agent) if result is None: if not any(_ in user_agent for _ in WHITELIST_UA_KEYWORDS): match = re.search(SUSPICIOUS_UA_REGEX, user_agent) if match: result = _result_cache[ user_agent] = match.group(0).join( ("(%s)" if _ else "%s") % _.replace('(', "\\(").replace( ')', "\\)") for _ in user_agent.split( match.group(0), 1)) if not result: _result_cache[user_agent] = False if result: log_event( (sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.UA, result, "suspicious user agent", "(heuristic)")) if not result and config.CHECK_SHORT_OR_MISSING_USER_AGENT: if user_agent is None: log_event(( sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.HTTP, url, "suspicious http request (missing user agent header)", "(heuristic)")) elif len(user_agent ) < SUSPICIOUS_UA_LENGTH_THRESHOLD: log_event( (sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.UA, user_agent, "suspicious user agent (too short)", "(heuristic)")) checks = [path.rstrip('/')] if '?' in path: checks.append(path.split('?')[0].rstrip('/')) _ = os.path.splitext(checks[-1]) if _[1]: checks.append(_[0]) if checks[-1].count('/') > 1: checks.append(checks[-1][:checks[-1].rfind('/')]) for check in filter(None, checks): for _ in ("", host): check = "%s%s" % (_, check) if check in trails: parts = url.split(check) other = ("(%s)" % _ if _ else _ for _ in parts) trail = check.join(other) log_event( (sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.URL, trail, trails[check][0], trails[check][1])) return if config.USE_HEURISTICS: if any(char in path for char in SUSPICIOUS_HTTP_REQUEST_FORCE_ENCODE_CHARS): for char in SUSPICIOUS_HTTP_REQUEST_FORCE_ENCODE_CHARS: path = path.replace(char, urllib.quote(char)) if host not in WHITELIST: if not any( _ in path for _ in WHITELIST_HTTP_REQUEST_KEYWORDS): result = _result_cache.get(path) if result is None: result = _result_cache[path] = re.search( SUSPICIOUS_HTTP_REQUEST_REGEX, urllib.unquote(path)) is not None if result: trail = "%s(%s)" % (host, path) log_event( (sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.URL, trail, "suspicious http request", "(heuristic)")) return if post_data and not any( _ in post_data for _ in WHITELIST_HTTP_REQUEST_KEYWORDS): result = _result_cache.get(post_data) if result is None: result = _result_cache[ post_data] = re.search( SUSPICIOUS_HTTP_REQUEST_REGEX, urllib.unquote( post_data)) is not None if result: trail = "%s(%s \(%s %s\))" % ( host, path, method, post_data.strip()) log_event( (sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.URL, trail, "suspicious http request", "(heuristic)")) return if '.' in path: _ = urlparse.urlparse("http://%s" % url) # dummy scheme filename = _.path.split('/')[-1] name, extension = os.path.splitext(filename) if extension and extension in SUSPICIOUS_DIRECT_DOWNLOAD_EXTENSIONS and not any( _ in path for _ in WHITELIST_DIRECT_DOWNLOAD_KEYWORDS ) and '.'.join( host.split('.')[-2:] ) not in WHITELIST and not _.query and len( name) < 10: trail = "%s(%s)" % (host, path) log_event( (sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.URL, trail, "direct %s download (suspicious)" % extension, "(heuristic)")) elif filename in SUSPICIOUS_FILENAMES: trail = "%s(%s)" % (host, path) log_event( (sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.URL, trail, "suspicious page", "(heuristic)")) elif protocol == socket.IPPROTO_UDP: # UDP _ = ip_data[iph_length:iph_length + 4] if len(_) < 4: return src_port, dst_port = struct.unpack("!HH", _) if src_port != 53: if dst_ip in trails: log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.UDP, TRAIL.IP, dst_ip, trails[dst_ip][0], trails[dst_ip][1])) elif src_ip in trails: log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.UDP, TRAIL.IP, src_ip, trails[src_ip][0], trails[src_ip][1])) if dst_port == 53 or src_port == 53: dns_data = ip_data[iph_length + 8:] # Reference: http://www.ccs.neu.edu/home/amislove/teaching/cs4700/fall09/handouts/project1-primer.pdf if len(dns_data) > 6: qdcount = struct.unpack("!H", dns_data[4:6])[0] if qdcount > 0: offset = 12 query = "" while len(dns_data) > offset: length = ord(dns_data[offset]) if not length: query = query[:-1] break query += dns_data[offset + 1:offset + length + 1] + '.' offset += length + 1 if ' ' in query or '.' not in query or any( query.endswith(_) for _ in IGNORE_DNS_QUERY_SUFFIXES): return if ord(dns_data[2]) == 0x01: # standard query type_, class_ = struct.unpack( "!HH", dns_data[offset + 1:offset + 5]) # Reference: http://en.wikipedia.org/wiki/List_of_DNS_record_types if type_ not in ( 12, 28 ) and class_ == 1: # Type not in (PTR, AAAA), Class IN _check_domain(query, sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.UDP) elif config.USE_HEURISTICS: if (ord(dns_data[2]) & 0x80) and ( ord(dns_data[3]) == 0x83 ): # standard response, recursion available, no such name parts = query.split('.') if not ( len(parts) > 4 and all(_.isdigit() and int(_) < 256 for _ in parts[:4]) ): # generic check for DNSBL IP lookups for _ in filter( None, (query, "*.%s" % '.'.join(parts[-2:]) if query.count('.') > 1 else None)): if _ not in NO_SUCH_NAME_COUNTERS or NO_SUCH_NAME_COUNTERS[ _][0] != sec / 3600: NO_SUCH_NAME_COUNTERS[_] = [ sec / 3600, 1, set() ] else: NO_SUCH_NAME_COUNTERS[_][1] += 1 NO_SUCH_NAME_COUNTERS[_][2].add( query) if NO_SUCH_NAME_COUNTERS[_][ 1] > NO_SUCH_NAME_PER_HOUR_THRESHOLD and _ not in WHITELIST and '.'.join( _.split('.')[-2:] ) not in WHITELIST: if _.startswith("*."): log_event(( sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.UDP, TRAIL.DNS, "%s%s" % ("(%s)" % ','.join( item.replace( _[1:], "") for item in NO_SUCH_NAME_COUNTERS[ _][2]), _[1:]), "excessive no such domain name (suspicious)", "(heuristic)")) for item in NO_SUCH_NAME_COUNTERS[ _][2]: try: del NO_SUCH_NAME_COUNTERS[ item] except KeyError: pass else: log_event(( sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.UDP, TRAIL.DNS, _, "excessive no such domain name (suspicious)", "(heuristic)")) try: del NO_SUCH_NAME_COUNTERS[ _] except KeyError: pass break # Reference: https://github.com/exp0se/dga_detector for part in parts: if part: consonants = re.findall( "(?i)[bcdfghjklmnpqrstvwxyz]", part) if len( consonants ) > SUSPICIOUS_DOMAIN_CONSONANT_THRESHOLD: log_event(( sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.UDP, TRAIL.DNS, query, "high consonant no such domain name (suspicious)", "(heuristic)")) break probabilities = ( float(part.count(c)) / len(part) for c in set(_ for _ in part)) entropy = -sum( p * math.log(p) / math.log(2.0) for p in probabilities) if entropy > SUSPICIOUS_DOMAIN_ENTROPY_THRESHOLD: log_event(( sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.UDP, TRAIL.DNS, query, "high entropy no such domain name (suspicious)", "(heuristic)")) break elif protocol in IPPROTO_LUT: # non-TCP/UDP (e.g. ICMP) if protocol == socket.IPPROTO_ICMP: if ord(ip_data[iph_length]) != 8: # Non-echo request return if dst_ip in trails: log_event((sec, usec, src_ip, '-', dst_ip, '-', IPPROTO_LUT[protocol], TRAIL.IP, dst_ip, trails[dst_ip][0], trails[dst_ip][1])) elif src_ip in trails: log_event((sec, usec, src_ip, '-', dst_ip, '-', IPPROTO_LUT[protocol], TRAIL.IP, src_ip, trails[src_ip][0], trails[src_ip][1])) except struct.error: pass except Exception: if config.SHOW_DEBUG: traceback.print_exc()
def _process_ip(ip_data, sec, usec): """ Processes single (raw) IP layer data """ global _connect_sec try: if len(_result_cache) > MAX_RESULT_CACHE_ENTRIES: _result_cache.clear() if config.USE_HEURISTICS: if _locks.connect_sec: _locks.connect_sec.acquire() connect_sec = _connect_sec _connect_sec = sec if _locks.connect_sec: _locks.connect_sec.release() if sec > connect_sec: for key in _connect_src_dst: if len(_connect_src_dst[key]) > PORT_SCANNING_THRESHOLD: _src_ip, _dst_ip = key.split(":") if _src_ip not in WHITELIST: _src_ports = set(str(_[2]) for _ in _connect_src_details[key]) _dst_ports = set(str(_[3]) for _ in _connect_src_details[key]) log_event( ( sec, usec, _src_ip, ",".join(_src_ports), _dst_ip, ",".join(_dst_ports), PROTO.TCP, TRAIL.IP, _src_ip, "potential port scanning", "(heuristic)", ) ) _connect_src_dst.clear() _connect_src_details.clear() ip_header = struct.unpack("!BBHHHBBH4s4s", ip_data[:20]) ip_length = ip_header[2] ip_data = ip_data[:ip_length] # truncate iph_length = (ip_header[0] & 0xF) << 2 protocol = ip_header[6] src_ip = socket.inet_ntoa(ip_header[8]) dst_ip = socket.inet_ntoa(ip_header[9]) if protocol == socket.IPPROTO_TCP: # TCP src_port, dst_port, _, _, doff_reserved, flags = struct.unpack( "!HHLLBB", ip_data[iph_length : iph_length + 14] ) if flags == 2: # SYN set (only) if dst_ip in trails: log_event( ( sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.IP, dst_ip, trails[dst_ip][0], trails[dst_ip][1], ) ) elif src_ip in trails and dst_ip != LOCALHOST_IP: log_event( ( sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.IP, src_ip, trails[src_ip][0], trails[src_ip][1], ) ) if config.USE_HEURISTICS: if dst_ip != LOCALHOST_IP: key = "%s:%s" % (src_ip, dst_ip) if key not in _connect_src_dst: _connect_src_dst[key] = set() _connect_src_details[key] = set() _connect_src_dst[key].add(dst_port) _connect_src_details[key].add((sec, usec, src_port, dst_port)) else: tcph_length = doff_reserved >> 4 h_size = iph_length + (tcph_length << 2) tcp_data = ip_data[h_size:] if src_port == 80 and tcp_data.startswith("HTTP/"): if ( any( _ in tcp_data[: tcp_data.find("\r\n\r\n")] for _ in ("X-Sinkhole:", "Server: Apache 1.0/SinkSoft") ) or "\r\n\r\nsinkhole" in tcp_data ): log_event( ( sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.IP, src_ip, "sinkhole response (malware)", "(heuristic)", ) ) else: index = tcp_data.find("<title>") if index >= 0: title = tcp_data[index + len("<title>") : tcp_data.find("</title>", index)] if all(_ in title.lower() for _ in ("this domain", "has been seized")): log_event( ( sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.IP, title, "seized domain (suspicious)", "(heuristic)", ) ) method, path = None, None index = tcp_data.find("\n") if index >= 0: line = tcp_data[:index] if line.count(" ") == 2 and " HTTP/" in line: method = line.split(" ")[0].upper() path = line.split(" ")[1].lower() if method and path: post_data = None host = dst_ip index = tcp_data.find("\r\nHost:") if index >= 0: index = index + len("\r\nHost:") host = tcp_data[index : tcp_data.find("\r\n", index)] host = host.strip() host = re.sub(r":80\Z", "", host) if not host.split(":")[0][-1].isdigit() and dst_ip in trails: log_event( ( sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.IP, "%s (%s)" % (dst_ip, host.split(":")[0]), trails[dst_ip][0], trails[dst_ip][1], ) ) elif config.USE_HEURISTICS and config.CHECK_MISSING_HOST: log_event( ( sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.HTTP, "%s%s" % (host, path), "suspicious http request (missing host header)", "(heuristic)", ) ) index = tcp_data.find("\r\n\r\n") if index >= 0: post_data = tcp_data[index:] if "://" in path: url = path.split("://", 1)[1] if "/" not in url: url = "%s/" % url host, path = url.split("/", 1) host = re.sub(r":80\Z", "", host) path = "/%s" % path proxy_domain = host.split(":")[0] _check_domain(proxy_domain, sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP) elif method == "CONNECT": if "/" in path: host, path = path.split("/", 1) path = "/%s" % path else: host, path = path, "/" host = re.sub(r":80\Z", "", host) url = "%s%s" % (host, path) proxy_domain = host.split(":")[0] _check_domain(proxy_domain, sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP) else: url = "%s%s" % (host, path) if config.USE_HEURISTICS: user_agent, result = None, None match = re.search("(?i)\r\nUser-Agent:([^\r\n]+)", tcp_data) if match: user_agent = urllib.unquote(match.group(1)).strip() if user_agent: result = _result_cache.get(user_agent) if result is None: if not any(_ in user_agent for _ in WHITELIST_UA_KEYWORDS): match = re.search(SUSPICIOUS_UA_REGEX, user_agent) if match: result = _result_cache[user_agent] = match.group(0).join( ("(%s)" if _ else "%s") % _.replace("(", "\\(").replace(")", "\\)") for _ in user_agent.split(match.group(0), 1) ) if not result: _result_cache[user_agent] = False if result: log_event( ( sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.UA, result, "suspicious user agent", "(heuristic)", ) ) if not result and config.CHECK_SHORT_OR_MISSING_USER_AGENT: if user_agent is None: log_event( ( sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.HTTP, url, "suspicious http request (missing user agent header)", "(heuristic)", ) ) elif len(user_agent) < SUSPICIOUS_UA_LENGTH_THRESHOLD: log_event( ( sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.UA, user_agent, "suspicious user agent (too short)", "(heuristic)", ) ) checks = [path.rstrip("/")] if "?" in path: checks.append(path.split("?")[0].rstrip("/")) _ = os.path.splitext(checks[-1]) if _[1]: checks.append(_[0]) if checks[-1].count("/") > 1: checks.append(checks[-1][: checks[-1].rfind("/")]) for check in filter(None, checks): for _ in ("", host): check = "%s%s" % (_, check) if check in trails: parts = url.split(check) other = ("(%s)" % _ if _ else _ for _ in parts) trail = check.join(other) log_event( ( sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.URL, trail, trails[check][0], trails[check][1], ) ) return if config.USE_HEURISTICS: if any(char in path for char in SUSPICIOUS_HTTP_REQUEST_FORCE_ENCODE_CHARS): for char in SUSPICIOUS_HTTP_REQUEST_FORCE_ENCODE_CHARS: path = path.replace(char, urllib.quote(char)) if host not in WHITELIST: if not any(_ in path for _ in WHITELIST_HTTP_REQUEST_KEYWORDS): result = _result_cache.get(path) if result is None: result = _result_cache[path] = ( re.search(SUSPICIOUS_HTTP_REQUEST_REGEX, urllib.unquote(path)) is not None ) if result: trail = "%s(%s)" % (host, path) log_event( ( sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.URL, trail, "suspicious http request", "(heuristic)", ) ) return if post_data and not any(_ in post_data for _ in WHITELIST_HTTP_REQUEST_KEYWORDS): result = _result_cache.get(post_data) if result is None: result = _result_cache[post_data] = ( re.search(SUSPICIOUS_HTTP_REQUEST_REGEX, urllib.unquote(post_data)) is not None ) if result: trail = "%s(%s \(%s %s\))" % (host, path, method, post_data.strip()) log_event( ( sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.URL, trail, "suspicious http request", "(heuristic)", ) ) return if "." in path: _ = urlparse.urlparse("http://%s" % url) # dummy scheme filename = _.path.split("/")[-1] name, extension = os.path.splitext(filename) if ( extension and extension in SUSPICIOUS_DIRECT_DOWNLOAD_EXTENSIONS and not any(_ in path for _ in WHITELIST_DIRECT_DOWNLOAD_KEYWORDS) and ".".join(host.split(".")[-2:]) not in WHITELIST and not _.query and len(name) < 10 ): trail = "%s(%s)" % (host, path) log_event( ( sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.URL, trail, "direct %s download (suspicious)" % extension, "(heuristic)", ) ) elif filename in SUSPICIOUS_FILENAMES: trail = "%s(%s)" % (host, path) log_event( ( sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.URL, trail, "suspicious page", "(heuristic)", ) ) elif protocol == socket.IPPROTO_UDP: # UDP _ = ip_data[iph_length : iph_length + 4] if len(_) < 4: return src_port, dst_port = struct.unpack("!HH", _) if src_port != 53: if dst_ip in trails: log_event( ( sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.UDP, TRAIL.IP, dst_ip, trails[dst_ip][0], trails[dst_ip][1], ) ) elif src_ip in trails: log_event( ( sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.UDP, TRAIL.IP, src_ip, trails[src_ip][0], trails[src_ip][1], ) ) if dst_port == 53 or src_port == 53: dns_data = ip_data[iph_length + 8 :] # Reference: http://www.ccs.neu.edu/home/amislove/teaching/cs4700/fall09/handouts/project1-primer.pdf if len(dns_data) > 6: qdcount = struct.unpack("!H", dns_data[4:6])[0] if qdcount > 0: offset = 12 query = "" while len(dns_data) > offset: length = ord(dns_data[offset]) if not length: query = query[:-1] break query += dns_data[offset + 1 : offset + length + 1] + "." offset += length + 1 if ( " " in query or "." not in query or any(query.endswith(_) for _ in IGNORE_DNS_QUERY_SUFFIXES) ): return if ord(dns_data[2]) == 0x01: # standard query type_, class_ = struct.unpack("!HH", dns_data[offset + 1 : offset + 5]) # Reference: http://en.wikipedia.org/wiki/List_of_DNS_record_types if type_ not in (12, 28) and class_ == 1: # Type not in (PTR, AAAA), Class IN _check_domain(query, sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.UDP) elif config.USE_HEURISTICS: if (ord(dns_data[2]) & 0x80) and ( ord(dns_data[3]) == 0x83 ): # standard response, recursion available, no such name parts = query.split(".") if not ( len(parts) > 4 and all(_.isdigit() and int(_) < 256 for _ in parts[:4]) ): # generic check for DNSBL IP lookups for _ in filter( None, (query, "*.%s" % ".".join(parts[-2:]) if query.count(".") > 1 else None) ): if _ not in NO_SUCH_NAME_COUNTERS or NO_SUCH_NAME_COUNTERS[_][0] != sec / 3600: NO_SUCH_NAME_COUNTERS[_] = [sec / 3600, 1, set()] else: NO_SUCH_NAME_COUNTERS[_][1] += 1 NO_SUCH_NAME_COUNTERS[_][2].add(query) if ( NO_SUCH_NAME_COUNTERS[_][1] > NO_SUCH_NAME_PER_HOUR_THRESHOLD and _ not in WHITELIST and ".".join(_.split(".")[-2:]) not in WHITELIST ): if _.startswith("*."): log_event( ( sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.UDP, TRAIL.DNS, "%s%s" % ( "(%s)" % ",".join( item.replace(_[1:], "") for item in NO_SUCH_NAME_COUNTERS[_][2] ), _[1:], ), "excessive no such domain name (suspicious)", "(heuristic)", ) ) for item in NO_SUCH_NAME_COUNTERS[_][2]: try: del NO_SUCH_NAME_COUNTERS[item] except KeyError: pass else: log_event( ( sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.UDP, TRAIL.DNS, _, "excessive no such domain name (suspicious)", "(heuristic)", ) ) try: del NO_SUCH_NAME_COUNTERS[_] except KeyError: pass break # Reference: https://github.com/exp0se/dga_detector for part in parts: if part: consonants = re.findall("(?i)[bcdfghjklmnpqrstvwxyz]", part) if len(consonants) > SUSPICIOUS_DOMAIN_CONSONANT_THRESHOLD: log_event( ( sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.UDP, TRAIL.DNS, query, "high consonant no such domain name (suspicious)", "(heuristic)", ) ) break probabilities = ( float(part.count(c)) / len(part) for c in set(_ for _ in part) ) entropy = -sum(p * math.log(p) / math.log(2.0) for p in probabilities) if entropy > SUSPICIOUS_DOMAIN_ENTROPY_THRESHOLD: log_event( ( sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.UDP, TRAIL.DNS, query, "high entropy no such domain name (suspicious)", "(heuristic)", ) ) break elif protocol in IPPROTO_LUT: # non-TCP/UDP (e.g. ICMP) if protocol == socket.IPPROTO_ICMP: if ord(ip_data[iph_length]) != 8: # Non-echo request return if dst_ip in trails: log_event( ( sec, usec, src_ip, "-", dst_ip, "-", IPPROTO_LUT[protocol], TRAIL.IP, dst_ip, trails[dst_ip][0], trails[dst_ip][1], ) ) elif src_ip in trails: log_event( ( sec, usec, src_ip, "-", dst_ip, "-", IPPROTO_LUT[protocol], TRAIL.IP, src_ip, trails[src_ip][0], trails[src_ip][1], ) ) except struct.error: pass except Exception: if config.SHOW_DEBUG: traceback.print_exc()
def _process_packet(packet, sec, usec, ip_offset): """ Processes single (raw) IP layer data """ global _connect_sec global _last_syn global _last_logged_syn global _last_udp global _last_logged_udp global _last_dns_exhaustion global _subdomains_sec try: if len(_result_cache) > MAX_RESULT_CACHE_ENTRIES: _result_cache.clear() if config.USE_HEURISTICS: if _locks.connect_sec: _locks.connect_sec.acquire() connect_sec = _connect_sec _connect_sec = sec if _locks.connect_sec: _locks.connect_sec.release() if sec > connect_sec: for key in _connect_src_dst: if len(_connect_src_dst[key]) > PORT_SCANNING_THRESHOLD: _src_ip, _dst_ip = key.split('~') if _src_ip not in WHITELIST: _src_ports = set(str(_[2]) for _ in _connect_src_details[key]) _dst_ports = set(str(_[3]) for _ in _connect_src_details[key]) log_event((sec, usec, _src_ip, ','.join(_src_ports), _dst_ip, ','.join(_dst_ports), PROTO.TCP, TRAIL.IP, "-", "potential port scanning", "(heuristic)"), packet) _connect_src_dst.clear() _connect_src_details.clear() ip_data = packet[ip_offset:] ip_version = ord(ip_data[0]) >> 4 localhost_ip = LOCALHOST_IP[ip_version] if ip_version == 0x04: # IPv4 ip_header = struct.unpack("!BBHHHBBH4s4s", ip_data[:20]) iph_length = (ip_header[0] & 0xf) << 2 protocol = ip_header[6] src_ip = socket.inet_ntoa(ip_header[8]) dst_ip = socket.inet_ntoa(ip_header[9]) elif ip_version == 0x06: # IPv6 # Reference: http://chrisgrundemann.com/index.php/2012/introducing-ipv6-understanding-ipv6-addresses/ ip_header = struct.unpack("!BBHHBB16s16s", ip_data[:40]) iph_length = 40 protocol = ip_header[4] src_ip = inet_ntoa6(ip_header[6]) dst_ip = inet_ntoa6(ip_header[7]) else: return if protocol == socket.IPPROTO_TCP: # TCP src_port, dst_port, _, _, doff_reserved, flags = struct.unpack("!HHLLBB", ip_data[iph_length:iph_length+14]) if flags != 2 and config.plugin_functions: if dst_ip in trails: log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.IP, dst_ip, trails[dst_ip][0], trails[dst_ip][1]), packet, skip_write=True) elif src_ip in trails and dst_ip != localhost_ip: log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.IP, src_ip, trails[src_ip][0], trails[src_ip][1]), packet, skip_write=True) if flags == 2: # SYN set (only) _ = _last_syn _last_syn = (sec, src_ip, src_port, dst_ip, dst_port) if _ == _last_syn: # skip bursts return if dst_ip in trails: _ = _last_logged_syn _last_logged_syn = _last_syn if _ != _last_logged_syn: log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.IP, dst_ip, trails[dst_ip][0], trails[dst_ip][1]), packet) elif src_ip in trails and dst_ip != localhost_ip: _ = _last_logged_syn _last_logged_syn = _last_syn if _ != _last_logged_syn: log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.IP, src_ip, trails[src_ip][0], trails[src_ip][1]), packet) if config.USE_HEURISTICS: if dst_ip != localhost_ip: key = "%s~%s" % (src_ip, dst_ip) if key not in _connect_src_dst: _connect_src_dst[key] = set() _connect_src_details[key] = set() _connect_src_dst[key].add(dst_port) _connect_src_details[key].add((sec, usec, src_port, dst_port)) else: tcph_length = doff_reserved >> 4 h_size = iph_length + (tcph_length << 2) tcp_data = ip_data[h_size:] if config.USE_DEEP_HEURISTICS: if tcp_data.startswith("HTTP/"): if any(_ in tcp_data[:tcp_data.find("\r\n\r\n")] for _ in ("X-Sinkhole:", "X-Malware-Sinkhole:", "Server: You got served", "Server: Apache 1.0/SinkSoft", "sinkdns.org")) or "\r\n\r\nsinkhole" in tcp_data: log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.IP, src_ip, "sinkhole response (malware)", "(heuristic)"), packet) else: index = tcp_data.find("<title>") if index >= 0: title = tcp_data[index + len("<title>"):tcp_data.find("</title>", index)] if all(_ in title.lower() for _ in ("this domain", "has been seized")): log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.IP, title, "seized domain (suspicious)", "(heuristic)"), packet) method, path = None, None index = tcp_data.find("\r\n") if index >= 0: line = tcp_data[:index] if line.count(' ') == 2 and " HTTP/" in line: method, path, _ = line.split(' ') if method and path: post_data = None host = dst_ip first_index = tcp_data.find("\r\nHost:") if first_index >= 0: first_index = first_index + len("\r\nHost:") last_index = tcp_data.find("\r\n", first_index) if last_index >= 0: host = tcp_data[first_index:last_index] host = host.strip() if host.endswith(":80"): host = host[:-3] if host and host[0].isalpha() and dst_ip in trails: log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.IP, "%s (%s)" % (dst_ip, host.split(':')[0]), trails[dst_ip][0], trails[dst_ip][1]), packet) elif config.USE_HEURISTICS and config.CHECK_MISSING_HOST: log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.HTTP, "%s%s" % (host, path), "missing host header (suspicious)", "(heuristic)"), packet) index = tcp_data.find("\r\n\r\n") if index >= 0: post_data = tcp_data[index + 4:] if "://" in path: url = path.split("://", 1)[1] if '/' not in url: url = "%s/" % url host, path = url.split('/', 1) if host.endswith(":80"): host = host[:-3] path = "/%s" % path proxy_domain = host.split(':')[0] _check_domain(proxy_domain, sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, packet) elif method == "CONNECT": if '/' in path: host, path = path.split('/', 1) path = "/%s" % path else: host, path = path, '/' if host.endswith(":80"): host = host[:-3] url = "%s%s" % (host, path) proxy_domain = host.split(':')[0] _check_domain(proxy_domain, sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, packet) else: url = "%s%s" % (host, path) if config.USE_HEURISTICS: user_agent, result = None, None first_index = tcp_data.find("\r\nUser-Agent:") if first_index >= 0: first_index = first_index + len("\r\nUser-Agent:") last_index = tcp_data.find("\r\n", first_index) if last_index >= 0: user_agent = tcp_data[first_index:last_index] user_agent = urllib.unquote(user_agent).strip() if user_agent: result = _result_cache.get(user_agent) if result is None: if not any(_ in user_agent for _ in WHITELIST_UA_KEYWORDS): match = re.search(SUSPICIOUS_UA_REGEX, user_agent) if match: def _(value): return value.replace('(', "\\(").replace(')', "\\)") parts = user_agent.split(match.group(0), 1) if len(parts) > 1 and parts[0] and parts[-1]: result = _result_cache[user_agent] = "%s (%s)" % (_(match.group(0)), _(user_agent)) else: result = _result_cache[user_agent] = _(match.group(0)).join(("(%s)" if part else "%s") % _(part) for part in parts) if not result: _result_cache[user_agent] = False if result: log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.UA, result, "user agent (suspicious)", "(heuristic)"), packet) checks = [path.rstrip('/')] if '?' in path: checks.append(path.split('?')[0].rstrip('/')) _ = os.path.splitext(checks[-1]) if _[1]: checks.append(_[0]) if checks[-1].count('/') > 1: checks.append(checks[-1][:checks[-1].rfind('/')]) for check in filter(None, checks): for _ in ("", host): check = "%s%s" % (_, check) if check in trails: parts = url.split(check) other = ("(%s)" % _ if _ else _ for _ in parts) trail = check.join(other) log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.URL, trail, trails[check][0], trails[check][1])) return if config.USE_HEURISTICS: unquoted_path = urllib.unquote(path) unquoted_post_data = urllib.unquote(post_data or "") for char in SUSPICIOUS_HTTP_REQUEST_FORCE_ENCODE_CHARS: replacement = SUSPICIOUS_HTTP_REQUEST_FORCE_ENCODE_CHARS[char] path = path.replace(char, replacement) if post_data: post_data = post_data.replace(char, replacement) if not _check_domain_whitelisted(host): if not any(_ in unquoted_path.lower() for _ in WHITELIST_HTTP_REQUEST_PATHS): if any(_ in unquoted_path for _ in SUSPICIOUS_HTTP_REQUEST_PRE_CONDITION): found = _result_cache.get(unquoted_path) if found is None: for desc, regex in SUSPICIOUS_HTTP_REQUEST_REGEXES: if re.search(regex, unquoted_path, re.I | re.DOTALL): found = desc break _result_cache[unquoted_path] = found or "" if found: trail = "%s(%s)" % (host, path) log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.URL, trail, "potential %s (suspicious)" % found, "(heuristic)"), packet) return if any(_ in unquoted_post_data for _ in SUSPICIOUS_HTTP_REQUEST_PRE_CONDITION): found = _result_cache.get(unquoted_post_data) if found is None: for desc, regex in SUSPICIOUS_HTTP_REQUEST_REGEXES: if re.search(regex, unquoted_post_data, re.I | re.DOTALL): found = desc break _result_cache[unquoted_post_data] = found or "" if found: trail = "%s(%s \(%s %s\))" % (host, path, method, post_data.strip()) log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.HTTP, trail, "potential %s (suspicious)" % found, "(heuristic)"), packet) return if '.' in path: _ = urlparse.urlparse("http://%s" % url) # dummy scheme filename = _.path.split('/')[-1] name, extension = os.path.splitext(filename) if extension and extension in SUSPICIOUS_DIRECT_DOWNLOAD_EXTENSIONS and not any(_ in path for _ in WHITELIST_DIRECT_DOWNLOAD_KEYWORDS) and not _.query and len(name) < 10: trail = "%s(%s)" % (host, path) log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.URL, trail, "direct %s download (suspicious)" % extension, "(heuristic)"), packet) elif filename == "suspendedpage.cgi": log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.URL, trail, "suspended page (suspicious)", "(heuristic)"), packet) elif filename == "defaultwebpage.cgi": log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.URL, trail, "non-existent page (suspicious)", "(heuristic)"), packet) elif protocol == socket.IPPROTO_UDP: # UDP _ = ip_data[iph_length:iph_length + 4] if len(_) < 4: return src_port, dst_port = struct.unpack("!HH", _) _ = _last_udp _last_udp = (sec, src_ip, src_port, dst_ip, dst_port) if _ == _last_udp: # skip bursts return if src_port != 53 and dst_port != 53: # not DNS if dst_ip in trails: trail = dst_ip elif src_ip in trails: trail = src_ip else: trail = None if trail: _ = _last_logged_udp _last_logged_udp = _last_udp if _ != _last_logged_udp: log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.UDP, TRAIL.IP, trail, trails[trail][0], trails[trail][1]), packet) else: dns_data = ip_data[iph_length + 8:] # Reference: http://www.ccs.neu.edu/home/amislove/teaching/cs4700/fall09/handouts/project1-primer.pdf if len(dns_data) > 6: qdcount = struct.unpack("!H", dns_data[4:6])[0] if qdcount > 0: offset = 12 query = "" while len(dns_data) > offset: length = ord(dns_data[offset]) if not length: query = query[:-1] break query += dns_data[offset + 1:offset + length + 1] + '.' offset += length + 1 query = query.lower() if not query or '.' not in query or not all(_ in VALID_DNS_CHARS for _ in query) or any(_ in query for _ in (".intranet.",)) or any(query.endswith(_) for _ in IGNORE_DNS_QUERY_SUFFIXES): return parts = query.split('.') if ord(dns_data[2]) == 0x01: # standard query type_, class_ = struct.unpack("!HH", dns_data[offset + 1:offset + 5]) if len(parts) > 2: domain = '.'.join(parts[-2:]) if not _check_domain_whitelisted(domain): # e.g. <hash>.hashserver.cs.trendmicro.com if (sec - (_subdomains_sec or 0)) > DAILY_SECS: _subdomains.clear() _dns_exhausted_domains.clear() _subdomains_sec = sec subdomains = _subdomains.get(domain) if not subdomains: subdomains = _subdomains[domain] = set() if len(subdomains) < DNS_EXHAUSTION_THRESHOLD: subdomains.add('.'.join(parts[:-2])) else: if (sec - (_last_dns_exhaustion or 0)) > 60: trail = "(%s).%s" % ('.'.join(parts[:-2]), '.'.join(parts[-2:])) log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.UDP, TRAIL.DNS, trail, "potential dns exhaustion (suspicious)", "(heuristic)"), packet) _dns_exhausted_domains.add(domain) _last_dns_exhaustion = sec return # Reference: http://en.wikipedia.org/wiki/List_of_DNS_record_types if type_ not in (12, 28) and class_ == 1: # Type not in (PTR, AAAA), Class IN if dst_ip in trails: log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.UDP, TRAIL.IP, "%s (%s)" % (dst_ip, query), trails[dst_ip][0], trails[dst_ip][1]), packet) elif src_ip in trails: log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.UDP, TRAIL.IP, src_ip, trails[src_ip][0], trails[src_ip][1]), packet) _check_domain(query, sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.UDP, packet) elif config.USE_HEURISTICS: if ord(dns_data[2]) & 0x80: # standard response if ord(dns_data[3]) == 0x80: # recursion available, no error if (ord(dns_data[offset + 5]) & 0xc0) and (dns_data[offset + 15] == "\x00") and (dns_data[offset + 16] == "\x04"): # QNAME compression, IPv4 result address answer = socket.inet_ntoa(dns_data[offset + 17:offset + 21]) if answer in trails and "sinkhole" in trails[answer][0]: trail = "(%s).%s" % ('.'.join(parts[:-1]), '.'.join(parts[-1:])) log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.UDP, TRAIL.DNS, trail, "sinkholed by %s (malware)" % trails[answer][0].split(" ")[1], "(heuristic)"), packet) # (e.g. kitro.pl, devomchart.com, jebena.ananikolic.su, vuvet.cn) elif ord(dns_data[3]) == 0x83: # recursion available, no such name if '.'.join(parts[-2:]) not in _dns_exhausted_domains and not _check_domain_whitelisted(query) and not _check_domain_member(query, trails): if parts[-1].isdigit(): return if not (len(parts) > 4 and all(_.isdigit() and int(_) < 256 for _ in parts[:4])): # generic check for DNSBL IP lookups for _ in filter(None, (query, "*.%s" % '.'.join(parts[-2:]) if query.count('.') > 1 else None)): if _ not in NO_SUCH_NAME_COUNTERS or NO_SUCH_NAME_COUNTERS[_][0] != sec / 3600: NO_SUCH_NAME_COUNTERS[_] = [sec / 3600, 1, set()] else: NO_SUCH_NAME_COUNTERS[_][1] += 1 NO_SUCH_NAME_COUNTERS[_][2].add(query) if NO_SUCH_NAME_COUNTERS[_][1] > NO_SUCH_NAME_PER_HOUR_THRESHOLD: if _.startswith("*."): log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.UDP, TRAIL.DNS, "%s%s" % ("(%s)" % ','.join(item.replace(_[1:], "") for item in NO_SUCH_NAME_COUNTERS[_][2]), _[1:]), "excessive no such domain (suspicious)", "(heuristic)"), packet) for item in NO_SUCH_NAME_COUNTERS[_][2]: try: del NO_SUCH_NAME_COUNTERS[item] except KeyError: pass else: log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.UDP, TRAIL.DNS, _, "excessive no such domain (suspicious)", "(heuristic)"), packet) try: del NO_SUCH_NAME_COUNTERS[_] except KeyError: pass break if len(parts) > 2: part = parts[0] if parts[0] != "www" else parts[1] trail = "(%s).%s" % ('.'.join(parts[:-2]), '.'.join(parts[-2:])) elif len(parts) == 2: part = parts[0] trail = "(%s).%s" % (parts[0], parts[1]) else: part = query trail = query if part and '-' not in part: result = _result_cache.get(part) if result is None: # Reference: https://github.com/exp0se/dga_detector probabilities = (float(part.count(c)) / len(part) for c in set(_ for _ in part)) entropy = -sum(p * math.log(p) / math.log(2.0) for p in probabilities) if entropy > SUSPICIOUS_DOMAIN_ENTROPY_THRESHOLD: result = "entropy threshold no such domain (suspicious)" if not result: if sum(_ in CONSONANTS for _ in part) > SUSPICIOUS_DOMAIN_CONSONANT_THRESHOLD: result = "consonant threshold no such domain (suspicious)" _result_cache[part] = result or False if result: log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.UDP, TRAIL.DNS, trail, result, "(heuristic)"), packet) elif protocol in IPPROTO_LUT: # non-TCP/UDP (e.g. ICMP) if protocol == socket.IPPROTO_ICMP: if ord(ip_data[iph_length]) != 0x08: # Non-echo request return elif protocol == socket.IPPROTO_ICMPV6: if ord(ip_data[iph_length]) != 0x80: # Non-echo request return if dst_ip in trails: log_event((sec, usec, src_ip, '-', dst_ip, '-', IPPROTO_LUT[protocol], TRAIL.IP, dst_ip, trails[dst_ip][0], trails[dst_ip][1]), packet) elif src_ip in trails: log_event((sec, usec, src_ip, '-', dst_ip, '-', IPPROTO_LUT[protocol], TRAIL.IP, src_ip, trails[src_ip][0], trails[src_ip][1]), packet) except struct.error: pass except Exception: if config.SHOW_DEBUG: traceback.print_exc()
def _process_packet(packet, sec, usec): """ Processes single (raw) packet """ global _connect_sec try: ip_offset = None if _datalink == pcapy.DLT_PPP: ppp_protocol = packet[2:4] if ppp_protocol == "\x00\x21": # IP ip_offset = PPPH_LENGTH else: if _datalink == pcapy.DLT_LINUX_SLL: packet = packet[2:] eth_header = struct.unpack("!HH8sH", packet[:ETH_LENGTH]) eth_protocol = socket.ntohs(eth_header[3]) if eth_protocol == 8: # IP ip_offset = ETH_LENGTH if ip_offset is None: return ip_header = struct.unpack("!BBHHHBBH4s4s", packet[ip_offset:ip_offset + 20]) ip_length = ip_header[2] packet = packet[:ETH_LENGTH + ip_length] # truncate iph_length = (ip_header[0] & 0xF) << 2 protocol = ip_header[6] src_ip = socket.inet_ntoa(ip_header[8]) dst_ip = socket.inet_ntoa(ip_header[9]) if protocol == socket.IPPROTO_TCP: # TCP i = iph_length + ETH_LENGTH src_port, dst_port, _, _, doff_reserved, flags = struct.unpack( "!HHLLBB", packet[i:i + 14]) if flags == 2: # SYN set (only) if dst_ip in trails: log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, "TCP", TRAIL.IP, dst_ip, trails[dst_ip][0], trails[dst_ip][1])) elif src_ip in trails: log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, "TCP", TRAIL.IP, src_ip, trails[src_ip][0], trails[src_ip][1])) if sec > _connect_sec: for key in _connect_src_dst: if len(_connect_src_dst[key] ) > PORT_SCANNING_THRESHOLD: for _sec, _usec, _src_port, _dst_port in _connect_src_details[ key]: log_event( (_sec, _usec, src_ip, _src_port, dst_ip, _dst_port, "TCP", TRAIL.IP, src_ip, "potential port scanning", "(heuristic)")) _connect_sec = sec _connect_src_dst.clear() _connect_src_details.clear() key = "%s:%s" % (src_ip, dst_ip) if key not in _connect_src_dst: _connect_src_dst[key] = set() _connect_src_details[key] = set() _connect_src_dst[key].add(dst_port) _connect_src_details[key].add((sec, usec, src_port, dst_port)) if flags & 8 != 0: # PSH set tcph_length = doff_reserved >> 4 h_size = ETH_LENGTH + iph_length + (tcph_length << 2) data = packet[h_size:] if len(data) > 0 and "HTTP/" in data: index = data.find("\r\n") if index >= 0: line = data[:index] if line.count(' ') == 2 and " HTTP/" in line: path = line.split(' ')[1].lower() else: return else: return index = data.find("\r\nHost:") if index >= 0: index = index + len("\r\nHost:") host = data[index:data.find("\r\n", index)] host = host.strip() host = re.sub(r":80\Z", "", host) else: return url = "%s%s" % (host, path) checks = [path.rstrip('/')] if '?' in path: checks.append(path.split('?')[0].rstrip('/')) _ = os.path.splitext(checks[-1]) if _[1]: checks.append(_[0]) if checks[-1].count('/') > 1: checks.append(checks[-1][:checks[-1].rfind('/')]) for check in filter(None, checks): for _ in ("", host): check = "%s%s" % (_, check) if check in trails: parts = url.split(check) other = ("(%s)" % _ if _ else _ for _ in parts) trail = check.join(other) log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, "TCP", TRAIL.HTTP, trail, trails[check][0], trails[check][1])) return if config.USE_HEURISTICS: if any(char in path for char in SUSPICIOUS_HTTP_REQUEST_FORCE_ENCODE_CHARS): for char in SUSPICIOUS_HTTP_REQUEST_FORCE_ENCODE_CHARS: path = path.replace(char, urllib.quote(char)) if re.search(SUSPICIOUS_HTTP_REQUEST_REGEX, urllib.unquote( path)) and host not in WHITELIST: trail = "%s(%s)" % (host, path) log_event( (sec, usec, src_ip, src_port, dst_ip, dst_port, "TCP", TRAIL.HTTP, trail, "suspicious http request", "(heuristic)")) return if '.' in path: _ = urlparse.urlparse(url) filename = _.path.split('/')[-1] name, extension = os.path.splitext(filename) if extension and extension in SUSPICIOUS_DIRECT_DOWNLOAD_EXTENSIONS and '.'.join( host.split('.') [-2:]) not in WHITELIST and len(name) < 6: trail = "%s(%s)" % (host, path) log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, "TCP", TRAIL.HTTP, trail, "direct %s download (suspicious)" % extension, "(heuristic)")) elif filename in SUSPICIOUS_FILENAMES: trail = "%s(%s)" % (host, path) log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, "TCP", TRAIL.HTTP, trail, "suspicious page", "(heuristic)")) elif protocol == socket.IPPROTO_UDP: # UDP i = iph_length + ETH_LENGTH _ = packet[i:i + 4] if len(_) < 4: return src_port, dst_port = struct.unpack("!HH", _) if src_port != 53: if dst_ip in trails: log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, "UDP", TRAIL.IP, dst_ip, trails[dst_ip][0], trails[dst_ip][1])) elif src_ip in trails: log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, "UDP", TRAIL.IP, src_ip, trails[src_ip][0], trails[src_ip][1])) if dst_port == 53 or src_port == 53: h_size = ETH_LENGTH + iph_length + 8 data = packet[h_size:] # Reference: http://www.ccs.neu.edu/home/amislove/teaching/cs4700/fall09/handouts/project1-primer.pdf if len(data) > 6: qdcount = struct.unpack("!H", data[4:6])[0] if qdcount > 0: offset = 12 query = "" while len(data) > offset: length = ord(data[offset]) if not length: query = query[:-1] break query += data[offset + 1:offset + length + 1] + '.' offset += length + 1 if ' ' in query or '.' not in query or query.endswith( ".in-addr.arpa") or query.endswith(".local"): return if ord(data[2]) == 0x01: # standard query type_, class_ = struct.unpack( "!HH", data[offset + 1:offset + 5]) # Reference: http://en.wikipedia.org/wiki/List_of_DNS_record_types if type_ != 12 and class_ == 1: # Type != PTR, Class IN parts = query.split('.') for i in xrange(0, len(parts)): domain = '.'.join(parts[i:]) if domain in trails: if domain == query: trail = domain else: _ = ".%s" % domain trail = "(%s)%s" % ( query[:-len(_)], _) log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, "UDP", TRAIL.DNS, trail, trails[domain][0], trails[domain][1])) return if config.USE_HEURISTICS and len( parts[0] ) > SUSPICIOUS_DOMAIN_LENGTH_THRESHOLD and '-' not in parts[ 0]: trail = None if len(parts) > 2: if '.'.join( parts[-2:]) not in WHITELIST: trail = "(%s).%s" % ('.'.join( parts[:-2]), '.'.join( parts[-2:])) elif len(parts) == 2: if '.'.join(parts) not in WHITELIST: trail = "(%s).%s" % (parts[0], parts[1]) else: trail = query if trail: log_event( (sec, usec, src_ip, src_port, dst_ip, dst_port, "UDP", TRAIL.DNS, trail, "long domain name (suspicious)", "(heuristic)")) elif config.USE_HEURISTICS and ( ord(data[2]) & 0x80 ) and ( ord(data[3]) == 0x83 ): # standard response, recursion available, no such name if query not in NO_SUCH_NAME_COUNTERS or NO_SUCH_NAME_COUNTERS[ query][0] != sec / 3600: NO_SUCH_NAME_COUNTERS[query] = [sec / 3600, 1] else: NO_SUCH_NAME_COUNTERS[query][1] += 1 if NO_SUCH_NAME_COUNTERS[query][ 1] > NO_SUCH_NAME_PER_HOUR_THRESHOLD and query not in WHITELIST: log_event(( sec, usec, src_ip, src_port, dst_ip, dst_port, "UDP", TRAIL.DNS, query, "excessive no such domain name (suspicious)", "(heuristic)")) elif protocol in IPPROTO_LUT: # non-TCP/UDP (e.g. ICMP) if dst_ip in trails: log_event((sec, usec, src_ip, '-', dst_ip, '-', IPPROTO_LUT[protocol], TRAIL.IP, dst_ip, trails[dst_ip][0], trails[dst_ip][1])) elif src_ip in trails: log_event((sec, usec, src_ip, '-', dst_ip, '-', IPPROTO_LUT[protocol], TRAIL.IP, src_ip, trails[src_ip][0], trails[src_ip][1])) except Exception: if DEBUG: traceback.print_exc()