Exemple #1
0
def _check_domain(query, sec, usec, src_ip, src_port, dst_ip, dst_port, proto):
    parts = query.split('.')

    for i in xrange(0, len(parts)):
        domain = '.'.join(parts[i:])
        if domain in trails:
            if domain == query:
                trail = domain
            else:
                _ = ".%s" % domain
                trail = "(%s)%s" % (query[:-len(_)], _)

            log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, proto, TRAIL.DNS, trail, trails[domain][0], trails[domain][1]))
            return

    if config.USE_HEURISTICS and len(parts[0]) > SUSPICIOUS_DOMAIN_LENGTH_THRESHOLD and '-' not in parts[0]:
        trail = None

        if len(parts) > 2:
            if '.'.join(parts[-2:]) not in WHITELIST:
                trail = "(%s).%s" % ('.'.join(parts[:-2]), '.'.join(parts[-2:]))
        elif len(parts) == 2:
            if '.'.join(parts) not in WHITELIST:
                trail = "(%s).%s" % (parts[0], parts[1])
        else:
            trail = query

        if trail:
            log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, proto, TRAIL.DNS, trail, "long domain name (suspicious)", "(heuristic)"))
Exemple #2
0
def _check_domain(query, sec, usec, src_ip, src_port, dst_ip, dst_port, proto, packet=None):
    if query:
        query = query.lower()
        if ':' in query:
            query = query.split(':', 1)[0]

    if query.replace('.', "").isdigit():  # IP address
        return

    if _result_cache.get(query) == False:
        return

    result = False
    if not _check_domain_whitelisted(query) and all(_ in VALID_DNS_CHARS for _ in query):
        parts = query.lower().split('.')

        for i in xrange(0, len(parts)):
            domain = '.'.join(parts[i:])
            if domain in trails:
                if domain == query:
                    trail = domain
                else:
                    _ = ".%s" % domain
                    trail = "(%s)%s" % (query[:-len(_)], _)

                if not (re.search(r"(?i)\Ad?ns\d*\.", query) and any(_ in trails.get(domain, " ")[0] for _ in ("suspicious", "sinkhole"))):  # e.g. ns2.nobel.su
                    if not ((query == trail) and any(_ in trails.get(domain, " ")[0] for _ in ("dynamic", "free web"))):  # e.g. noip.com
                        result = True
                        log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, proto, TRAIL.DNS, trail, trails[domain][0], trails[domain][1]), packet)
                        break

        if not result and config.USE_HEURISTICS:
            if len(parts[0]) > SUSPICIOUS_DOMAIN_LENGTH_THRESHOLD and '-' not in parts[0]:
                trail = None

                if len(parts) > 2:
                    trail = "(%s).%s" % ('.'.join(parts[:-2]), '.'.join(parts[-2:]))
                elif len(parts) == 2:
                    trail = "(%s).%s" % (parts[0], parts[1])
                else:
                    trail = query

                if trail and not any(_ in trail for _ in WHITELIST_LONG_DOMAIN_NAME_KEYWORDS):
                    result = True
                    log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, proto, TRAIL.DNS, trail, "long domain (suspicious)", "(heuristic)"), packet)

    if result == False:
        _result_cache[query] = False
Exemple #3
0
def _check_domain(query, sec, usec, src_ip, src_port, dst_ip, dst_port, proto, packet=None):
    if not _check_domain_whitelisted(query):
        parts = query.lower().split('.')

        for i in xrange(0, len(parts)):
            domain = '.'.join(parts[i:])
            if domain in trails:
                if domain == query:
                    trail = domain
                else:
                    _ = ".%s" % domain
                    trail = "(%s)%s" % (query[:-len(_)], _)

                log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, proto, TRAIL.DNS, trail, trails[domain][0], trails[domain][1]), packet)
                return

        if config.USE_HEURISTICS:
            if len(parts[0]) > SUSPICIOUS_DOMAIN_LENGTH_THRESHOLD and '-' not in parts[0]:
                trail = None

                if len(parts) > 2:
                    trail = "(%s).%s" % ('.'.join(parts[:-2]), '.'.join(parts[-2:]))
                elif len(parts) == 2:
                    trail = "(%s).%s" % (parts[0], parts[1])
                else:
                    trail = query

                if trail and not any(_ in trail for _ in WHITELIST_LONG_DOMAIN_NAME_KEYWORDS):
                    log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, proto, TRAIL.DNS, trail, "long domain (suspicious)", "(heuristic)"), packet)

            elif "sinkhole" in query:
                log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, proto, TRAIL.DNS, query, "potential sinkhole domain (suspicious)", "(heuristic)"), packet)
Exemple #4
0
def _check_domain(query, sec, usec, src_ip, src_port, dst_ip, dst_port, proto):
    parts = query.lower().split('.')

    for i in xrange(0, len(parts)):
        domain = '.'.join(parts[i:])
        if domain in trails:
            if domain == query:
                trail = domain
            else:
                _ = ".%s" % domain
                trail = "(%s)%s" % (query[:-len(_)], _)

            log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, proto, TRAIL.DNS, trail, trails[domain][0], trails[domain][1]))
            return

    if config.USE_HEURISTICS:
        if len(parts[0]) > SUSPICIOUS_DOMAIN_LENGTH_THRESHOLD and '-' not in parts[0]:
            trail = None

            if len(parts) > 2:
                if '.'.join(parts[-2:]) not in WHITELIST:
                    trail = "(%s).%s" % ('.'.join(parts[:-2]), '.'.join(parts[-2:]))
            elif len(parts) == 2:
                if '.'.join(parts) not in WHITELIST:
                    trail = "(%s).%s" % (parts[0], parts[1])
            else:
                trail = query

            if trail and not any(_ in trail for _ in WHITELIST_LONG_DOMAIN_NAME_KEYWORDS):
                log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, proto, TRAIL.DNS, trail, "long domain name (suspicious)", "(heuristic)"))

        elif "sinkhole" in query:
            log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, proto, TRAIL.DNS, query, "potential sinkhole domain (suspicious)", "(heuristic)"))
Exemple #5
0
def _check_domain(query, sec, usec, src_ip, src_port, dst_ip, dst_port, proto, packet=None):
    if _result_cache.get(query) == False:
        return

    result = False
    if not _check_domain_whitelisted(query) and all(_ in VALID_DNS_CHARS for _ in query):
        parts = query.lower().split('.')

        for i in xrange(0, len(parts)):
            domain = '.'.join(parts[i:])
            if domain in trails:
                if domain == query:
                    trail = domain
                else:
                    _ = ".%s" % domain
                    trail = "(%s)%s" % (query[:-len(_)], _)

                result = True
                log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, proto, TRAIL.DNS, trail, trails[domain][0], trails[domain][1]), packet)
                break

        if not result and config.USE_HEURISTICS:
            if len(parts[0]) > SUSPICIOUS_DOMAIN_LENGTH_THRESHOLD and '-' not in parts[0]:
                trail = None

                if len(parts) > 2:
                    trail = "(%s).%s" % ('.'.join(parts[:-2]), '.'.join(parts[-2:]))
                elif len(parts) == 2:
                    trail = "(%s).%s" % (parts[0], parts[1])
                else:
                    trail = query

                if trail and not any(_ in trail for _ in WHITELIST_LONG_DOMAIN_NAME_KEYWORDS):
                    result = True
                    log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, proto, TRAIL.DNS, trail, "long domain (suspicious)", "(heuristic)"), packet)

    if result == False:
        _result_cache[query] = False
Exemple #6
0
def _check_domain(query, sec, usec, src_ip, src_port, dst_ip, dst_port, proto, packet=None):
    if _result_cache.get(query) == False:
        return

    result = False
    if not _check_domain_whitelisted(query) and all(_ in VALID_DNS_CHARS for _ in query):
        parts = query.lower().split('.')

        for i in xrange(0, len(parts)):
            domain = '.'.join(parts[i:])
            if domain in trails:
                if domain == query:
                    trail = domain
                else:
                    _ = ".%s" % domain
                    trail = "(%s)%s" % (query[:-len(_)], _)

                result = True
                log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, proto, TRAIL.DNS, trail, trails[domain][0], trails[domain][1]), packet)
                break

        if not result and config.USE_HEURISTICS:
            if len(parts[0]) > SUSPICIOUS_DOMAIN_LENGTH_THRESHOLD and '-' not in parts[0]:
                trail = None

                if len(parts) > 2:
                    trail = "(%s).%s" % ('.'.join(parts[:-2]), '.'.join(parts[-2:]))
                elif len(parts) == 2:
                    trail = "(%s).%s" % (parts[0], parts[1])
                else:
                    trail = query

                if trail and not any(_ in trail for _ in WHITELIST_LONG_DOMAIN_NAME_KEYWORDS):
                    result = True
                    log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, proto, TRAIL.DNS, trail, "long domain (suspicious)", "(heuristic)"), packet)

    if result == False:
        _result_cache[query] = False
Exemple #7
0
def _process_packet(packet, sec, usec, ip_offset):
    """
    Processes single (raw) IP layer data
    """

    global _connect_sec
    global _last_syn
    global _last_logged_syn
    global _last_udp
    global _last_logged_udp

    try:
        if len(_result_cache) > MAX_RESULT_CACHE_ENTRIES:
            _result_cache.clear()

        if config.USE_HEURISTICS:
            if _locks.connect_sec:
                _locks.connect_sec.acquire()

            connect_sec = _connect_sec
            _connect_sec = sec

            if _locks.connect_sec:
                _locks.connect_sec.release()

            if sec > connect_sec:
                for key in _connect_src_dst:
                    if len(_connect_src_dst[key]) > PORT_SCANNING_THRESHOLD:
                        _src_ip, _dst_ip = key.split('~')
                        if _src_ip not in WHITELIST:
                            _src_ports = set(str(_[2]) for _ in _connect_src_details[key])
                            _dst_ports = set(str(_[3]) for _ in _connect_src_details[key])
                            log_event((sec, usec, _src_ip, ','.join(_src_ports), _dst_ip, ','.join(_dst_ports), PROTO.TCP, TRAIL.IP, "-", "potential port scanning", "(heuristic)"), packet)

                _connect_src_dst.clear()
                _connect_src_details.clear()

        ip_data = packet[ip_offset:]
        ip_version = ord(ip_data[0]) >> 4
        localhost_ip = LOCALHOST_IP[ip_version]

        if ip_version == 0x04:  # IPv4
            ip_header = struct.unpack("!BBHHHBBH4s4s", ip_data[:20])
            iph_length = (ip_header[0] & 0xf) << 2
            protocol = ip_header[6]
            src_ip = socket.inet_ntoa(ip_header[8])
            dst_ip = socket.inet_ntoa(ip_header[9])
        elif ip_version == 0x06:  # IPv6
            # Reference: http://chrisgrundemann.com/index.php/2012/introducing-ipv6-understanding-ipv6-addresses/
            ip_header = struct.unpack("!BBHHBB16s16s", ip_data[:40])
            iph_length = 40
            protocol = ip_header[4]
            src_ip = inet_ntoa6(ip_header[6])
            dst_ip = inet_ntoa6(ip_header[7])
        else:
            return

        if protocol == socket.IPPROTO_TCP:  # TCP
            src_port, dst_port, _, _, doff_reserved, flags = struct.unpack("!HHLLBB", ip_data[iph_length:iph_length+14])

            if flags != 2 and config.plugin_functions:
                if dst_ip in trails:
                    log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.IP, dst_ip, trails[dst_ip][0], trails[dst_ip][1]), packet, skip_write=True)
                elif src_ip in trails and dst_ip != localhost_ip:
                    log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.IP, src_ip, trails[src_ip][0], trails[src_ip][1]), packet, skip_write=True)

            if flags == 2:  # SYN set (only)
                _ = _last_syn
                _last_syn = (sec, src_ip, src_port, dst_ip, dst_port)
                if _ == _last_syn:  # skip bursts
                    return

                if dst_ip in trails:
                    _ = _last_logged_syn
                    _last_logged_syn = _last_syn
                    if _ != _last_logged_syn:
                        log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.IP, dst_ip, trails[dst_ip][0], trails[dst_ip][1]), packet)

                elif src_ip in trails and dst_ip != localhost_ip:
                    _ = _last_logged_syn
                    _last_logged_syn = _last_syn
                    if _ != _last_logged_syn:
                        log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.IP, src_ip, trails[src_ip][0], trails[src_ip][1]), packet)

                if config.USE_HEURISTICS:
                    if dst_ip != localhost_ip:
                        key = "%s~%s" % (src_ip, dst_ip)
                        if key not in _connect_src_dst:
                            _connect_src_dst[key] = set()
                            _connect_src_details[key] = set()
                        _connect_src_dst[key].add(dst_port)
                        _connect_src_details[key].add((sec, usec, src_port, dst_port))

            else:
                tcph_length = doff_reserved >> 4
                h_size = iph_length + (tcph_length << 2)
                tcp_data = ip_data[h_size:]

                if config.USE_DEEP_HEURISTICS:
                    if tcp_data.startswith("HTTP/"):
                        if any(_ in tcp_data[:tcp_data.find("\r\n\r\n")] for _ in ("X-Sinkhole:", "X-Malware-Sinkhole:", "Server: You got served", "Server: Apache 1.0/SinkSoft", "sinkdns.org")) or "\r\n\r\nsinkhole" in tcp_data:
                            log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.IP, src_ip, "sinkhole response (malware)", "(heuristic)"), packet)
                        else:
                            index = tcp_data.find("<title>")
                            if index >= 0:
                                title = tcp_data[index + len("<title>"):tcp_data.find("</title>", index)]
                                if all(_ in title.lower() for _ in ("this domain", "has been seized")):
                                    log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.IP, title, "seized domain (suspicious)", "(heuristic)"), packet)

                method, path = None, None
                index = tcp_data.find("\r\n")
                if index >= 0:
                    line = tcp_data[:index]
                    if line.count(' ') == 2 and " HTTP/" in line:
                        method, path, _ = line.split(' ')

                if method and path:
                    post_data = None
                    host = dst_ip
                    first_index = tcp_data.find("\r\nHost:")

                    if first_index >= 0:
                        first_index = first_index + len("\r\nHost:")
                        last_index = tcp_data.find("\r\n", first_index)
                        if last_index >= 0:
                            host = tcp_data[first_index:last_index]
                            host = host.strip()
                            if host.endswith(":80"):
                                host = host[:-3]
                            if host and host[0].isalpha() and dst_ip in trails:
                                log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.IP, "%s (%s)" % (dst_ip, host.split(':')[0]), trails[dst_ip][0], trails[dst_ip][1]), packet)
                    elif config.USE_HEURISTICS and config.CHECK_MISSING_HOST:
                        log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.HTTP, "%s%s" % (host, path), "missing host header (suspicious)", "(heuristic)"), packet)

                    index = tcp_data.find("\r\n\r\n")
                    if index >= 0:
                        post_data = tcp_data[index + 4:]

                    if "://" in path:
                        url = path.split("://", 1)[1]

                        if '/' not in url:
                            url = "%s/" % url

                        host, path = url.split('/', 1)
                        if host.endswith(":80"):
                            host = host[:-3]
                        path = "/%s" % path
                        proxy_domain = host.split(':')[0]
                        _check_domain(proxy_domain, sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, packet)
                    elif method == "CONNECT":
                        if '/' in path:
                            host, path = path.split('/', 1)
                            path = "/%s" % path
                        else:
                            host, path = path, '/'
                        if host.endswith(":80"):
                            host = host[:-3]
                        url = "%s%s" % (host, path)
                        proxy_domain = host.split(':')[0]
                        _check_domain(proxy_domain, sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, packet)
                    else:
                        url = "%s%s" % (host, path)

                    if config.USE_HEURISTICS:
                        user_agent, result = None, None

                        first_index = tcp_data.find("\r\nUser-Agent:")
                        if first_index >= 0:
                            first_index = first_index + len("\r\nUser-Agent:")
                            last_index = tcp_data.find("\r\n", first_index)
                            if last_index >= 0:
                                user_agent = tcp_data[first_index:last_index]
                                user_agent = urllib.unquote(user_agent).strip()

                        if user_agent:
                            result = _result_cache.get(user_agent)
                            if result is None:
                                if not any(_ in user_agent for _ in WHITELIST_UA_KEYWORDS):
                                    match = re.search(SUSPICIOUS_UA_REGEX, user_agent)
                                    if match:
                                        def _(value):
                                            return value.replace('(', "\\(").replace(')', "\\)")

                                        parts = user_agent.split(match.group(0), 1)

                                        if len(parts) > 1 and parts[0] and parts[-1]:
                                            result = _result_cache[user_agent] = "%s (%s)" % (_(match.group(0)), _(user_agent))
                                        else:
                                            result = _result_cache[user_agent] = _(match.group(0)).join(("(%s)" if part else "%s") % _(part) for part in parts)
                                if not result:
                                    _result_cache[user_agent] = False

                            if result:
                                log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.UA, result, "user agent (suspicious)", "(heuristic)"), packet)

                    checks = [path.rstrip('/')]
                    if '?' in path:
                        checks.append(path.split('?')[0].rstrip('/'))

                    _ = os.path.splitext(checks[-1])
                    if _[1]:
                        checks.append(_[0])

                    if checks[-1].count('/') > 1:
                        checks.append(checks[-1][:checks[-1].rfind('/')])

                    for check in filter(None, checks):
                        for _ in ("", host):
                            check = "%s%s" % (_, check)
                            if check in trails:
                                parts = url.split(check)
                                other = ("(%s)" % _ if _ else _ for _ in parts)
                                trail = check.join(other)
                                log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.URL, trail, trails[check][0], trails[check][1]))
                                return

                    if config.USE_HEURISTICS:
                        unquoted_path = urllib.unquote(path)
                        unquoted_post_data = urllib.unquote(post_data or "")
                        for char in SUSPICIOUS_HTTP_REQUEST_FORCE_ENCODE_CHARS:
                            replacement = SUSPICIOUS_HTTP_REQUEST_FORCE_ENCODE_CHARS[char]
                            path = path.replace(char, replacement)
                            if post_data:
                                post_data = post_data.replace(char, replacement)

                        if not _check_domain_whitelisted(host):
                            if not any(_ in unquoted_path.lower() for _ in WHITELIST_HTTP_REQUEST_PATHS):
                                if any(_ in unquoted_path for _ in SUSPICIOUS_HTTP_REQUEST_PRE_CONDITION):
                                    found = _result_cache.get(unquoted_path)
                                    if found is None:
                                        for desc, regex in SUSPICIOUS_HTTP_REQUEST_REGEXES:
                                            if re.search(regex, unquoted_path, re.I):
                                                found = desc
                                                break
                                        _result_cache[unquoted_path] = found or ""
                                    if found:
                                        trail = "%s(%s)" % (host, path)
                                        log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.URL, trail, "potential %s (suspicious)" % found, "(heuristic)"), packet)
                                        return

                                if any(_ in unquoted_post_data for _ in SUSPICIOUS_HTTP_REQUEST_PRE_CONDITION):
                                    found = _result_cache.get(unquoted_post_data)
                                    if found is None:
                                        for desc, regex in SUSPICIOUS_HTTP_REQUEST_REGEXES:
                                            if re.search(regex, unquoted_post_data, re.I):
                                                found = desc
                                                break
                                        _result_cache[unquoted_post_data] = found or ""
                                    if found:
                                        trail = "%s(%s \(%s %s\))" % (host, path, method, post_data.strip())
                                        log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.URL, trail, "potential %s (suspicious)" % found, "(heuristic)"), packet)
                                        return

                            if '.' in path:
                                _ = urlparse.urlparse("http://%s" % url)  # dummy scheme
                                filename = _.path.split('/')[-1]
                                name, extension = os.path.splitext(filename)
                                if extension and extension in SUSPICIOUS_DIRECT_DOWNLOAD_EXTENSIONS and not any(_ in path for _ in WHITELIST_DIRECT_DOWNLOAD_KEYWORDS) and not _.query and len(name) < 10:
                                    trail = "%s(%s)" % (host, path)
                                    log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.URL, trail, "direct %s download (suspicious)" % extension, "(heuristic)"), packet)
                                elif filename == "suspendedpage.cgi":
                                    log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.URL, trail, "suspended page (suspicious)", "(heuristic)"), packet)

        elif protocol == socket.IPPROTO_UDP:  # UDP
            _ = ip_data[iph_length:iph_length + 4]
            if len(_) < 4:
                return

            src_port, dst_port = struct.unpack("!HH", _)

            _ = _last_udp
            _last_udp = (sec, src_ip, src_port, dst_ip, dst_port)
            if _ == _last_udp:  # skip bursts
                return

            if src_port != 53 and dst_port != 53:  # not DNS
                if dst_ip in trails:
                    trail = dst_ip
                elif src_ip in trails:
                    trail = src_ip
                else:
                    trail = None

                if trail:
                    _ = _last_logged_udp
                    _last_logged_udp = _last_udp
                    if _ != _last_logged_udp:
                        log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.UDP, TRAIL.IP, trail, trails[trail][0], trails[trail][1]), packet)

            else:
                dns_data = ip_data[iph_length + 8:]

                # Reference: http://www.ccs.neu.edu/home/amislove/teaching/cs4700/fall09/handouts/project1-primer.pdf
                if len(dns_data) > 6:
                    qdcount = struct.unpack("!H", dns_data[4:6])[0]
                    if qdcount > 0:
                        offset = 12
                        query = ""

                        while len(dns_data) > offset:
                            length = ord(dns_data[offset])
                            if not length:
                                query = query[:-1]
                                break
                            query += dns_data[offset + 1:offset + length + 1] + '.'
                            offset += length + 1

                        query = query.lower()

                        if not query or '.' not in query or not all(_ in VALID_DNS_CHARS for _ in query) or any(_ in query for _ in (".intranet.",)) or any(query.endswith(_) for _ in IGNORE_DNS_QUERY_SUFFIXES):
                            return

                        if ord(dns_data[2]) == 0x01:  # standard query
                            type_, class_ = struct.unpack("!HH", dns_data[offset + 1:offset + 5])

                            # Reference: http://en.wikipedia.org/wiki/List_of_DNS_record_types
                            if type_ not in (12, 28) and class_ == 1:  # Type not in (PTR, AAAA), Class IN
                                if dst_ip in trails:
                                    log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.UDP, TRAIL.IP, "%s (%s)" % (dst_ip, query), trails[dst_ip][0], trails[dst_ip][1]), packet)
                                elif src_ip in trails:
                                    log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.UDP, TRAIL.IP, src_ip, trails[src_ip][0], trails[src_ip][1]), packet)

                                _check_domain(query, sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.UDP, packet)

                        elif config.USE_HEURISTICS:
                            if (ord(dns_data[2]) & 0x80) and (ord(dns_data[3]) == 0x83):  # standard response, recursion available, no such name
                                if not _check_domain_whitelisted(query):
                                    parts = query.split('.')
                                    if parts[-1].isdigit():
                                        return

                                    if not (len(parts) > 4 and all(_.isdigit() and int(_) < 256 for _ in parts[:4])):  # generic check for DNSBL IP lookups
                                        for _ in filter(None, (query, "*.%s" % '.'.join(parts[-2:]) if query.count('.') > 1 else None)):
                                            if _ not in NO_SUCH_NAME_COUNTERS or NO_SUCH_NAME_COUNTERS[_][0] != sec / 3600:
                                                NO_SUCH_NAME_COUNTERS[_] = [sec / 3600, 1, set()]
                                            else:
                                                NO_SUCH_NAME_COUNTERS[_][1] += 1
                                                NO_SUCH_NAME_COUNTERS[_][2].add(query)

                                                if NO_SUCH_NAME_COUNTERS[_][1] > NO_SUCH_NAME_PER_HOUR_THRESHOLD:
                                                    if _.startswith("*."):
                                                        log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.UDP, TRAIL.DNS, "%s%s" % ("(%s)" % ','.join(item.replace(_[1:], "") for item in NO_SUCH_NAME_COUNTERS[_][2]), _[1:]), "excessive no such domain (suspicious)", "(heuristic)"), packet)
                                                        for item in NO_SUCH_NAME_COUNTERS[_][2]:
                                                            try:
                                                                del NO_SUCH_NAME_COUNTERS[item]
                                                            except KeyError:
                                                                pass
                                                    else:
                                                        log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.UDP, TRAIL.DNS, _, "excessive no such domain (suspicious)", "(heuristic)"), packet)

                                                    try:
                                                        del NO_SUCH_NAME_COUNTERS[_]
                                                    except KeyError:
                                                        pass

                                                    break

                                        if len(parts) > 2:
                                            part = parts[0] if parts[0] != "www" else parts[1]
                                            trail = "(%s).%s" % ('.'.join(parts[:-2]), '.'.join(parts[-2:]))
                                        elif len(parts) == 2:
                                            part = parts[0]
                                            trail = "(%s).%s" % (parts[0], parts[1])
                                        else:
                                            part = query
                                            trail = query

                                        result = _result_cache.get(part)

                                        if part:
                                            if result is None:
                                                # Reference: https://github.com/exp0se/dga_detector
                                                probabilities = (float(part.count(c)) / len(part) for c in set(_ for _ in part))
                                                entropy = -sum(p * math.log(p) / math.log(2.0) for p in probabilities)
                                                if entropy > SUSPICIOUS_DOMAIN_ENTROPY_THRESHOLD:
                                                    result = "entropy threshold no such domain (suspicious)"

                                                if not result:
                                                    if sum(_ in CONSONANTS for _ in part) > SUSPICIOUS_DOMAIN_CONSONANT_THRESHOLD:
                                                        result = "consonant threshold no such domain (suspicious)"

                                                _result_cache[part] = result or False

                                        if result:
                                            log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.UDP, TRAIL.DNS, trail, result, "(heuristic)"), packet)

        elif protocol in IPPROTO_LUT:  # non-TCP/UDP (e.g. ICMP)
            if protocol == socket.IPPROTO_ICMP:
                if ord(ip_data[iph_length]) != 0x08:  # Non-echo request
                    return
            elif protocol == socket.IPPROTO_ICMPV6:
                if ord(ip_data[iph_length]) != 0x80:  # Non-echo request
                    return

            if dst_ip in trails:
                log_event((sec, usec, src_ip, '-', dst_ip, '-', IPPROTO_LUT[protocol], TRAIL.IP, dst_ip, trails[dst_ip][0], trails[dst_ip][1]), packet)
            elif src_ip in trails:
                log_event((sec, usec, src_ip, '-', dst_ip, '-', IPPROTO_LUT[protocol], TRAIL.IP, src_ip, trails[src_ip][0], trails[src_ip][1]), packet)

    except struct.error:
        pass

    except Exception:
        if config.SHOW_DEBUG:
            traceback.print_exc()
Exemple #8
0
def _process_packet(packet, sec, usec):
    """
    Processes single (raw) packet
    """

    global _connect_sec

    try:
        ip_offset = None

        if _datalink == pcapy.DLT_PPP:
            ppp_protocol = packet[2:4]
            if ppp_protocol == "\x00\x21":  # IP
                ip_offset = PPPH_LENGTH
        else:
            if _datalink == pcapy.DLT_LINUX_SLL:
                packet = packet[2:]

            eth_header = struct.unpack("!HH8sH", packet[:ETH_LENGTH])
            eth_protocol = socket.ntohs(eth_header[3])
            if eth_protocol == 8:  # IP
                ip_offset = ETH_LENGTH

        if ip_offset is None:
            return

        ip_header = struct.unpack("!BBHHHBBH4s4s", packet[ip_offset:ip_offset + 20])

        ip_length = ip_header[2]
        packet = packet[:ETH_LENGTH + ip_length]  # truncate
        iph_length = (ip_header[0] & 0xf) << 2
        protocol = ip_header[6]
        src_ip = socket.inet_ntoa(ip_header[8])
        dst_ip = socket.inet_ntoa(ip_header[9])

        if protocol == socket.IPPROTO_TCP:  # TCP
            i = iph_length + ETH_LENGTH
            src_port, dst_port, _, _, doff_reserved, flags = struct.unpack("!HHLLBB", packet[i:i+14])

            if flags == 2:  # SYN set (only)
                if dst_ip in trails:
                    log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, "TCP", TRAIL.IP, dst_ip, trails[dst_ip][0], trails[dst_ip][1]))
                elif src_ip in trails:
                    log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, "TCP", TRAIL.IP, src_ip, trails[src_ip][0], trails[src_ip][1]))

                if config.USE_HEURISTICS:
                    key = "%s:%s" % (src_ip, dst_ip)
                    if key not in _connect_src_dst:
                        _connect_src_dst[key] = set()
                        _connect_src_details[key] = set()
                    _connect_src_dst[key].add(dst_port)
                    _connect_src_details[key].add((sec, usec, src_port, dst_port))

                    if sec > _connect_sec:
                        for key in _connect_src_dst:
                            if len(_connect_src_dst[key]) > PORT_SCANNING_THRESHOLD:
                                _src_ip, _dst_ip = key.split(':')
                                for _sec, _usec, _src_port, _dst_port in _connect_src_details[key]:
                                    log_event((_sec, _usec, _src_ip, _src_port, _dst_ip, _dst_port, "TCP", TRAIL.IP, _src_ip, "potential port scanning", "(heuristic)"))

                        _connect_sec = sec
                        _connect_src_dst.clear()
                        _connect_src_details.clear()

            if flags & 8 != 0:  # PSH set
                tcph_length = doff_reserved >> 4
                h_size = ETH_LENGTH + iph_length + (tcph_length << 2)
                data = packet[h_size:]

                method, path = None, None
                index = data.find("\n")
                if index >= 0:
                    line = data[:index]
                    if line.count(' ') == 2 and " HTTP/" in line:
                        method = line.split(' ')[0].upper()
                        path = line.split(' ')[1].lower()

                if method and path:
                    host = dst_ip
                    index = data.find("\r\nHost:")

                    if index >= 0:
                        index = index + len("\r\nHost:")
                        host = data[index:data.find("\r\n", index)]
                        host = host.strip()
                        host = re.sub(r":80\Z", "", host)
                    elif config.USE_HEURISTICS and config.CHECK_MISSING_HOST:
                        log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, "TCP", TRAIL.HTTP, "%s%s" % (host, path), "suspicious http request (missing host header)", "(heuristic)"))

                    if "://" in path:
                        url = path.split("://", 1)[1]

                        if '/' not in url:
                            url = "%s/" % url

                        host, path = url.split('/', 1)
                        host = re.sub(r":80\Z", "", host)
                        path = "/%s" % path
                        proxy_domain = host.split(':')[0]
                        _check_domain(proxy_domain, sec, usec, src_ip, src_port, dst_ip, dst_port, "TCP")
                    elif method == "CONNECT":
                        if '/' in path:
                            host, path = path.split('/', 1)
                            path = "/%s" % path
                        else:
                            host, path = path, '/'
                        host = re.sub(r":80\Z", "", host)
                        url = "%s%s" % (host, path)
                        proxy_domain = host.split(':')[0]
                        _check_domain(proxy_domain, sec, usec, src_ip, src_port, dst_ip, dst_port, "TCP")
                    else:
                        url = "%s%s" % (host, path)

                    user_agent = None
                    index = data.find("\r\nUser-Agent:")
                    if index >= 0:
                        index = index + len("\r\nUser-Agent:")
                        user_agent = urllib.unquote(data[index:data.find("\r\n", index)]).strip()

                    if config.USE_HEURISTICS:
                        found = False
                        if user_agent:
                            if user_agent not in _user_agent_cache:
                                found = _user_agent_cache[user_agent] = re.search(SUSPICIOUS_UA_REGEX, user_agent) is not None and not any(_ in user_agent for _ in WHITELIST_UA_KEYWORDS)
                            else:
                                found = _user_agent_cache[user_agent]

                            if found:
                                log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, "TCP", TRAIL.UA, user_agent.replace('(', "&#40;").replace(')', "&#41;"), "suspicious user agent", "(heuristic)"))

                        if not found and config.CHECK_SHORT_OR_MISSING_USER_AGENT:
                            if user_agent is None:
                                log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, "TCP", TRAIL.HTTP, url, "suspicious http request (missing user agent header)", "(heuristic)"))
                            elif len(user_agent) < SUSPICIOUS_UA_LENGTH_THRESHOLD:
                                log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, "TCP", TRAIL.UA, user_agent, "suspicious user agent (too short)", "(heuristic)"))

                    checks = [path.rstrip('/')]
                    if '?' in path:
                        checks.append(path.split('?')[0].rstrip('/'))

                    _ = os.path.splitext(checks[-1])
                    if _[1]:
                        checks.append(_[0])

                    if checks[-1].count('/') > 1:
                        checks.append(checks[-1][:checks[-1].rfind('/')])

                    for check in filter(None, checks):
                        for _ in ("", host):
                            check = "%s%s" % (_, check)
                            if check in trails:
                                parts = url.split(check)
                                other = ("(%s)" % _ if _ else _ for _ in parts)
                                trail = check.join(other)
                                log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, "TCP", TRAIL.URL, trail, trails[check][0], trails[check][1]))
                                return

                    if config.USE_HEURISTICS:
                        if any(char in path for char in SUSPICIOUS_HTTP_REQUEST_FORCE_ENCODE_CHARS):
                            for char in SUSPICIOUS_HTTP_REQUEST_FORCE_ENCODE_CHARS:
                                path = path.replace(char, urllib.quote(char))

                        if host not in WHITELIST and not any(_ in path for _ in WHITELIST_HTTP_REQUEST_KEYWORDS) and re.search(SUSPICIOUS_HTTP_REQUEST_REGEX, urllib.unquote(path)):
                            trail = "%s(%s)" % (host, path)
                            log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, "TCP", TRAIL.URL, trail, "suspicious http request", "(heuristic)"))
                            return

                        if '.' in path:
                            _ = urlparse.urlparse("http://%s" % url)  # dummy scheme
                            filename = _.path.split('/')[-1]
                            name, extension = os.path.splitext(filename)
                            if extension and extension in SUSPICIOUS_DIRECT_DOWNLOAD_EXTENSIONS and '.'.join(host.split('.')[-2:]) not in WHITELIST and not _.query and len(name) < 10:
                                trail = "%s(%s)" % (host, path)
                                log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, "TCP", TRAIL.URL, trail, "direct %s download (suspicious)" % extension, "(heuristic)"))
                            elif filename in SUSPICIOUS_FILENAMES:
                                trail = "%s(%s)" % (host, path)
                                log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, "TCP", TRAIL.URL, trail, "suspicious page", "(heuristic)"))

        elif protocol == socket.IPPROTO_UDP:  # UDP
            i = iph_length + ETH_LENGTH
            _ = packet[i:i + 4]
            if len(_) < 4:
                return

            src_port, dst_port = struct.unpack("!HH", _)

            if src_port != 53:
                if dst_ip in trails:
                    log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, "UDP", TRAIL.IP, dst_ip, trails[dst_ip][0], trails[dst_ip][1]))
                elif src_ip in trails:
                    log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, "UDP", TRAIL.IP, src_ip, trails[src_ip][0], trails[src_ip][1]))

            if dst_port == 53 or src_port == 53:
                h_size = ETH_LENGTH + iph_length + 8
                data = packet[h_size:]

                # Reference: http://www.ccs.neu.edu/home/amislove/teaching/cs4700/fall09/handouts/project1-primer.pdf
                if len(data) > 6:
                    qdcount = struct.unpack("!H", data[4:6])[0]
                    if qdcount > 0:
                        offset = 12
                        query = ""

                        while len(data) > offset:
                            length = ord(data[offset])
                            if not length:
                                query = query[:-1]
                                break
                            query += data[offset + 1:offset + length + 1] + '.'
                            offset += length + 1

                        if ' ' in query or '.' not in query or query.endswith(".in-addr.arpa") or query.endswith(".local"):
                            return

                        if ord(data[2]) == 0x01:  # standard query
                            type_, class_ = struct.unpack("!HH", data[offset + 1:offset + 5])

                            # Reference: http://en.wikipedia.org/wiki/List_of_DNS_record_types
                            if type_ not in (12, 28) and class_ == 1:  # Type not in (PTR, AAAA), Class IN
                                _check_domain(query, sec, usec, src_ip, src_port, dst_ip, dst_port, "UDP")

                        elif config.USE_HEURISTICS:
                            if (ord(data[2]) & 0x80) and (ord(data[3]) == 0x83):  # standard response, recursion available, no such name
                                if query not in NO_SUCH_NAME_COUNTERS or NO_SUCH_NAME_COUNTERS[query][0] != sec / 3600:
                                    NO_SUCH_NAME_COUNTERS[query] = [sec / 3600, 1]
                                else:
                                    NO_SUCH_NAME_COUNTERS[query][1] += 1

                                    if NO_SUCH_NAME_COUNTERS[query][1] > NO_SUCH_NAME_PER_HOUR_THRESHOLD and query not in WHITELIST and '.'.join(query.split('.')[-2:]) not in WHITELIST:
                                        log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, "UDP", TRAIL.DNS, query, "excessive no such domain name (suspicious)", "(heuristic)"))

        elif protocol in IPPROTO_LUT:  # non-TCP/UDP (e.g. ICMP)
            if protocol == socket.IPPROTO_ICMP:
                i = iph_length + ETH_LENGTH
                if packet[i] != 8:  # Echo request
                    return

            if dst_ip in trails:
                log_event((sec, usec, src_ip, '-', dst_ip, '-', IPPROTO_LUT[protocol], TRAIL.IP, dst_ip, trails[dst_ip][0], trails[dst_ip][1]))
            elif src_ip in trails:
                log_event((sec, usec, src_ip, '-', dst_ip, '-', IPPROTO_LUT[protocol], TRAIL.IP, src_ip, trails[src_ip][0], trails[src_ip][1]))

    except Exception:
        if config.SHOW_DEBUG:
            traceback.print_exc()
Exemple #9
0
def _process_packet(packet, sec, usec):
    """
    Processes single (raw) packet
    """

    global _connect_sec

    try:
        ip_offset = None

        if _datalink == pcapy.DLT_PPP:
            if ord(packet[2]) == 0 and ord(packet[3]) == 0x21:  # IPv4
                ip_offset = PPPH_LENGTH
        else:
            if _datalink == pcapy.DLT_LINUX_SLL:
                packet = packet[2:]

            # Reference: ftp://ftp.heanet.ie/disk1/sourceforge/t/tp/tpcat/tpcat%20python%20source/TPCAT.py

            if ord(packet[12]) == 8 and ord(packet[13]) == 0:  # IPv4
                ip_offset = ETH_LENGTH

            elif ord(packet[12]) == 0x81 and ord(packet[13]) == 0:  # VLAN
                if ord(packet[16]) == 8 and ord(packet[17]) == 0:  # IPv4
                    ip_offset = VLANH_LENGTH

        if ip_offset is None:
            return

        ip_header = struct.unpack("!BBHHHBBH4s4s",
                                  packet[ip_offset:ip_offset + 20])

        ip_length = ip_header[2]
        packet = packet[:ETH_LENGTH + ip_length]  # truncate
        iph_length = (ip_header[0] & 0xf) << 2
        protocol = ip_header[6]
        src_ip = socket.inet_ntoa(ip_header[8])
        dst_ip = socket.inet_ntoa(ip_header[9])

        if protocol == socket.IPPROTO_TCP:  # TCP
            i = iph_length + ETH_LENGTH
            src_port, dst_port, _, _, doff_reserved, flags = struct.unpack(
                "!HHLLBB", packet[i:i + 14])

            if flags == 2:  # SYN set (only)
                if dst_ip in trails:
                    log_event((sec, usec, src_ip, src_port, dst_ip, dst_port,
                               "TCP", TRAIL.IP, dst_ip, trails[dst_ip][0],
                               trails[dst_ip][1]))
                elif src_ip in trails:
                    log_event((sec, usec, src_ip, src_port, dst_ip, dst_port,
                               "TCP", TRAIL.IP, src_ip, trails[src_ip][0],
                               trails[src_ip][1]))

                if config.USE_HEURISTICS:
                    key = "%s:%s" % (src_ip, dst_ip)
                    if key not in _connect_src_dst:
                        _connect_src_dst[key] = set()
                        _connect_src_details[key] = set()
                    _connect_src_dst[key].add(dst_port)
                    _connect_src_details[key].add(
                        (sec, usec, src_port, dst_port))

                    if sec > _connect_sec:
                        for key in _connect_src_dst:
                            if len(_connect_src_dst[key]
                                   ) > PORT_SCANNING_THRESHOLD:
                                _src_ip, _dst_ip = key.split(':')
                                for _sec, _usec, _src_port, _dst_port in _connect_src_details[
                                        key]:
                                    log_event(
                                        (_sec, _usec, _src_ip, _src_port,
                                         _dst_ip, _dst_port, "TCP", TRAIL.IP,
                                         _src_ip, "potential port scanning",
                                         "(heuristic)"))

                        _connect_sec = sec
                        _connect_src_dst.clear()
                        _connect_src_details.clear()

            if flags & 8 != 0:  # PSH set
                tcph_length = doff_reserved >> 4
                h_size = ETH_LENGTH + iph_length + (tcph_length << 2)
                data = packet[h_size:]

                method, path = None, None
                index = data.find("\n")
                if index >= 0:
                    line = data[:index]
                    if line.count(' ') == 2 and " HTTP/" in line:
                        method = line.split(' ')[0].upper()
                        path = line.split(' ')[1].lower()

                if method and path:
                    host = dst_ip
                    index = data.find("\r\nHost:")

                    if index >= 0:
                        index = index + len("\r\nHost:")
                        host = data[index:data.find("\r\n", index)]
                        host = host.strip()
                        host = re.sub(r":80\Z", "", host)
                    elif config.USE_HEURISTICS and config.CHECK_MISSING_HOST:
                        log_event(
                            (sec, usec, src_ip, src_port, dst_ip, dst_port,
                             "TCP", TRAIL.HTTP, "%s%s" % (host, path),
                             "suspicious http request (missing host header)",
                             "(heuristic)"))

                    if "://" in path:
                        url = path.split("://", 1)[1]

                        if '/' not in url:
                            url = "%s/" % url

                        host, path = url.split('/', 1)
                        host = re.sub(r":80\Z", "", host)
                        path = "/%s" % path
                        proxy_domain = host.split(':')[0]
                        _check_domain(proxy_domain, sec, usec, src_ip,
                                      src_port, dst_ip, dst_port, "TCP")
                    elif method == "CONNECT":
                        if '/' in path:
                            host, path = path.split('/', 1)
                            path = "/%s" % path
                        else:
                            host, path = path, '/'
                        host = re.sub(r":80\Z", "", host)
                        url = "%s%s" % (host, path)
                        proxy_domain = host.split(':')[0]
                        _check_domain(proxy_domain, sec, usec, src_ip,
                                      src_port, dst_ip, dst_port, "TCP")
                    else:
                        url = "%s%s" % (host, path)

                    user_agent = None
                    index = data.find("\r\nUser-Agent:")
                    if index >= 0:
                        index = index + len("\r\nUser-Agent:")
                        user_agent = urllib.unquote(
                            data[index:data.find("\r\n", index)]).strip()

                    if config.USE_HEURISTICS:
                        found = False
                        if user_agent:
                            if user_agent not in _user_agent_cache:
                                found = _user_agent_cache[
                                    user_agent] = re.search(
                                        SUSPICIOUS_UA_REGEX,
                                        user_agent) is not None and not any(
                                            _ in user_agent
                                            for _ in WHITELIST_UA_KEYWORDS)
                            else:
                                found = _user_agent_cache[user_agent]

                            if found:
                                log_event(
                                    (sec, usec, src_ip, src_port, dst_ip,
                                     dst_port, "TCP", TRAIL.UA,
                                     user_agent.replace('(', "&#40;").replace(
                                         ')', "&#41;"),
                                     "suspicious user agent", "(heuristic)"))

                        if not found and config.CHECK_SHORT_OR_MISSING_USER_AGENT:
                            if user_agent is None:
                                log_event((
                                    sec, usec, src_ip, src_port, dst_ip,
                                    dst_port, "TCP", TRAIL.HTTP, url,
                                    "suspicious http request (missing user agent header)",
                                    "(heuristic)"))
                            elif len(user_agent
                                     ) < SUSPICIOUS_UA_LENGTH_THRESHOLD:
                                log_event(
                                    (sec, usec, src_ip, src_port, dst_ip,
                                     dst_port, "TCP", TRAIL.UA, user_agent,
                                     "suspicious user agent (too short)",
                                     "(heuristic)"))

                    checks = [path.rstrip('/')]
                    if '?' in path:
                        checks.append(path.split('?')[0].rstrip('/'))

                    _ = os.path.splitext(checks[-1])
                    if _[1]:
                        checks.append(_[0])

                    if checks[-1].count('/') > 1:
                        checks.append(checks[-1][:checks[-1].rfind('/')])

                    for check in filter(None, checks):
                        for _ in ("", host):
                            check = "%s%s" % (_, check)
                            if check in trails:
                                parts = url.split(check)
                                other = ("(%s)" % _ if _ else _ for _ in parts)
                                trail = check.join(other)
                                log_event((sec, usec, src_ip, src_port, dst_ip,
                                           dst_port, "TCP", TRAIL.URL, trail,
                                           trails[check][0], trails[check][1]))
                                return

                    if config.USE_HEURISTICS:
                        if any(char in path for char in
                               SUSPICIOUS_HTTP_REQUEST_FORCE_ENCODE_CHARS):
                            for char in SUSPICIOUS_HTTP_REQUEST_FORCE_ENCODE_CHARS:
                                path = path.replace(char, urllib.quote(char))

                        if host not in WHITELIST and not any(
                                _ in path
                                for _ in WHITELIST_HTTP_REQUEST_KEYWORDS
                        ) and re.search(SUSPICIOUS_HTTP_REQUEST_REGEX,
                                        urllib.unquote(path)):
                            trail = "%s(%s)" % (host, path)
                            log_event(
                                (sec, usec, src_ip, src_port, dst_ip, dst_port,
                                 "TCP", TRAIL.URL, trail,
                                 "suspicious http request", "(heuristic)"))
                            return

                        if '.' in path:
                            _ = urlparse.urlparse("http://%s" %
                                                  url)  # dummy scheme
                            filename = _.path.split('/')[-1]
                            name, extension = os.path.splitext(filename)
                            if extension and extension in SUSPICIOUS_DIRECT_DOWNLOAD_EXTENSIONS and '.'.join(
                                    host.split('.')[-2:]
                            ) not in WHITELIST and not _.query and len(
                                    name) < 10:
                                trail = "%s(%s)" % (host, path)
                                log_event((sec, usec, src_ip, src_port, dst_ip,
                                           dst_port, "TCP", TRAIL.URL, trail,
                                           "direct %s download (suspicious)" %
                                           extension, "(heuristic)"))
                            elif filename in SUSPICIOUS_FILENAMES:
                                trail = "%s(%s)" % (host, path)
                                log_event((sec, usec, src_ip, src_port, dst_ip,
                                           dst_port, "TCP", TRAIL.URL, trail,
                                           "suspicious page", "(heuristic)"))

        elif protocol == socket.IPPROTO_UDP:  # UDP
            i = iph_length + ETH_LENGTH
            _ = packet[i:i + 4]
            if len(_) < 4:
                return

            src_port, dst_port = struct.unpack("!HH", _)

            if src_port != 53:
                if dst_ip in trails:
                    log_event((sec, usec, src_ip, src_port, dst_ip, dst_port,
                               "UDP", TRAIL.IP, dst_ip, trails[dst_ip][0],
                               trails[dst_ip][1]))
                elif src_ip in trails:
                    log_event((sec, usec, src_ip, src_port, dst_ip, dst_port,
                               "UDP", TRAIL.IP, src_ip, trails[src_ip][0],
                               trails[src_ip][1]))

            if dst_port == 53 or src_port == 53:
                h_size = ETH_LENGTH + iph_length + 8
                data = packet[h_size:]

                # Reference: http://www.ccs.neu.edu/home/amislove/teaching/cs4700/fall09/handouts/project1-primer.pdf
                if len(data) > 6:
                    qdcount = struct.unpack("!H", data[4:6])[0]
                    if qdcount > 0:
                        offset = 12
                        query = ""

                        while len(data) > offset:
                            length = ord(data[offset])
                            if not length:
                                query = query[:-1]
                                break
                            query += data[offset + 1:offset + length + 1] + '.'
                            offset += length + 1

                        if ' ' in query or '.' not in query or query.endswith(
                                ".in-addr.arpa") or query.endswith(".local"):
                            return

                        if ord(data[2]) == 0x01:  # standard query
                            type_, class_ = struct.unpack(
                                "!HH", data[offset + 1:offset + 5])

                            # Reference: http://en.wikipedia.org/wiki/List_of_DNS_record_types
                            if type_ not in (
                                    12, 28
                            ) and class_ == 1:  # Type not in (PTR, AAAA), Class IN
                                _check_domain(query, sec, usec, src_ip,
                                              src_port, dst_ip, dst_port,
                                              "UDP")

                        elif config.USE_HEURISTICS:
                            if (ord(data[2]) & 0x80) and (
                                    ord(data[3]) == 0x83
                            ):  # standard response, recursion available, no such name
                                if query not in NO_SUCH_NAME_COUNTERS or NO_SUCH_NAME_COUNTERS[
                                        query][0] != sec / 3600:
                                    NO_SUCH_NAME_COUNTERS[query] = [
                                        sec / 3600, 1
                                    ]
                                else:
                                    NO_SUCH_NAME_COUNTERS[query][1] += 1

                                    if NO_SUCH_NAME_COUNTERS[query][
                                            1] > NO_SUCH_NAME_PER_HOUR_THRESHOLD and query not in WHITELIST and '.'.join(
                                                query.split('.')
                                                [-2:]) not in WHITELIST:
                                        log_event((
                                            sec, usec, src_ip, src_port,
                                            dst_ip, dst_port, "UDP", TRAIL.DNS,
                                            query,
                                            "excessive no such domain name (suspicious)",
                                            "(heuristic)"))

        elif protocol in IPPROTO_LUT:  # non-TCP/UDP (e.g. ICMP)
            if protocol == socket.IPPROTO_ICMP:
                i = iph_length + ETH_LENGTH
                if ord(packet[i]) != 8:  # Echo request
                    return

            if dst_ip in trails:
                log_event((sec, usec, src_ip, '-', dst_ip, '-',
                           IPPROTO_LUT[protocol], TRAIL.IP, dst_ip,
                           trails[dst_ip][0], trails[dst_ip][1]))
            elif src_ip in trails:
                log_event((sec, usec, src_ip, '-', dst_ip, '-',
                           IPPROTO_LUT[protocol], TRAIL.IP, src_ip,
                           trails[src_ip][0], trails[src_ip][1]))

    except Exception:
        if config.SHOW_DEBUG:
            traceback.print_exc()
Exemple #10
0
def _process_packet(packet, sec, usec):
    """
    Processes single (raw) packet
    """

    try:
        if _datalink == pcapy.DLT_LINUX_SLL:
            packet = packet[2:]

        eth_header = struct.unpack("!HH8sH", packet[:ETH_LENGTH])
        eth_protocol = socket.ntohs(eth_header[3])

        if eth_protocol == IPPROTO:  # IP
            ip_header = struct.unpack("!BBHHHBBH4s4s", packet[ETH_LENGTH:ETH_LENGTH + 20])
            ip_length = ip_header[2]
            packet = packet[:ETH_LENGTH + ip_length]  # truncate
            iph_length = (ip_header[0] & 0xF) << 2
            protocol = ip_header[6]
            src_ip = socket.inet_ntoa(ip_header[8])
            dst_ip = socket.inet_ntoa(ip_header[9])

            if protocol == socket.IPPROTO_TCP:  # TCP
                i = iph_length + ETH_LENGTH
                src_port, dst_port, _, _, doff_reserved, flags = struct.unpack("!HHLLBB", packet[i:i+14])

                if flags == 2:  # SYN set (only)
                    if dst_ip in trails:
                        log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, "TCP", TRAIL.IP, dst_ip, trails[dst_ip][0], trails[dst_ip][1]))
                    elif src_ip in trails:
                        log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, "TCP", TRAIL.IP, src_ip, trails[src_ip][0], trails[src_ip][1]))

                if flags & 8 != 0:  # PSH set
                    tcph_length = doff_reserved >> 4
                    h_size = ETH_LENGTH + iph_length + (tcph_length << 2)
                    data = packet[h_size:]

                    if len(data) > 0 and "HTTP/" in data:
                        index = data.find("\r\n")
                        if index >= 0:
                            line = data[:index]
                            if line.count(' ') == 2 and " HTTP/" in line:
                                path = line.split(' ')[1].lower()
                            else:
                                return
                        else:
                            return

                        index = data.find("\r\nHost:")
                        if index >= 0:
                            index = index + len("\r\nHost:")
                            host = data[index:data.find("\r\n", index)]
                            host = host.strip()
                            host = re.sub(r":80\Z", "", host)
                        else:
                            return

                        url = "%s%s" % (host, path)

                        checks = [path.rstrip('/')]
                        if '?' in path:
                            checks.append(path.split('?')[0].rstrip('/'))

                        _ = os.path.splitext(checks[-1])
                        if _[1]:
                            checks.append(_[0])

                        if checks[-1].count('/') > 1:
                            checks.append(checks[-1][:checks[-1].rfind('/')])

                        for check in filter(None, checks):
                            for _ in ("", host):
                                check = "%s%s" % (_, check)
                                if check in trails:
                                    parts = url.split(check)
                                    other = ("(%s)" % _ if _ else _ for _ in parts)
                                    trail = check.join(other)
                                    log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, "TCP", TRAIL.URL, trail, trails[check][0], trails[check][1]))
                                    return

                        if config.USE_HEURISTICS:
                            if re.search(SUSPICIOUS_HTTP_REQUEST_REGEX, urllib.unquote(path)):
                                trail = "%s(%s)" % (host, path)
                                log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, "TCP", TRAIL.URL, trail, "suspicious http request", "(heuristic)"))
                                return

                            if ('.') in path:
                                _ = urlparse.urlparse(url)
                                filename = _.path.split('/')[-1]
                                name, extension = os.path.splitext(filename)
                                if extension and extension in SUSPICIOUS_DIRECT_DOWNLOAD_EXTENSIONS and '.'.join(host.split('.')[-2:]) not in WHITELIST and len(name) < 6:
                                    trail = "%s(%s)" % (host, path)
                                    log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, "TCP", TRAIL.URL, trail, "direct .%s download (suspicious)" % extension, "(heuristic)"))
                                elif filename in SUSPICIOUS_FILENAMES:
                                    trail = "%s(%s)" % (host, path)
                                    log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, "TCP", TRAIL.URL, trail, "suspicious page", "(heuristic)"))

            elif protocol == socket.IPPROTO_UDP:  # UDP
                i = iph_length + ETH_LENGTH
                _ = packet[i:i + 4]
                if len(_) < 4:
                    return

                src_port, dst_port = struct.unpack("!HH", _)

                if src_port != 53:
                    if dst_ip in trails:
                        log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, "UDP", TRAIL.IP, dst_ip, trails[dst_ip][0], trails[dst_ip][1]))
                    elif src_ip in trails:
                        log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, "UDP", TRAIL.IP, src_ip, trails[src_ip][0], trails[src_ip][1]))

                if dst_port == 53 or src_port == 53:
                    h_size = ETH_LENGTH + iph_length + 8
                    data = packet[h_size:]

                    # Reference: http://www.ccs.neu.edu/home/amislove/teaching/cs4700/fall09/handouts/project1-primer.pdf
                    if len(data) > 6:
                        qdcount = struct.unpack("!H", data[4:6])[0]
                        if qdcount > 0:
                            offset = 12
                            query =  ""

                            while len(data) > offset:
                                length = ord(data[offset])
                                if not length:
                                    query = query[:-1]
                                    break
                                query += data[offset + 1:offset + length + 1] + '.'
                                offset += length + 1

                            if ' ' in query or '.' not in query or query.endswith(".in-addr.arpa") or query.endswith(".local"):
                                return

                            if ord(data[2]) == 0x01:  # standard query
                                type_, class_ = struct.unpack("!HH", data[offset + 1:offset + 5])

                                # Reference: http://en.wikipedia.org/wiki/List_of_DNS_record_types
                                if type_ != 12 and class_ == 1:  # Type != PTR, Class IN
                                    parts = query.split('.')

                                    for i in xrange(0, len(parts)):
                                        domain = '.'.join(parts[i:])
                                        if domain in trails:
                                            if domain == query:
                                                trail = domain
                                            else:
                                                _ = ".%s" % domain
                                                trail = "(%s)%s" % (query[:-len(_)], _)

                                            log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, "UDP", TRAIL.DNS, trail, trails[domain][0], trails[domain][1]))
                                            return

                                    if config.USE_HEURISTICS and len(parts[0]) > SUSPICIOUS_DOMAIN_LENGTH_THRESHOLD and '-' not in parts[0]:
                                        trail = None

                                        if len(parts) > 2:
                                            if '.'.join(parts[-2:]) not in WHITELIST:
                                                trail = "(%s).%s" % ('.'.join(parts[:-2]), '.'.join(parts[-2:]))
                                        elif len(parts) == 2:
                                            if '.'.join(parts) not in WHITELIST:
                                                trail = "(%s).%s" % (parts[0], parts[1])
                                        else:
                                            trail = query

                                        if trail:
                                            log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, "UDP", TRAIL.DNS, trail, "long domain name (suspicious)", "(heuristic)"))

                            elif config.USE_HEURISTICS and (ord(data[2]) & 0x80) and (ord(data[3]) == 0x83):  # standard response, recursion available, no such name
                                if query not in NO_SUCH_NAME_COUNTERS or NO_SUCH_NAME_COUNTERS[query][0] != sec / 3600:
                                    NO_SUCH_NAME_COUNTERS[query] = [sec / 3600, 1]
                                else:
                                    NO_SUCH_NAME_COUNTERS[query][1] += 1

                                    if NO_SUCH_NAME_COUNTERS[query][1] > NO_SUCH_NAME_PER_HOUR_THRESHOLD:
                                        log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, "UDP", TRAIL.DNS, query, "excessive no such domain name (suspicious)", "(heuristic)"))

            elif protocol in IPPROTO_LUT:  # non-TCP/UDP (e.g. ICMP)
                if dst_ip in trails:
                    log_event((sec, usec, src_ip, '-', dst_ip, '-', IPPROTO_LUT[protocol], TRAIL.IP, dst_ip, trails[dst_ip][0], trails[dst_ip][1]))
                elif src_ip in trails:
                    log_event((sec, usec, src_ip, '-', dst_ip, '-', IPPROTO_LUT[protocol], TRAIL.IP, src_ip, trails[src_ip][0], trails[src_ip][1]))

    except Exception, ex:
        print "[x] '%s'" % ex
        print traceback.format_exc()
Exemple #11
0
def _process_ip(ip_data, sec, usec):
    """
    Processes single (raw) IP layer data
    """

    global _connect_sec

    try:
        if len(_result_cache) > MAX_RESULT_CACHE_ENTRIES:
            _result_cache.clear()

        if config.USE_HEURISTICS:
            if _locks.connect_sec:
                _locks.connect_sec.acquire()

            connect_sec = _connect_sec
            _connect_sec = sec

            if _locks.connect_sec:
                _locks.connect_sec.release()

            if sec > connect_sec:
                for key in _connect_src_dst:
                    if len(_connect_src_dst[key]) > PORT_SCANNING_THRESHOLD:
                        _src_ip, _dst_ip = key.split(':')
                        if _src_ip not in WHITELIST:
                            _src_ports = set(
                                str(_[2]) for _ in _connect_src_details[key])
                            _dst_ports = set(
                                str(_[3]) for _ in _connect_src_details[key])
                            log_event(
                                (sec, usec, _src_ip, ','.join(_src_ports),
                                 _dst_ip, ','.join(_dst_ports), PROTO.TCP,
                                 TRAIL.IP, _src_ip, "potential port scanning",
                                 "(heuristic)"))

                _connect_src_dst.clear()
                _connect_src_details.clear()

        ip_header = struct.unpack("!BBHHHBBH4s4s", ip_data[:20])

        ip_length = ip_header[2]
        ip_data = ip_data[:ip_length]  # truncate
        iph_length = (ip_header[0] & 0xf) << 2
        protocol = ip_header[6]
        src_ip = socket.inet_ntoa(ip_header[8])
        dst_ip = socket.inet_ntoa(ip_header[9])

        if protocol == socket.IPPROTO_TCP:  # TCP
            src_port, dst_port, _, _, doff_reserved, flags = struct.unpack(
                "!HHLLBB", ip_data[iph_length:iph_length + 14])

            if flags == 2:  # SYN set (only)
                if dst_ip in trails:
                    log_event((sec, usec, src_ip, src_port, dst_ip, dst_port,
                               PROTO.TCP, TRAIL.IP, dst_ip, trails[dst_ip][0],
                               trails[dst_ip][1]))
                elif src_ip in trails and dst_ip != LOCALHOST_IP:
                    log_event((sec, usec, src_ip, src_port, dst_ip, dst_port,
                               PROTO.TCP, TRAIL.IP, src_ip, trails[src_ip][0],
                               trails[src_ip][1]))

                if config.USE_HEURISTICS:
                    if dst_ip != LOCALHOST_IP:
                        key = "%s:%s" % (src_ip, dst_ip)
                        if key not in _connect_src_dst:
                            _connect_src_dst[key] = set()
                            _connect_src_details[key] = set()
                        _connect_src_dst[key].add(dst_port)
                        _connect_src_details[key].add(
                            (sec, usec, src_port, dst_port))

            else:
                tcph_length = doff_reserved >> 4
                h_size = iph_length + (tcph_length << 2)
                tcp_data = ip_data[h_size:]

                if src_port == 80 and tcp_data.startswith("HTTP/"):
                    if any(_ in tcp_data[:tcp_data.find("\r\n\r\n")]
                           for _ in ("X-Sinkhole:",
                                     "Server: Apache 1.0/SinkSoft"
                                     )) or "\r\n\r\nsinkhole" in tcp_data:
                        log_event(
                            (sec, usec, src_ip, src_port, dst_ip, dst_port,
                             PROTO.TCP, TRAIL.IP, src_ip,
                             "sinkhole response (malware)", "(heuristic)"))
                    else:
                        index = tcp_data.find("<title>")
                        if index >= 0:
                            title = tcp_data[index + len("<title>"):tcp_data.
                                             find("</title>", index)]
                            if all(_ in title.lower()
                                   for _ in ("this domain",
                                             "has been seized")):
                                log_event((sec, usec, src_ip, src_port, dst_ip,
                                           dst_port, PROTO.TCP, TRAIL.IP,
                                           title, "seized domain (suspicious)",
                                           "(heuristic)"))

                method, path = None, None
                index = tcp_data.find("\n")
                if index >= 0:
                    line = tcp_data[:index]
                    if line.count(' ') == 2 and " HTTP/" in line:
                        method = line.split(' ')[0].upper()
                        path = line.split(' ')[1].lower()

                if method and path:
                    post_data = None
                    host = dst_ip
                    index = tcp_data.find("\r\nHost:")

                    if index >= 0:
                        index = index + len("\r\nHost:")
                        host = tcp_data[index:tcp_data.find("\r\n", index)]
                        host = host.strip()
                        host = re.sub(r":80\Z", "", host)
                        if not host.split(
                                ':')[0][-1].isdigit() and dst_ip in trails:
                            log_event(
                                (sec, usec, src_ip, src_port, dst_ip, dst_port,
                                 PROTO.TCP, TRAIL.IP,
                                 "%s (%s)" % (dst_ip, host.split(':')[0]),
                                 trails[dst_ip][0], trails[dst_ip][1]))
                    elif config.USE_HEURISTICS and config.CHECK_MISSING_HOST:
                        log_event(
                            (sec, usec, src_ip, src_port, dst_ip, dst_port,
                             PROTO.TCP, TRAIL.HTTP, "%s%s" % (host, path),
                             "suspicious http request (missing host header)",
                             "(heuristic)"))

                    index = tcp_data.find("\r\n\r\n")
                    if index >= 0:
                        post_data = tcp_data[index:]

                    if "://" in path:
                        url = path.split("://", 1)[1]

                        if '/' not in url:
                            url = "%s/" % url

                        host, path = url.split('/', 1)
                        host = re.sub(r":80\Z", "", host)
                        path = "/%s" % path
                        proxy_domain = host.split(':')[0]
                        _check_domain(proxy_domain, sec, usec, src_ip,
                                      src_port, dst_ip, dst_port, PROTO.TCP)
                    elif method == "CONNECT":
                        if '/' in path:
                            host, path = path.split('/', 1)
                            path = "/%s" % path
                        else:
                            host, path = path, '/'
                        host = re.sub(r":80\Z", "", host)
                        url = "%s%s" % (host, path)
                        proxy_domain = host.split(':')[0]
                        _check_domain(proxy_domain, sec, usec, src_ip,
                                      src_port, dst_ip, dst_port, PROTO.TCP)
                    else:
                        url = "%s%s" % (host, path)

                    if config.USE_HEURISTICS:
                        user_agent, result = None, None
                        match = re.search("(?i)\r\nUser-Agent:([^\r\n]+)",
                                          tcp_data)
                        if match:
                            user_agent = urllib.unquote(match.group(1)).strip()

                        if user_agent:
                            result = _result_cache.get(user_agent)
                            if result is None:
                                if not any(_ in user_agent
                                           for _ in WHITELIST_UA_KEYWORDS):
                                    match = re.search(SUSPICIOUS_UA_REGEX,
                                                      user_agent)
                                    if match:
                                        result = _result_cache[
                                            user_agent] = match.group(0).join(
                                                ("(%s)" if _ else "%s") %
                                                _.replace('(', "\\(").replace(
                                                    ')', "\\)")
                                                for _ in user_agent.split(
                                                    match.group(0), 1))
                                if not result:
                                    _result_cache[user_agent] = False
                            if result:
                                log_event(
                                    (sec, usec, src_ip, src_port, dst_ip,
                                     dst_port, PROTO.TCP, TRAIL.UA, result,
                                     "suspicious user agent", "(heuristic)"))

                        if not result and config.CHECK_SHORT_OR_MISSING_USER_AGENT:
                            if user_agent is None:
                                log_event((
                                    sec, usec, src_ip, src_port, dst_ip,
                                    dst_port, PROTO.TCP, TRAIL.HTTP, url,
                                    "suspicious http request (missing user agent header)",
                                    "(heuristic)"))
                            elif len(user_agent
                                     ) < SUSPICIOUS_UA_LENGTH_THRESHOLD:
                                log_event(
                                    (sec, usec, src_ip, src_port, dst_ip,
                                     dst_port, PROTO.TCP, TRAIL.UA, user_agent,
                                     "suspicious user agent (too short)",
                                     "(heuristic)"))

                    checks = [path.rstrip('/')]
                    if '?' in path:
                        checks.append(path.split('?')[0].rstrip('/'))

                    _ = os.path.splitext(checks[-1])
                    if _[1]:
                        checks.append(_[0])

                    if checks[-1].count('/') > 1:
                        checks.append(checks[-1][:checks[-1].rfind('/')])

                    for check in filter(None, checks):
                        for _ in ("", host):
                            check = "%s%s" % (_, check)
                            if check in trails:
                                parts = url.split(check)
                                other = ("(%s)" % _ if _ else _ for _ in parts)
                                trail = check.join(other)
                                log_event(
                                    (sec, usec, src_ip, src_port, dst_ip,
                                     dst_port, PROTO.TCP, TRAIL.URL, trail,
                                     trails[check][0], trails[check][1]))
                                return

                    if config.USE_HEURISTICS:
                        if any(char in path for char in
                               SUSPICIOUS_HTTP_REQUEST_FORCE_ENCODE_CHARS):
                            for char in SUSPICIOUS_HTTP_REQUEST_FORCE_ENCODE_CHARS:
                                path = path.replace(char, urllib.quote(char))

                        if host not in WHITELIST:
                            if not any(
                                    _ in path
                                    for _ in WHITELIST_HTTP_REQUEST_KEYWORDS):
                                result = _result_cache.get(path)
                                if result is None:
                                    result = _result_cache[path] = re.search(
                                        SUSPICIOUS_HTTP_REQUEST_REGEX,
                                        urllib.unquote(path)) is not None
                                if result:
                                    trail = "%s(%s)" % (host, path)
                                    log_event(
                                        (sec, usec, src_ip, src_port, dst_ip,
                                         dst_port, PROTO.TCP, TRAIL.URL, trail,
                                         "suspicious http request",
                                         "(heuristic)"))
                                    return

                            if post_data and not any(
                                    _ in post_data
                                    for _ in WHITELIST_HTTP_REQUEST_KEYWORDS):
                                result = _result_cache.get(post_data)
                                if result is None:
                                    result = _result_cache[
                                        post_data] = re.search(
                                            SUSPICIOUS_HTTP_REQUEST_REGEX,
                                            urllib.unquote(
                                                post_data)) is not None
                                if result:
                                    trail = "%s(%s \(%s %s\))" % (
                                        host, path, method, post_data.strip())
                                    log_event(
                                        (sec, usec, src_ip, src_port, dst_ip,
                                         dst_port, PROTO.TCP, TRAIL.URL, trail,
                                         "suspicious http request",
                                         "(heuristic)"))
                                    return

                        if '.' in path:
                            _ = urlparse.urlparse("http://%s" %
                                                  url)  # dummy scheme
                            filename = _.path.split('/')[-1]
                            name, extension = os.path.splitext(filename)
                            if extension and extension in SUSPICIOUS_DIRECT_DOWNLOAD_EXTENSIONS and not any(
                                    _ in path
                                    for _ in WHITELIST_DIRECT_DOWNLOAD_KEYWORDS
                            ) and '.'.join(
                                    host.split('.')[-2:]
                            ) not in WHITELIST and not _.query and len(
                                    name) < 10:
                                trail = "%s(%s)" % (host, path)
                                log_event(
                                    (sec, usec, src_ip, src_port, dst_ip,
                                     dst_port, PROTO.TCP, TRAIL.URL, trail,
                                     "direct %s download (suspicious)" %
                                     extension, "(heuristic)"))
                            elif filename in SUSPICIOUS_FILENAMES:
                                trail = "%s(%s)" % (host, path)
                                log_event(
                                    (sec, usec, src_ip, src_port, dst_ip,
                                     dst_port, PROTO.TCP, TRAIL.URL, trail,
                                     "suspicious page", "(heuristic)"))

        elif protocol == socket.IPPROTO_UDP:  # UDP
            _ = ip_data[iph_length:iph_length + 4]
            if len(_) < 4:
                return

            src_port, dst_port = struct.unpack("!HH", _)

            if src_port != 53:
                if dst_ip in trails:
                    log_event((sec, usec, src_ip, src_port, dst_ip, dst_port,
                               PROTO.UDP, TRAIL.IP, dst_ip, trails[dst_ip][0],
                               trails[dst_ip][1]))
                elif src_ip in trails:
                    log_event((sec, usec, src_ip, src_port, dst_ip, dst_port,
                               PROTO.UDP, TRAIL.IP, src_ip, trails[src_ip][0],
                               trails[src_ip][1]))

            if dst_port == 53 or src_port == 53:
                dns_data = ip_data[iph_length + 8:]

                # Reference: http://www.ccs.neu.edu/home/amislove/teaching/cs4700/fall09/handouts/project1-primer.pdf
                if len(dns_data) > 6:
                    qdcount = struct.unpack("!H", dns_data[4:6])[0]
                    if qdcount > 0:
                        offset = 12
                        query = ""

                        while len(dns_data) > offset:
                            length = ord(dns_data[offset])
                            if not length:
                                query = query[:-1]
                                break
                            query += dns_data[offset + 1:offset + length +
                                              1] + '.'
                            offset += length + 1

                        if ' ' in query or '.' not in query or any(
                                query.endswith(_)
                                for _ in IGNORE_DNS_QUERY_SUFFIXES):
                            return

                        if ord(dns_data[2]) == 0x01:  # standard query
                            type_, class_ = struct.unpack(
                                "!HH", dns_data[offset + 1:offset + 5])

                            # Reference: http://en.wikipedia.org/wiki/List_of_DNS_record_types
                            if type_ not in (
                                    12, 28
                            ) and class_ == 1:  # Type not in (PTR, AAAA), Class IN
                                _check_domain(query, sec, usec, src_ip,
                                              src_port, dst_ip, dst_port,
                                              PROTO.UDP)

                        elif config.USE_HEURISTICS:
                            if (ord(dns_data[2]) & 0x80) and (
                                    ord(dns_data[3]) == 0x83
                            ):  # standard response, recursion available, no such name
                                parts = query.split('.')
                                if not (
                                        len(parts) > 4
                                        and all(_.isdigit() and int(_) < 256
                                                for _ in parts[:4])
                                ):  # generic check for DNSBL IP lookups
                                    for _ in filter(
                                            None,
                                        (query, "*.%s" % '.'.join(parts[-2:])
                                         if query.count('.') > 1 else None)):
                                        if _ not in NO_SUCH_NAME_COUNTERS or NO_SUCH_NAME_COUNTERS[
                                                _][0] != sec / 3600:
                                            NO_SUCH_NAME_COUNTERS[_] = [
                                                sec / 3600, 1,
                                                set()
                                            ]
                                        else:
                                            NO_SUCH_NAME_COUNTERS[_][1] += 1
                                            NO_SUCH_NAME_COUNTERS[_][2].add(
                                                query)

                                            if NO_SUCH_NAME_COUNTERS[_][
                                                    1] > NO_SUCH_NAME_PER_HOUR_THRESHOLD and _ not in WHITELIST and '.'.join(
                                                        _.split('.')[-2:]
                                                    ) not in WHITELIST:
                                                if _.startswith("*."):
                                                    log_event((
                                                        sec, usec, src_ip,
                                                        src_port, dst_ip,
                                                        dst_port, PROTO.UDP,
                                                        TRAIL.DNS, "%s%s" %
                                                        ("(%s)" % ','.join(
                                                            item.replace(
                                                                _[1:], "")
                                                            for item in
                                                            NO_SUCH_NAME_COUNTERS[
                                                                _][2]), _[1:]),
                                                        "excessive no such domain name (suspicious)",
                                                        "(heuristic)"))
                                                    for item in NO_SUCH_NAME_COUNTERS[
                                                            _][2]:
                                                        try:
                                                            del NO_SUCH_NAME_COUNTERS[
                                                                item]
                                                        except KeyError:
                                                            pass
                                                else:
                                                    log_event((
                                                        sec, usec, src_ip,
                                                        src_port, dst_ip,
                                                        dst_port, PROTO.UDP,
                                                        TRAIL.DNS, _,
                                                        "excessive no such domain name (suspicious)",
                                                        "(heuristic)"))

                                                try:
                                                    del NO_SUCH_NAME_COUNTERS[
                                                        _]
                                                except KeyError:
                                                    pass

                                                break

                                    # Reference: https://github.com/exp0se/dga_detector
                                    for part in parts:
                                        if part:
                                            consonants = re.findall(
                                                "(?i)[bcdfghjklmnpqrstvwxyz]",
                                                part)
                                            if len(
                                                    consonants
                                            ) > SUSPICIOUS_DOMAIN_CONSONANT_THRESHOLD:
                                                log_event((
                                                    sec, usec, src_ip,
                                                    src_port, dst_ip, dst_port,
                                                    PROTO.UDP, TRAIL.DNS,
                                                    query,
                                                    "high consonant no such domain name (suspicious)",
                                                    "(heuristic)"))
                                                break

                                            probabilities = (
                                                float(part.count(c)) /
                                                len(part)
                                                for c in set(_ for _ in part))
                                            entropy = -sum(
                                                p * math.log(p) / math.log(2.0)
                                                for p in probabilities)
                                            if entropy > SUSPICIOUS_DOMAIN_ENTROPY_THRESHOLD:
                                                log_event((
                                                    sec, usec, src_ip,
                                                    src_port, dst_ip, dst_port,
                                                    PROTO.UDP, TRAIL.DNS,
                                                    query,
                                                    "high entropy no such domain name (suspicious)",
                                                    "(heuristic)"))
                                                break

        elif protocol in IPPROTO_LUT:  # non-TCP/UDP (e.g. ICMP)
            if protocol == socket.IPPROTO_ICMP:
                if ord(ip_data[iph_length]) != 8:  # Non-echo request
                    return

            if dst_ip in trails:
                log_event((sec, usec, src_ip, '-', dst_ip, '-',
                           IPPROTO_LUT[protocol], TRAIL.IP, dst_ip,
                           trails[dst_ip][0], trails[dst_ip][1]))
            elif src_ip in trails:
                log_event((sec, usec, src_ip, '-', dst_ip, '-',
                           IPPROTO_LUT[protocol], TRAIL.IP, src_ip,
                           trails[src_ip][0], trails[src_ip][1]))

    except struct.error:
        pass

    except Exception:
        if config.SHOW_DEBUG:
            traceback.print_exc()
Exemple #12
0
def _process_ip(ip_data, sec, usec):
    """
    Processes single (raw) IP layer data
    """

    global _connect_sec

    try:
        if len(_result_cache) > MAX_RESULT_CACHE_ENTRIES:
            _result_cache.clear()

        if config.USE_HEURISTICS:
            if _locks.connect_sec:
                _locks.connect_sec.acquire()

            connect_sec = _connect_sec
            _connect_sec = sec

            if _locks.connect_sec:
                _locks.connect_sec.release()

            if sec > connect_sec:
                for key in _connect_src_dst:
                    if len(_connect_src_dst[key]) > PORT_SCANNING_THRESHOLD:
                        _src_ip, _dst_ip = key.split(":")
                        if _src_ip not in WHITELIST:
                            _src_ports = set(str(_[2]) for _ in _connect_src_details[key])
                            _dst_ports = set(str(_[3]) for _ in _connect_src_details[key])
                            log_event(
                                (
                                    sec,
                                    usec,
                                    _src_ip,
                                    ",".join(_src_ports),
                                    _dst_ip,
                                    ",".join(_dst_ports),
                                    PROTO.TCP,
                                    TRAIL.IP,
                                    _src_ip,
                                    "potential port scanning",
                                    "(heuristic)",
                                )
                            )

                _connect_src_dst.clear()
                _connect_src_details.clear()

        ip_header = struct.unpack("!BBHHHBBH4s4s", ip_data[:20])

        ip_length = ip_header[2]
        ip_data = ip_data[:ip_length]  # truncate
        iph_length = (ip_header[0] & 0xF) << 2
        protocol = ip_header[6]
        src_ip = socket.inet_ntoa(ip_header[8])
        dst_ip = socket.inet_ntoa(ip_header[9])

        if protocol == socket.IPPROTO_TCP:  # TCP
            src_port, dst_port, _, _, doff_reserved, flags = struct.unpack(
                "!HHLLBB", ip_data[iph_length : iph_length + 14]
            )

            if flags == 2:  # SYN set (only)
                if dst_ip in trails:
                    log_event(
                        (
                            sec,
                            usec,
                            src_ip,
                            src_port,
                            dst_ip,
                            dst_port,
                            PROTO.TCP,
                            TRAIL.IP,
                            dst_ip,
                            trails[dst_ip][0],
                            trails[dst_ip][1],
                        )
                    )
                elif src_ip in trails and dst_ip != LOCALHOST_IP:
                    log_event(
                        (
                            sec,
                            usec,
                            src_ip,
                            src_port,
                            dst_ip,
                            dst_port,
                            PROTO.TCP,
                            TRAIL.IP,
                            src_ip,
                            trails[src_ip][0],
                            trails[src_ip][1],
                        )
                    )

                if config.USE_HEURISTICS:
                    if dst_ip != LOCALHOST_IP:
                        key = "%s:%s" % (src_ip, dst_ip)
                        if key not in _connect_src_dst:
                            _connect_src_dst[key] = set()
                            _connect_src_details[key] = set()
                        _connect_src_dst[key].add(dst_port)
                        _connect_src_details[key].add((sec, usec, src_port, dst_port))

            else:
                tcph_length = doff_reserved >> 4
                h_size = iph_length + (tcph_length << 2)
                tcp_data = ip_data[h_size:]

                if src_port == 80 and tcp_data.startswith("HTTP/"):
                    if (
                        any(
                            _ in tcp_data[: tcp_data.find("\r\n\r\n")]
                            for _ in ("X-Sinkhole:", "Server: Apache 1.0/SinkSoft")
                        )
                        or "\r\n\r\nsinkhole" in tcp_data
                    ):
                        log_event(
                            (
                                sec,
                                usec,
                                src_ip,
                                src_port,
                                dst_ip,
                                dst_port,
                                PROTO.TCP,
                                TRAIL.IP,
                                src_ip,
                                "sinkhole response (malware)",
                                "(heuristic)",
                            )
                        )
                    else:
                        index = tcp_data.find("<title>")
                        if index >= 0:
                            title = tcp_data[index + len("<title>") : tcp_data.find("</title>", index)]
                            if all(_ in title.lower() for _ in ("this domain", "has been seized")):
                                log_event(
                                    (
                                        sec,
                                        usec,
                                        src_ip,
                                        src_port,
                                        dst_ip,
                                        dst_port,
                                        PROTO.TCP,
                                        TRAIL.IP,
                                        title,
                                        "seized domain (suspicious)",
                                        "(heuristic)",
                                    )
                                )

                method, path = None, None
                index = tcp_data.find("\n")
                if index >= 0:
                    line = tcp_data[:index]
                    if line.count(" ") == 2 and " HTTP/" in line:
                        method = line.split(" ")[0].upper()
                        path = line.split(" ")[1].lower()

                if method and path:
                    post_data = None
                    host = dst_ip
                    index = tcp_data.find("\r\nHost:")

                    if index >= 0:
                        index = index + len("\r\nHost:")
                        host = tcp_data[index : tcp_data.find("\r\n", index)]
                        host = host.strip()
                        host = re.sub(r":80\Z", "", host)
                        if not host.split(":")[0][-1].isdigit() and dst_ip in trails:
                            log_event(
                                (
                                    sec,
                                    usec,
                                    src_ip,
                                    src_port,
                                    dst_ip,
                                    dst_port,
                                    PROTO.TCP,
                                    TRAIL.IP,
                                    "%s (%s)" % (dst_ip, host.split(":")[0]),
                                    trails[dst_ip][0],
                                    trails[dst_ip][1],
                                )
                            )
                    elif config.USE_HEURISTICS and config.CHECK_MISSING_HOST:
                        log_event(
                            (
                                sec,
                                usec,
                                src_ip,
                                src_port,
                                dst_ip,
                                dst_port,
                                PROTO.TCP,
                                TRAIL.HTTP,
                                "%s%s" % (host, path),
                                "suspicious http request (missing host header)",
                                "(heuristic)",
                            )
                        )

                    index = tcp_data.find("\r\n\r\n")
                    if index >= 0:
                        post_data = tcp_data[index:]

                    if "://" in path:
                        url = path.split("://", 1)[1]

                        if "/" not in url:
                            url = "%s/" % url

                        host, path = url.split("/", 1)
                        host = re.sub(r":80\Z", "", host)
                        path = "/%s" % path
                        proxy_domain = host.split(":")[0]
                        _check_domain(proxy_domain, sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP)
                    elif method == "CONNECT":
                        if "/" in path:
                            host, path = path.split("/", 1)
                            path = "/%s" % path
                        else:
                            host, path = path, "/"
                        host = re.sub(r":80\Z", "", host)
                        url = "%s%s" % (host, path)
                        proxy_domain = host.split(":")[0]
                        _check_domain(proxy_domain, sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP)
                    else:
                        url = "%s%s" % (host, path)

                    if config.USE_HEURISTICS:
                        user_agent, result = None, None
                        match = re.search("(?i)\r\nUser-Agent:([^\r\n]+)", tcp_data)
                        if match:
                            user_agent = urllib.unquote(match.group(1)).strip()

                        if user_agent:
                            result = _result_cache.get(user_agent)
                            if result is None:
                                if not any(_ in user_agent for _ in WHITELIST_UA_KEYWORDS):
                                    match = re.search(SUSPICIOUS_UA_REGEX, user_agent)
                                    if match:
                                        result = _result_cache[user_agent] = match.group(0).join(
                                            ("(%s)" if _ else "%s") % _.replace("(", "\\(").replace(")", "\\)")
                                            for _ in user_agent.split(match.group(0), 1)
                                        )
                                if not result:
                                    _result_cache[user_agent] = False
                            if result:
                                log_event(
                                    (
                                        sec,
                                        usec,
                                        src_ip,
                                        src_port,
                                        dst_ip,
                                        dst_port,
                                        PROTO.TCP,
                                        TRAIL.UA,
                                        result,
                                        "suspicious user agent",
                                        "(heuristic)",
                                    )
                                )

                        if not result and config.CHECK_SHORT_OR_MISSING_USER_AGENT:
                            if user_agent is None:
                                log_event(
                                    (
                                        sec,
                                        usec,
                                        src_ip,
                                        src_port,
                                        dst_ip,
                                        dst_port,
                                        PROTO.TCP,
                                        TRAIL.HTTP,
                                        url,
                                        "suspicious http request (missing user agent header)",
                                        "(heuristic)",
                                    )
                                )
                            elif len(user_agent) < SUSPICIOUS_UA_LENGTH_THRESHOLD:
                                log_event(
                                    (
                                        sec,
                                        usec,
                                        src_ip,
                                        src_port,
                                        dst_ip,
                                        dst_port,
                                        PROTO.TCP,
                                        TRAIL.UA,
                                        user_agent,
                                        "suspicious user agent (too short)",
                                        "(heuristic)",
                                    )
                                )

                    checks = [path.rstrip("/")]
                    if "?" in path:
                        checks.append(path.split("?")[0].rstrip("/"))

                    _ = os.path.splitext(checks[-1])
                    if _[1]:
                        checks.append(_[0])

                    if checks[-1].count("/") > 1:
                        checks.append(checks[-1][: checks[-1].rfind("/")])

                    for check in filter(None, checks):
                        for _ in ("", host):
                            check = "%s%s" % (_, check)
                            if check in trails:
                                parts = url.split(check)
                                other = ("(%s)" % _ if _ else _ for _ in parts)
                                trail = check.join(other)
                                log_event(
                                    (
                                        sec,
                                        usec,
                                        src_ip,
                                        src_port,
                                        dst_ip,
                                        dst_port,
                                        PROTO.TCP,
                                        TRAIL.URL,
                                        trail,
                                        trails[check][0],
                                        trails[check][1],
                                    )
                                )
                                return

                    if config.USE_HEURISTICS:
                        if any(char in path for char in SUSPICIOUS_HTTP_REQUEST_FORCE_ENCODE_CHARS):
                            for char in SUSPICIOUS_HTTP_REQUEST_FORCE_ENCODE_CHARS:
                                path = path.replace(char, urllib.quote(char))

                        if host not in WHITELIST:
                            if not any(_ in path for _ in WHITELIST_HTTP_REQUEST_KEYWORDS):
                                result = _result_cache.get(path)
                                if result is None:
                                    result = _result_cache[path] = (
                                        re.search(SUSPICIOUS_HTTP_REQUEST_REGEX, urllib.unquote(path)) is not None
                                    )
                                if result:
                                    trail = "%s(%s)" % (host, path)
                                    log_event(
                                        (
                                            sec,
                                            usec,
                                            src_ip,
                                            src_port,
                                            dst_ip,
                                            dst_port,
                                            PROTO.TCP,
                                            TRAIL.URL,
                                            trail,
                                            "suspicious http request",
                                            "(heuristic)",
                                        )
                                    )
                                    return

                            if post_data and not any(_ in post_data for _ in WHITELIST_HTTP_REQUEST_KEYWORDS):
                                result = _result_cache.get(post_data)
                                if result is None:
                                    result = _result_cache[post_data] = (
                                        re.search(SUSPICIOUS_HTTP_REQUEST_REGEX, urllib.unquote(post_data)) is not None
                                    )
                                if result:
                                    trail = "%s(%s \(%s %s\))" % (host, path, method, post_data.strip())
                                    log_event(
                                        (
                                            sec,
                                            usec,
                                            src_ip,
                                            src_port,
                                            dst_ip,
                                            dst_port,
                                            PROTO.TCP,
                                            TRAIL.URL,
                                            trail,
                                            "suspicious http request",
                                            "(heuristic)",
                                        )
                                    )
                                    return

                        if "." in path:
                            _ = urlparse.urlparse("http://%s" % url)  # dummy scheme
                            filename = _.path.split("/")[-1]
                            name, extension = os.path.splitext(filename)
                            if (
                                extension
                                and extension in SUSPICIOUS_DIRECT_DOWNLOAD_EXTENSIONS
                                and not any(_ in path for _ in WHITELIST_DIRECT_DOWNLOAD_KEYWORDS)
                                and ".".join(host.split(".")[-2:]) not in WHITELIST
                                and not _.query
                                and len(name) < 10
                            ):
                                trail = "%s(%s)" % (host, path)
                                log_event(
                                    (
                                        sec,
                                        usec,
                                        src_ip,
                                        src_port,
                                        dst_ip,
                                        dst_port,
                                        PROTO.TCP,
                                        TRAIL.URL,
                                        trail,
                                        "direct %s download (suspicious)" % extension,
                                        "(heuristic)",
                                    )
                                )
                            elif filename in SUSPICIOUS_FILENAMES:
                                trail = "%s(%s)" % (host, path)
                                log_event(
                                    (
                                        sec,
                                        usec,
                                        src_ip,
                                        src_port,
                                        dst_ip,
                                        dst_port,
                                        PROTO.TCP,
                                        TRAIL.URL,
                                        trail,
                                        "suspicious page",
                                        "(heuristic)",
                                    )
                                )

        elif protocol == socket.IPPROTO_UDP:  # UDP
            _ = ip_data[iph_length : iph_length + 4]
            if len(_) < 4:
                return

            src_port, dst_port = struct.unpack("!HH", _)

            if src_port != 53:
                if dst_ip in trails:
                    log_event(
                        (
                            sec,
                            usec,
                            src_ip,
                            src_port,
                            dst_ip,
                            dst_port,
                            PROTO.UDP,
                            TRAIL.IP,
                            dst_ip,
                            trails[dst_ip][0],
                            trails[dst_ip][1],
                        )
                    )
                elif src_ip in trails:
                    log_event(
                        (
                            sec,
                            usec,
                            src_ip,
                            src_port,
                            dst_ip,
                            dst_port,
                            PROTO.UDP,
                            TRAIL.IP,
                            src_ip,
                            trails[src_ip][0],
                            trails[src_ip][1],
                        )
                    )

            if dst_port == 53 or src_port == 53:
                dns_data = ip_data[iph_length + 8 :]

                # Reference: http://www.ccs.neu.edu/home/amislove/teaching/cs4700/fall09/handouts/project1-primer.pdf
                if len(dns_data) > 6:
                    qdcount = struct.unpack("!H", dns_data[4:6])[0]
                    if qdcount > 0:
                        offset = 12
                        query = ""

                        while len(dns_data) > offset:
                            length = ord(dns_data[offset])
                            if not length:
                                query = query[:-1]
                                break
                            query += dns_data[offset + 1 : offset + length + 1] + "."
                            offset += length + 1

                        if (
                            " " in query
                            or "." not in query
                            or any(query.endswith(_) for _ in IGNORE_DNS_QUERY_SUFFIXES)
                        ):
                            return

                        if ord(dns_data[2]) == 0x01:  # standard query
                            type_, class_ = struct.unpack("!HH", dns_data[offset + 1 : offset + 5])

                            # Reference: http://en.wikipedia.org/wiki/List_of_DNS_record_types
                            if type_ not in (12, 28) and class_ == 1:  # Type not in (PTR, AAAA), Class IN
                                _check_domain(query, sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.UDP)

                        elif config.USE_HEURISTICS:
                            if (ord(dns_data[2]) & 0x80) and (
                                ord(dns_data[3]) == 0x83
                            ):  # standard response, recursion available, no such name
                                parts = query.split(".")
                                if not (
                                    len(parts) > 4 and all(_.isdigit() and int(_) < 256 for _ in parts[:4])
                                ):  # generic check for DNSBL IP lookups
                                    for _ in filter(
                                        None, (query, "*.%s" % ".".join(parts[-2:]) if query.count(".") > 1 else None)
                                    ):
                                        if _ not in NO_SUCH_NAME_COUNTERS or NO_SUCH_NAME_COUNTERS[_][0] != sec / 3600:
                                            NO_SUCH_NAME_COUNTERS[_] = [sec / 3600, 1, set()]
                                        else:
                                            NO_SUCH_NAME_COUNTERS[_][1] += 1
                                            NO_SUCH_NAME_COUNTERS[_][2].add(query)

                                            if (
                                                NO_SUCH_NAME_COUNTERS[_][1] > NO_SUCH_NAME_PER_HOUR_THRESHOLD
                                                and _ not in WHITELIST
                                                and ".".join(_.split(".")[-2:]) not in WHITELIST
                                            ):
                                                if _.startswith("*."):
                                                    log_event(
                                                        (
                                                            sec,
                                                            usec,
                                                            src_ip,
                                                            src_port,
                                                            dst_ip,
                                                            dst_port,
                                                            PROTO.UDP,
                                                            TRAIL.DNS,
                                                            "%s%s"
                                                            % (
                                                                "(%s)"
                                                                % ",".join(
                                                                    item.replace(_[1:], "")
                                                                    for item in NO_SUCH_NAME_COUNTERS[_][2]
                                                                ),
                                                                _[1:],
                                                            ),
                                                            "excessive no such domain name (suspicious)",
                                                            "(heuristic)",
                                                        )
                                                    )
                                                    for item in NO_SUCH_NAME_COUNTERS[_][2]:
                                                        try:
                                                            del NO_SUCH_NAME_COUNTERS[item]
                                                        except KeyError:
                                                            pass
                                                else:
                                                    log_event(
                                                        (
                                                            sec,
                                                            usec,
                                                            src_ip,
                                                            src_port,
                                                            dst_ip,
                                                            dst_port,
                                                            PROTO.UDP,
                                                            TRAIL.DNS,
                                                            _,
                                                            "excessive no such domain name (suspicious)",
                                                            "(heuristic)",
                                                        )
                                                    )

                                                try:
                                                    del NO_SUCH_NAME_COUNTERS[_]
                                                except KeyError:
                                                    pass

                                                break

                                    # Reference: https://github.com/exp0se/dga_detector
                                    for part in parts:
                                        if part:
                                            consonants = re.findall("(?i)[bcdfghjklmnpqrstvwxyz]", part)
                                            if len(consonants) > SUSPICIOUS_DOMAIN_CONSONANT_THRESHOLD:
                                                log_event(
                                                    (
                                                        sec,
                                                        usec,
                                                        src_ip,
                                                        src_port,
                                                        dst_ip,
                                                        dst_port,
                                                        PROTO.UDP,
                                                        TRAIL.DNS,
                                                        query,
                                                        "high consonant no such domain name (suspicious)",
                                                        "(heuristic)",
                                                    )
                                                )
                                                break

                                            probabilities = (
                                                float(part.count(c)) / len(part) for c in set(_ for _ in part)
                                            )
                                            entropy = -sum(p * math.log(p) / math.log(2.0) for p in probabilities)
                                            if entropy > SUSPICIOUS_DOMAIN_ENTROPY_THRESHOLD:
                                                log_event(
                                                    (
                                                        sec,
                                                        usec,
                                                        src_ip,
                                                        src_port,
                                                        dst_ip,
                                                        dst_port,
                                                        PROTO.UDP,
                                                        TRAIL.DNS,
                                                        query,
                                                        "high entropy no such domain name (suspicious)",
                                                        "(heuristic)",
                                                    )
                                                )
                                                break

        elif protocol in IPPROTO_LUT:  # non-TCP/UDP (e.g. ICMP)
            if protocol == socket.IPPROTO_ICMP:
                if ord(ip_data[iph_length]) != 8:  # Non-echo request
                    return

            if dst_ip in trails:
                log_event(
                    (
                        sec,
                        usec,
                        src_ip,
                        "-",
                        dst_ip,
                        "-",
                        IPPROTO_LUT[protocol],
                        TRAIL.IP,
                        dst_ip,
                        trails[dst_ip][0],
                        trails[dst_ip][1],
                    )
                )
            elif src_ip in trails:
                log_event(
                    (
                        sec,
                        usec,
                        src_ip,
                        "-",
                        dst_ip,
                        "-",
                        IPPROTO_LUT[protocol],
                        TRAIL.IP,
                        src_ip,
                        trails[src_ip][0],
                        trails[src_ip][1],
                    )
                )

    except struct.error:
        pass

    except Exception:
        if config.SHOW_DEBUG:
            traceback.print_exc()
Exemple #13
0
def _process_packet(packet, sec, usec, ip_offset):
    """
    Processes single (raw) IP layer data
    """

    global _connect_sec
    global _last_syn
    global _last_logged_syn
    global _last_udp
    global _last_logged_udp
    global _last_dns_exhaustion
    global _subdomains_sec

    try:
        if len(_result_cache) > MAX_RESULT_CACHE_ENTRIES:
            _result_cache.clear()

        if config.USE_HEURISTICS:
            if _locks.connect_sec:
                _locks.connect_sec.acquire()

            connect_sec = _connect_sec
            _connect_sec = sec

            if _locks.connect_sec:
                _locks.connect_sec.release()

            if sec > connect_sec:
                for key in _connect_src_dst:
                    if len(_connect_src_dst[key]) > PORT_SCANNING_THRESHOLD:
                        _src_ip, _dst_ip = key.split('~')
                        if _src_ip not in WHITELIST:
                            _src_ports = set(str(_[2]) for _ in _connect_src_details[key])
                            _dst_ports = set(str(_[3]) for _ in _connect_src_details[key])
                            log_event((sec, usec, _src_ip, ','.join(_src_ports), _dst_ip, ','.join(_dst_ports), PROTO.TCP, TRAIL.IP, "-", "potential port scanning", "(heuristic)"), packet)

                _connect_src_dst.clear()
                _connect_src_details.clear()

        ip_data = packet[ip_offset:]
        ip_version = ord(ip_data[0]) >> 4
        localhost_ip = LOCALHOST_IP[ip_version]

        if ip_version == 0x04:  # IPv4
            ip_header = struct.unpack("!BBHHHBBH4s4s", ip_data[:20])
            iph_length = (ip_header[0] & 0xf) << 2
            protocol = ip_header[6]
            src_ip = socket.inet_ntoa(ip_header[8])
            dst_ip = socket.inet_ntoa(ip_header[9])
        elif ip_version == 0x06:  # IPv6
            # Reference: http://chrisgrundemann.com/index.php/2012/introducing-ipv6-understanding-ipv6-addresses/
            ip_header = struct.unpack("!BBHHBB16s16s", ip_data[:40])
            iph_length = 40
            protocol = ip_header[4]
            src_ip = inet_ntoa6(ip_header[6])
            dst_ip = inet_ntoa6(ip_header[7])
        else:
            return

        if protocol == socket.IPPROTO_TCP:  # TCP
            src_port, dst_port, _, _, doff_reserved, flags = struct.unpack("!HHLLBB", ip_data[iph_length:iph_length+14])

            if flags != 2 and config.plugin_functions:
                if dst_ip in trails:
                    log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.IP, dst_ip, trails[dst_ip][0], trails[dst_ip][1]), packet, skip_write=True)
                elif src_ip in trails and dst_ip != localhost_ip:
                    log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.IP, src_ip, trails[src_ip][0], trails[src_ip][1]), packet, skip_write=True)

            if flags == 2:  # SYN set (only)
                _ = _last_syn
                _last_syn = (sec, src_ip, src_port, dst_ip, dst_port)
                if _ == _last_syn:  # skip bursts
                    return

                if dst_ip in trails:
                    _ = _last_logged_syn
                    _last_logged_syn = _last_syn
                    if _ != _last_logged_syn:
                        log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.IP, dst_ip, trails[dst_ip][0], trails[dst_ip][1]), packet)

                elif src_ip in trails and dst_ip != localhost_ip:
                    _ = _last_logged_syn
                    _last_logged_syn = _last_syn
                    if _ != _last_logged_syn:
                        log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.IP, src_ip, trails[src_ip][0], trails[src_ip][1]), packet)

                if config.USE_HEURISTICS:
                    if dst_ip != localhost_ip:
                        key = "%s~%s" % (src_ip, dst_ip)
                        if key not in _connect_src_dst:
                            _connect_src_dst[key] = set()
                            _connect_src_details[key] = set()
                        _connect_src_dst[key].add(dst_port)
                        _connect_src_details[key].add((sec, usec, src_port, dst_port))

            else:
                tcph_length = doff_reserved >> 4
                h_size = iph_length + (tcph_length << 2)
                tcp_data = ip_data[h_size:]

                if config.USE_DEEP_HEURISTICS:
                    if tcp_data.startswith("HTTP/"):
                        if any(_ in tcp_data[:tcp_data.find("\r\n\r\n")] for _ in ("X-Sinkhole:", "X-Malware-Sinkhole:", "Server: You got served", "Server: Apache 1.0/SinkSoft", "sinkdns.org")) or "\r\n\r\nsinkhole" in tcp_data:
                            log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.IP, src_ip, "sinkhole response (malware)", "(heuristic)"), packet)
                        else:
                            index = tcp_data.find("<title>")
                            if index >= 0:
                                title = tcp_data[index + len("<title>"):tcp_data.find("</title>", index)]
                                if all(_ in title.lower() for _ in ("this domain", "has been seized")):
                                    log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.IP, title, "seized domain (suspicious)", "(heuristic)"), packet)

                method, path = None, None
                index = tcp_data.find("\r\n")
                if index >= 0:
                    line = tcp_data[:index]
                    if line.count(' ') == 2 and " HTTP/" in line:
                        method, path, _ = line.split(' ')

                if method and path:
                    post_data = None
                    host = dst_ip
                    first_index = tcp_data.find("\r\nHost:")

                    if first_index >= 0:
                        first_index = first_index + len("\r\nHost:")
                        last_index = tcp_data.find("\r\n", first_index)
                        if last_index >= 0:
                            host = tcp_data[first_index:last_index]
                            host = host.strip()
                            if host.endswith(":80"):
                                host = host[:-3]
                            if host and host[0].isalpha() and dst_ip in trails:
                                log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.IP, "%s (%s)" % (dst_ip, host.split(':')[0]), trails[dst_ip][0], trails[dst_ip][1]), packet)
                    elif config.USE_HEURISTICS and config.CHECK_MISSING_HOST:
                        log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.HTTP, "%s%s" % (host, path), "missing host header (suspicious)", "(heuristic)"), packet)

                    index = tcp_data.find("\r\n\r\n")
                    if index >= 0:
                        post_data = tcp_data[index + 4:]

                    if "://" in path:
                        url = path.split("://", 1)[1]

                        if '/' not in url:
                            url = "%s/" % url

                        host, path = url.split('/', 1)
                        if host.endswith(":80"):
                            host = host[:-3]
                        path = "/%s" % path
                        proxy_domain = host.split(':')[0]
                        _check_domain(proxy_domain, sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, packet)
                    elif method == "CONNECT":
                        if '/' in path:
                            host, path = path.split('/', 1)
                            path = "/%s" % path
                        else:
                            host, path = path, '/'
                        if host.endswith(":80"):
                            host = host[:-3]
                        url = "%s%s" % (host, path)
                        proxy_domain = host.split(':')[0]
                        _check_domain(proxy_domain, sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, packet)
                    else:
                        url = "%s%s" % (host, path)

                    if config.USE_HEURISTICS:
                        user_agent, result = None, None

                        first_index = tcp_data.find("\r\nUser-Agent:")
                        if first_index >= 0:
                            first_index = first_index + len("\r\nUser-Agent:")
                            last_index = tcp_data.find("\r\n", first_index)
                            if last_index >= 0:
                                user_agent = tcp_data[first_index:last_index]
                                user_agent = urllib.unquote(user_agent).strip()

                        if user_agent:
                            result = _result_cache.get(user_agent)
                            if result is None:
                                if not any(_ in user_agent for _ in WHITELIST_UA_KEYWORDS):
                                    match = re.search(SUSPICIOUS_UA_REGEX, user_agent)
                                    if match:
                                        def _(value):
                                            return value.replace('(', "\\(").replace(')', "\\)")

                                        parts = user_agent.split(match.group(0), 1)

                                        if len(parts) > 1 and parts[0] and parts[-1]:
                                            result = _result_cache[user_agent] = "%s (%s)" % (_(match.group(0)), _(user_agent))
                                        else:
                                            result = _result_cache[user_agent] = _(match.group(0)).join(("(%s)" if part else "%s") % _(part) for part in parts)
                                if not result:
                                    _result_cache[user_agent] = False

                            if result:
                                log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.UA, result, "user agent (suspicious)", "(heuristic)"), packet)

                    checks = [path.rstrip('/')]
                    if '?' in path:
                        checks.append(path.split('?')[0].rstrip('/'))

                    _ = os.path.splitext(checks[-1])
                    if _[1]:
                        checks.append(_[0])

                    if checks[-1].count('/') > 1:
                        checks.append(checks[-1][:checks[-1].rfind('/')])

                    for check in filter(None, checks):
                        for _ in ("", host):
                            check = "%s%s" % (_, check)
                            if check in trails:
                                parts = url.split(check)
                                other = ("(%s)" % _ if _ else _ for _ in parts)
                                trail = check.join(other)
                                log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.URL, trail, trails[check][0], trails[check][1]))
                                return

                    if config.USE_HEURISTICS:
                        unquoted_path = urllib.unquote(path)
                        unquoted_post_data = urllib.unquote(post_data or "")
                        for char in SUSPICIOUS_HTTP_REQUEST_FORCE_ENCODE_CHARS:
                            replacement = SUSPICIOUS_HTTP_REQUEST_FORCE_ENCODE_CHARS[char]
                            path = path.replace(char, replacement)
                            if post_data:
                                post_data = post_data.replace(char, replacement)

                        if not _check_domain_whitelisted(host):
                            if not any(_ in unquoted_path.lower() for _ in WHITELIST_HTTP_REQUEST_PATHS):
                                if any(_ in unquoted_path for _ in SUSPICIOUS_HTTP_REQUEST_PRE_CONDITION):
                                    found = _result_cache.get(unquoted_path)
                                    if found is None:
                                        for desc, regex in SUSPICIOUS_HTTP_REQUEST_REGEXES:
                                            if re.search(regex, unquoted_path, re.I | re.DOTALL):
                                                found = desc
                                                break
                                        _result_cache[unquoted_path] = found or ""
                                    if found:
                                        trail = "%s(%s)" % (host, path)
                                        log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.URL, trail, "potential %s (suspicious)" % found, "(heuristic)"), packet)
                                        return

                                if any(_ in unquoted_post_data for _ in SUSPICIOUS_HTTP_REQUEST_PRE_CONDITION):
                                    found = _result_cache.get(unquoted_post_data)
                                    if found is None:
                                        for desc, regex in SUSPICIOUS_HTTP_REQUEST_REGEXES:
                                            if re.search(regex, unquoted_post_data, re.I | re.DOTALL):
                                                found = desc
                                                break
                                        _result_cache[unquoted_post_data] = found or ""
                                    if found:
                                        trail = "%s(%s \(%s %s\))" % (host, path, method, post_data.strip())
                                        log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.HTTP, trail, "potential %s (suspicious)" % found, "(heuristic)"), packet)
                                        return

                            if '.' in path:
                                _ = urlparse.urlparse("http://%s" % url)  # dummy scheme
                                filename = _.path.split('/')[-1]
                                name, extension = os.path.splitext(filename)
                                if extension and extension in SUSPICIOUS_DIRECT_DOWNLOAD_EXTENSIONS and not any(_ in path for _ in WHITELIST_DIRECT_DOWNLOAD_KEYWORDS) and not _.query and len(name) < 10:
                                    trail = "%s(%s)" % (host, path)
                                    log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.URL, trail, "direct %s download (suspicious)" % extension, "(heuristic)"), packet)
                                elif filename == "suspendedpage.cgi":
                                    log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.URL, trail, "suspended page (suspicious)", "(heuristic)"), packet)
                                elif filename == "defaultwebpage.cgi":
                                    log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.TCP, TRAIL.URL, trail, "non-existent page (suspicious)", "(heuristic)"), packet)

        elif protocol == socket.IPPROTO_UDP:  # UDP
            _ = ip_data[iph_length:iph_length + 4]
            if len(_) < 4:
                return

            src_port, dst_port = struct.unpack("!HH", _)

            _ = _last_udp
            _last_udp = (sec, src_ip, src_port, dst_ip, dst_port)
            if _ == _last_udp:  # skip bursts
                return

            if src_port != 53 and dst_port != 53:  # not DNS
                if dst_ip in trails:
                    trail = dst_ip
                elif src_ip in trails:
                    trail = src_ip
                else:
                    trail = None

                if trail:
                    _ = _last_logged_udp
                    _last_logged_udp = _last_udp
                    if _ != _last_logged_udp:
                        log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.UDP, TRAIL.IP, trail, trails[trail][0], trails[trail][1]), packet)

            else:
                dns_data = ip_data[iph_length + 8:]

                # Reference: http://www.ccs.neu.edu/home/amislove/teaching/cs4700/fall09/handouts/project1-primer.pdf
                if len(dns_data) > 6:
                    qdcount = struct.unpack("!H", dns_data[4:6])[0]
                    if qdcount > 0:
                        offset = 12
                        query = ""

                        while len(dns_data) > offset:
                            length = ord(dns_data[offset])
                            if not length:
                                query = query[:-1]
                                break
                            query += dns_data[offset + 1:offset + length + 1] + '.'
                            offset += length + 1

                        query = query.lower()

                        if not query or '.' not in query or not all(_ in VALID_DNS_CHARS for _ in query) or any(_ in query for _ in (".intranet.",)) or any(query.endswith(_) for _ in IGNORE_DNS_QUERY_SUFFIXES):
                            return

                        parts = query.split('.')

                        if ord(dns_data[2]) == 0x01:  # standard query
                            type_, class_ = struct.unpack("!HH", dns_data[offset + 1:offset + 5])

                            if len(parts) > 2:
                                domain = '.'.join(parts[-2:])

                                if not _check_domain_whitelisted(domain):  # e.g. <hash>.hashserver.cs.trendmicro.com
                                    if (sec - (_subdomains_sec or 0)) > DAILY_SECS:
                                        _subdomains.clear()
                                        _dns_exhausted_domains.clear()
                                        _subdomains_sec = sec

                                    subdomains = _subdomains.get(domain)

                                    if not subdomains:
                                        subdomains = _subdomains[domain] = set()

                                    if len(subdomains) < DNS_EXHAUSTION_THRESHOLD:
                                        subdomains.add('.'.join(parts[:-2]))
                                    else:
                                        if (sec - (_last_dns_exhaustion or 0)) > 60:
                                            trail = "(%s).%s" % ('.'.join(parts[:-2]), '.'.join(parts[-2:]))
                                            log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.UDP, TRAIL.DNS, trail, "potential dns exhaustion (suspicious)", "(heuristic)"), packet)
                                            _dns_exhausted_domains.add(domain)
                                            _last_dns_exhaustion = sec

                                        return

                            # Reference: http://en.wikipedia.org/wiki/List_of_DNS_record_types
                            if type_ not in (12, 28) and class_ == 1:  # Type not in (PTR, AAAA), Class IN
                                if dst_ip in trails:
                                    log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.UDP, TRAIL.IP, "%s (%s)" % (dst_ip, query), trails[dst_ip][0], trails[dst_ip][1]), packet)
                                elif src_ip in trails:
                                    log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.UDP, TRAIL.IP, src_ip, trails[src_ip][0], trails[src_ip][1]), packet)

                                _check_domain(query, sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.UDP, packet)

                        elif config.USE_HEURISTICS:
                            if ord(dns_data[2]) & 0x80:  # standard response
                                if ord(dns_data[3]) == 0x80:  # recursion available, no error
                                    if (ord(dns_data[offset + 5]) & 0xc0) and (dns_data[offset + 15] == "\x00") and (dns_data[offset + 16] == "\x04"):  # QNAME compression, IPv4 result address
                                        answer = socket.inet_ntoa(dns_data[offset + 17:offset + 21])
                                        if answer in trails and "sinkhole" in trails[answer][0]:
                                            trail = "(%s).%s" % ('.'.join(parts[:-1]), '.'.join(parts[-1:]))
                                            log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.UDP, TRAIL.DNS, trail, "sinkholed by %s (malware)" % trails[answer][0].split(" ")[1], "(heuristic)"), packet)  # (e.g. kitro.pl, devomchart.com, jebena.ananikolic.su, vuvet.cn)
                                elif ord(dns_data[3]) == 0x83:  # recursion available, no such name
                                    if '.'.join(parts[-2:]) not in _dns_exhausted_domains and not _check_domain_whitelisted(query) and not _check_domain_member(query, trails):
                                        if parts[-1].isdigit():
                                            return

                                        if not (len(parts) > 4 and all(_.isdigit() and int(_) < 256 for _ in parts[:4])):  # generic check for DNSBL IP lookups
                                            for _ in filter(None, (query, "*.%s" % '.'.join(parts[-2:]) if query.count('.') > 1 else None)):
                                                if _ not in NO_SUCH_NAME_COUNTERS or NO_SUCH_NAME_COUNTERS[_][0] != sec / 3600:
                                                    NO_SUCH_NAME_COUNTERS[_] = [sec / 3600, 1, set()]
                                                else:
                                                    NO_SUCH_NAME_COUNTERS[_][1] += 1
                                                    NO_SUCH_NAME_COUNTERS[_][2].add(query)

                                                    if NO_SUCH_NAME_COUNTERS[_][1] > NO_SUCH_NAME_PER_HOUR_THRESHOLD:
                                                        if _.startswith("*."):
                                                            log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.UDP, TRAIL.DNS, "%s%s" % ("(%s)" % ','.join(item.replace(_[1:], "") for item in NO_SUCH_NAME_COUNTERS[_][2]), _[1:]), "excessive no such domain (suspicious)", "(heuristic)"), packet)
                                                            for item in NO_SUCH_NAME_COUNTERS[_][2]:
                                                                try:
                                                                    del NO_SUCH_NAME_COUNTERS[item]
                                                                except KeyError:
                                                                    pass
                                                        else:
                                                            log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.UDP, TRAIL.DNS, _, "excessive no such domain (suspicious)", "(heuristic)"), packet)

                                                        try:
                                                            del NO_SUCH_NAME_COUNTERS[_]
                                                        except KeyError:
                                                            pass

                                                        break

                                            if len(parts) > 2:
                                                part = parts[0] if parts[0] != "www" else parts[1]
                                                trail = "(%s).%s" % ('.'.join(parts[:-2]), '.'.join(parts[-2:]))
                                            elif len(parts) == 2:
                                                part = parts[0]
                                                trail = "(%s).%s" % (parts[0], parts[1])
                                            else:
                                                part = query
                                                trail = query

                                            if part and '-' not in part:
                                                result = _result_cache.get(part)

                                                if result is None:
                                                    # Reference: https://github.com/exp0se/dga_detector
                                                    probabilities = (float(part.count(c)) / len(part) for c in set(_ for _ in part))
                                                    entropy = -sum(p * math.log(p) / math.log(2.0) for p in probabilities)
                                                    if entropy > SUSPICIOUS_DOMAIN_ENTROPY_THRESHOLD:
                                                        result = "entropy threshold no such domain (suspicious)"

                                                    if not result:
                                                        if sum(_ in CONSONANTS for _ in part) > SUSPICIOUS_DOMAIN_CONSONANT_THRESHOLD:
                                                            result = "consonant threshold no such domain (suspicious)"

                                                    _result_cache[part] = result or False

                                                if result:
                                                    log_event((sec, usec, src_ip, src_port, dst_ip, dst_port, PROTO.UDP, TRAIL.DNS, trail, result, "(heuristic)"), packet)

        elif protocol in IPPROTO_LUT:  # non-TCP/UDP (e.g. ICMP)
            if protocol == socket.IPPROTO_ICMP:
                if ord(ip_data[iph_length]) != 0x08:  # Non-echo request
                    return
            elif protocol == socket.IPPROTO_ICMPV6:
                if ord(ip_data[iph_length]) != 0x80:  # Non-echo request
                    return

            if dst_ip in trails:
                log_event((sec, usec, src_ip, '-', dst_ip, '-', IPPROTO_LUT[protocol], TRAIL.IP, dst_ip, trails[dst_ip][0], trails[dst_ip][1]), packet)
            elif src_ip in trails:
                log_event((sec, usec, src_ip, '-', dst_ip, '-', IPPROTO_LUT[protocol], TRAIL.IP, src_ip, trails[src_ip][0], trails[src_ip][1]), packet)

    except struct.error:
        pass

    except Exception:
        if config.SHOW_DEBUG:
            traceback.print_exc()
Exemple #14
0
def _process_packet(packet, sec, usec):
    """
    Processes single (raw) packet
    """

    global _connect_sec

    try:
        ip_offset = None

        if _datalink == pcapy.DLT_PPP:
            ppp_protocol = packet[2:4]
            if ppp_protocol == "\x00\x21":  # IP
                ip_offset = PPPH_LENGTH
        else:
            if _datalink == pcapy.DLT_LINUX_SLL:
                packet = packet[2:]

            eth_header = struct.unpack("!HH8sH", packet[:ETH_LENGTH])
            eth_protocol = socket.ntohs(eth_header[3])
            if eth_protocol == 8:  # IP
                ip_offset = ETH_LENGTH

        if ip_offset is None:
            return

        ip_header = struct.unpack("!BBHHHBBH4s4s",
                                  packet[ip_offset:ip_offset + 20])

        ip_length = ip_header[2]
        packet = packet[:ETH_LENGTH + ip_length]  # truncate
        iph_length = (ip_header[0] & 0xF) << 2
        protocol = ip_header[6]
        src_ip = socket.inet_ntoa(ip_header[8])
        dst_ip = socket.inet_ntoa(ip_header[9])

        if protocol == socket.IPPROTO_TCP:  # TCP
            i = iph_length + ETH_LENGTH
            src_port, dst_port, _, _, doff_reserved, flags = struct.unpack(
                "!HHLLBB", packet[i:i + 14])

            if flags == 2:  # SYN set (only)
                if dst_ip in trails:
                    log_event((sec, usec, src_ip, src_port, dst_ip, dst_port,
                               "TCP", TRAIL.IP, dst_ip, trails[dst_ip][0],
                               trails[dst_ip][1]))
                elif src_ip in trails:
                    log_event((sec, usec, src_ip, src_port, dst_ip, dst_port,
                               "TCP", TRAIL.IP, src_ip, trails[src_ip][0],
                               trails[src_ip][1]))

                if sec > _connect_sec:
                    for key in _connect_src_dst:
                        if len(_connect_src_dst[key]
                               ) > PORT_SCANNING_THRESHOLD:
                            for _sec, _usec, _src_port, _dst_port in _connect_src_details[
                                    key]:
                                log_event(
                                    (_sec, _usec, src_ip, _src_port, dst_ip,
                                     _dst_port, "TCP", TRAIL.IP, src_ip,
                                     "potential port scanning", "(heuristic)"))

                    _connect_sec = sec
                    _connect_src_dst.clear()
                    _connect_src_details.clear()

                key = "%s:%s" % (src_ip, dst_ip)
                if key not in _connect_src_dst:
                    _connect_src_dst[key] = set()
                    _connect_src_details[key] = set()
                _connect_src_dst[key].add(dst_port)
                _connect_src_details[key].add((sec, usec, src_port, dst_port))

            if flags & 8 != 0:  # PSH set
                tcph_length = doff_reserved >> 4
                h_size = ETH_LENGTH + iph_length + (tcph_length << 2)
                data = packet[h_size:]

                if len(data) > 0 and "HTTP/" in data:
                    index = data.find("\r\n")
                    if index >= 0:
                        line = data[:index]
                        if line.count(' ') == 2 and " HTTP/" in line:
                            path = line.split(' ')[1].lower()
                        else:
                            return
                    else:
                        return

                    index = data.find("\r\nHost:")
                    if index >= 0:
                        index = index + len("\r\nHost:")
                        host = data[index:data.find("\r\n", index)]
                        host = host.strip()
                        host = re.sub(r":80\Z", "", host)
                    else:
                        return

                    url = "%s%s" % (host, path)

                    checks = [path.rstrip('/')]
                    if '?' in path:
                        checks.append(path.split('?')[0].rstrip('/'))

                    _ = os.path.splitext(checks[-1])
                    if _[1]:
                        checks.append(_[0])

                    if checks[-1].count('/') > 1:
                        checks.append(checks[-1][:checks[-1].rfind('/')])

                    for check in filter(None, checks):
                        for _ in ("", host):
                            check = "%s%s" % (_, check)
                            if check in trails:
                                parts = url.split(check)
                                other = ("(%s)" % _ if _ else _ for _ in parts)
                                trail = check.join(other)
                                log_event((sec, usec, src_ip, src_port, dst_ip,
                                           dst_port, "TCP", TRAIL.HTTP, trail,
                                           trails[check][0], trails[check][1]))
                                return

                    if config.USE_HEURISTICS:
                        if any(char in path for char in
                               SUSPICIOUS_HTTP_REQUEST_FORCE_ENCODE_CHARS):
                            for char in SUSPICIOUS_HTTP_REQUEST_FORCE_ENCODE_CHARS:
                                path = path.replace(char, urllib.quote(char))

                        if re.search(SUSPICIOUS_HTTP_REQUEST_REGEX,
                                     urllib.unquote(
                                         path)) and host not in WHITELIST:
                            trail = "%s(%s)" % (host, path)
                            log_event(
                                (sec, usec, src_ip, src_port, dst_ip, dst_port,
                                 "TCP", TRAIL.HTTP, trail,
                                 "suspicious http request", "(heuristic)"))
                            return

                        if '.' in path:
                            _ = urlparse.urlparse(url)
                            filename = _.path.split('/')[-1]
                            name, extension = os.path.splitext(filename)
                            if extension and extension in SUSPICIOUS_DIRECT_DOWNLOAD_EXTENSIONS and '.'.join(
                                    host.split('.')
                                [-2:]) not in WHITELIST and len(name) < 6:
                                trail = "%s(%s)" % (host, path)
                                log_event((sec, usec, src_ip, src_port, dst_ip,
                                           dst_port, "TCP", TRAIL.HTTP, trail,
                                           "direct %s download (suspicious)" %
                                           extension, "(heuristic)"))
                            elif filename in SUSPICIOUS_FILENAMES:
                                trail = "%s(%s)" % (host, path)
                                log_event((sec, usec, src_ip, src_port, dst_ip,
                                           dst_port, "TCP", TRAIL.HTTP, trail,
                                           "suspicious page", "(heuristic)"))

        elif protocol == socket.IPPROTO_UDP:  # UDP
            i = iph_length + ETH_LENGTH
            _ = packet[i:i + 4]
            if len(_) < 4:
                return

            src_port, dst_port = struct.unpack("!HH", _)

            if src_port != 53:
                if dst_ip in trails:
                    log_event((sec, usec, src_ip, src_port, dst_ip, dst_port,
                               "UDP", TRAIL.IP, dst_ip, trails[dst_ip][0],
                               trails[dst_ip][1]))
                elif src_ip in trails:
                    log_event((sec, usec, src_ip, src_port, dst_ip, dst_port,
                               "UDP", TRAIL.IP, src_ip, trails[src_ip][0],
                               trails[src_ip][1]))

            if dst_port == 53 or src_port == 53:
                h_size = ETH_LENGTH + iph_length + 8
                data = packet[h_size:]

                # Reference: http://www.ccs.neu.edu/home/amislove/teaching/cs4700/fall09/handouts/project1-primer.pdf
                if len(data) > 6:
                    qdcount = struct.unpack("!H", data[4:6])[0]
                    if qdcount > 0:
                        offset = 12
                        query = ""

                        while len(data) > offset:
                            length = ord(data[offset])
                            if not length:
                                query = query[:-1]
                                break
                            query += data[offset + 1:offset + length + 1] + '.'
                            offset += length + 1

                        if ' ' in query or '.' not in query or query.endswith(
                                ".in-addr.arpa") or query.endswith(".local"):
                            return

                        if ord(data[2]) == 0x01:  # standard query
                            type_, class_ = struct.unpack(
                                "!HH", data[offset + 1:offset + 5])

                            # Reference: http://en.wikipedia.org/wiki/List_of_DNS_record_types
                            if type_ != 12 and class_ == 1:  # Type != PTR, Class IN
                                parts = query.split('.')

                                for i in xrange(0, len(parts)):
                                    domain = '.'.join(parts[i:])
                                    if domain in trails:
                                        if domain == query:
                                            trail = domain
                                        else:
                                            _ = ".%s" % domain
                                            trail = "(%s)%s" % (
                                                query[:-len(_)], _)

                                        log_event((sec, usec, src_ip, src_port,
                                                   dst_ip, dst_port, "UDP",
                                                   TRAIL.DNS, trail,
                                                   trails[domain][0],
                                                   trails[domain][1]))
                                        return

                                if config.USE_HEURISTICS and len(
                                        parts[0]
                                ) > SUSPICIOUS_DOMAIN_LENGTH_THRESHOLD and '-' not in parts[
                                        0]:
                                    trail = None

                                    if len(parts) > 2:
                                        if '.'.join(
                                                parts[-2:]) not in WHITELIST:
                                            trail = "(%s).%s" % ('.'.join(
                                                parts[:-2]), '.'.join(
                                                    parts[-2:]))
                                    elif len(parts) == 2:
                                        if '.'.join(parts) not in WHITELIST:
                                            trail = "(%s).%s" % (parts[0],
                                                                 parts[1])
                                    else:
                                        trail = query

                                    if trail:
                                        log_event(
                                            (sec, usec, src_ip, src_port,
                                             dst_ip, dst_port, "UDP",
                                             TRAIL.DNS, trail,
                                             "long domain name (suspicious)",
                                             "(heuristic)"))

                        elif config.USE_HEURISTICS and (
                                ord(data[2]) & 0x80
                        ) and (
                                ord(data[3]) == 0x83
                        ):  # standard response, recursion available, no such name
                            if query not in NO_SUCH_NAME_COUNTERS or NO_SUCH_NAME_COUNTERS[
                                    query][0] != sec / 3600:
                                NO_SUCH_NAME_COUNTERS[query] = [sec / 3600, 1]
                            else:
                                NO_SUCH_NAME_COUNTERS[query][1] += 1

                                if NO_SUCH_NAME_COUNTERS[query][
                                        1] > NO_SUCH_NAME_PER_HOUR_THRESHOLD and query not in WHITELIST:
                                    log_event((
                                        sec, usec, src_ip, src_port, dst_ip,
                                        dst_port, "UDP", TRAIL.DNS, query,
                                        "excessive no such domain name (suspicious)",
                                        "(heuristic)"))

        elif protocol in IPPROTO_LUT:  # non-TCP/UDP (e.g. ICMP)
            if dst_ip in trails:
                log_event((sec, usec, src_ip, '-', dst_ip, '-',
                           IPPROTO_LUT[protocol], TRAIL.IP, dst_ip,
                           trails[dst_ip][0], trails[dst_ip][1]))
            elif src_ip in trails:
                log_event((sec, usec, src_ip, '-', dst_ip, '-',
                           IPPROTO_LUT[protocol], TRAIL.IP, src_ip,
                           trails[src_ip][0], trails[src_ip][1]))

    except Exception:
        if DEBUG:
            traceback.print_exc()