def flush_condensed_events(): while True: time.sleep(CONDENSED_EVENTS_FLUSH_PERIOD) with _condensing_lock: for key in _condensed_events: condensed = False events = _condensed_events[key] first_event = events[0] condensed_event = [_ for _ in first_event] for i in xrange(1, len(events)): current_event = events[i] for j in xrange(3, 7): # src_port, dst_ip, dst_port, proto if current_event[j] != condensed_event[j]: condensed = True if not isinstance(condensed_event[j], set): condensed_event[j] = set( (condensed_event[j], )) condensed_event[j].add(current_event[j]) if condensed: for i in xrange(len(condensed_event)): if isinstance(condensed_event[i], set): condensed_event[i] = ','.join( str(_) for _ in sorted(condensed_event[i])) log_event(condensed_event, skip_condensing=True) _condensed_events.clear()
def process(current): if not current: return "" if not any(current[_] for _ in current): if len(current) > 1: items = [] previous = None start = None for _ in sorted(current) + [six.unichr(65535)]: if previous is not None: if ord(_) == ord(previous) + 1: pass else: if start != previous: if start == '0' and previous == '9': items.append(r"\d") else: items.append("%s-%s" % (re.escape(start), re.escape(previous))) else: items.append(re.escape(previous)) start = _ if start is None: start = _ previous = _ return ( "[%s]" % "".join(items) ) if len(items) > 1 or '-' in items[0] else "".join(items) else: return re.escape(current.keys()[0]) else: return ("(?:%s)" if len(current) > 1 else "%s") % ('|'.join( "%s%s" % (re.escape(_), process(current[_])) for _ in sorted(current))).replace( '|'.join(str(_) for _ in xrange(10)), r"\d")
def retrieve_content(url, data=None, headers=None): """ Retrieves page content from given URL """ try: req = _urllib.request.Request( "".join(url[i].replace(' ', "%20") if i > url.find('?') else url[i] for i in xrange(len(url))), data, headers or { "User-agent": USER_AGENT, "Accept-encoding": "gzip, deflate" }) resp = _urllib.request.urlopen(req, timeout=TIMEOUT) retval = resp.read() encoding = resp.headers.get("Content-Encoding") if encoding: if encoding.lower() == "deflate": data = io.BytesIO(zlib.decompress(retval, -15)) elif encoding.lower() == "gzip": data = gzip.GzipFile("", "rb", 9, io.BytesIO(retval)) retval = data.read() except Exception as ex: retval = ex.read() if hasattr(ex, "read") else (get_ex_message(ex) or "") if url.startswith("https://") and isinstance( retval, str) and "handshake failure" in retval: return retrieve_content(url.replace("https://", "http://"), data, headers) retval = retval or b"" if six.PY3 and isinstance(retval, bytes): retval = retval.decode(UNICODE_ENCODING, errors="replace") return retval
def update_trails(force=False, offline=False): """ Update trails from feeds """ success = False trails = TrailsDict() duplicates = {} try: if not os.path.isdir(USERS_DIR): os.makedirs(USERS_DIR, 0o755) except Exception as ex: exit( "[!] something went wrong during creation of directory '%s' ('%s')" % (USERS_DIR, ex)) _chown(USERS_DIR) if config.UPDATE_SERVER: print("[i] retrieving trails from provided 'UPDATE_SERVER' server...") content = retrieve_content(config.UPDATE_SERVER) if not content or content.count(',') < 2: print("[x] unable to retrieve data from '%s'" % config.UPDATE_SERVER) else: with _fopen(config.TRAILS_FILE, "w+b" if six.PY2 else "w+", open if six.PY2 else codecs.open) as f: f.write(content) trails = load_trails() else: trail_files = set() for dirpath, dirnames, filenames in os.walk( os.path.abspath(os.path.join(ROOT_DIR, "trails"))): for filename in filenames: trail_files.add( os.path.abspath(os.path.join(dirpath, filename))) if config.CUSTOM_TRAILS_DIR: for dirpath, dirnames, filenames in os.walk( os.path.abspath( os.path.join( ROOT_DIR, os.path.expanduser(config.CUSTOM_TRAILS_DIR)))): for filename in filenames: trail_files.add( os.path.abspath(os.path.join(dirpath, filename))) if not trails and ( force or not os.path.isfile(config.TRAILS_FILE) or (time.time() - os.stat(config.TRAILS_FILE).st_mtime) >= config.UPDATE_PERIOD or os.stat(config.TRAILS_FILE).st_size == 0 or any( os.stat(_).st_mtime > os.stat(config.TRAILS_FILE).st_mtime for _ in trail_files)): if not config.no_updates: print("[i] updating trails (this might take a while)...") else: print("[i] checking trails...") if not offline and (force or config.USE_FEED_UPDATES): _ = os.path.abspath(os.path.join(ROOT_DIR, "trails", "feeds")) if _ not in sys.path: sys.path.append(_) filenames = sorted(glob.glob(os.path.join(_, "*.py"))) else: filenames = [] _ = os.path.abspath(os.path.join(ROOT_DIR, "trails")) if _ not in sys.path: sys.path.append(_) filenames += [os.path.join(_, "custom")] filenames += [ os.path.join(_, "static") ] # Note: higher priority than previous one because of dummy user trails (FE) filenames = [_ for _ in filenames if "__init__.py" not in _] if config.DISABLED_FEEDS: filenames = [ filename for filename in filenames if os.path.splitext(os.path.split(filename)[-1])[0] not in re.split(r"[^\w]+", config.DISABLED_FEEDS) ] for i in xrange(len(filenames)): filename = filenames[i] try: module = __import__( os.path.basename(filename).split(".py")[0]) except (ImportError, SyntaxError) as ex: print( "[x] something went wrong during import of feed file '%s' ('%s')" % (filename, ex)) continue for name, function in inspect.getmembers( module, inspect.isfunction): if name == "fetch": url = module.__url__ # Note: to prevent "SyntaxError: can not delete variable 'module' referenced in nested scope" print(" [o] '%s'%s" % (url, " " * 20 if len(url) < 20 else "")) sys.stdout.write( "[?] progress: %d/%d (%d%%)\r" % (i, len(filenames), i * 100 // len(filenames))) sys.stdout.flush() if config.DISABLED_TRAILS_INFO_REGEX and re.search( config.DISABLED_TRAILS_INFO_REGEX, getattr(module, "__info__", "")): continue try: results = function() for item in results.items(): if item[0].startswith( "www.") and '/' not in item[0]: item = [item[0][len("www."):], item[1]] if item[0] in trails: if item[0] not in duplicates: duplicates[item[0]] = set( (trails[item[0]][1], )) duplicates[item[0]].add(item[1][1]) if not ( item[0] in trails and (any(_ in item[1][0] for _ in LOW_PRIORITY_INFO_KEYWORDS) or trails[item[0]][1] in HIGH_PRIORITY_REFERENCES)) or ( item[1][1] in HIGH_PRIORITY_REFERENCES and "history" not in item[1][0] ) or any( _ in item[1][0] for _ in HIGH_PRIORITY_INFO_KEYWORDS): trails[item[0]] = item[1] if not results and not any( _ in url for _ in ("abuse.ch", "cobaltstrike")): print( "[x] something went wrong during remote data retrieval ('%s')" % url) except Exception as ex: print( "[x] something went wrong during processing of feed file '%s' ('%s')" % (filename, ex)) try: sys.modules.pop(module.__name__) del module except Exception: pass # custom trails from remote location if config.CUSTOM_TRAILS_URL: print(" [o] '(remote custom)'%s" % (" " * 20)) for url in re.split(r"[;,]", config.CUSTOM_TRAILS_URL): url = url.strip() if not url: continue url = ("http://%s" % url) if "//" not in url else url content = retrieve_content(url) if not content: print( "[x] unable to retrieve data (or empty response) from '%s'" % url) else: __info__ = "blacklisted" __reference__ = "(remote custom)" # urlparse.urlsplit(url).netloc for line in content.split('\n'): line = line.strip() if not line or line.startswith('#'): continue line = re.sub(r"\s*#.*", "", line) if '://' in line: line = re.search(r"://(.*)", line).group(1) line = line.rstrip('/') if line in trails and any( _ in trails[line][1] for _ in ("custom", "static")): continue if '/' in line: trails[line] = (__info__, __reference__) line = line.split('/')[0] elif re.search(r"\A\d+\.\d+\.\d+\.\d+\Z", line): trails[line] = (__info__, __reference__) else: trails[line.strip('.')] = (__info__, __reference__) for match in re.finditer(r"(\d+\.\d+\.\d+\.\d+)/(\d+)", content): prefix, mask = match.groups() mask = int(mask) if mask > 32: continue start_int = addr_to_int(prefix) & make_mask(mask) end_int = start_int | ((1 << 32 - mask) - 1) if 0 <= end_int - start_int <= 1024: address = start_int while start_int <= address <= end_int: trails[int_to_addr(address)] = ( __info__, __reference__) address += 1 print("[i] post-processing trails (this might take a while)...") # basic cleanup for key in list(trails.keys()): if key not in trails: continue if config.DISABLED_TRAILS_INFO_REGEX: if re.search(config.DISABLED_TRAILS_INFO_REGEX, trails[key][0]): del trails[key] continue try: _key = key.decode(UNICODE_ENCODING) if isinstance( key, bytes) else key _key = _key.encode("idna") if six.PY3: _key = _key.decode(UNICODE_ENCODING) if _key != key: # for domains with non-ASCII letters (e.g. phishing) trails[_key] = trails[key] del trails[key] key = _key except: pass if not key or re.search(r"(?i)\A\.?[a-z]+\Z", key) and not any( _ in trails[key][1] for _ in ("custom", "static")): del trails[key] continue if re.search(r"\A\d+\.\d+\.\d+\.\d+\Z", key): if any( _ in trails[key][0] for _ in ("parking site", "sinkhole") ) and key in duplicates: # Note: delete (e.g.) junk custom trails if static trail is a sinkhole del duplicates[key] if trails[key][0] == "malware": trails[key] = ("potential malware site", trails[key][1]) if config.get("IP_MINIMUM_FEEDS", 3) > 1: if (key not in duplicates or len(duplicates[key]) < config.get("IP_MINIMUM_FEEDS", 3) ) and re.search(r"\b(custom|static)\b", trails[key][1]) is None: del trails[key] continue if any(int(_) > 255 for _ in key.split('.')): del trails[key] continue if trails[key][0] == "ransomware": trails[key] = ("ransomware (malware)", trails[key][1]) if key.startswith("www.") and '/' not in key: _ = trails[key] del trails[key] key = key[len("www."):] if key: trails[key] = _ if '?' in key and not key.startswith('/'): _ = trails[key] del trails[key] key = key.split('?')[0] if key: trails[key] = _ if '//' in key: _ = trails[key] del trails[key] key = key.replace('//', '/') trails[key] = _ if key != key.lower(): _ = trails[key] del trails[key] key = key.lower() trails[key] = _ if key in duplicates: _ = trails[key] others = sorted(duplicates[key] - set((_[1], ))) if others and " (+" not in _[1]: trails[key] = (_[0], "%s (+%s)" % (_[1], ','.join(others))) read_whitelist() for key in list(trails.keys()): match = re.search(r"\A(\d+\.\d+\.\d+\.\d+)\b", key) if check_whitelisted(key) or any( key.startswith(_) for _ in BAD_TRAIL_PREFIXES): del trails[key] elif match and (bogon_ip(match.group(1)) or cdn_ip(match.group(1))) and not any( _ in trails[key][0] for _ in ("parking", "sinkhole")): del trails[key] else: try: key.decode("utf8") if hasattr( key, "decode") else key.encode("utf8") trails[key][0].decode("utf8") if hasattr( trails[key][0], "decode") else trails[key][0].encode("utf8") trails[key][1].decode("utf8") if hasattr( trails[key][1], "decode") else trails[key][1].encode("utf8") except UnicodeError: del trails[key] try: if trails: with _fopen(config.TRAILS_FILE, "w+b" if six.PY2 else "w+", open if six.PY2 else codecs.open) as f: writer = csv.writer(f, delimiter=',', quotechar='\"', quoting=csv.QUOTE_MINIMAL) for trail in trails: row = (trail, trails[trail][0], trails[trail][1]) writer.writerow(row) success = True except Exception as ex: print( "[x] something went wrong during trails file write '%s' ('%s')" % (config.TRAILS_FILE, ex)) print("[i] update finished%s" % (40 * " ")) if success: print("[i] trails stored to '%s'" % config.TRAILS_FILE) return trails
def _events(self, params): session = self.get_session() if session is None: self.send_response(_http_client.UNAUTHORIZED) self.send_header(HTTP_HEADER.CONNECTION, "close") return None start, end, size, total = None, None, -1, None content = None log_exists = False dates = params.get("date", "") if ".." in dates: pass elif '_' not in dates: try: date = datetime.datetime.strptime( dates, "%Y-%m-%d").strftime("%Y-%m-%d") event_log_path = os.path.join(config.LOG_DIR, "%s.log" % date) if os.path.exists(event_log_path): range_handle = open(event_log_path, "rb") log_exists = True except ValueError: print("[!] invalid date format in request") log_exists = False else: logs_data = "" date_interval = dates.split("_", 1) try: start_date = datetime.datetime.strptime( date_interval[0], "%Y-%m-%d").date() end_date = datetime.datetime.strptime( date_interval[1], "%Y-%m-%d").date() for i in xrange(int((end_date - start_date).days) + 1): date = start_date + datetime.timedelta(i) event_log_path = os.path.join( config.LOG_DIR, "%s.log" % date.strftime("%Y-%m-%d")) if os.path.exists(event_log_path): log_handle = open(event_log_path, "rb") logs_data += log_handle.read() log_handle.close() range_handle = io.BytesIO(logs_data) log_exists = True except ValueError: print("[!] invalid date format in request") log_exists = False if log_exists: range_handle.seek(0, 2) total = range_handle.tell() range_handle.seek(0) if self.headers.get(HTTP_HEADER.RANGE): match = re.search(r"bytes=(\d+)-(\d+)", self.headers[HTTP_HEADER.RANGE]) if match: start, end = int(match.group(1)), int(match.group(2)) max_size = end - start + 1 end = min(total - 1, end) size = end - start + 1 if start == 0 or not session.range_handle: session.range_handle = range_handle if session.netfilters is None and not session.mask_custom: session.range_handle.seek(start) self.send_response(_http_client.PARTIAL_CONTENT) self.send_header(HTTP_HEADER.CONNECTION, "close") self.send_header(HTTP_HEADER.CONTENT_TYPE, "text/plain") self.send_header( HTTP_HEADER.CONTENT_RANGE, "bytes %d-%d/%d" % (start, end, total)) content = session.range_handle.read(size) else: self.send_response(_http_client.OK) self.send_header(HTTP_HEADER.CONNECTION, "close") self.send_header(HTTP_HEADER.CONTENT_TYPE, "text/plain") buffer, addresses, netmasks, regex = io.StringIO( ), set(), [], "" for netfilter in session.netfilters or []: if not netfilter: continue if '/' in netfilter: netmasks.append(netfilter) elif re.search(r"\A[\d.]+\Z", netfilter): addresses.add(netfilter) elif "\\." in netfilter: regex = r"\b(%s)\b" % netfilter else: print("[!] invalid network filter '%s'" % netfilter) return for line in session.range_handle: display = session.netfilters is None ip = None line = line.decode(UNICODE_ENCODING, "ignore") if regex: match = re.search(regex, line) if match: ip = match.group(1) display = True if not display and (addresses or netmasks): for match in re.finditer( r"\b(\d+\.\d+\.\d+\.\d+)\b", line): if not display: ip = match.group(1) else: break if ip in addresses: display = True break elif netmasks: for _ in netmasks: prefix, mask = _.split('/') if addr_to_int(ip) & make_mask( int(mask) ) == addr_to_int(prefix): addresses.add(ip) display = True break if session.mask_custom and "(custom)" in line: line = re.sub( r'("[^"]+"|[^ ]+) \(custom\)', "- (custom)", line) if display: if ",%s" % ip in line or "%s," % ip in line: line = re.sub( r" ([\d.,]+,)?%s(,[\d.,]+)? " % re.escape(ip), " %s " % ip, line) buffer.write(line) if buffer.tell() >= max_size: break content = buffer.getvalue() end = start + len(content) - 1 self.send_header( HTTP_HEADER.CONTENT_RANGE, "bytes %d-%d/%d" % (start, end, end + 1 + max_size * (len(content) >= max_size))) if len(content) < max_size: session.range_handle.close() session.range_handle = None if size == -1: self.send_response(_http_client.OK) self.send_header(HTTP_HEADER.CONNECTION, "close") self.send_header(HTTP_HEADER.CONTENT_TYPE, "text/plain") self.end_headers() with range_handle as f: while True: data = f.read(io.DEFAULT_BUFFER_SIZE) if not data: break else: self.wfile.write(data) else: self.send_response( _http_client.OK ) # instead of _http_client.NO_CONTENT (compatibility reasons) self.send_header(HTTP_HEADER.CONNECTION, "close") if self.headers.get(HTTP_HEADER.RANGE): self.send_header(HTTP_HEADER.CONTENT_RANGE, "bytes 0-0/0") return content
def inet_ntoa6(packed_ip): _ = packed_ip.hex() if hasattr(packed_ip, "hex") else packed_ip.encode("hex") return compress_ipv6(':'.join(_[i:i + 4] for i in xrange(0, len(_), 4)))