Beispiel #1
0
                _ = trails[key]
                del trails[key]
                key = key.lower()
                trails[key] = _
            if key in duplicates:
                _ = trails[key]
                others = sorted(duplicates[key] - set((_[1],)))
                if others and " (+" not in _[1]:
                    trails[key] = (_[0], "%s (+%s)" % (_[1], ','.join(others)))

        read_whitelist()

        for key in trails.keys():
            if key in WHITELIST or any(key.startswith(_) for _ in BAD_TRAIL_PREFIXES):
                del trails[key]
            elif re.search(r"\A\d+\.\d+\.\d+\.\d+\Z", key) and cdn_ip(key):
                del trails[key]
            else:
                try:
                    key.decode("utf8")
                    trails[key][0].decode("utf8")
                    trails[key][1].decode("utf8")
                except UnicodeDecodeError:
                    del trails[key]

        try:
            if trails:
                with _fopen(TRAILS_FILE, "w+b") as f:
                    writer = csv.writer(f, delimiter=',', quotechar='\"', quoting=csv.QUOTE_MINIMAL)
                    for trail in trails:
                        writer.writerow((trail, trails[trail][0], trails[trail][1]))
Beispiel #2
0
def update_trails(force=False, offline=False):
    """
    Update trails from feeds
    """

    success = False
    trails = TrailsDict()
    duplicates = {}

    try:
        if not os.path.isdir(USERS_DIR):
            os.makedirs(USERS_DIR, 0o755)
    except Exception as ex:
        exit(
            "[!] something went wrong during creation of directory '%s' ('%s')"
            % (USERS_DIR, ex))

    _chown(USERS_DIR)

    if config.UPDATE_SERVER:
        print("[i] retrieving trails from provided 'UPDATE_SERVER' server...")
        content = retrieve_content(config.UPDATE_SERVER)
        if not content or content.count(',') < 2:
            print("[x] unable to retrieve data from '%s'" %
                  config.UPDATE_SERVER)
        else:
            with _fopen(config.TRAILS_FILE, "w+b" if six.PY2 else "w+",
                        open if six.PY2 else codecs.open) as f:
                f.write(content)
            trails = load_trails()

    else:
        trail_files = set()
        for dirpath, dirnames, filenames in os.walk(
                os.path.abspath(os.path.join(ROOT_DIR, "trails"))):
            for filename in filenames:
                trail_files.add(
                    os.path.abspath(os.path.join(dirpath, filename)))

        if config.CUSTOM_TRAILS_DIR:
            for dirpath, dirnames, filenames in os.walk(
                    os.path.abspath(
                        os.path.join(
                            ROOT_DIR,
                            os.path.expanduser(config.CUSTOM_TRAILS_DIR)))):
                for filename in filenames:
                    trail_files.add(
                        os.path.abspath(os.path.join(dirpath, filename)))

        if not trails and (
                force or not os.path.isfile(config.TRAILS_FILE) or
            (time.time() - os.stat(config.TRAILS_FILE).st_mtime) >=
                config.UPDATE_PERIOD
                or os.stat(config.TRAILS_FILE).st_size == 0 or any(
                    os.stat(_).st_mtime > os.stat(config.TRAILS_FILE).st_mtime
                    for _ in trail_files)):
            if not config.no_updates:
                print("[i] updating trails (this might take a while)...")
            else:
                print("[i] checking trails...")

            if not offline and (force or config.USE_FEED_UPDATES):
                _ = os.path.abspath(os.path.join(ROOT_DIR, "trails", "feeds"))
                if _ not in sys.path:
                    sys.path.append(_)

                filenames = sorted(glob.glob(os.path.join(_, "*.py")))
            else:
                filenames = []

            _ = os.path.abspath(os.path.join(ROOT_DIR, "trails"))
            if _ not in sys.path:
                sys.path.append(_)

            filenames += [os.path.join(_, "custom")]
            filenames += [
                os.path.join(_, "static")
            ]  # Note: higher priority than previous one because of dummy user trails (FE)

            filenames = [_ for _ in filenames if "__init__.py" not in _]

            if config.DISABLED_FEEDS:
                filenames = [
                    filename for filename in filenames
                    if os.path.splitext(os.path.split(filename)[-1])[0] not in
                    re.split(r"[^\w]+", config.DISABLED_FEEDS)
                ]

            for i in xrange(len(filenames)):
                filename = filenames[i]

                try:
                    module = __import__(
                        os.path.basename(filename).split(".py")[0])
                except (ImportError, SyntaxError) as ex:
                    print(
                        "[x] something went wrong during import of feed file '%s' ('%s')"
                        % (filename, ex))
                    continue

                for name, function in inspect.getmembers(
                        module, inspect.isfunction):
                    if name == "fetch":
                        url = module.__url__  # Note: to prevent "SyntaxError: can not delete variable 'module' referenced in nested scope"

                        print(" [o] '%s'%s" %
                              (url, " " * 20 if len(url) < 20 else ""))
                        sys.stdout.write(
                            "[?] progress: %d/%d (%d%%)\r" %
                            (i, len(filenames), i * 100 // len(filenames)))
                        sys.stdout.flush()

                        if config.DISABLED_TRAILS_INFO_REGEX and re.search(
                                config.DISABLED_TRAILS_INFO_REGEX,
                                getattr(module, "__info__", "")):
                            continue

                        try:
                            results = function()
                            for item in results.items():
                                if item[0].startswith(
                                        "www.") and '/' not in item[0]:
                                    item = [item[0][len("www."):], item[1]]
                                if item[0] in trails:
                                    if item[0] not in duplicates:
                                        duplicates[item[0]] = set(
                                            (trails[item[0]][1], ))
                                    duplicates[item[0]].add(item[1][1])
                                if not (
                                        item[0] in trails and
                                    (any(_ in item[1][0]
                                         for _ in LOW_PRIORITY_INFO_KEYWORDS)
                                     or trails[item[0]][1]
                                     in HIGH_PRIORITY_REFERENCES)) or (
                                         item[1][1] in HIGH_PRIORITY_REFERENCES
                                         and "history" not in item[1][0]
                                     ) or any(
                                         _ in item[1][0]
                                         for _ in HIGH_PRIORITY_INFO_KEYWORDS):
                                    trails[item[0]] = item[1]
                            if not results and not any(
                                    _ in url
                                    for _ in ("abuse.ch", "cobaltstrike")):
                                print(
                                    "[x] something went wrong during remote data retrieval ('%s')"
                                    % url)
                        except Exception as ex:
                            print(
                                "[x] something went wrong during processing of feed file '%s' ('%s')"
                                % (filename, ex))

                try:
                    sys.modules.pop(module.__name__)
                    del module
                except Exception:
                    pass

            # custom trails from remote location
            if config.CUSTOM_TRAILS_URL:
                print(" [o] '(remote custom)'%s" % (" " * 20))
                for url in re.split(r"[;,]", config.CUSTOM_TRAILS_URL):
                    url = url.strip()
                    if not url:
                        continue

                    url = ("http://%s" % url) if "//" not in url else url
                    content = retrieve_content(url)

                    if not content:
                        print(
                            "[x] unable to retrieve data (or empty response) from '%s'"
                            % url)
                    else:
                        __info__ = "blacklisted"
                        __reference__ = "(remote custom)"  # urlparse.urlsplit(url).netloc
                        for line in content.split('\n'):
                            line = line.strip()
                            if not line or line.startswith('#'):
                                continue
                            line = re.sub(r"\s*#.*", "", line)
                            if '://' in line:
                                line = re.search(r"://(.*)", line).group(1)
                            line = line.rstrip('/')

                            if line in trails and any(
                                    _ in trails[line][1]
                                    for _ in ("custom", "static")):
                                continue

                            if '/' in line:
                                trails[line] = (__info__, __reference__)
                                line = line.split('/')[0]
                            elif re.search(r"\A\d+\.\d+\.\d+\.\d+\Z", line):
                                trails[line] = (__info__, __reference__)
                            else:
                                trails[line.strip('.')] = (__info__,
                                                           __reference__)

                        for match in re.finditer(r"(\d+\.\d+\.\d+\.\d+)/(\d+)",
                                                 content):
                            prefix, mask = match.groups()
                            mask = int(mask)
                            if mask > 32:
                                continue
                            start_int = addr_to_int(prefix) & make_mask(mask)
                            end_int = start_int | ((1 << 32 - mask) - 1)
                            if 0 <= end_int - start_int <= 1024:
                                address = start_int
                                while start_int <= address <= end_int:
                                    trails[int_to_addr(address)] = (
                                        __info__, __reference__)
                                    address += 1

            print("[i] post-processing trails (this might take a while)...")

            # basic cleanup
            for key in list(trails.keys()):
                if key not in trails:
                    continue

                if config.DISABLED_TRAILS_INFO_REGEX:
                    if re.search(config.DISABLED_TRAILS_INFO_REGEX,
                                 trails[key][0]):
                        del trails[key]
                        continue

                try:
                    _key = key.decode(UNICODE_ENCODING) if isinstance(
                        key, bytes) else key
                    _key = _key.encode("idna")
                    if six.PY3:
                        _key = _key.decode(UNICODE_ENCODING)
                    if _key != key:  # for domains with non-ASCII letters (e.g. phishing)
                        trails[_key] = trails[key]
                        del trails[key]
                        key = _key
                except:
                    pass

                if not key or re.search(r"(?i)\A\.?[a-z]+\Z", key) and not any(
                        _ in trails[key][1] for _ in ("custom", "static")):
                    del trails[key]
                    continue

                if re.search(r"\A\d+\.\d+\.\d+\.\d+\Z", key):
                    if any(
                            _ in trails[key][0]
                            for _ in ("parking site", "sinkhole")
                    ) and key in duplicates:  # Note: delete (e.g.) junk custom trails if static trail is a sinkhole
                        del duplicates[key]

                    if trails[key][0] == "malware":
                        trails[key] = ("potential malware site",
                                       trails[key][1])

                    if config.get("IP_MINIMUM_FEEDS", 3) > 1:
                        if (key not in duplicates or len(duplicates[key]) <
                                config.get("IP_MINIMUM_FEEDS", 3)
                            ) and re.search(r"\b(custom|static)\b",
                                            trails[key][1]) is None:
                            del trails[key]
                            continue

                    if any(int(_) > 255 for _ in key.split('.')):
                        del trails[key]
                        continue

                if trails[key][0] == "ransomware":
                    trails[key] = ("ransomware (malware)", trails[key][1])

                if key.startswith("www.") and '/' not in key:
                    _ = trails[key]
                    del trails[key]
                    key = key[len("www."):]
                    if key:
                        trails[key] = _

                if '?' in key and not key.startswith('/'):
                    _ = trails[key]
                    del trails[key]
                    key = key.split('?')[0]
                    if key:
                        trails[key] = _

                if '//' in key:
                    _ = trails[key]
                    del trails[key]
                    key = key.replace('//', '/')
                    trails[key] = _

                if key != key.lower():
                    _ = trails[key]
                    del trails[key]
                    key = key.lower()
                    trails[key] = _

                if key in duplicates:
                    _ = trails[key]
                    others = sorted(duplicates[key] - set((_[1], )))
                    if others and " (+" not in _[1]:
                        trails[key] = (_[0],
                                       "%s (+%s)" % (_[1], ','.join(others)))

            read_whitelist()

            for key in list(trails.keys()):
                match = re.search(r"\A(\d+\.\d+\.\d+\.\d+)\b", key)
                if check_whitelisted(key) or any(
                        key.startswith(_) for _ in BAD_TRAIL_PREFIXES):
                    del trails[key]
                elif match and (bogon_ip(match.group(1))
                                or cdn_ip(match.group(1))) and not any(
                                    _ in trails[key][0]
                                    for _ in ("parking", "sinkhole")):
                    del trails[key]
                else:
                    try:
                        key.decode("utf8") if hasattr(
                            key, "decode") else key.encode("utf8")
                        trails[key][0].decode("utf8") if hasattr(
                            trails[key][0],
                            "decode") else trails[key][0].encode("utf8")
                        trails[key][1].decode("utf8") if hasattr(
                            trails[key][1],
                            "decode") else trails[key][1].encode("utf8")
                    except UnicodeError:
                        del trails[key]

            try:
                if trails:
                    with _fopen(config.TRAILS_FILE, "w+b" if six.PY2 else "w+",
                                open if six.PY2 else codecs.open) as f:
                        writer = csv.writer(f,
                                            delimiter=',',
                                            quotechar='\"',
                                            quoting=csv.QUOTE_MINIMAL)
                        for trail in trails:
                            row = (trail, trails[trail][0], trails[trail][1])
                            writer.writerow(row)

                    success = True
            except Exception as ex:
                print(
                    "[x] something went wrong during trails file write '%s' ('%s')"
                    % (config.TRAILS_FILE, ex))

            print("[i] update finished%s" % (40 * " "))

            if success:
                print("[i] trails stored to '%s'" % config.TRAILS_FILE)

    return trails
Beispiel #3
0
                key = key.lower()
                trails[key] = _
            if key in duplicates:
                _ = trails[key]
                others = sorted(duplicates[key] - set((_[1], )))
                if others and " (+" not in _[1]:
                    trails[key] = (_[0], "%s (+%s)" % (_[1], ','.join(others)))

        read_whitelist()

        for key in trails.keys():
            if check_whitelisted(key) or any(
                    key.startswith(_) for _ in BAD_TRAIL_PREFIXES):
                del trails[key]
            elif re.search(r"\A\d+\.\d+\.\d+\.\d+\Z",
                           key) and (bogon_ip(key) or cdn_ip(key)):
                del trails[key]
            else:
                try:
                    key.decode("utf8")
                    trails[key][0].decode("utf8")
                    trails[key][1].decode("utf8")
                except UnicodeDecodeError:
                    del trails[key]

        try:
            if trails:
                with _fopen(TRAILS_FILE, "w+b") as f:
                    writer = csv.writer(f,
                                        delimiter=',',
                                        quotechar='\"',
Beispiel #4
0
                _ = trails[key]
                del trails[key]
                key = key.lower()
                trails[key] = _
            if key in duplicates:
                _ = trails[key]
                others = sorted(duplicates[key] - set((_[1],)))
                if others and " (+" not in _[1]:
                    trails[key] = (_[0], "%s (+%s)" % (_[1], ','.join(others)))

        read_whitelist()

        for key in trails.keys():
            if check_whitelisted(key) or any(key.startswith(_) for _ in BAD_TRAIL_PREFIXES):
                del trails[key]
            elif re.search(r"\A\d+\.\d+\.\d+\.\d+\Z", key) and (bogon_ip(key) or cdn_ip(key)):
                del trails[key]
            else:
                try:
                    key.decode("utf8")
                    trails[key][0].decode("utf8")
                    trails[key][1].decode("utf8")
                except UnicodeDecodeError:
                    del trails[key]

        try:
            if trails:
                with _fopen(TRAILS_FILE, "w+b") as f:
                    writer = csv.writer(f, delimiter=',', quotechar='\"', quoting=csv.QUOTE_MINIMAL)
                    for trail in trails:
                        writer.writerow((trail, trails[trail][0], trails[trail][1]))
Beispiel #5
0
                del trails[key]
                key = key.lower()
                trails[key] = _
            if key in duplicates:
                _ = trails[key]
                others = sorted(duplicates[key] - set((_[1], )))
                if others and " (+" not in _[1]:
                    trails[key] = (_[0], "%s (+%s)" % (_[1], ','.join(others)))

        read_whitelist()

        for key in trails.keys():
            if check_whitelisted(key) or any(
                    key.startswith(_) for _ in BAD_TRAIL_PREFIXES):
                del trails[key]
            elif re.search(r"\A\d+\.\d+\.\d+\.\d+\Z", key) and cdn_ip(key):
                del trails[key]
            else:
                try:
                    key.decode("utf8")
                    trails[key][0].decode("utf8")
                    trails[key][1].decode("utf8")
                except UnicodeDecodeError:
                    del trails[key]

        try:
            if trails:
                with _fopen(TRAILS_FILE, "w+b") as f:
                    writer = csv.writer(f,
                                        delimiter=',',
                                        quotechar='\"',
Beispiel #6
0
                _ = trails[key]
                del trails[key]
                key = key.lower()
                trails[key] = _
            if key in duplicates:
                _ = trails[key]
                others = sorted(duplicates[key] - set((_[1],)))
                if others:
                    trails[key] = (_[0], "%s (+%s)" % (_[1], ','.join(others)))

        read_whitelist()

        for key in trails.keys():
            if key in WHITELIST or any(key.startswith(_) for _ in BAD_TRAIL_PREFIXES):
                del trails[key]
            elif key.replace('.', "").isdigit() and cdn_ip(key):
                del trails[key]
            else:
                try:
                    key.decode("utf8")
                    trails[key][0].decode("utf8")
                    trails[key][1].decode("utf8")
                except UnicodeDecodeError:
                    del trails[key]

        try:
            if trails:
                with _fopen(TRAILS_FILE, "w+b") as f:
                    writer = csv.writer(f, delimiter=',', quotechar='\"', quoting=csv.QUOTE_MINIMAL)
                    for trail in trails:
                        writer.writerow((trail, trails[trail][0], trails[trail][1]))
Beispiel #7
0
                    _ = trails[key]
                    del trails[key]
                    key = key.lower()
                    trails[key] = _
                if key in duplicates:
                    _ = trails[key]
                    others = sorted(duplicates[key] - set((_[1],)))
                    if others and " (+" not in _[1]:
                        trails[key] = (_[0], "%s (+%s)" % (_[1], ','.join(others)))

            read_whitelist()

            for key in trails.keys():
                if check_whitelisted(key) or any(key.startswith(_) for _ in BAD_TRAIL_PREFIXES):
                    del trails[key]
                elif re.search(r"\A\d+\.\d+\.\d+\.\d+\Z", key) and (bogon_ip(key) or cdn_ip(key)):
                    del trails[key]
                else:
                    try:
                        key.decode("utf8")
                        trails[key][0].decode("utf8")
                        trails[key][1].decode("utf8")
                    except UnicodeDecodeError:
                        del trails[key]

            try:
                if trails:
                    with _fopen(TRAILS_FILE, "w+b") as f:
                        writer = csv.writer(f, delimiter=',', quotechar='\"', quoting=csv.QUOTE_MINIMAL)
                        for trail in trails:
                            writer.writerow((trail, trails[trail][0], trails[trail][1]))