Exemple #1
0
def startSpiderFootScanner(loggingQueue, *args, **kwargs):
    logger.logWorkerSetup(loggingQueue)
    return SpiderFootScanner(*args, **kwargs)
Exemple #2
0
def main() -> None:
    # web server config
    sfWebUiConfig = {
        'host': '127.0.0.1',
        'port': 5001,
        'root': '/',
        'cors_origins': [],
    }

    # 'Global' configuration options
    # These can be overriden on a per-module basis, and some will
    # be overridden from saved configuration settings stored in the DB.
    sfConfig = {
        '_debug': False,  # Debug
        '_maxthreads': 3,  # Number of modules to run concurrently
        '__logging': True,  # Logging in general
        '__outputfilter': None,  # Event types to filter from modules' output
        '_useragent':
        'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:62.0) Gecko/20100101 Firefox/62.0',  # User-Agent to use for HTTP requests
        '_dnsserver': '',  # Override the default resolver
        '_fetchtimeout': 5,  # number of seconds before giving up on a fetch
        '_internettlds':
        'https://publicsuffix.org/list/effective_tld_names.dat',
        '_internettlds_cache': 72,
        '_genericusers':
        "abuse,admin,billing,compliance,devnull,dns,ftp,hostmaster,inoc,ispfeedback,ispsupport,list-request,list,maildaemon,marketing,noc,no-reply,noreply,null,peering,peering-notify,peering-request,phish,phishing,postmaster,privacy,registrar,registry,root,routing-registry,rr,sales,security,spam,support,sysadmin,tech,undisclosed-recipients,unsubscribe,usenet,uucp,webmaster,www",
        '__database': f"{SpiderFootHelpers.dataPath()}/spiderfoot.db",
        '__modules__': None,  # List of modules. Will be set after start-up.
        '__correlationrules__':
        None,  # List of correlation rules. Will be set after start-up.
        '_socks1type': '',
        '_socks2addr': '',
        '_socks3port': '',
        '_socks4user': '',
        '_socks5pwd': '',
    }

    sfOptdescs = {
        '_debug': "Enable debugging?",
        '_maxthreads': "Max number of modules to run concurrently",
        '_useragent':
        "User-Agent string to use for HTTP requests. Prefix with an '@' to randomly select the User Agent from a file containing user agent strings for each request, e.g. @C:\\useragents.txt or @/home/bob/useragents.txt. Or supply a URL to load the list from there.",
        '_dnsserver':
        "Override the default resolver with another DNS server. For example, 8.8.8.8 is Google's open DNS server.",
        '_fetchtimeout':
        "Number of seconds before giving up on a HTTP request.",
        '_internettlds': "List of Internet TLDs.",
        '_internettlds_cache':
        "Hours to cache the Internet TLD list. This can safely be quite a long time given that the list doesn't change too often.",
        '_genericusers':
        "List of usernames that if found as usernames or as part of e-mail addresses, should be treated differently to non-generics.",
        '_socks1type': "SOCKS Server Type. Can be '4', '5', 'HTTP' or 'TOR'",
        '_socks2addr': 'SOCKS Server IP Address.',
        '_socks3port':
        'SOCKS Server TCP Port. Usually 1080 for 4/5, 8080 for HTTP and 9050 for TOR.',
        '_socks4user':
        '******',
        '_socks5pwd': "SOCKS Password. Valid only for SOCKS5 servers.",
        '_modulesenabled':
        "Modules enabled for the scan."  # This is a hack to get a description for an option not actually available.
    }

    # Legacy way to run the server
    args = None
    p = argparse.ArgumentParser(
        description=
        f"SpiderFoot {__version__}: Open Source Intelligence Automation.")
    p.add_argument("-d",
                   "--debug",
                   action='store_true',
                   help="Enable debug output.")
    p.add_argument("-l", metavar="IP:port", help="IP and port to listen on.")
    p.add_argument("-m",
                   metavar="mod1,mod2,...",
                   type=str,
                   help="Modules to enable.")
    p.add_argument("-M",
                   "--modules",
                   action='store_true',
                   help="List available modules.")
    p.add_argument("-C",
                   "--correlate",
                   metavar="scanID",
                   help="Run correlation rules against a scan ID.")
    p.add_argument("-s", metavar="TARGET", help="Target for the scan.")
    p.add_argument(
        "-t",
        metavar="type1,type2,...",
        type=str,
        help="Event types to collect (modules selected automatically).")
    p.add_argument("-u",
                   choices=["all", "footprint", "investigate", "passive"],
                   type=str,
                   help="Select modules automatically by use case")
    p.add_argument("-T",
                   "--types",
                   action='store_true',
                   help="List available event types.")
    p.add_argument("-o",
                   choices=["tab", "csv", "json"],
                   type=str,
                   help="Output format. Tab is default.")
    p.add_argument("-H",
                   action='store_true',
                   help="Don't print field headers, just data.")
    p.add_argument("-n", action='store_true', help="Strip newlines from data.")
    p.add_argument("-r",
                   action='store_true',
                   help="Include the source data field in tab/csv output.")
    p.add_argument(
        "-S",
        metavar="LENGTH",
        type=int,
        help="Maximum data length to display. By default, all data is shown.")
    p.add_argument("-D",
                   metavar='DELIMITER',
                   type=str,
                   help="Delimiter to use for CSV output. Default is ,.")
    p.add_argument(
        "-f",
        action='store_true',
        help="Filter out other event types that weren't requested with -t.")
    p.add_argument("-F",
                   metavar="type1,type2,...",
                   type=str,
                   help="Show only a set of event types, comma-separated.")
    p.add_argument(
        "-x",
        action='store_true',
        help=
        "STRICT MODE. Will only enable modules that can directly consume your target, and if -t was specified only those events will be consumed by modules. This overrides -t and -m options."
    )
    p.add_argument("-q",
                   action='store_true',
                   help="Disable logging. This will also hide errors!")
    p.add_argument("-V",
                   "--version",
                   action='store_true',
                   help="Display the version of SpiderFoot and exit.")
    p.add_argument("-max-threads",
                   type=int,
                   help="Max number of modules to run concurrently.")
    args = p.parse_args()

    if args.version:
        print(
            f"SpiderFoot {__version__}: Open Source Intelligence Automation.")
        sys.exit(0)

    if args.max_threads:
        sfConfig['_maxthreads'] = args.max_threads

    if args.debug:
        sfConfig['_debug'] = True
    else:
        sfConfig['_debug'] = False

    if args.q:
        sfConfig['__logging'] = False

    loggingQueue = mp.Queue()
    logListenerSetup(loggingQueue, sfConfig)
    logWorkerSetup(loggingQueue)
    log = logging.getLogger(f"spiderfoot.{__name__}")
    sft = SpiderFoot(sfConfig)

    # Add descriptions of the global config options
    sfConfig['__globaloptdescs__'] = sfOptdescs

    # Load each module in the modules directory with a .py extension
    try:
        mod_dir = sft.myPath() + '/modules/'
        sfModules = SpiderFootHelpers.loadModulesAsDict(
            mod_dir, ['sfp_template.py'])
    except BaseException as e:
        log.critical(f"Failed to load modules: {e}", exc_info=True)
        sys.exit(-1)

    if not sfModules:
        log.critical(f"No modules found in modules directory: {mod_dir}")
        sys.exit(-1)

    # Load each correlation rule in the correlations directory with
    # a .yaml extension
    try:
        correlations_dir = sft.myPath() + '/correlations/'
        correlationRulesRaw = SpiderFootHelpers.loadCorrelationRulesRaw(
            correlations_dir, ['template.yaml'])
    except BaseException as e:
        log.critical(f"Failed to load correlation rules: {e}", exc_info=True)
        sys.exit(-1)

    # Initialize database handle
    try:
        dbh = SpiderFootDb(sfConfig)
    except Exception as e:
        log.critical(f"Failed to initialize database: {e}", exc_info=True)
        sys.exit(-1)

    # Sanity-check the rules and parse them
    sfCorrelationRules = list()
    if not correlationRulesRaw:
        log.error(
            f"No correlation rules found in correlations directory: {correlations_dir}"
        )
    else:
        try:
            correlator = SpiderFootCorrelator(dbh, correlationRulesRaw)
            sfCorrelationRules = correlator.get_ruleset()
        except Exception as e:
            log.critical(f"Failure initializing correlation rules: {e}",
                         exc_info=True)
            sys.exit(-1)

    # Add modules and correlation rules to sfConfig so they can be used elsewhere
    sfConfig['__modules__'] = sfModules
    sfConfig['__correlationrules__'] = sfCorrelationRules

    if args.correlate:
        if not correlationRulesRaw:
            log.error(
                "Unable to perform correlations as no correlation rules were found."
            )
            sys.exit(-1)

        try:
            log.info(
                f"Running {len(correlationRulesRaw)} correlation rules against scan, {args.correlate}."
            )
            corr = SpiderFootCorrelator(dbh, correlationRulesRaw,
                                        args.correlate)
            corr.run_correlations()
        except Exception as e:
            log.critical(f"Unable to run correlation rules: {e}",
                         exc_info=True)
            sys.exit(-1)
        sys.exit(0)

    if args.modules:
        log.info("Modules available:")
        for m in sorted(sfModules.keys()):
            if "__" in m:
                continue
            print(('{0:25}  {1}'.format(m, sfModules[m]['descr'])))
        sys.exit(0)

    if args.types:
        dbh = SpiderFootDb(sfConfig, init=True)
        log.info("Types available:")
        typedata = dbh.eventTypes()
        types = dict()
        for r in typedata:
            types[r[1]] = r[0]

        for t in sorted(types.keys()):
            print(('{0:45}  {1}'.format(t, types[t])))
        sys.exit(0)

    if args.l:
        try:
            (host, port) = args.l.split(":")
        except BaseException:
            log.critical("Invalid ip:port format.")
            sys.exit(-1)

        sfWebUiConfig['host'] = host
        sfWebUiConfig['port'] = port

        start_web_server(sfWebUiConfig, sfConfig, loggingQueue)
        sys.exit(0)

    start_scan(sfConfig, sfModules, args, loggingQueue)