Esempio n. 1
0
 def test_eventTypes_should_return_a_list(self):
     """
     Test eventTypes(self)
     """
     sfdb = SpiderFootDb(self.default_options, False)
     event_types = sfdb.eventTypes()
     self.assertIsInstance(event_types, list)
Esempio n. 2
0
    def newscan(self):
        """
        Configure a new scan
        """

        dbh = SpiderFootDb(self.config)
        types = dbh.eventTypes()
        templ = Template(filename='dyn/newscan.tmpl', lookup=self.lookup)
        return templ.render(pageid='NEWSCAN', types=types, docroot=self.docroot,
                            modules=self.config['__modules__'], scanname="",
                            selectedmods="", scantarget="")
Esempio n. 3
0
    def eventtypes(self):
        """List all event types.

        Returns:
            str: list of event types
        """

        cherrypy.response.headers['Content-Type'] = "application/json; charset=utf-8"

        dbh = SpiderFootDb(self.config)
        types = dbh.eventTypes()
        ret = list()

        for r in types:
            ret.append([r[1], r[0]])

        return sorted(ret, key=itemgetter(0))
Esempio n. 4
0
    def clonescan(self, id):
        """
        Clone an existing scan (pre-selected options in the newscan page)

        Args:
            id (str): scan ID to clone

        Returns:
            None
        """

        dbh = SpiderFootDb(self.config)
        types = dbh.eventTypes()
        info = dbh.scanInstanceGet(id)

        if not info:
            return self.error("Invalid scan ID.")

        scanconfig = dbh.scanConfigGet(id)
        scanname = info[0]
        scantarget = info[1]
        targetType = None

        if scanname == "" or scantarget == "" or len(scanconfig) == 0:
            return self.error("Something went wrong internally.")

        targetType = SpiderFootHelpers.targetTypeFromString(scantarget)
        if targetType is None:
            # It must be a name, so wrap quotes around it
            scantarget = """ + scantarget + """

        modlist = scanconfig['_modulesenabled'].split(',')

        templ = Template(filename='spiderfoot/templates/newscan.tmpl', lookup=self.lookup)
        return templ.render(pageid='NEWSCAN', types=types, docroot=self.docroot,
                            modules=self.config['__modules__'], selectedmods=modlist,
                            scanname=str(scanname),
                            scantarget=str(scantarget), version=__version__)
Esempio n. 5
0
def start_scan(sfConfig, sfModules, args):
    global dbh
    global scanId

    dbh = SpiderFootDb(sfConfig, init=True)
    sf = SpiderFoot(sfConfig)

    if args.modules:
        log.info("Modules available:")
        for m in sorted(sfModules.keys()):
            if "__" in m:
                continue
            print(('{0:25}  {1}'.format(m, sfModules[m]['descr'])))
        sys.exit(0)

    if args.types:
        log.info("Types available:")
        typedata = dbh.eventTypes()
        types = dict()
        for r in typedata:
            types[r[1]] = r[0]

        for t in sorted(types.keys()):
            print(('{0:45}  {1}'.format(t, types[t])))
        sys.exit(0)

    if not args.s:
        log.error(
            "You must specify a target when running in scan mode. Try --help for guidance."
        )
        sys.exit(-1)

    if args.x and not args.t:
        log.error("-x can only be used with -t. Use --help for guidance.")
        sys.exit(-1)

    if args.x and args.m:
        log.error(
            "-x can only be used with -t and not with -m. Use --help for guidance."
        )
        sys.exit(-1)

    if args.r and (args.o and args.o not in ["tab", "csv"]):
        log.error("-r can only be used when your output format is tab or csv.")
        sys.exit(-1)

    if args.H and (args.o and args.o not in ["tab", "csv"]):
        log.error("-H can only be used when your output format is tab or csv.")
        sys.exit(-1)

    if args.D and args.o != "csv":
        log.error("-D can only be used when using the csv output format.")
        sys.exit(-1)

    target = args.s
    # Usernames and names - quoted on the commandline - won't have quotes,
    # so add them.
    if " " in target:
        target = f"\"{target}\""
    if "." not in target and not target.startswith("+") and '"' not in target:
        target = f"\"{target}\""
    targetType = sf.targetType(target)

    if not targetType:
        log.error(f"Could not determine target type. Invalid target: {target}")
        sys.exit(-1)

    target = target.strip('"')

    modlist = list()
    if not args.t and not args.m:
        log.warning(
            "You didn't specify any modules or types, so all will be enabled.")
        for m in list(sfModules.keys()):
            if "__" in m:
                continue
            modlist.append(m)

    signal.signal(signal.SIGINT, handle_abort)
    # If the user is scanning by type..
    # 1. Find modules producing that type
    if args.t:
        types = args.t
        modlist = sf.modulesProducing(types)
        newmods = deepcopy(modlist)
        newmodcpy = deepcopy(newmods)

        # 2. For each type those modules consume, get modules producing
        while len(newmodcpy) > 0:
            for etype in sf.eventsToModules(newmodcpy):
                xmods = sf.modulesProducing([etype])
                for mod in xmods:
                    if mod not in modlist:
                        modlist.append(mod)
                        newmods.append(mod)
            newmodcpy = deepcopy(newmods)
            newmods = list()

    # Easier if scanning by module
    if args.m:
        modlist = list(filter(None, args.m.split(",")))

    # Add sfp__stor_stdout to the module list
    typedata = dbh.eventTypes()
    types = dict()
    for r in typedata:
        types[r[1]] = r[0]

    sfp__stor_stdout_opts = sfConfig['__modules__']['sfp__stor_stdout']['opts']
    sfp__stor_stdout_opts['_eventtypes'] = types
    if args.f:
        if args.f and not args.t:
            log.error("You can only use -f with -t. Use --help for guidance.")
            sys.exit(-1)
        sfp__stor_stdout_opts['_showonlyrequested'] = True
    if args.F:
        sfp__stor_stdout_opts['_requested'] = args.F.split(",")
        sfp__stor_stdout_opts['_showonlyrequested'] = True
    if args.o:
        sfp__stor_stdout_opts['_format'] = args.o
    if args.t:
        sfp__stor_stdout_opts['_requested'] = args.t.split(",")
    if args.n:
        sfp__stor_stdout_opts['_stripnewline'] = True
    if args.r:
        sfp__stor_stdout_opts['_showsource'] = True
    if args.S:
        sfp__stor_stdout_opts['_maxlength'] = args.S
    if args.D:
        sfp__stor_stdout_opts['_csvdelim'] = args.D
    if args.x:
        tmodlist = list()
        modlist = list()
        xmods = sf.modulesConsuming([targetType])
        for mod in xmods:
            if mod not in modlist:
                tmodlist.append(mod)

        # Remove any modules not producing the type requested
        rtypes = args.t.split(",")
        for mod in tmodlist:
            for r in rtypes:
                if not sfModules[mod]['provides']:
                    continue
                if r in sfModules[mod].get('provides',
                                           []) and mod not in modlist:
                    modlist.append(mod)

    if len(modlist) == 0:
        log.error("Based on your criteria, no modules were enabled.")
        sys.exit(-1)

    modlist += ["sfp__stor_db", "sfp__stor_stdout"]

    # Run the scan
    if sfConfig['__logging']:
        log.info(f"Modules enabled ({len(modlist)}): {','.join(modlist)}")
    cfg = sf.configUnserialize(dbh.configGet(), sfConfig)

    # Debug mode is a variable that gets stored to the DB, so re-apply it
    if args.debug:
        cfg['_debug'] = True
    else:
        cfg['_debug'] = False

    # If strict mode is enabled, filter the output from modules.
    if args.x and args.t:
        cfg['__outputfilter'] = args.t.split(",")

    if args.o == "json":
        print("[", end='')

    # Start running a new scan
    scanName = target
    scanId = sf.genScanInstanceId()
    try:
        p = mp.Process(target=SpiderFootScanner,
                       args=(scanName, scanId, target, targetType, modlist,
                             cfg))
        p.daemon = True
        p.start()
    except BaseException as e:
        log.error(f"Scan [{scanId}] failed: {e}")
        sys.exit(-1)

    # If field headers weren't disabled, print them
    if not args.H and args.o != "json":
        if args.D:
            delim = args.D
        else:
            if args.o in ["tab", None]:
                delim = "\t"

            if args.o == "csv":
                delim = ","

        if not args.r:
            if delim != "\t":
                print(delim.join(["Source", "Type", "Data"]))
            else:
                print('{0:30}{1}{2:45}{3}{4}'.format("Source", delim, "Type",
                                                     delim, "Data"))
        else:
            if delim != "\t":
                print(delim.join(["Source", "Type", "Source Data", "Data"]))
            else:
                print('{0:30}{1}{2:45}{3}{4}{5}{6}'.format(
                    "Source", delim, "Type", delim, "Source Data", delim,
                    "Data"))

    while True:
        time.sleep(1)
        info = dbh.scanInstanceGet(scanId)
        if not info:
            continue
        if info[5] in [
                "ERROR-FAILED", "ABORT-REQUESTED", "ABORTED", "FINISHED"
        ]:
            if sfConfig['__logging']:
                log.info(f"Scan completed with status {info[5]}")
            if args.o == "json":
                print("]")
            sys.exit(0)

    return None
Esempio n. 6
0
def main():
    # web server config
    sfWebUiConfig = {'host': '127.0.0.1', 'port': 5001, 'root': '/'}

    # 'Global' configuration options
    # These can be overriden on a per-module basis, and some will
    # be overridden from saved configuration settings stored in the DB.
    sfConfig = {
        '_debug': False,  # Debug
        '__logging': True,  # Logging in general
        '__outputfilter': None,  # Event types to filter from modules' output
        '_useragent':
        'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:62.0) Gecko/20100101 Firefox/62.0',  # User-Agent to use for HTTP requests
        '_dnsserver': '',  # Override the default resolver
        '_fetchtimeout': 5,  # number of seconds before giving up on a fetch
        '_internettlds':
        'https://publicsuffix.org/list/effective_tld_names.dat',
        '_internettlds_cache': 72,
        '_genericusers':
        "abuse,admin,billing,compliance,devnull,dns,ftp,hostmaster,inoc,ispfeedback,ispsupport,list-request,list,maildaemon,marketing,noc,no-reply,noreply,null,peering,peering-notify,peering-request,phish,phishing,postmaster,privacy,registrar,registry,root,routing-registry,rr,sales,security,spam,support,sysadmin,tech,undisclosed-recipients,unsubscribe,usenet,uucp,webmaster,www",
        '__version__': '3.3',
        '__database': 'spiderfoot.db',
        '__modules__': None,  # List of modules. Will be set after start-up.
        '_socks1type': '',
        '_socks2addr': '',
        '_socks3port': '',
        '_socks4user': '',
        '_socks5pwd': '',
        '_torctlport': 9051
    }

    sfOptdescs = {
        '_debug': "Enable debugging?",
        '_useragent':
        "User-Agent string to use for HTTP requests. Prefix with an '@' to randomly select the User Agent from a file containing user agent strings for each request, e.g. @C:\\useragents.txt or @/home/bob/useragents.txt. Or supply a URL to load the list from there.",
        '_dnsserver':
        "Override the default resolver with another DNS server. For example, 8.8.8.8 is Google's open DNS server.",
        '_fetchtimeout':
        "Number of seconds before giving up on a HTTP request.",
        '_internettlds': "List of Internet TLDs.",
        '_internettlds_cache':
        "Hours to cache the Internet TLD list. This can safely be quite a long time given that the list doesn't change too often.",
        '_genericusers':
        "List of usernames that if found as usernames or as part of e-mail addresses, should be treated differently to non-generics.",
        '_socks1type': "SOCKS Server Type. Can be '4', '5', 'HTTP' or 'TOR'",
        '_socks2addr': 'SOCKS Server IP Address.',
        '_socks3port':
        'SOCKS Server TCP Port. Usually 1080 for 4/5, 8080 for HTTP and 9050 for TOR.',
        '_socks4user':
        '******',
        '_socks5pwd': "SOCKS Password. Valid only for SOCKS5 servers.",
        '_torctlport':
        "The port TOR is taking control commands on. This is necessary for SpiderFoot to tell TOR to re-circuit when it suspects anonymity is compromised.",
        '_modulesenabled':
        "Modules enabled for the scan."  # This is a hack to get a description for an option not actually available.
    }

    # Legacy way to run the server
    args = None
    p = argparse.ArgumentParser(
        description='SpiderFoot 3.3: Open Source Intelligence Automation.')
    p.add_argument("-d",
                   "--debug",
                   action='store_true',
                   help="Enable debug output.")
    p.add_argument("-l", metavar="IP:port", help="IP and port to listen on.")
    p.add_argument("-m",
                   metavar="mod1,mod2,...",
                   type=str,
                   help="Modules to enable.")
    p.add_argument("-M",
                   "--modules",
                   action='store_true',
                   help="List available modules.")
    p.add_argument("-s", metavar="TARGET", help="Target for the scan.")
    p.add_argument(
        "-t",
        metavar="type1,type2,...",
        type=str,
        help="Event types to collect (modules selected automatically).")
    p.add_argument("-T",
                   "--types",
                   action='store_true',
                   help="List available event types.")
    p.add_argument(
        "-o",
        metavar="tab|csv|json",
        type=str,
        help="Output format. Tab is default. If using json, -q is enforced.")
    p.add_argument("-H",
                   action='store_true',
                   help="Don't print field headers, just data.")
    p.add_argument("-n", action='store_true', help="Strip newlines from data.")
    p.add_argument("-r",
                   action='store_true',
                   help="Include the source data field in tab/csv output.")
    p.add_argument(
        "-S",
        metavar="LENGTH",
        type=int,
        help="Maximum data length to display. By default, all data is shown.")
    p.add_argument("-D",
                   metavar='DELIMITER',
                   type=str,
                   help="Delimiter to use for CSV output. Default is ,.")
    p.add_argument(
        "-f",
        action='store_true',
        help="Filter out other event types that weren't requested with -t.")
    p.add_argument("-F",
                   metavar="type1,type2,...",
                   type=str,
                   help="Show only a set of event types, comma-separated.")
    p.add_argument(
        "-x",
        action='store_true',
        help=
        "STRICT MODE. Will only enable modules that can directly consume your target, and if -t was specified only those events will be consumed by modules. This overrides -t and -m options."
    )
    p.add_argument("-q",
                   action='store_true',
                   help="Disable logging. This will also hide errors!")
    args = p.parse_args()

    if args.debug:
        sfConfig['_debug'] = True
        log.setLevel(logging.DEBUG)
    else:
        log.setLevel(logging.INFO)
        sfConfig['_debug'] = False

    if args.q or args.o == "json":
        log.setLevel(logging.NOTSET)
        sfConfig['__logging'] = False

    sfModules = dict()
    sft = SpiderFoot(sfConfig)

    # Load each module in the modules directory with a .py extension
    mod_dir = sft.myPath() + '/modules/'

    if not os.path.isdir(mod_dir):
        log.critical(f"Modules directory does not exist: {mod_dir}")
        sys.exit(-1)

    for filename in os.listdir(mod_dir):
        if not filename.endswith(".py"):
            continue
        if not filename.startswith("sfp_"):
            continue

        # Skip the module template and debugging modules
        if filename in ('sfp_template.py', 'sfp_stor_print.py'):
            continue

        modName = filename.split('.')[0]

        # Load and instantiate the module
        sfModules[modName] = dict()
        mod = __import__('modules.' + modName, globals(), locals(), [modName])
        sfModules[modName]['object'] = getattr(mod, modName)()
        try:
            sfModules[modName]['name'] = sfModules[modName]['object'].meta[
                'name']
            sfModules[modName]['cats'] = sfModules[modName]['object'].meta.get(
                'categories', list())
            sfModules[modName]['group'] = sfModules[modName][
                'object'].meta.get('useCases', list())
            sfModules[modName]['labels'] = sfModules[modName][
                'object'].meta.get('flags', list())
            sfModules[modName]['descr'] = sfModules[modName]['object'].meta[
                'summary']
            sfModules[modName]['provides'] = sfModules[modName][
                'object'].producedEvents()
            sfModules[modName]['consumes'] = sfModules[modName][
                'object'].watchedEvents()
            sfModules[modName]['meta'] = sfModules[modName]['object'].meta
            if hasattr(sfModules[modName]['object'], 'opts'):
                sfModules[modName]['opts'] = sfModules[modName]['object'].opts
            if hasattr(sfModules[modName]['object'], 'optdescs'):
                sfModules[modName]['optdescs'] = sfModules[modName][
                    'object'].optdescs
        except BaseException as e:
            log.critical(f"Failed to load {modName}: {e}")
            sys.exit(-1)

    if not sfModules:
        log.critical(f"No modules found in modules directory: {mod_dir}")
        sys.exit(-1)

    # Add module info to sfConfig so it can be used by the UI
    sfConfig['__modules__'] = sfModules
    # Add descriptions of the global config options
    sfConfig['__globaloptdescs__'] = sfOptdescs

    if args.modules:
        log.info("Modules available:")
        for m in sorted(sfModules.keys()):
            if "__" in m:
                continue
            print(('{0:25}  {1}'.format(m, sfModules[m]['descr'])))
        sys.exit(0)

    if args.types:
        dbh = SpiderFootDb(sfConfig, init=True)
        log.info("Types available:")
        typedata = dbh.eventTypes()
        types = dict()
        for r in typedata:
            types[r[1]] = r[0]

        for t in sorted(types.keys()):
            print(('{0:45}  {1}'.format(t, types[t])))
        sys.exit(0)

    if args.l:
        try:
            (host, port) = args.l.split(":")
        except BaseException:
            log.critical("Invalid ip:port format.")
            sys.exit(-1)

        sfWebUiConfig['host'] = host
        sfWebUiConfig['port'] = port

        start_web_server(sfWebUiConfig, sfConfig)
        exit(0)

    start_scan(sfConfig, sfModules, args)
Esempio n. 7
0
def main() -> None:
    # web server config
    sfWebUiConfig = {
        'host': '127.0.0.1',
        'port': 5001,
        'root': '/',
        'cors_origins': [],
    }

    # 'Global' configuration options
    # These can be overriden on a per-module basis, and some will
    # be overridden from saved configuration settings stored in the DB.
    sfConfig = {
        '_debug': False,  # Debug
        '_maxthreads': 3,  # Number of modules to run concurrently
        '__logging': True,  # Logging in general
        '__outputfilter': None,  # Event types to filter from modules' output
        '_useragent':
        'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:62.0) Gecko/20100101 Firefox/62.0',  # User-Agent to use for HTTP requests
        '_dnsserver': '',  # Override the default resolver
        '_fetchtimeout': 5,  # number of seconds before giving up on a fetch
        '_internettlds':
        'https://publicsuffix.org/list/effective_tld_names.dat',
        '_internettlds_cache': 72,
        '_genericusers':
        "abuse,admin,billing,compliance,devnull,dns,ftp,hostmaster,inoc,ispfeedback,ispsupport,list-request,list,maildaemon,marketing,noc,no-reply,noreply,null,peering,peering-notify,peering-request,phish,phishing,postmaster,privacy,registrar,registry,root,routing-registry,rr,sales,security,spam,support,sysadmin,tech,undisclosed-recipients,unsubscribe,usenet,uucp,webmaster,www",
        '__database': f"{SpiderFootHelpers.dataPath()}/spiderfoot.db",
        '__modules__': None,  # List of modules. Will be set after start-up.
        '__correlationrules__':
        None,  # List of correlation rules. Will be set after start-up.
        '_socks1type': '',
        '_socks2addr': '',
        '_socks3port': '',
        '_socks4user': '',
        '_socks5pwd': '',
    }

    sfOptdescs = {
        '_debug': "Enable debugging?",
        '_maxthreads': "Max number of modules to run concurrently",
        '_useragent':
        "User-Agent string to use for HTTP requests. Prefix with an '@' to randomly select the User Agent from a file containing user agent strings for each request, e.g. @C:\\useragents.txt or @/home/bob/useragents.txt. Or supply a URL to load the list from there.",
        '_dnsserver':
        "Override the default resolver with another DNS server. For example, 8.8.8.8 is Google's open DNS server.",
        '_fetchtimeout':
        "Number of seconds before giving up on a HTTP request.",
        '_internettlds': "List of Internet TLDs.",
        '_internettlds_cache':
        "Hours to cache the Internet TLD list. This can safely be quite a long time given that the list doesn't change too often.",
        '_genericusers':
        "List of usernames that if found as usernames or as part of e-mail addresses, should be treated differently to non-generics.",
        '_socks1type': "SOCKS Server Type. Can be '4', '5', 'HTTP' or 'TOR'",
        '_socks2addr': 'SOCKS Server IP Address.',
        '_socks3port':
        'SOCKS Server TCP Port. Usually 1080 for 4/5, 8080 for HTTP and 9050 for TOR.',
        '_socks4user':
        '******',
        '_socks5pwd': "SOCKS Password. Valid only for SOCKS5 servers.",
        '_modulesenabled':
        "Modules enabled for the scan."  # This is a hack to get a description for an option not actually available.
    }

    # Legacy way to run the server
    args = None
    p = argparse.ArgumentParser(
        description=
        f"SpiderFoot {__version__}: Open Source Intelligence Automation.")
    p.add_argument("-d",
                   "--debug",
                   action='store_true',
                   help="Enable debug output.")
    p.add_argument("-l", metavar="IP:port", help="IP and port to listen on.")
    p.add_argument("-m",
                   metavar="mod1,mod2,...",
                   type=str,
                   help="Modules to enable.")
    p.add_argument("-M",
                   "--modules",
                   action='store_true',
                   help="List available modules.")
    p.add_argument("-C",
                   "--correlate",
                   metavar="scanID",
                   help="Run correlation rules against a scan ID.")
    p.add_argument("-s", metavar="TARGET", help="Target for the scan.")
    p.add_argument(
        "-t",
        metavar="type1,type2,...",
        type=str,
        help="Event types to collect (modules selected automatically).")
    p.add_argument("-u",
                   choices=["all", "footprint", "investigate", "passive"],
                   type=str,
                   help="Select modules automatically by use case")
    p.add_argument("-T",
                   "--types",
                   action='store_true',
                   help="List available event types.")
    p.add_argument("-o",
                   choices=["tab", "csv", "json"],
                   type=str,
                   help="Output format. Tab is default.")
    p.add_argument("-H",
                   action='store_true',
                   help="Don't print field headers, just data.")
    p.add_argument("-n", action='store_true', help="Strip newlines from data.")
    p.add_argument("-r",
                   action='store_true',
                   help="Include the source data field in tab/csv output.")
    p.add_argument(
        "-S",
        metavar="LENGTH",
        type=int,
        help="Maximum data length to display. By default, all data is shown.")
    p.add_argument("-D",
                   metavar='DELIMITER',
                   type=str,
                   help="Delimiter to use for CSV output. Default is ,.")
    p.add_argument(
        "-f",
        action='store_true',
        help="Filter out other event types that weren't requested with -t.")
    p.add_argument("-F",
                   metavar="type1,type2,...",
                   type=str,
                   help="Show only a set of event types, comma-separated.")
    p.add_argument(
        "-x",
        action='store_true',
        help=
        "STRICT MODE. Will only enable modules that can directly consume your target, and if -t was specified only those events will be consumed by modules. This overrides -t and -m options."
    )
    p.add_argument("-q",
                   action='store_true',
                   help="Disable logging. This will also hide errors!")
    p.add_argument("-V",
                   "--version",
                   action='store_true',
                   help="Display the version of SpiderFoot and exit.")
    p.add_argument("-max-threads",
                   type=int,
                   help="Max number of modules to run concurrently.")
    args = p.parse_args()

    if args.version:
        print(
            f"SpiderFoot {__version__}: Open Source Intelligence Automation.")
        sys.exit(0)

    if args.max_threads:
        sfConfig['_maxthreads'] = args.max_threads

    if args.debug:
        sfConfig['_debug'] = True
    else:
        sfConfig['_debug'] = False

    if args.q:
        sfConfig['__logging'] = False

    loggingQueue = mp.Queue()
    logListenerSetup(loggingQueue, sfConfig)
    logWorkerSetup(loggingQueue)
    log = logging.getLogger(f"spiderfoot.{__name__}")
    sft = SpiderFoot(sfConfig)

    # Add descriptions of the global config options
    sfConfig['__globaloptdescs__'] = sfOptdescs

    # Load each module in the modules directory with a .py extension
    try:
        mod_dir = sft.myPath() + '/modules/'
        sfModules = SpiderFootHelpers.loadModulesAsDict(
            mod_dir, ['sfp_template.py'])
    except BaseException as e:
        log.critical(f"Failed to load modules: {e}", exc_info=True)
        sys.exit(-1)

    if not sfModules:
        log.critical(f"No modules found in modules directory: {mod_dir}")
        sys.exit(-1)

    # Load each correlation rule in the correlations directory with
    # a .yaml extension
    try:
        correlations_dir = sft.myPath() + '/correlations/'
        correlationRulesRaw = SpiderFootHelpers.loadCorrelationRulesRaw(
            correlations_dir, ['template.yaml'])
    except BaseException as e:
        log.critical(f"Failed to load correlation rules: {e}", exc_info=True)
        sys.exit(-1)

    # Initialize database handle
    try:
        dbh = SpiderFootDb(sfConfig)
    except Exception as e:
        log.critical(f"Failed to initialize database: {e}", exc_info=True)
        sys.exit(-1)

    # Sanity-check the rules and parse them
    sfCorrelationRules = list()
    if not correlationRulesRaw:
        log.error(
            f"No correlation rules found in correlations directory: {correlations_dir}"
        )
    else:
        try:
            correlator = SpiderFootCorrelator(dbh, correlationRulesRaw)
            sfCorrelationRules = correlator.get_ruleset()
        except Exception as e:
            log.critical(f"Failure initializing correlation rules: {e}",
                         exc_info=True)
            sys.exit(-1)

    # Add modules and correlation rules to sfConfig so they can be used elsewhere
    sfConfig['__modules__'] = sfModules
    sfConfig['__correlationrules__'] = sfCorrelationRules

    if args.correlate:
        if not correlationRulesRaw:
            log.error(
                "Unable to perform correlations as no correlation rules were found."
            )
            sys.exit(-1)

        try:
            log.info(
                f"Running {len(correlationRulesRaw)} correlation rules against scan, {args.correlate}."
            )
            corr = SpiderFootCorrelator(dbh, correlationRulesRaw,
                                        args.correlate)
            corr.run_correlations()
        except Exception as e:
            log.critical(f"Unable to run correlation rules: {e}",
                         exc_info=True)
            sys.exit(-1)
        sys.exit(0)

    if args.modules:
        log.info("Modules available:")
        for m in sorted(sfModules.keys()):
            if "__" in m:
                continue
            print(('{0:25}  {1}'.format(m, sfModules[m]['descr'])))
        sys.exit(0)

    if args.types:
        dbh = SpiderFootDb(sfConfig, init=True)
        log.info("Types available:")
        typedata = dbh.eventTypes()
        types = dict()
        for r in typedata:
            types[r[1]] = r[0]

        for t in sorted(types.keys()):
            print(('{0:45}  {1}'.format(t, types[t])))
        sys.exit(0)

    if args.l:
        try:
            (host, port) = args.l.split(":")
        except BaseException:
            log.critical("Invalid ip:port format.")
            sys.exit(-1)

        sfWebUiConfig['host'] = host
        sfWebUiConfig['port'] = port

        start_web_server(sfWebUiConfig, sfConfig, loggingQueue)
        sys.exit(0)

    start_scan(sfConfig, sfModules, args, loggingQueue)
Esempio n. 8
0
def start_scan(sfConfig: dict, sfModules: dict, args, loggingQueue) -> None:
    """Start scan

    Args:
        sfConfig (dict): SpiderFoot config options
        sfModules (dict): modules
        args (argparse.Namespace): command line args
        loggingQueue (Queue): main SpiderFoot logging queue
    """
    log = logging.getLogger(f"spiderfoot.{__name__}")

    global dbh
    global scanId

    dbh = SpiderFootDb(sfConfig, init=True)
    sf = SpiderFoot(sfConfig)

    if not args.s:
        log.error(
            "You must specify a target when running in scan mode. Try --help for guidance."
        )
        sys.exit(-1)

    if args.x and not args.t:
        log.error("-x can only be used with -t. Use --help for guidance.")
        sys.exit(-1)

    if args.x and args.m:
        log.error(
            "-x can only be used with -t and not with -m. Use --help for guidance."
        )
        sys.exit(-1)

    if args.r and (args.o and args.o not in ["tab", "csv"]):
        log.error("-r can only be used when your output format is tab or csv.")
        sys.exit(-1)

    if args.H and (args.o and args.o not in ["tab", "csv"]):
        log.error("-H can only be used when your output format is tab or csv.")
        sys.exit(-1)

    if args.D and args.o != "csv":
        log.error("-D can only be used when using the csv output format.")
        sys.exit(-1)

    target = args.s
    # Usernames and names - quoted on the commandline - won't have quotes,
    # so add them.
    if " " in target:
        target = f"\"{target}\""
    if "." not in target and not target.startswith("+") and '"' not in target:
        target = f"\"{target}\""
    targetType = SpiderFootHelpers.targetTypeFromString(target)

    if not targetType:
        log.error(f"Could not determine target type. Invalid target: {target}")
        sys.exit(-1)

    target = target.strip('"')

    modlist = list()
    if not args.t and not args.m and not args.u:
        log.warning(
            "You didn't specify any modules, types or use case, so all modules will be enabled."
        )
        for m in list(sfModules.keys()):
            if "__" in m:
                continue
            modlist.append(m)

    signal.signal(signal.SIGINT, handle_abort)
    # If the user is scanning by type..
    # 1. Find modules producing that type
    if args.t:
        types = args.t
        modlist = sf.modulesProducing(types)
        newmods = deepcopy(modlist)
        newmodcpy = deepcopy(newmods)

        # 2. For each type those modules consume, get modules producing
        while len(newmodcpy) > 0:
            for etype in sf.eventsToModules(newmodcpy):
                xmods = sf.modulesProducing([etype])
                for mod in xmods:
                    if mod not in modlist:
                        modlist.append(mod)
                        newmods.append(mod)
            newmodcpy = deepcopy(newmods)
            newmods = list()

    # Easier if scanning by module
    if args.m:
        modlist = list(filter(None, args.m.split(",")))

    # Select modules if the user selected usercase
    if args.u:
        usecase = args.u[0].upper() + args.u[
            1:]  # Make the first Letter Uppercase
        for mod in sfConfig['__modules__']:
            if usecase == 'All' or usecase in sfConfig['__modules__'][mod][
                    'group']:
                modlist.append(mod)

    # Add sfp__stor_stdout to the module list
    typedata = dbh.eventTypes()
    types = dict()
    for r in typedata:
        types[r[1]] = r[0]

    sfp__stor_stdout_opts = sfConfig['__modules__']['sfp__stor_stdout']['opts']
    sfp__stor_stdout_opts['_eventtypes'] = types
    if args.f:
        if args.f and not args.t:
            log.error("You can only use -f with -t. Use --help for guidance.")
            sys.exit(-1)
        sfp__stor_stdout_opts['_showonlyrequested'] = True
    if args.F:
        sfp__stor_stdout_opts['_requested'] = args.F.split(",")
        sfp__stor_stdout_opts['_showonlyrequested'] = True
    if args.o:
        if args.o not in ["tab", "csv", "json"]:
            log.error(
                "Invalid output format selected. Must be 'tab', 'csv' or 'json'."
            )
            sys.exit(-1)
        sfp__stor_stdout_opts['_format'] = args.o
    if args.t:
        sfp__stor_stdout_opts['_requested'] = args.t.split(",")
    if args.n:
        sfp__stor_stdout_opts['_stripnewline'] = True
    if args.r:
        sfp__stor_stdout_opts['_showsource'] = True
    if args.S:
        sfp__stor_stdout_opts['_maxlength'] = args.S
    if args.D:
        sfp__stor_stdout_opts['_csvdelim'] = args.D
    if args.x:
        tmodlist = list()
        modlist = list()
        xmods = sf.modulesConsuming([targetType])
        for mod in xmods:
            if mod not in modlist:
                tmodlist.append(mod)

        # Remove any modules not producing the type requested
        rtypes = args.t.split(",")
        for mod in tmodlist:
            for r in rtypes:
                if not sfModules[mod]['provides']:
                    continue
                if r in sfModules[mod].get('provides',
                                           []) and mod not in modlist:
                    modlist.append(mod)

    if len(modlist) == 0:
        log.error("Based on your criteria, no modules were enabled.")
        sys.exit(-1)

    modlist += ["sfp__stor_db", "sfp__stor_stdout"]

    if sfConfig['__logging']:
        log.info(f"Modules enabled ({len(modlist)}): {','.join(modlist)}")

    cfg = sf.configUnserialize(dbh.configGet(), sfConfig)

    # Debug mode is a variable that gets stored to the DB, so re-apply it
    if args.debug:
        cfg['_debug'] = True
    else:
        cfg['_debug'] = False

    # If strict mode is enabled, filter the output from modules.
    if args.x and args.t:
        cfg['__outputfilter'] = args.t.split(",")

    # Prepare scan output headers
    if args.o == "json":
        print("[", end='')
    elif not args.H:
        delim = "\t"

        if args.o == "tab":
            delim = "\t"

        if args.o == "csv":
            if args.D:
                delim = args.D
            else:
                delim = ","

        if args.r:
            if delim == "\t":
                headers = '{0:30}{1}{2:45}{3}{4}{5}{6}'.format(
                    "Source", delim, "Type", delim, "Source Data", delim,
                    "Data")
            else:
                headers = delim.join(["Source", "Type", "Source Data", "Data"])
        else:
            if delim == "\t":
                headers = '{0:30}{1}{2:45}{3}{4}'.format(
                    "Source", delim, "Type", delim, "Data")
            else:
                headers = delim.join(["Source", "Type", "Data"])

        print(headers)

    # Start running a new scan
    scanName = target
    scanId = SpiderFootHelpers.genScanInstanceId()
    try:
        p = mp.Process(target=startSpiderFootScanner,
                       args=(loggingQueue, scanName, scanId, target,
                             targetType, modlist, cfg))
        p.daemon = True
        p.start()
    except BaseException as e:
        log.error(f"Scan [{scanId}] failed: {e}")
        sys.exit(-1)

    # Poll for scan status until completion
    while True:
        time.sleep(1)
        info = dbh.scanInstanceGet(scanId)
        if not info:
            continue
        if info[5] in [
                "ERROR-FAILED", "ABORT-REQUESTED", "ABORTED", "FINISHED"
        ]:
            if sfConfig['__logging']:
                log.info(f"Scan completed with status {info[5]}")
            if args.o == "json":
                print("]")
            sys.exit(0)

    return
Esempio n. 9
0
    sf = SpiderFoot(sfConfig)
    dbh = SpiderFootDb(sfConfig, init=True)

    if not args.l:
        if args.modules:
            log.info("Modules available:")
            for m in sorted(sfModules.keys()):
                if "__" in m:
                    continue
                print(('{0:25}  {1}'.format(m, sfModules[m]['descr'])))
            sys.exit(0)

        if args.types:
            log.info("Types available:")
            typedata = dbh.eventTypes()
            types = dict()
            for r in typedata:
                types[r[1]] = r[0]

            for t in sorted(types.keys()):
                print(('{0:45}  {1}'.format(t, types[t])))
            sys.exit(0)

        if not args.s:
            log.error(
                "You must specify a target when running in scan mode. Try --help for guidance."
            )
            sys.exit(-1)

        if args.x and not args.t: