Esempio n. 1
0
    def test_cache_get_should_return_a_string(self):
        """
        Test cachePut(self, label, data)
        Test cacheGet(self, label, timeoutHrs)
        """
        sf = SpiderFoot(dict())

        label = 'test-cache-label'
        data = 'test-cache-data'

        sf.cachePut(label, data)

        cache_get = sf.cacheGet(label, sf.opts.get('cacheperiod', 0))
        self.assertIsInstance(cache_get, str)
        self.assertEqual(data, cache_get)
Esempio n. 2
0
class SpiderFootScanner():
    """SpiderFootScanner object.

    Attributes:
        scanId (str): unique ID of the scan
        status (str): status of the scan
    """

    __scanId = None
    __status = None
    __config = None
    __sf = None
    __dbh = None
    __targetValue = None
    __targetType = None
    __moduleList = list()
    __target = None
    __moduleInstances = dict()
    __modconfig = dict()
    __scanName = None

    def __init__(self,
                 scanName: str,
                 scanId: str,
                 targetValue: str,
                 targetType: str,
                 moduleList: list,
                 globalOpts: dict,
                 start: bool = True) -> None:
        """Initialize SpiderFootScanner object.

        Args:
            scanName (str): name of the scan
            scanId (str): unique ID of the scan
            targetValue (str): scan target
            targetType (str): scan target type
            moduleList (list): list of modules to run
            globalOpts (dict): scan options
            start (bool): start the scan immediately

        Raises:
            TypeError: arg type was invalid
            ValueError: arg value was invalid

        Todo:
             Eventually change this to be able to control multiple scan instances
        """
        if not isinstance(globalOpts, dict):
            raise TypeError(
                f"globalOpts is {type(globalOpts)}; expected dict()")
        if not globalOpts:
            raise ValueError("globalOpts is empty")

        self.__config = deepcopy(globalOpts)
        self.__dbh = SpiderFootDb(self.__config)

        if not isinstance(scanName, str):
            raise TypeError(f"scanName is {type(scanName)}; expected str()")
        if not scanName:
            raise ValueError("scanName value is blank")

        self.__scanName = scanName

        if not isinstance(scanId, str):
            raise TypeError(f"scanId is {type(scanId)}; expected str()")
        if not scanId:
            raise ValueError("scanId value is blank")

        if not isinstance(targetValue, str):
            raise TypeError(
                f"targetValue is {type(targetValue)}; expected str()")
        if not targetValue:
            raise ValueError("targetValue value is blank")

        self.__targetValue = targetValue

        if not isinstance(targetType, str):
            raise TypeError(
                f"targetType is {type(targetType)}; expected str()")
        if not targetType:
            raise ValueError("targetType value is blank")

        self.__targetType = targetType

        if not isinstance(moduleList, list):
            raise TypeError(
                f"moduleList is {type(moduleList)}; expected list()")
        if not moduleList:
            raise ValueError("moduleList is empty")

        self.__moduleList = moduleList
        self.__sf = SpiderFoot(self.__config)
        self.__sf.dbh = self.__dbh

        # Create a unique ID for this scan in the back-end DB.
        if scanId:
            self.__scanId = scanId
        else:
            self.__scanId = SpiderFootHelpers.genScanInstanceId()

        self.__sf.scanId = self.__scanId
        self.__dbh.scanInstanceCreate(self.__scanId, self.__scanName,
                                      self.__targetValue)

        # Create our target
        try:
            self.__target = SpiderFootTarget(self.__targetValue,
                                             self.__targetType)
        except (TypeError, ValueError) as e:
            self.__sf.status(f"Scan [{self.__scanId}] failed: {e}")
            self.__setStatus("ERROR-FAILED", None, time.time() * 1000)
            raise ValueError(f"Invalid target: {e}") from None

        # Save the config current set for this scan
        self.__config['_modulesenabled'] = self.__moduleList
        self.__dbh.scanConfigSet(
            self.__scanId, self.__sf.configSerialize(deepcopy(self.__config)))

        # Process global options that point to other places for data

        # If a proxy server was specified, set it up
        proxy_type = self.__config.get('_socks1type')
        if proxy_type:
            # TODO: allow DNS lookup to be configurable when using a proxy
            # - proxy DNS lookup: socks5h:// and socks4a://
            # - local DNS lookup: socks5:// and socks4://
            if proxy_type == '4':
                proxy_proto = 'socks4://'
            elif proxy_type == '5':
                proxy_proto = 'socks5://'
            elif proxy_type == 'HTTP':
                proxy_proto = 'http://'
            elif proxy_type == 'TOR':
                proxy_proto = 'socks5h://'
            else:
                self.__sf.status(
                    f"Scan [{self.__scanId}] failed: Invalid proxy type: {proxy_type}"
                )
                self.__setStatus("ERROR-FAILED", None, time.time() * 1000)
                raise ValueError(f"Invalid proxy type: {proxy_type}")

            proxy_host = self.__config.get('_socks2addr', '')

            if not proxy_host:
                self.__sf.status(
                    f"Scan [{self.__scanId}] failed: Proxy type is set ({proxy_type}) but proxy address value is blank"
                )
                self.__setStatus("ERROR-FAILED", None, time.time() * 1000)
                raise ValueError(
                    f"Proxy type is set ({proxy_type}) but proxy address value is blank"
                )

            proxy_port = int(self.__config.get('_socks3port') or 0)

            if not proxy_port:
                if proxy_type in ['4', '5']:
                    proxy_port = 1080
                elif proxy_type.upper() == 'HTTP':
                    proxy_port = 8080
                elif proxy_type.upper() == 'TOR':
                    proxy_port = 9050

            proxy_username = self.__config.get('_socks4user', '')
            proxy_password = self.__config.get('_socks5pwd', '')

            if proxy_username or proxy_password:
                proxy_auth = f"{proxy_username}:{proxy_password}"
                proxy = f"{proxy_proto}{proxy_auth}@{proxy_host}:{proxy_port}"
            else:
                proxy = f"{proxy_proto}{proxy_host}:{proxy_port}"

            self.__sf.debug(f"Using proxy: {proxy}")
            self.__sf.socksProxy = proxy
        else:
            self.__sf.socksProxy = None

        # Override the default DNS server
        if self.__config['_dnsserver']:
            res = dns.resolver.Resolver()
            res.nameservers = [self.__config['_dnsserver']]
            dns.resolver.override_system_resolver(res)
        else:
            dns.resolver.restore_system_resolver()

        # Set the user agent
        self.__config['_useragent'] = self.__sf.optValueToData(
            self.__config['_useragent'])

        # Set up the Internet TLD list.
        # If the cached does not exist or has expired, reload it from scratch.
        tld_data = self.__sf.cacheGet("internet_tlds",
                                      self.__config['_internettlds_cache'])
        if tld_data is None:
            tld_data = self.__sf.optValueToData(self.__config['_internettlds'])
            if tld_data is None:
                self.__sf.status(
                    f"Scan [{self.__scanId}] failed: Could not update TLD list"
                )
                self.__setStatus("ERROR-FAILED", None, time.time() * 1000)
                raise ValueError("Could not update TLD list")
            self.__sf.cachePut("internet_tlds", tld_data)

        self.__config['_internettlds'] = tld_data.splitlines()

        self.__setStatus("INITIALIZING", time.time() * 1000, None)

        self.__sharedThreadPool = SpiderFootThreadPool(
            threads=self.__config.get("_maxthreads", 3),
            name='sharedThreadPool')

        # Used when module threading is enabled
        self.eventQueue = None

        if start:
            self.__startScan()

    @property
    def scanId(self) -> str:
        return self.__scanId

    @property
    def status(self) -> str:
        return self.__status

    def __setStatus(self,
                    status: str,
                    started: float = None,
                    ended: float = None) -> None:
        """Set the status of the currently running scan (if any).

        Args:
            status (str): scan status
            started (float): timestamp at start of scan
            ended (float): timestamp at end of scan

        Raises:
            TypeError: arg type was invalid
            ValueError: arg value was invalid
        """
        if not isinstance(status, str):
            raise TypeError(f"status is {type(status)}; expected str()")

        if status not in [
                "INITIALIZING", "STARTING", "STARTED", "RUNNING",
                "ABORT-REQUESTED", "ABORTED", "ABORTING", "FINISHED",
                "ERROR-FAILED"
        ]:
            raise ValueError(f"Invalid scan status {status}")

        self.__status = status
        self.__dbh.scanInstanceSet(self.__scanId, started, ended, status)

    def __startScan(self) -> None:
        """Start running a scan.

        Raises:
            AssertionError: Never actually raised.
        """
        failed = True

        try:
            self.__setStatus("STARTING", time.time() * 1000, None)
            self.__sf.status(
                f"Scan [{self.__scanId}] for '{self.__target.targetValue}' initiated."
            )

            self.eventQueue = queue.Queue()

            self.__sharedThreadPool.start()

            # moduleList = list of modules the user wants to run
            self.__sf.debug(f"Loading {len(self.__moduleList)} modules ...")
            for modName in self.__moduleList:
                if not modName:
                    continue

                # Module may have been renamed or removed
                if modName not in self.__config['__modules__']:
                    self.__sf.error(f"Failed to load module: {modName}")
                    continue

                try:
                    module = __import__('modules.' + modName, globals(),
                                        locals(), [modName])
                except ImportError:
                    self.__sf.error(f"Failed to load module: {modName}")
                    continue

                try:
                    mod = getattr(module, modName)()
                    mod.__name__ = modName
                except Exception:
                    self.__sf.error(
                        f"Module {modName} initialization failed: {traceback.format_exc()}"
                    )
                    continue

                # Set up the module options, scan ID, database handle and listeners
                try:
                    # Configuration is a combined global config with module-specific options
                    self.__modconfig[modName] = deepcopy(
                        self.__config['__modules__'][modName]['opts'])
                    for opt in list(self.__config.keys()):
                        self.__modconfig[modName][opt] = deepcopy(
                            self.__config[opt])

                    # clear any listener relationships from the past
                    mod.clearListeners()
                    mod.setScanId(self.__scanId)
                    mod.setSharedThreadPool(self.__sharedThreadPool)
                    mod.setDbh(self.__dbh)
                    mod.setup(self.__sf, self.__modconfig[modName])
                except Exception:
                    self.__sf.error(
                        f"Module {modName} initialization failed: {traceback.format_exc()}"
                    )
                    mod.errorState = True
                    continue

                # Override the module's local socket module to be the SOCKS one.
                if self.__config['_socks1type'] != '':
                    try:
                        mod._updateSocket(socket)
                    except Exception as e:
                        self.__sf.error(
                            f"Module {modName} socket setup failed: {e}")
                        continue

                # Set up event output filters if requested
                if self.__config['__outputfilter']:
                    try:
                        mod.setOutputFilter(self.__config['__outputfilter'])
                    except Exception as e:
                        self.__sf.error(
                            f"Module {modName} output filter setup failed: {e}"
                        )
                        continue

                # Give modules a chance to 'enrich' the original target with aliases of that target.
                try:
                    newTarget = mod.enrichTarget(self.__target)
                    if newTarget is not None:
                        self.__target = newTarget
                except Exception as e:
                    self.__sf.error(
                        f"Module {modName} target enrichment failed: {e}")
                    continue

                # Register the target with the module
                try:
                    mod.setTarget(self.__target)
                except Exception as e:
                    self.__sf.error(
                        f"Module {modName} failed to set target '{self.__target}': {e}"
                    )
                    continue

                # Set up the outgoing event queue
                try:
                    mod.outgoingEventQueue = self.eventQueue
                    mod.incomingEventQueue = queue.Queue()
                except Exception as e:
                    self.__sf.error(
                        f"Module {modName} event queue setup failed: {e}")
                    continue

                self.__moduleInstances[modName] = mod
                self.__sf.status(f"{modName} module loaded.")

            self.__sf.debug(
                f"Scan [{self.__scanId}] loaded {len(self.__moduleInstances)} modules."
            )

            if not self.__moduleInstances:
                self.__setStatus("ERROR-FAILED", None, time.time() * 1000)
                self.__dbh.close()
                return

            # sort modules by priority
            self.__moduleInstances = OrderedDict(
                sorted(self.__moduleInstances.items(),
                       key=lambda m: m[-1]._priority))

            # Now we are ready to roll..
            self.__setStatus("RUNNING")

            # Create a pseudo module for the root event to originate from
            psMod = SpiderFootPlugin()
            psMod.__name__ = "SpiderFoot UI"
            psMod.setTarget(self.__target)
            psMod.setDbh(self.__dbh)
            psMod.clearListeners()
            psMod.outgoingEventQueue = self.eventQueue
            psMod.incomingEventQueue = queue.Queue()

            # Create the "ROOT" event which un-triggered modules will link events to
            rootEvent = SpiderFootEvent("ROOT", self.__targetValue, "", None)
            psMod.notifyListeners(rootEvent)
            firstEvent = SpiderFootEvent(self.__targetType, self.__targetValue,
                                         "SpiderFoot UI", rootEvent)
            psMod.notifyListeners(firstEvent)

            # Special case.. check if an INTERNET_NAME is also a domain
            if self.__targetType == 'INTERNET_NAME':
                if self.__sf.isDomain(self.__targetValue,
                                      self.__config['_internettlds']):
                    firstEvent = SpiderFootEvent('DOMAIN_NAME',
                                                 self.__targetValue,
                                                 "SpiderFoot UI", rootEvent)
                    psMod.notifyListeners(firstEvent)

            # If in interactive mode, loop through this shared global variable
            # waiting for inputs, and process them until my status is set to
            # FINISHED.

            # Check in case the user requested to stop the scan between modules
            # initializing
            scanstatus = self.__dbh.scanInstanceGet(self.__scanId)
            if scanstatus and scanstatus[5] == "ABORT-REQUESTED":
                raise AssertionError("ABORT-REQUESTED")

            # start threads
            self.waitForThreads()
            failed = False

        except (KeyboardInterrupt, AssertionError):
            self.__sf.status(f"Scan [{self.__scanId}] aborted.")
            self.__setStatus("ABORTED", None, time.time() * 1000)

        except BaseException as e:
            exc_type, exc_value, exc_traceback = sys.exc_info()
            self.__sf.error(
                f"Unhandled exception ({e.__class__.__name__}) encountered during scan."
                + "Please report this as a bug: " + +repr(
                    traceback.format_exception(exc_type, exc_value,
                                               exc_traceback)))
            self.__sf.status(f"Scan [{self.__scanId}] failed: {e}")
            self.__setStatus("ERROR-FAILED", None, time.time() * 1000)

        finally:
            if not failed:
                self.__setStatus("FINISHED", None, time.time() * 1000)
                self.runCorrelations()
                self.__sf.status(f"Scan [{self.__scanId}] completed.")
            self.__dbh.close()

    def runCorrelations(self) -> None:
        """Run correlation rules."""

        self.__sf.status(
            f"Running {len(self.__config['__correlationrules__'])} correlation rules."
        )
        ruleset = dict()
        for rule in self.__config['__correlationrules__']:
            ruleset[rule['id']] = rule['rawYaml']
        corr = SpiderFootCorrelator(self.__dbh, ruleset, self.__scanId)
        corr.run_correlations()

    def waitForThreads(self) -> None:
        """Wait for threads.

        Raises:
            TypeError: queue tried to process a malformed event
            AssertionError: scan halted for some reason
        """

        counter = 0

        try:
            if not self.eventQueue:
                return

            # start one thread for each module
            for mod in self.__moduleInstances.values():
                mod.start()
            final_passes = 3

            # watch for newly-generated events
            while True:

                # log status of threads every 10 iterations
                log_status = counter % 10 == 0
                counter += 1

                if log_status:
                    scanstatus = self.__dbh.scanInstanceGet(self.__scanId)
                    if scanstatus and scanstatus[5] == "ABORT-REQUESTED":
                        raise AssertionError("ABORT-REQUESTED")

                try:
                    sfEvent = self.eventQueue.get_nowait()
                    self.__sf.debug(
                        f"waitForThreads() got event, {sfEvent.eventType}, from eventQueue."
                    )
                except queue.Empty:
                    # check if we're finished
                    if self.threadsFinished(log_status):
                        sleep(.1)
                        # but are we really?
                        if self.threadsFinished(log_status):
                            if final_passes < 1:
                                break
                            # Trigger module.finished()
                            for mod in self.__moduleInstances.values():
                                if not mod.errorState and mod.incomingEventQueue is not None:
                                    mod.incomingEventQueue.put('FINISHED')
                            sleep(.1)
                            while not self.threadsFinished(log_status):
                                log_status = counter % 100 == 0
                                counter += 1
                                sleep(.01)
                            final_passes -= 1

                    else:
                        # save on CPU
                        sleep(.1)
                    continue

                if not isinstance(sfEvent, SpiderFootEvent):
                    raise TypeError(
                        f"sfEvent is {type(sfEvent)}; expected SpiderFootEvent"
                    )

                # for every module
                for mod in self.__moduleInstances.values():
                    # if it's been aborted
                    if mod._stopScanning:
                        # break out of the while loop
                        raise AssertionError(f"{mod.__name__} requested stop")

                    # send it the new event if applicable
                    if not mod.errorState and mod.incomingEventQueue is not None:
                        watchedEvents = mod.watchedEvents()
                        if sfEvent.eventType in watchedEvents or "*" in watchedEvents:
                            mod.incomingEventQueue.put(deepcopy(sfEvent))

        finally:
            # tell the modules to stop
            for mod in self.__moduleInstances.values():
                mod._stopScanning = True
            self.__sharedThreadPool.shutdown(wait=True)

    def threadsFinished(self, log_status: bool = False) -> bool:
        """Check if all threads are complete.

        Args:
            log_status (bool): print thread queue status to debug log

        Returns:
            bool: True if all threads are finished
        """
        if self.eventQueue is None:
            return True

        modules_waiting = dict()
        for m in self.__moduleInstances.values():
            try:
                if m.incomingEventQueue is not None:
                    modules_waiting[m.__name__] = m.incomingEventQueue.qsize()
            except Exception:
                with suppress(Exception):
                    m.errorState = True
        modules_waiting = sorted(modules_waiting.items(),
                                 key=lambda x: x[-1],
                                 reverse=True)

        modules_running = []
        for m in self.__moduleInstances.values():
            try:
                if m.running:
                    modules_running.append(m.__name__)
            except Exception:
                with suppress(Exception):
                    m.errorState = True

        modules_errored = []
        for m in self.__moduleInstances.values():
            try:
                if m.errorState:
                    modules_errored.append(m.__name__)
            except Exception:
                with suppress(Exception):
                    m.errorState = True

        queues_empty = [qsize == 0 for m, qsize in modules_waiting]

        for mod in self.__moduleInstances.values():
            if mod.errorState and mod.incomingEventQueue is not None:
                self.__sf.debug(
                    f"Clearing and unsetting incomingEventQueue for errored module {mod.__name__}."
                )
                with suppress(Exception):
                    while 1:
                        mod.incomingEventQueue.get_nowait()
                mod.incomingEventQueue = None

        if not modules_running and not queues_empty:
            self.__sf.debug("Clearing queues for stalled/aborted modules.")
            for mod in self.__moduleInstances.values():
                try:
                    while True:
                        mod.incomingEventQueue.get_nowait()
                except Exception:
                    pass

        if log_status:
            events_queued = ", ".join([
                f"{mod}: {qsize:,}" for mod, qsize in modules_waiting[:5]
                if qsize > 0
            ])
            if not events_queued:
                events_queued = 'None'
            self.__sf.debug(
                f"Events queued: {sum([m[-1] for m in modules_waiting]):,} ({events_queued})"
            )
            if modules_running:
                self.__sf.debug(
                    f"Modules running: {len(modules_running):,} ({', '.join(modules_running)})"
                )
            if modules_errored:
                self.__sf.debug(
                    f"Modules errored: {len(modules_errored):,} ({', '.join(modules_errored)})"
                )

        if all(queues_empty) and not modules_running:
            return True
        return False
Esempio n. 3
0
class SpiderFootScanner():
    """SpiderFootScanner object.

    Attributes:
        scanId (str): unique ID of the scan
        status (str): status of the scan
    """

    __scanId = None
    __status = None
    __config = None
    __sf = None
    __dbh = None
    __targetValue = None
    __targetType = None
    __moduleList = list()
    __target = None
    __moduleInstances = dict()
    __modconfig = dict()
    __scanName = None

    def __init__(self,
                 scanName,
                 scanId,
                 targetValue,
                 targetType,
                 moduleList,
                 globalOpts,
                 start=True):
        """Initialize SpiderFootScanner object.

        Args:
            scanName (str): name of the scan
            scanId (str): unique ID of the scan
            targetValue (str): scan target
            targetType (str): scan target type
            moduleList (list): list of modules to run
            globalOpts (dict): scan options
            start (bool): start the scan immediately

        Raises:
            TypeError: arg type was invalid
            ValueError: arg value was invalid

        Todo:
             Eventually change this to be able to control multiple scan instances
        """
        if not isinstance(globalOpts, dict):
            raise TypeError(
                f"globalOpts is {type(globalOpts)}; expected dict()")
        if not globalOpts:
            raise ValueError("globalOpts is empty")

        self.__config = deepcopy(globalOpts)
        self.__dbh = SpiderFootDb(self.__config)

        if not isinstance(scanName, str):
            raise TypeError(f"scanName is {type(scanName)}; expected str()")
        if not scanName:
            raise ValueError("scanName value is blank")

        self.__scanName = scanName

        if not isinstance(scanId, str):
            raise TypeError(f"scanId is {type(scanId)}; expected str()")
        if not scanId:
            raise ValueError("scanId value is blank")

        if not isinstance(targetValue, str):
            raise TypeError(
                f"targetValue is {type(targetValue)}; expected str()")
        if not targetValue:
            raise ValueError("targetValue value is blank")

        self.__targetValue = targetValue

        if not isinstance(targetType, str):
            raise TypeError(
                f"targetType is {type(targetType)}; expected str()")
        if not targetType:
            raise ValueError("targetType value is blank")

        self.__targetType = targetType

        if not isinstance(moduleList, list):
            raise TypeError(
                f"moduleList is {type(moduleList)}; expected list()")
        if not moduleList:
            raise ValueError("moduleList is empty")

        self.__moduleList = moduleList

        self.__sf = SpiderFoot(self.__config)
        self.__sf.dbh = self.__dbh

        # Create a unique ID for this scan in the back-end DB.
        if scanId:
            self.__scanId = scanId
        else:
            self.__scanId = SpiderFootHelpers.genScanInstanceId()

        self.__sf.scanId = self.__scanId
        self.__dbh.scanInstanceCreate(self.__scanId, self.__scanName,
                                      self.__targetValue)

        # Create our target
        try:
            self.__target = SpiderFootTarget(self.__targetValue,
                                             self.__targetType)
        except (TypeError, ValueError) as e:
            self.__sf.status(f"Scan [{self.__scanId}] failed: {e}")
            self.__setStatus("ERROR-FAILED", None, time.time() * 1000)
            raise ValueError(f"Invalid target: {e}")

        # Save the config current set for this scan
        self.__config['_modulesenabled'] = self.__moduleList
        self.__dbh.scanConfigSet(
            self.__scanId, self.__sf.configSerialize(deepcopy(self.__config)))

        # Process global options that point to other places for data

        # If a proxy server was specified, set it up
        proxy_type = self.__config.get('_socks1type')
        if proxy_type:
            # TODO: allow DNS lookup to be configurable when using a proxy
            # - proxy DNS lookup: socks5h:// and socks4a://
            # - local DNS lookup: socks5:// and socks4://
            if proxy_type == '4':
                proxy_proto = 'socks4://'
            elif proxy_type == '5':
                proxy_proto = 'socks5://'
            elif proxy_type == 'HTTP':
                proxy_proto = 'http://'
            elif proxy_type == 'TOR':
                proxy_proto = 'socks5h://'
            else:
                self.__sf.status(
                    f"Scan [{self.__scanId}] failed: Invalid proxy type: {proxy_type}"
                )
                self.__setStatus("ERROR-FAILED", None, time.time() * 1000)
                raise ValueError(f"Invalid proxy type: {proxy_type}")

            proxy_host = self.__config.get('_socks2addr', '')

            if not proxy_host:
                self.__sf.status(
                    f"Scan [{self.__scanId}] failed: Proxy type is set ({proxy_type}) but proxy address value is blank"
                )
                self.__setStatus("ERROR-FAILED", None, time.time() * 1000)
                raise ValueError(
                    f"Proxy type is set ({proxy_type}) but proxy address value is blank"
                )

            proxy_port = int(self.__config.get('_socks3port') or 0)

            if not proxy_port:
                if proxy_type == '4' or proxy_type == '5':
                    proxy_port = 1080
                elif proxy_type.upper() == 'HTTP':
                    proxy_port = 8080
                elif proxy_type.upper() == 'TOR':
                    proxy_port = 9050

            proxy_username = self.__config.get('_socks4user', '')
            proxy_password = self.__config.get('_socks5pwd', '')

            if proxy_username or proxy_password:
                proxy_auth = f"{proxy_username}:{proxy_password}"
                proxy = f"{proxy_proto}{proxy_auth}@{proxy_host}:{proxy_port}"
            else:
                proxy = f"{proxy_proto}{proxy_host}:{proxy_port}"

            self.__sf.debug(f"Using proxy: {proxy}")
            self.__sf.socksProxy = proxy
        else:
            self.__sf.socksProxy = None

        # Override the default DNS server
        if self.__config['_dnsserver']:
            res = dns.resolver.Resolver()
            res.nameservers = [self.__config['_dnsserver']]
            dns.resolver.override_system_resolver(res)
        else:
            dns.resolver.restore_system_resolver()

        # Set the user agent
        self.__config['_useragent'] = self.__sf.optValueToData(
            self.__config['_useragent'])

        # Get internet TLDs
        tlddata = self.__sf.cacheGet("internet_tlds",
                                     self.__config['_internettlds_cache'])

        # If it wasn't loadable from cache, load it from scratch
        if tlddata is None:
            self.__config['_internettlds'] = self.__sf.optValueToData(
                self.__config['_internettlds'])
            self.__sf.cachePut("internet_tlds", self.__config['_internettlds'])
        else:
            self.__config["_internettlds"] = tlddata.splitlines()

        self.__setStatus("INITIALIZING", time.time() * 1000, None)

        # Used when module threading is enabled
        self.eventQueue = None

        if start:
            self.__startScan()

    @property
    def scanId(self):
        return self.__scanId

    @property
    def status(self):
        return self.__status

    def __setStatus(self, status, started=None, ended=None):
        """Set the status of the currently running scan (if any).

        Args:
            status (str): scan status
            started (float): timestamp at start of scan
            ended (float): timestamp at end of scan

        Raises:
            TypeError: arg type was invalid
            ValueError: arg value was invalid
        """
        if not isinstance(status, str):
            raise TypeError(f"status is {type(status)}; expected str()")

        if status not in [
                "INITIALIZING", "STARTING", "STARTED", "RUNNING",
                "ABORT-REQUESTED", "ABORTED", "ABORTING", "FINISHED",
                "ERROR-FAILED"
        ]:
            raise ValueError(f"Invalid scan status {status}")

        self.__status = status
        self.__dbh.scanInstanceSet(self.__scanId, started, ended, status)

    def __startScan(self, threaded=True):
        """Start running a scan.

        Args:
            threaded (bool): whether to thread modules
        """
        aborted = False

        self.__setStatus("STARTING", time.time() * 1000, None)
        self.__sf.status(f"Scan [{self.__scanId}] initiated.")

        if threaded:
            self.eventQueue = queue.Queue()

        try:
            # moduleList = list of modules the user wants to run
            for modName in self.__moduleList:
                if modName == '':
                    continue

                try:
                    module = __import__('modules.' + modName, globals(),
                                        locals(), [modName])
                except ImportError:
                    self.__sf.error(f"Failed to load module: {modName}")
                    continue

                mod = getattr(module, modName)()
                mod.__name__ = modName

                # Module may have been renamed or removed
                if modName not in self.__config['__modules__']:
                    continue

                # Set up the module
                # Configuration is a combined global config with module-specific options
                self.__modconfig[modName] = deepcopy(
                    self.__config['__modules__'][modName]['opts'])
                for opt in list(self.__config.keys()):
                    self.__modconfig[modName][opt] = deepcopy(
                        self.__config[opt])

                mod.clearListeners(
                )  # clear any listener relationships from the past
                mod.setup(self.__sf, self.__modconfig[modName])
                mod.setDbh(self.__dbh)
                mod.setScanId(self.__scanId)

                # Give modules a chance to 'enrich' the original target with
                # aliases of that target.
                newTarget = mod.enrichTarget(self.__target)
                if newTarget is not None:
                    self.__target = newTarget
                self.__moduleInstances[modName] = mod

                # Override the module's local socket module
                # to be the SOCKS one.
                if self.__config['_socks1type'] != '':
                    mod._updateSocket(socket)

                # Set up event output filters if requested
                if self.__config['__outputfilter']:
                    mod.setOutputFilter(self.__config['__outputfilter'])

                # Register the target with the module
                mod.setTarget(self.__target)

                if threaded:
                    # Set up the outgoing event queue
                    mod.outgoingEventQueue = self.eventQueue
                    mod.incomingEventQueue = queue.Queue()

                self.__sf.status(modName + " module loaded.")

            # sort modules by priority
            self.__moduleInstances = OrderedDict(
                sorted(self.__moduleInstances.items(),
                       key=lambda m: m[-1]._priority))

            if not threaded:
                # Register listener modules and then start all modules sequentially
                for module in list(self.__moduleInstances.values()):

                    for listenerModule in list(
                            self.__moduleInstances.values()):
                        # Careful not to register twice or you will get duplicate events
                        if listenerModule in module._listenerModules:
                            continue
                        # Note the absence of a check for whether a module can register
                        # to itself. That is intentional because some modules will
                        # act on their own notifications (e.g. sfp_dns)!
                        if listenerModule.watchedEvents() is not None:
                            module.registerListener(listenerModule)

            # Now we are ready to roll..
            self.__setStatus("RUNNING")

            # Create a pseudo module for the root event to originate from
            psMod = SpiderFootPlugin()
            psMod.__name__ = "SpiderFoot UI"
            psMod.setTarget(self.__target)
            psMod.setDbh(self.__dbh)
            psMod.clearListeners()
            if threaded:
                psMod.outgoingEventQueue = self.eventQueue
                psMod.incomingEventQueue = queue.Queue()
            else:
                for mod in list(self.__moduleInstances.values()):
                    if mod.watchedEvents() is not None:
                        psMod.registerListener(mod)

            # Create the "ROOT" event which un-triggered modules will link events to
            rootEvent = SpiderFootEvent("ROOT", self.__targetValue, "", None)
            psMod.notifyListeners(rootEvent)
            firstEvent = SpiderFootEvent(self.__targetType, self.__targetValue,
                                         "SpiderFoot UI", rootEvent)
            psMod.notifyListeners(firstEvent)

            # Special case.. check if an INTERNET_NAME is also a domain
            if self.__targetType == 'INTERNET_NAME':
                if self.__sf.isDomain(self.__targetValue,
                                      self.__config['_internettlds']):
                    firstEvent = SpiderFootEvent('DOMAIN_NAME',
                                                 self.__targetValue,
                                                 "SpiderFoot UI", rootEvent)
                    psMod.notifyListeners(firstEvent)

            # If in interactive mode, loop through this shared global variable
            # waiting for inputs, and process them until my status is set to
            # FINISHED.

            # Check in case the user requested to stop the scan between modules
            # initializing
            for mod in list(self.__moduleInstances.values()):
                if mod.checkForStop():
                    self.__setStatus('ABORTING')
                    aborted = True
                    break

            # start threads
            if threaded and not aborted:
                self.waitForThreads()

            if aborted:
                self.__sf.status(f"Scan [{self.__scanId}] aborted.")
                self.__setStatus("ABORTED", None, time.time() * 1000)
            else:
                self.__sf.status(f"Scan [{self.__scanId}] completed.")
                self.__setStatus("FINISHED", None, time.time() * 1000)
        except BaseException as e:
            exc_type, exc_value, exc_traceback = sys.exc_info()
            self.__sf.error(
                f"Unhandled exception ({e.__class__.__name__}) encountered during scan."
                + "Please report this as a bug: " + repr(
                    traceback.format_exception(exc_type, exc_value,
                                               exc_traceback)))
            self.__sf.status(f"Scan [{self.__scanId}] failed: {e}")
            self.__setStatus("ERROR-FAILED", None, time.time() * 1000)

        self.__dbh.close()

    def waitForThreads(self):
        counter = 0

        try:
            if not self.eventQueue:
                return

            # start one thread for each module
            for mod in self.__moduleInstances.values():
                mod.start()

            # watch for newly-generated events
            while True:

                # log status of threads every 100 iterations
                log_status = counter % 100 == 0
                counter += 1

                try:
                    sfEvent = self.eventQueue.get_nowait()
                    self.__sf.debug(
                        f"waitForThreads() got event, {sfEvent.eventType}, from eventQueue."
                    )
                except queue.Empty:
                    # check if we're finished
                    if self.threadsFinished(log_status):
                        sleep(.1)
                        # but are we really?
                        if self.threadsFinished(log_status):
                            break
                    else:
                        # save on CPU
                        sleep(.01)
                    continue

                if not isinstance(sfEvent, SpiderFootEvent):
                    raise TypeError(
                        f"sfEvent is {type(sfEvent)}; expected SpiderFootEvent"
                    )

                # for every module
                for mod in self.__moduleInstances.values():
                    # if it's been aborted
                    if mod._stopScanning:
                        # break out of the while loop
                        raise AssertionError(f"{mod.__name__} requested stop")

                    # send it the new event if applicable
                    watchedEvents = mod.watchedEvents()
                    if sfEvent.eventType in watchedEvents or "*" in watchedEvents:
                        mod.incomingEventQueue.put(deepcopy(sfEvent))

        except (KeyboardInterrupt, AssertionError) as e:
            self.__sf.status(f"Scan [{self.__scanId}] aborted, {e}.")

        finally:
            # tell the modules to stop
            for mod in self.__moduleInstances.values():
                mod._stopScanning = True

    def threadsFinished(self, log_status=False):
        if self.eventQueue is None:
            return True

        modules_waiting = {
            m.__name__: m.incomingEventQueue.qsize()
            for m in self.__moduleInstances.values()
        }
        modules_waiting = sorted(modules_waiting.items(),
                                 key=lambda x: x[-1],
                                 reverse=True)
        modules_running = [
            m.__name__ for m in self.__moduleInstances.values() if m.running
        ]
        queues_empty = [qsize == 0 for m, qsize in modules_waiting]

        if not modules_running and not queues_empty:
            self.__sf.debug("Clearing queues for stalled/aborted modules.")
            for mod in self.__moduleInstances.values():
                try:
                    while True:
                        mod.incomingEventQueue.get_nowait()
                except Exception:
                    pass

        if log_status and modules_running:
            events_queued = ", ".join([
                f"{mod}: {qsize:,}" for mod, qsize in modules_waiting[:5]
                if qsize > 0
            ])
            if events_queued:
                self.__sf.info(f"Events queued: {events_queued}")

        if all(queues_empty) and not modules_running:
            return True
        return False
Esempio n. 4
0
class SpiderFootScanner():
    # Temporary storage
    temp = None

    def __init__(self, scanName, scanTarget, targetType, scanId, moduleList,
                 globalOpts, moduleOpts):
        """Initialize SpiderFootScanner object and immediately start a scan
        of the specified target.

        Args:
            scanName (str): name of the scan
            scanTarget (str): scan target
            targetType (str): scan target type
            scanId (str): scan identifier
            moduleList (list): list of modules to run
            globalOpts (dict): scan options
            moduleOpts (dict): unused

        Returns:
            None
        """

        if not isinstance(scanName, str):
            raise TypeError("scanName is %s; expected str()" % type(scanName))
        if not isinstance(scanTarget, str):
            raise TypeError("scanTarget is %s; expected str()" % type(scanTarget))
        if not isinstance(scanId, str):
            raise TypeError("scanId is %s; expected str()" % type(scanId))
        if not isinstance(moduleList, list):
            raise TypeError("moduleList is %s; expected list()" % type(moduleList))
        if not isinstance(globalOpts, dict):
            raise TypeError("globalOpts is %s; expected dict()" % type(globalOpts))
        
        self.temp = dict()
        self.temp['config'] = deepcopy(globalOpts)
        self.temp['targetValue'] = scanTarget
        self.temp['targetType'] = targetType
        self.temp['moduleList'] = moduleList
        self.temp['scanName'] = scanName
        self.temp['scanId'] = scanId
        self.startScan()

    def setStatus(self, status, started=None, ended=None):
        """Set the status of the currently running scan (if any).

        Args:
            status (str): scan status ("RUNNING", "STARTING", "STARTED", "ABORT-REQUESTED", "ABORTED", "FINISHED", "ERROR-FAILED")
            started (str): TBD
            ended (str): TBD

        Returns:
            None
        """

        #if self is None:
        #   print(("Internal Error: Status set attempted before " + \
        #          "SpiderFootScanner was ready."))
        #    exit(-1)

        self.status = status
        self.dbh.scanInstanceSet(self.scanId, started, ended, status)
        return None

    def run(self):
        """Start running a scan."""
        self.startScan()

    def getId(self):
        if hasattr(self, 'scanId'):
            return self.scanId
        return None

    def startScan(self):
        """Start running a scan."""
        self.moduleInstances = dict()
        self.sf = SpiderFoot(self.temp['config'])
        self.config = deepcopy(self.temp['config'])
        self.dbh = SpiderFootDb(self.temp['config'])
        self.targetValue = self.temp['targetValue']
        self.targetType = self.temp['targetType']
        self.moduleList = self.temp['moduleList']
        self.modconfig = dict()
        self.scanName = self.temp['scanName']
        self.scanId = self.temp['scanId']
        aborted = False
        self.sf.setDbh(self.dbh)

        # Create a unique ID for this scan and create it in the back-end DB.
        self.sf.setGUID(self.scanId)
        self.dbh.scanInstanceCreate(self.scanId,
                                       self.scanName, self.targetValue)
        self.setStatus("STARTING", time.time() * 1000, None)

        # Create our target
        try:
            target = SpiderFootTarget(self.targetValue, self.targetType)
        except BaseException as e:
            self.sf.status("Scan [%s] failed: %s" % (self.scanId, e))
            self.setStatus("ERROR-FAILED", None, time.time() * 1000)
            return None

        # Save the config current set for this scan
        self.config['_modulesenabled'] = self.moduleList
        self.dbh.scanConfigSet(self.scanId,
                                  self.sf.configSerialize(deepcopy(self.config)))

        self.sf.status("Scan [" + self.scanId + "] initiated.")
        # moduleList = list of modules the user wants to run
        try:
            # Process global options that point to other places for data

            # If a SOCKS server was specified, set it up
            if self.config['_socks1type'] != '':
                socksDns = self.config['_socks6dns']
                socksAddr = self.config['_socks2addr']
                socksPort = int(self.config['_socks3port'])
                socksUsername = self.config['_socks4user'] or ''
                socksPassword = self.config['_socks5pwd'] or ''
                creds = ""
                if socksUsername and socksPassword:
                    creds = socksUsername + ":" + socksPassword + "@"
                proxy = creds + socksAddr + ":" + str(socksPort)

                if self.config['_socks1type'] == '4':
                    proxy = 'socks4://' + proxy
                elif self.config['_socks1type'] == '5':
                    proxy = 'socks5://' + proxy
                elif self.config['_socks1type'] == 'HTTP':
                    proxy = 'http://' + proxy
                elif self.config['_socks1type'] == 'TOR':
                    proxy = 'socks5h://' + proxy

                self.sf.debug("SOCKS: " + socksAddr + ":" + str(socksPort) + \
                                 "(" + socksUsername + ":" + socksPassword + ")")

                self.sf.updateSocket(proxy)
            else:
                self.sf.revertSocket()

            # Override the default DNS server
            if self.config['_dnsserver'] != "":
                res = dns.resolver.Resolver()
                res.nameservers = [self.config['_dnsserver']]
                dns.resolver.override_system_resolver(res)
            else:
                dns.resolver.restore_system_resolver()

            # Set the user agent
            self.config['_useragent'] = self.sf.optValueToData(
                self.config['_useragent'])

            # Get internet TLDs
            tlddata = self.sf.cacheGet("internet_tlds",
                                          self.config['_internettlds_cache'])
            # If it wasn't loadable from cache, load it from scratch
            if tlddata is None:
                self.config['_internettlds'] = self.sf.optValueToData(
                    self.config['_internettlds'])
                self.sf.cachePut("internet_tlds", self.config['_internettlds'])
            else:
                self.config["_internettlds"] = tlddata.splitlines()

            for modName in self.moduleList:
                if modName == '':
                    continue

                try:
                    module = __import__('modules.' + modName, globals(), locals(),
                                        [modName])
                except ImportError:
                    self.sf.error("Failed to load module: " + modName, False)
                    continue

                mod = getattr(module, modName)()
                mod.__name__ = modName

                # Module may have been renamed or removed
                if modName not in self.config['__modules__']:
                    continue

                # Set up the module
                # Configuration is a combined global config with module-specific options
                self.modconfig[modName] = deepcopy(self.config['__modules__'][modName]['opts'])
                for opt in list(self.config.keys()):
                    self.modconfig[modName][opt] = deepcopy(self.config[opt])

                mod.clearListeners()  # clear any listener relationships from the past
                mod.setup(self.sf, self.modconfig[modName])
                mod.setDbh(self.dbh)
                mod.setScanId(self.scanId)

                # Give modules a chance to 'enrich' the original target with
                # aliases of that target.
                newTarget = mod.enrichTarget(target)
                if newTarget is not None:
                    target = newTarget
                self.moduleInstances[modName] = mod

                # Override the module's local socket module
                # to be the SOCKS one.
                if self.config['_socks1type'] != '':
                    mod._updateSocket(socket)

                # Set up event output filters if requested
                if self.config['__outputfilter']:
                    mod.setOutputFilter(self.config['__outputfilter'])

                self.sf.status(modName + " module loaded.")

            # Register listener modules and then start all modules sequentially
            for module in list(self.moduleInstances.values()):
                # Register the target with the module
                module.setTarget(target)

                for listenerModule in list(self.moduleInstances.values()):
                    # Careful not to register twice or you will get duplicate events
                    if listenerModule in module._listenerModules:
                        continue
                    # Note the absence of a check for whether a module can register
                    # to itself. That is intentional because some modules will
                    # act on their own notifications (e.g. sfp_dns)!
                    if listenerModule.watchedEvents() is not None:
                        module.registerListener(listenerModule)

            # Now we are ready to roll..
            self.setStatus("RUNNING")

            # Create a pseudo module for the root event to originate from
            psMod = SpiderFootPlugin()
            psMod.__name__ = "SpiderFoot UI"
            psMod.setTarget(target)
            psMod.setDbh(self.dbh)
            psMod.clearListeners()
            for mod in list(self.moduleInstances.values()):
                if mod.watchedEvents() is not None:
                    psMod.registerListener(mod)

            # Create the "ROOT" event which un-triggered modules will link events to
            rootEvent = SpiderFootEvent("ROOT", self.targetValue, "", None)
            psMod.notifyListeners(rootEvent)
            firstEvent = SpiderFootEvent(self.targetType, self.targetValue,
                                         "SpiderFoot UI", rootEvent)
            psMod.notifyListeners(firstEvent)

            # Special case.. check if an INTERNET_NAME is also a domain
            if self.targetType == 'INTERNET_NAME':
                if self.sf.isDomain(self.targetValue, self.config['_internettlds']):
                    firstEvent = SpiderFootEvent('DOMAIN_NAME', self.targetValue,
                                                 "SpiderFoot UI", rootEvent)
                    psMod.notifyListeners(firstEvent)

            # If in interactive mode, loop through this shared global variable
            # waiting for inputs, and process them until my status is set to
            # FINISHED.

            # Check in case the user requested to stop the scan between modules
            # initializing
            for module in list(self.moduleInstances.values()):
                if module.checkForStop():
                    self.setStatus('ABORTING')
                    aborted = True
                    break

            if aborted:
                self.sf.status("Scan [" + self.scanId + "] aborted.")
                self.setStatus("ABORTED", None, time.time() * 1000)
            else:
                self.sf.status("Scan [" + self.scanId + "] completed.")
                self.setStatus("FINISHED", None, time.time() * 1000)
        except BaseException as e:
            exc_type, exc_value, exc_traceback = sys.exc_info()
            self.sf.error("Unhandled exception (" + e.__class__.__name__ + ") " + \
                             "encountered during scan. Please report this as a bug: " + \
                             repr(traceback.format_exception(exc_type, exc_value, exc_traceback)), False)
            self.sf.status("Scan [" + self.scanId + "] failed: " + str(e))
            self.setStatus("ERROR-FAILED", None, time.time() * 1000)

        self.dbh.close()
Esempio n. 5
0
class SpiderFootScanner:
    moduleInstances = None
    status = "UNKNOWN"
    myId = None

    def __init__(self, name, target, moduleList, globalOpts, moduleOpts):
        self.config = deepcopy(globalOpts)
        self.sf = SpiderFoot(self.config)
        self.target = target
        self.moduleList = moduleList
        self.name = name

        return

    # Status of the currently running scan (if any)
    def scanStatus(self, id):
        if id != self.myId:
            return "UNKNOWN"
        return self.status  

    # Stop a scan (id variable is unnecessary for now given that only one simultaneous
    # scan is permitted.)
    def stopScan(self, id):
        if id != self.myId:
            return None

        if self.moduleInstances == None:
            return None

        for modName in self.moduleInstances.keys():
            self.moduleInstances[modName].stopScanning()

    # Start running a scan
    def startScan(self):
        self.moduleInstances = dict()
        dbh = SpiderFootDb(self.config)
        self.sf.setDbh(dbh)
        aborted = False

        # Create a unique ID for this scan and create it in the back-end DB.
        self.config['__guid__'] = dbh.scanInstanceGenGUID(self.target)
        self.sf.setScanId(self.config['__guid__'])
        self.myId = self.config['__guid__']
        dbh.scanInstanceCreate(self.config['__guid__'], self.name, self.target)
        dbh.scanInstanceSet(self.config['__guid__'], time.time() * 1000, None, 'STARTING')
        self.status = "STARTING"
        
        # Save the config current set for this scan
        self.config['_modulesenabled'] = self.moduleList
        dbh.scanConfigSet(self.config['__guid__'], self.sf.configSerialize(self.config))

        self.sf.status("Scan [" + self.config['__guid__'] + "] initiated.")
        # moduleList = list of modules the user wants to run
        try:
            # Process global options that point to other places for data

            # If a SOCKS server was specified, set it up
            if self.config['_socks1type'] != '':
                socksType = socks.PROXY_TYPE_SOCKS4
                socksDns = self.config['_socks6dns']
                socksAddr = self.config['_socks2addr']
                socksPort = int(self.config['_socks3port'])
                socksUsername = ''
                socksPassword = ''

                if self.config['_socks1type'] == '4':
                    socksType = socks.PROXY_TYPE_SOCKS4
                if self.config['_socks1type'] == '5':
                    socksType = socks.PROXY_TYPE_SOCKS5
                    socksUsername = self.config['_socks4user']
                    socksPassword = self.config['_socks5pwd']
                    
                if self.config['_socks1type'] == 'HTTP':
                    socksType = socks.PROXY_TYPE_HTTP
                   
                self.sf.debug("SOCKS: " + socksAddr + ":" + str(socksPort) + \
                    "(" + socksUsername + ":" + socksPassword + ")")
                socks.setdefaultproxy(socksType, socksAddr, socksPort, 
                    socksDns, socksUsername, socksPassword)

                # Override the default socket and getaddrinfo calls with the 
                # SOCKS ones
                socket.socket = socks.socksocket
                socket.create_connection = socks.create_connection
                socket.getaddrinfo = socks.getaddrinfo

                self.sf.updateSocket(socket)
            
            # Override the default DNS server
            if self.config['_dnsserver'] != "":
                res = dns.resolver.Resolver()
                res.nameservers = [ self.config['_dnsserver'] ]
                dns.resolver.override_system_resolver(res)
            else:
                dns.resolver.restore_system_resolver()

            # Set the user agent
            self.config['_useragent'] = self.sf.optValueToData(self.config['_useragent'])

            # Get internet TLDs
            tlddata = self.sf.cacheGet("internet_tlds", self.config['_internettlds_cache'])
            # If it wasn't loadable from cache, load it from scratch
            if tlddata == None:
                self.config['_internettlds'] = self.sf.optValueToData(self.config['_internettlds'])
                self.sf.cachePut("internet_tlds", self.config['_internettlds'])
            else:
                self.config["_internettlds"] = tlddata.splitlines()

            for modName in self.moduleList:
                if modName == '':
                    continue

                module = __import__('modules.' + modName, globals(), locals(), [modName])
                mod = getattr(module, modName)()
                mod.__name__ = modName

                # A bit hacky: we pass the database object as part of the config. This
                # object should only be used by the internal SpiderFoot modules writing
                # to the database, which at present is only sfp__stor_db.
                # Individual modules cannot create their own SpiderFootDb instance or
                # we'll get database locking issues, so it all goes through this.
                self.config['__sfdb__'] = dbh

                # Set up the module
                # Configuration is a combined global config with module-specific options
                #modConfig = deepcopy(self.config)
                modConfig = self.config['__modules__'][modName]['opts']
                for opt in self.config.keys():
                    modConfig[opt] = self.config[opt]

                mod.clearListeners() # clear any listener relationships from the past
                mod.setup(self.sf, self.target, modConfig)
                self.moduleInstances[modName] = mod

                # Override the module's local socket module
                # to be the SOCKS one.
                if self.config['_socks1type'] != '':
                    mod._updateSocket(socket)

                self.sf.status(modName + " module loaded.")

            # Register listener modules and then start all modules sequentially
            for module in self.moduleInstances.values():
                for listenerModule in self.moduleInstances.values():
                    # Careful not to register twice or you will get duplicate events
                    if listenerModule in module._listenerModules:
                        continue
                    # Note the absence of a check for whether a module can register
                    # to itself. That is intentional because some modules will
                    # act on their own notifications (e.g. sfp_dns)!
                    if listenerModule.watchedEvents() != None:
                        module.registerListener(listenerModule)

            dbh.scanInstanceSet(self.config['__guid__'], status='RUNNING')
            self.status = "RUNNING"

            # Create the "ROOT" event which un-triggered modules will link events to
            rootEvent = SpiderFootEvent("INITIAL_TARGET", self.target, "SpiderFoot UI")
            dbh.scanEventStore(self.config['__guid__'], rootEvent)

            # Start the modules sequentially.
            for module in self.moduleInstances.values():
                # Check in case the user requested to stop the scan between modules initializing
                if module.checkForStop():
                    dbh.scanInstanceSet(self.config['__guid__'], status='ABORTING')
                    self.status = "ABORTING"
                    aborted = True
                    break
                # Many modules' start() method will return None, as most will rely on 
                # notifications during the scan from other modules.
                module.start()

            # Check if any of the modules ended due to being stopped
            for module in self.moduleInstances.values():
                if module.checkForStop():
                    aborted = True

            if aborted:
                self.sf.status("Scan [" + self.config['__guid__'] + "] aborted.")
                dbh.scanInstanceSet(self.config['__guid__'], None, time.time() * 1000, 'ABORTED')
                self.status = "ABORTED"
            else:
                self.sf.status("Scan [" + self.config['__guid__'] + "] completed.")
                dbh.scanInstanceSet(self.config['__guid__'], None, time.time() * 1000, 'FINISHED')
                self.status = "FINISHED"
        except BaseException as e:
            exc_type, exc_value, exc_traceback = sys.exc_info()
            self.sf.error("Unhandled exception (" + e.__class__.__name__ + ") " + \
                "encountered during scan. Please report this as a bug: " + \
                repr(traceback.format_exception(exc_type, exc_value, exc_traceback)), False)
            self.sf.status("Scan [" + self.config['__guid__'] + "] failed: " + str(e))
            dbh.scanInstanceSet(self.config['__guid__'], None, time.time() * 1000, 'ERROR-FAILED')
            self.status = "ERROR-FAILED"

        self.moduleInstances = None
        dbh.close()
        self.sf.setDbh(None)
        self.sf.setScanId(None)
Esempio n. 6
0
class SpiderFootScanner():
    """SpiderFootScanner object.

    Attributes:
        scanId (str): unique ID of the scan
        status (str): status of the scan
    """

    __scanId = None
    __status = None
    __config = None
    __sf = None
    __dbh = None
    __targetValue = None
    __targetType = None
    __moduleList = list()
    __target = None
    __moduleInstances = dict()
    __modconfig = dict()
    __scanName = None

    def __init__(self,
                 scanName,
                 scanId,
                 targetValue,
                 targetType,
                 moduleList,
                 globalOpts,
                 start=True):
        """Initialize SpiderFootScanner object.

        Args:
            scanName (str): name of the scan
            scanId (str): unique ID of the scan
            targetValue (str): scan target
            targetType (str): scan target type
            moduleList (list): list of modules to run
            globalOpts (dict): scan options
            start (bool): start the scan immediately

        Raises:
            TypeError: arg type was invalid
            ValueError: arg value was invalid

        Todo:
             Eventually change this to be able to control multiple scan instances
        """
        if not isinstance(globalOpts, dict):
            raise TypeError(
                f"globalOpts is {type(globalOpts)}; expected dict()")
        if not globalOpts:
            raise ValueError("globalOpts is empty")

        self.__config = deepcopy(globalOpts)
        self.__dbh = SpiderFootDb(self.__config)

        if not isinstance(scanName, str):
            raise TypeError(f"scanName is {type(scanName)}; expected str()")
        if not scanName:
            raise ValueError("scanName value is blank")

        self.__scanName = scanName

        if not isinstance(scanId, str):
            raise TypeError(f"scanId is {type(scanId)}; expected str()")
        if not scanId:
            raise ValueError("scanId value is blank")

        if not isinstance(targetValue, str):
            raise TypeError(
                f"targetValue is {type(targetValue)}; expected str()")
        if not targetValue:
            raise ValueError("targetValue value is blank")

        self.__targetValue = targetValue

        if not isinstance(targetType, str):
            raise TypeError(
                f"targetType is {type(targetType)}; expected str()")
        if not targetType:
            raise ValueError("targetType value is blank")

        self.__targetType = targetType

        if not isinstance(moduleList, list):
            raise TypeError(
                f"moduleList is {type(moduleList)}; expected list()")
        if not moduleList:
            raise ValueError("moduleList is empty")

        self.__moduleList = moduleList

        self.__sf = SpiderFoot(self.__config)
        self.__sf.dbh = self.__dbh

        # Create a unique ID for this scan in the back-end DB.
        if not isinstance(scanId, str):
            raise TypeError(f"scanId is {type(scanId)}; expected str()")

        if scanId:
            self.__scanId = scanId
        else:
            self.__scanId = self.__sf.genScanInstanceId()

        self.__sf.scanId = self.__scanId
        self.__dbh.scanInstanceCreate(self.__scanId, self.__scanName,
                                      self.__targetValue)

        # Create our target
        try:
            self.__target = SpiderFootTarget(self.__targetValue,
                                             self.__targetType)
        except (TypeError, ValueError) as e:
            self.__sf.status(f"Scan [{self.__scanId}] failed: {e}")
            self.__setStatus("ERROR-FAILED", None, time.time() * 1000)
            raise ValueError(f"Invalid target: {e}")

        # Save the config current set for this scan
        self.__config['_modulesenabled'] = self.__moduleList
        self.__dbh.scanConfigSet(
            self.__scanId, self.__sf.configSerialize(deepcopy(self.__config)))

        # Process global options that point to other places for data

        # If a SOCKS server was specified, set it up
        if self.__config['_socks1type']:
            socksAddr = self.__config['_socks2addr']
            socksPort = int(self.__config['_socks3port'])
            socksUsername = self.__config['_socks4user'] or ''
            socksPassword = self.__config['_socks5pwd'] or ''

            proxy = f"{socksAddr}:{socksPort}"

            if socksUsername or socksPassword:
                proxy = "%s:%s@%s" % (socksUsername, socksPassword, proxy)

            if self.__config['_socks1type'] == '4':
                proxy = 'socks4://' + proxy
            elif self.__config['_socks1type'] == '5':
                proxy = 'socks5://' + proxy
            elif self.__config['_socks1type'] == 'HTTP':
                proxy = 'http://' + proxy
            elif self.__config['_socks1type'] == 'TOR':
                proxy = 'socks5h://' + proxy
            else:
                raise ValueError(
                    f"Invalid SOCKS proxy type: {self.__config['_socks1ttype']}"
                )

            self.__sf.debug(
                f"SOCKS: {socksAddr}:{socksPort} ({socksUsername}:{socksPassword})"
            )

            self.__sf.socksProxy = proxy
        else:
            self.__sf.socksProxy = None

        # Override the default DNS server
        if self.__config['_dnsserver']:
            res = dns.resolver.Resolver()
            res.nameservers = [self.__config['_dnsserver']]
            dns.resolver.override_system_resolver(res)
        else:
            dns.resolver.restore_system_resolver()

        # Set the user agent
        self.__config['_useragent'] = self.__sf.optValueToData(
            self.__config['_useragent'])

        # Get internet TLDs
        tlddata = self.__sf.cacheGet("internet_tlds",
                                     self.__config['_internettlds_cache'])

        # If it wasn't loadable from cache, load it from scratch
        if tlddata is None:
            self.__config['_internettlds'] = self.__sf.optValueToData(
                self.__config['_internettlds'])
            self.__sf.cachePut("internet_tlds", self.__config['_internettlds'])
        else:
            self.__config["_internettlds"] = tlddata.splitlines()

        self.__setStatus("INITIALIZING", time.time() * 1000, None)

        if start:
            self.__startScan()

    @property
    def scanId(self):
        """Unique identifier for this scan"""
        return self.__scanId

    @property
    def status(self):
        """Status of this scan"""
        return self.__status

    def __setStatus(self, status, started=None, ended=None):
        """Set the status of the currently running scan (if any).

        Args:
            status (str): scan status
            started (float): timestamp at start of scan
            ended (float): timestamp at end of scan

        Returns:
            None

        Raises:
            TypeError: arg type was invalid
            ValueError: arg value was invalid
        """
        if not isinstance(status, str):
            raise TypeError(f"status is {type(status)}; expected str()")

        if status not in [
                "INITIALIZING", "STARTING", "STARTED", "RUNNING",
                "ABORT-REQUESTED", "ABORTED", "ABORTING", "FINISHED",
                "ERROR-FAILED"
        ]:
            raise ValueError(f"Invalid scan status {status}")

        self.__status = status
        self.__dbh.scanInstanceSet(self.__scanId, started, ended, status)

    def __startScan(self):
        """Start running a scan."""

        aborted = False

        self.__setStatus("STARTING", time.time() * 1000, None)
        self.__sf.status(f"Scan [{self.__scanId}] initiated.")

        try:
            # moduleList = list of modules the user wants to run
            for modName in self.__moduleList:
                if modName == '':
                    continue

                try:
                    module = __import__('modules.' + modName, globals(),
                                        locals(), [modName])
                except ImportError:
                    self.__sf.error("Failed to load module: " + modName, False)
                    continue

                mod = getattr(module, modName)()
                mod.__name__ = modName

                # Module may have been renamed or removed
                if modName not in self.__config['__modules__']:
                    continue

                # Set up the module
                # Configuration is a combined global config with module-specific options
                self.__modconfig[modName] = deepcopy(
                    self.__config['__modules__'][modName]['opts'])
                for opt in list(self.__config.keys()):
                    self.__modconfig[modName][opt] = deepcopy(
                        self.__config[opt])

                mod.clearListeners(
                )  # clear any listener relationships from the past
                mod.setup(self.__sf, self.__modconfig[modName])
                mod.setDbh(self.__dbh)
                mod.setScanId(self.__scanId)

                # Give modules a chance to 'enrich' the original target with
                # aliases of that target.
                newTarget = mod.enrichTarget(self.__target)
                if newTarget is not None:
                    self.__target = newTarget
                self.__moduleInstances[modName] = mod

                # Override the module's local socket module
                # to be the SOCKS one.
                if self.__config['_socks1type'] != '':
                    mod._updateSocket(socket)

                # Set up event output filters if requested
                if self.__config['__outputfilter']:
                    mod.setOutputFilter(self.__config['__outputfilter'])

                self.__sf.status(modName + " module loaded.")

            # Register listener modules and then start all modules sequentially
            for module in list(self.__moduleInstances.values()):
                # Register the target with the module
                module.setTarget(self.__target)

                for listenerModule in list(self.__moduleInstances.values()):
                    # Careful not to register twice or you will get duplicate events
                    if listenerModule in module._listenerModules:
                        continue
                    # Note the absence of a check for whether a module can register
                    # to itself. That is intentional because some modules will
                    # act on their own notifications (e.g. sfp_dns)!
                    if listenerModule.watchedEvents() is not None:
                        module.registerListener(listenerModule)

            # Now we are ready to roll..
            self.__setStatus("RUNNING")

            # Create a pseudo module for the root event to originate from
            psMod = SpiderFootPlugin()
            psMod.__name__ = "SpiderFoot UI"
            psMod.setTarget(self.__target)
            psMod.setDbh(self.__dbh)
            psMod.clearListeners()
            for mod in list(self.__moduleInstances.values()):
                if mod.watchedEvents() is not None:
                    psMod.registerListener(mod)

            # Create the "ROOT" event which un-triggered modules will link events to
            rootEvent = SpiderFootEvent("ROOT", self.__targetValue, "", None)
            psMod.notifyListeners(rootEvent)
            firstEvent = SpiderFootEvent(self.__targetType, self.__targetValue,
                                         "SpiderFoot UI", rootEvent)
            psMod.notifyListeners(firstEvent)

            # Special case.. check if an INTERNET_NAME is also a domain
            if self.__targetType == 'INTERNET_NAME':
                if self.__sf.isDomain(self.__targetValue,
                                      self.__config['_internettlds']):
                    firstEvent = SpiderFootEvent('DOMAIN_NAME',
                                                 self.__targetValue,
                                                 "SpiderFoot UI", rootEvent)
                    psMod.notifyListeners(firstEvent)

            # If in interactive mode, loop through this shared global variable
            # waiting for inputs, and process them until my status is set to
            # FINISHED.

            # Check in case the user requested to stop the scan between modules
            # initializing
            for module in list(self.__moduleInstances.values()):
                if module.checkForStop():
                    self.__setStatus('ABORTING')
                    aborted = True
                    break

            if aborted:
                self.__sf.status(f"Scan [{self.__scanId}] aborted.")
                self.__setStatus("ABORTED", None, time.time() * 1000)
            else:
                self.__sf.status(f"Scan [{self.__scanId}] completed.")
                self.__setStatus("FINISHED", None, time.time() * 1000)
        except BaseException as e:
            exc_type, exc_value, exc_traceback = sys.exc_info()
            self.__sf.error(
                f"Unhandled exception ({e.__class__.__name__}) encountered during scan."
                + "Please report this as a bug: " + repr(
                    traceback.format_exception(exc_type, exc_value,
                                               exc_traceback)), False)
            self.__sf.status(f"Scan [{self.__scanId}] failed: {e}")
            self.__setStatus("ERROR-FAILED", None, time.time() * 1000)

        self.__dbh.close()
Esempio n. 7
0
class SpiderFootScanner:
    moduleInstances = None
    status = "UNKNOWN"
    myId = None

    def __init__(self, name, target, moduleList, globalOpts, moduleOpts):
        self.config = deepcopy(globalOpts)
        self.sf = SpiderFoot(self.config)
        self.target = target
        self.moduleList = moduleList
        self.name = name

        return

    # Status of the currently running scan (if any)
    def scanStatus(self, id):
        if id != self.myId:
            return "UNKNOWN"
        return self.status

    # Stop a scan (id variable is unnecessary for now given that only one simultaneous
    # scan is permitted.)
    def stopScan(self, id):
        if id != self.myId:
            return None

        if self.moduleInstances == None:
            return None

        for modName in self.moduleInstances.keys():
            self.moduleInstances[modName].stopScanning()

    # Start running a scan
    def startScan(self):
        self.moduleInstances = dict()
        dbh = SpiderFootDb(self.config)
        self.sf.setDbh(dbh)
        aborted = False

        # Create a unique ID for this scan and create it in the back-end DB.
        self.config['__guid__'] = dbh.scanInstanceGenGUID(self.target)
        self.sf.setScanId(self.config['__guid__'])
        self.myId = self.config['__guid__']
        dbh.scanInstanceCreate(self.config['__guid__'], self.name, self.target)
        dbh.scanInstanceSet(self.config['__guid__'],
                            time.time() * 1000, None, 'STARTING')
        self.status = "STARTING"

        # Save the config current set for this scan
        self.config['_modulesenabled'] = self.moduleList
        dbh.scanConfigSet(self.config['__guid__'],
                          self.sf.configSerialize(self.config))

        self.sf.status("Scan [" + self.config['__guid__'] + "] initiated.")
        # moduleList = list of modules the user wants to run
        try:
            # Process global options that point to other places for data

            # If a SOCKS server was specified, set it up
            if self.config['_socks1type'] != '':
                socksType = socks.PROXY_TYPE_SOCKS4
                socksDns = self.config['_socks6dns']
                socksAddr = self.config['_socks2addr']
                socksPort = int(self.config['_socks3port'])
                socksUsername = ''
                socksPassword = ''

                if self.config['_socks1type'] == '4':
                    socksType = socks.PROXY_TYPE_SOCKS4
                if self.config['_socks1type'] == '5':
                    socksType = socks.PROXY_TYPE_SOCKS5
                    socksUsername = self.config['_socks4user']
                    socksPassword = self.config['_socks5pwd']

                if self.config['_socks1type'] == 'HTTP':
                    socksType = socks.PROXY_TYPE_HTTP

                self.sf.debug("SOCKS: " + socksAddr + ":" + str(socksPort) + \
                    "(" + socksUsername + ":" + socksPassword + ")")
                socks.setdefaultproxy(socksType, socksAddr, socksPort,
                                      socksDns, socksUsername, socksPassword)

                # Override the default socket and getaddrinfo calls with the
                # SOCKS ones
                socket.socket = socks.socksocket
                socket.create_connection = socks.create_connection
                socket.getaddrinfo = socks.getaddrinfo

                self.sf.updateSocket(socket)

            # Override the default DNS server
            if self.config['_dnsserver'] != "":
                res = dns.resolver.Resolver()
                res.nameservers = [self.config['_dnsserver']]
                dns.resolver.override_system_resolver(res)
            else:
                dns.resolver.restore_system_resolver()

            # Set the user agent
            self.config['_useragent'] = self.sf.optValueToData(
                self.config['_useragent'])

            # Get internet TLDs
            tlddata = self.sf.cacheGet("internet_tlds",
                                       self.config['_internettlds_cache'])
            # If it wasn't loadable from cache, load it from scratch
            if tlddata == None:
                self.config['_internettlds'] = self.sf.optValueToData(
                    self.config['_internettlds'])
                self.sf.cachePut("internet_tlds", self.config['_internettlds'])
            else:
                self.config["_internettlds"] = tlddata.splitlines()

            for modName in self.moduleList:
                if modName == '':
                    continue

                module = __import__('modules.' + modName, globals(), locals(),
                                    [modName])
                mod = getattr(module, modName)()
                mod.__name__ = modName

                # A bit hacky: we pass the database object as part of the config. This
                # object should only be used by the internal SpiderFoot modules writing
                # to the database, which at present is only sfp__stor_db.
                # Individual modules cannot create their own SpiderFootDb instance or
                # we'll get database locking issues, so it all goes through this.
                self.config['__sfdb__'] = dbh

                # Set up the module
                # Configuration is a combined global config with module-specific options
                #modConfig = deepcopy(self.config)
                modConfig = self.config['__modules__'][modName]['opts']
                for opt in self.config.keys():
                    modConfig[opt] = self.config[opt]

                mod.clearListeners(
                )  # clear any listener relationships from the past
                mod.setup(self.sf, self.target, modConfig)
                self.moduleInstances[modName] = mod

                # Override the module's local socket module
                # to be the SOCKS one.
                if self.config['_socks1type'] != '':
                    mod._updateSocket(socket)

                self.sf.status(modName + " module loaded.")

            # Register listener modules and then start all modules sequentially
            for module in self.moduleInstances.values():
                for listenerModule in self.moduleInstances.values():
                    # Careful not to register twice or you will get duplicate events
                    if listenerModule in module._listenerModules:
                        continue
                    # Note the absence of a check for whether a module can register
                    # to itself. That is intentional because some modules will
                    # act on their own notifications (e.g. sfp_dns)!
                    if listenerModule.watchedEvents() != None:
                        module.registerListener(listenerModule)

            dbh.scanInstanceSet(self.config['__guid__'], status='RUNNING')
            self.status = "RUNNING"

            # Create the "ROOT" event which un-triggered modules will link events to
            rootEvent = SpiderFootEvent("INITIAL_TARGET", self.target,
                                        "SpiderFoot UI")
            dbh.scanEventStore(self.config['__guid__'], rootEvent)

            # Start the modules sequentially.
            for module in self.moduleInstances.values():
                # Check in case the user requested to stop the scan between modules initializing
                if module.checkForStop():
                    dbh.scanInstanceSet(self.config['__guid__'],
                                        status='ABORTING')
                    self.status = "ABORTING"
                    aborted = True
                    break
                # Many modules' start() method will return None, as most will rely on
                # notifications during the scan from other modules.
                module.start()

            # Check if any of the modules ended due to being stopped
            for module in self.moduleInstances.values():
                if module.checkForStop():
                    aborted = True

            if aborted:
                self.sf.status("Scan [" + self.config['__guid__'] +
                               "] aborted.")
                dbh.scanInstanceSet(self.config['__guid__'], None,
                                    time.time() * 1000, 'ABORTED')
                self.status = "ABORTED"
            else:
                self.sf.status("Scan [" + self.config['__guid__'] +
                               "] completed.")
                dbh.scanInstanceSet(self.config['__guid__'], None,
                                    time.time() * 1000, 'FINISHED')
                self.status = "FINISHED"
        except BaseException as e:
            exc_type, exc_value, exc_traceback = sys.exc_info()
            self.sf.error("Unhandled exception (" + e.__class__.__name__ + ") " + \
                "encountered during scan. Please report this as a bug: " + \
                repr(traceback.format_exception(exc_type, exc_value, exc_traceback)), False)
            self.sf.status("Scan [" + self.config['__guid__'] + "] failed: " +
                           str(e))
            dbh.scanInstanceSet(self.config['__guid__'], None,
                                time.time() * 1000, 'ERROR-FAILED')
            self.status = "ERROR-FAILED"

        self.moduleInstances = None
        dbh.close()
        self.sf.setDbh(None)
        self.sf.setScanId(None)